From 5269162599a6b9e64598b468e99b20a5a0110f28 Mon Sep 17 00:00:00 2001 From: Dong Yu Date: Wed, 29 Oct 2014 02:36:30 -0700 Subject: [PATCH 01/31] Fixed bugs in ScaleNode's forward computation and L1RegNode's gradient computation. --- MachineLearning/cn/ComputationNode.h | 2 +- MachineLearning/cn/TrainingCriterionNode.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/MachineLearning/cn/ComputationNode.h b/MachineLearning/cn/ComputationNode.h index abfe60c14..fcfd01d95 100644 --- a/MachineLearning/cn/ComputationNode.h +++ b/MachineLearning/cn/ComputationNode.h @@ -2879,7 +2879,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { static void WINAPI EvaluateThisNodeS(Matrix& functionValues, const Matrix& input0, const Matrix& input1) { - functionValues.AssignProductOf(input0, false, input1, false); + functionValues.AssignProductOf(input0.Get00Element(), input1); #if NANCHECK functionValues.HasNan("Scale"); #endif diff --git a/MachineLearning/cn/TrainingCriterionNode.h b/MachineLearning/cn/TrainingCriterionNode.h index 31102b596..b9766f5f0 100644 --- a/MachineLearning/cn/TrainingCriterionNode.h +++ b/MachineLearning/cn/TrainingCriterionNode.h @@ -752,7 +752,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { Matrix& inputGradientValues, const Matrix& gradientValues, const Matrix& inputFunctionValues) { gradientOfL1Norm.AssignSignOf(inputFunctionValues); - inputGradientValues.AddElementProductOf(gradientValues, gradientOfL1Norm); + inputGradientValues.AddWithScaleOf(gradientValues.Get00Element(), gradientOfL1Norm); } // GetTaskDescriptor - Get a task descriptor for this node From 67f40ac28509f5c27eb8782bddfc5a5080457653 Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Thu, 30 Oct 2014 09:28:27 -0700 Subject: [PATCH 02/31] removed some unused Windows-specific code from basetypes.h; created a sub-structure inside projects that matches the actual directory names for shared files to make those easy to spot; disabled some warnings in the test projects since we don't care much for those; some warnings in test projects fixed --- Common/Include/basetypes.h | 21 +- Common/Include/fileutil.h | 8 +- Common/fileutil.cpp | 1 + DataReader/HTKMLFReader/ReadMe.txt | 45 +- .../LUSequenceReader/LUSequenceReader.vcxproj | 5 + .../SequenceReader/SequenceReader.vcxproj | 5 + MachineLearning/cn/SGD.h | 1 + MachineLearning/cn/cn.vcxproj | 565 +++++++++--------- Math/CNTKMathTest/CNTKMathTest.vcxproj | 4 +- Math/CNTKMathTest/CPUMatrixUnitTests.cpp | 8 +- Math/CNTKMathTest/GPUMatrixUnitTests.cpp | 2 + Math/CNTKMathTest/GPUMatrixcuBLASTests.cpp | 6 +- .../CNTKMathTest/GPUSparseMatrixUnitTests.cpp | 6 +- Math/CNTKMathTest/MatrixBLASTests.cpp | 6 +- .../MatrixSparseDenseInteractionsTests.cpp | 2 + Math/CNTKMathTest/MatrixUnitTests.cpp | 9 +- Math/Math/Math.vcxproj | 2 + 17 files changed, 354 insertions(+), 342 deletions(-) diff --git a/Common/Include/basetypes.h b/Common/Include/basetypes.h index a43160987..7369ba5fe 100644 --- a/Common/Include/basetypes.h +++ b/Common/Include/basetypes.h @@ -76,7 +76,9 @@ OACR_WARNING_DISABLE(POTENTIAL_ARGUMENT_TYPE_MISMATCH, "Not level1 or level2_sec using namespace std; #include #include -#include // for CRITICAL_SECTION +#ifdef _MSC_VER +#include // for CRITICAL_SECTION and Unicode conversion functions --TODO: is there a portable alternative? +#endif // CRT error handling seems to not be included in wince headers // so we define our own imports @@ -272,7 +274,6 @@ template inline void swap (fixed_vector<_T> & L, fixed_vector<_T> & R) // TODO: get rid of these typedef std::string STRING; typedef std::wstring WSTRING; -typedef std::basic_string TSTRING; // wide/narrow character string // derive from this for noncopyable classes (will get you private unimplemented copy constructors) // ... TODO: change all of basetypes classes/structs to use this @@ -285,15 +286,25 @@ public: }; // class CCritSec and CAutoLock -- simple critical section handling +// TODO: Currently only working under Windows; BROKEN otherwise, to be fixed class CCritSec { CCritSec (const CCritSec &); CCritSec & operator= (const CCritSec &); +#ifdef _MSC_VER CRITICAL_SECTION m_CritSec; +#endif public: +#ifdef _MSC_VER CCritSec() { InitializeCriticalSection(&m_CritSec); }; ~CCritSec() { DeleteCriticalSection(&m_CritSec); }; void Lock() { EnterCriticalSection(&m_CritSec); }; void Unlock() { LeaveCriticalSection(&m_CritSec); }; +#else // POSIX --TODO: need to figure this out + CCritSec() { }; + ~CCritSec() { };; + void Lock() { }; + void Unlock() { }; +#endif }; // locks a critical section, and unlocks it automatically @@ -307,6 +318,7 @@ public: ~CAutoLock() { m_rLock.Unlock(); }; }; +#if 0 // an efficient way to write COM code // usage examples: // COM_function() || throw_hr ("message"); @@ -387,6 +399,7 @@ public: operator void * () { return TlsGetValue (tlsSlot); } void *operator = (void *val) { if (!TlsSetValue (tlsSlot,val)) throw std::runtime_error ("tls: TlsSetValue failed"); return val; } }; +#endif };}; // namespace @@ -731,6 +744,7 @@ public: }; inline int fclose (auto_file_ptr & af) { return af.fclose(); } +#ifdef _MSC_VER // auto-closing container for Win32 handles. // Pass close function if not CloseHandle(), e.g. // auto_handle h (FindFirstFile(...), FindClose); @@ -747,6 +761,7 @@ public: operator _H () const { return h; } }; typedef auto_handle_t auto_handle; +#endif // like auto_ptr but calls freeFunc_p (type free_func_t) instead of delete to clean up // minor difference - wrapped object is T, not T *, so to wrap a @@ -768,6 +783,7 @@ public: T detach () { T tmp = it; it = 0; return tmp; } // release ownership of object }; +#if 0 // simple timer // auto_timer timer; run(); double seconds = timer; // now can abandon the object class auto_timer @@ -793,6 +809,7 @@ public: fprintf (stderr, "%s: %.6f ms\n", msg.c_str(), elapsed * 1000.0/*to ms*/); } }; +#endif };}; diff --git a/Common/Include/fileutil.h b/Common/Include/fileutil.h index e55b4e867..3ef7c1bb3 100644 --- a/Common/Include/fileutil.h +++ b/Common/Include/fileutil.h @@ -229,7 +229,7 @@ #define _FILEUTIL_ #include -#include // for mmreg.h and FILETIME +#include // for mmreg.h and FILETIME --TODO: we should be able to remove this (for portability; currently CUDA chokes) #include #include // for std::find #include @@ -674,8 +674,10 @@ namespace msra { namespace files { // getfiletime(), setfiletime(): access modification time // ---------------------------------------------------------------------------- -bool getfiletime (const std::wstring & path, FILETIME & time); -void setfiletime (const std::wstring & path, const FILETIME & time); +// Note: we use struct _FILETIME instead of FILETIME to avoid having to include Windows.h, for increased portability. +// As a next step, we shall make these two functions local to fileutil.cpp, and move all code that uses it in there as well. +bool getfiletime (const std::wstring & path, struct _FILETIME & time); +void setfiletime (const std::wstring & path, const struct _FILETIME & time); // ---------------------------------------------------------------------------- // expand_wildcards() -- expand a path with wildcards (also intermediate ones) diff --git a/Common/fileutil.cpp b/Common/fileutil.cpp index 6bec42842..2210d670b 100644 --- a/Common/fileutil.cpp +++ b/Common/fileutil.cpp @@ -6,6 +6,7 @@ #define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms --add this at the top of all CPP files that give "function or variable may be unsafe" warnings #define _CRT_NONSTDC_NO_DEPRECATE // make VS accept POSIX functions without _ +#pragma warning (disable: 4996) // ^^ this does not seem to work--TODO: make it work #ifndef UNDER_CE // fixed-buffer overloads not available for wince #ifdef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES // fixed-buffer overloads for strcpy() etc. diff --git a/DataReader/HTKMLFReader/ReadMe.txt b/DataReader/HTKMLFReader/ReadMe.txt index a042f87bc..d3398d581 100644 --- a/DataReader/HTKMLFReader/ReadMe.txt +++ b/DataReader/HTKMLFReader/ReadMe.txt @@ -1,40 +1,5 @@ -======================================================================== - DYNAMIC LINK LIBRARY : HTKMLFReader Project Overview -======================================================================== - -AppWizard has created this HTKMLFReader DLL for you. - -This file contains a summary of what you will find in each of the files that -make up your HTKMLFReader application. - - -HTKMLFReader.vcxproj - This is the main project file for VC++ projects generated using an Application Wizard. - It contains information about the version of Visual C++ that generated the file, and - information about the platforms, configurations, and project features selected with the - Application Wizard. - -HTKMLFReader.vcxproj.filters - This is the filters file for VC++ projects generated using an Application Wizard. - It contains information about the association between the files in your project - and the filters. This association is used in the IDE to show grouping of files with - similar extensions under a specific node (for e.g. ".cpp" files are associated with the - "Source Files" filter). - -HTKMLFReader.cpp - This is the main DLL source file. - -///////////////////////////////////////////////////////////////////////////// -Other standard files: - -StdAfx.h, StdAfx.cpp - These files are used to build a precompiled header (PCH) file - named HTKMLFReader.pch and a precompiled types file named StdAfx.obj. - -///////////////////////////////////////////////////////////////////////////// -Other notes: - -AppWizard uses "TODO:" comments to indicate parts of the source code you -should add to or customize. - -///////////////////////////////////////////////////////////////////////////// +This is the reader for HTK-formatted speech files. + +Large parts of this code deviate from the coding style of the rest of CNTK and also include some unused code and complexity not really needed for CNTK. + +The reason is that this code was ported from another, older tool. We hope to gradually clean this out. diff --git a/DataReader/LUSequenceReader/LUSequenceReader.vcxproj b/DataReader/LUSequenceReader/LUSequenceReader.vcxproj index 04d359116..ad0bc7613 100644 --- a/DataReader/LUSequenceReader/LUSequenceReader.vcxproj +++ b/DataReader/LUSequenceReader/LUSequenceReader.vcxproj @@ -100,6 +100,11 @@ + + + + + diff --git a/DataReader/SequenceReader/SequenceReader.vcxproj b/DataReader/SequenceReader/SequenceReader.vcxproj index 0ee0f0b8a..7c7913d36 100644 --- a/DataReader/SequenceReader/SequenceReader.vcxproj +++ b/DataReader/SequenceReader/SequenceReader.vcxproj @@ -100,6 +100,11 @@ + + + + + diff --git a/MachineLearning/cn/SGD.h b/MachineLearning/cn/SGD.h index 40c8bc0a7..1c067ccf2 100644 --- a/MachineLearning/cn/SGD.h +++ b/MachineLearning/cn/SGD.h @@ -1265,6 +1265,7 @@ protected: } //up to date if resultFile is older than srcFile or missing + // TODO: move this to fileutil.h to allow for portable implementation bool IsResultFileUpdateToDate (const wstring & resultFile, const wstring & srcFile, const bool IsSrcFileNeeded) { FILETIME resultFileTime; diff --git a/MachineLearning/cn/cn.vcxproj b/MachineLearning/cn/cn.vcxproj index 0e02235ab..42e20126a 100644 --- a/MachineLearning/cn/cn.vcxproj +++ b/MachineLearning/cn/cn.vcxproj @@ -1,282 +1,285 @@ - - - - - Debug - Win32 - - - Debug - x64 - - - Release - Win32 - - - Release - x64 - - - - {E6F26F9A-FF64-4F0A-B749-CD309EE357EE} - - - - - - - - - Win32Proj - cn - - - - Application - true - v120 - Unicode - - - Application - true - v120 - Unicode - - - Application - false - v120 - true - Unicode - - - Application - false - v120 - true - Unicode - - - - - - - - - - - - - - - - - - - - true - - - true - ..\..\Math\Math;..\..\Common\;..\..\Common\include;..\..\Common\PTask\include;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath); - $(SolutionDir)$(Platform)\$(Configuration);$(SolutionDir)..\Common\lib;$(SolutionDir)..\Common\PTask\lib\$(Configuration)\;$(VCInstallDir)lib\amd64;$(VCInstallDir)atlmfc\lib\amd64;$(WindowsSDK_LibraryPath_x64);$(CUDA_PATH)\lib\$(Platform) - Build - - - false - - - false - ..\..\Math\Math;..\..\Common\;..\..\Common\include;..\..\Common\PTask\include;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath); - $(SolutionDir)$(Platform)\$(Configuration);$(SolutionDir)..\Common\lib;$(SolutionDir)..\Common\PTask\lib\$(Configuration)\;$(VCInstallDir)lib\amd64;$(VCInstallDir)atlmfc\lib\amd64;$(WindowsSDK_LibraryPath_x64);$(CUDA_PATH)\lib\$(Platform) - Build - $(SolutionDir)..\Common\PTask\bin\;$(ExecutablePath) - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - true - - - Console - true - - - - - - - Level4 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - true - true - true - - - Console - true - Delayimp.lib;nvml.lib;cudart.lib;cntkMath.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) - $(SolutionDir)$(Platform)\$(Configuration)\ - CNTKMath.dll;nvml.dll - - - - - - - - - - - $(TargetDir)config.txt;$(TargetDir)labels.txt;$(TargetDir)network.txt;$(TargetDir)NdlScript.txt - - - true - Copy content files to target directory - - - true - compute_20,sm_20;compute_30,sm_30;compute_35,sm_35;compute_50,sm_50; - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - true - - - Console - true - true - true - true - - - - - Level4 - - - Disabled - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - true - Speed - /d2Zi+ %(AdditionalOptions) - true - - - Console - true - true - true - Delayimp.lib;nvml.lib;cudart.lib;cntkMath.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) - true - CNTKMath.dll;nvml.dll - - - copy $(SolutionDir)..\Common\PTask\bin\*.dll $(TargetDir) - Copy over the NVidia tools extention DLL - - - - - - - - - - - true - - - - - true - - - - - true - true - false - false - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - NotUsing - - - NotUsing - - - - - - - - - - - - - {60bdb847-d0c4-4fd3-a947-0c15c08bcdb5} - - - - - - + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {E6F26F9A-FF64-4F0A-B749-CD309EE357EE} + + + + + + + + + Win32Proj + cn + + + + Application + true + v120 + Unicode + + + Application + true + v120 + Unicode + + + Application + false + v120 + true + Unicode + + + Application + false + v120 + true + Unicode + + + + + + + + + + + + + + + + + + + + true + + + true + ..\..\Math\Math;..\..\Common\;..\..\Common\include;..\..\Common\PTask\include;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath); + $(SolutionDir)$(Platform)\$(Configuration);$(SolutionDir)..\Common\lib;$(SolutionDir)..\Common\PTask\lib\$(Configuration)\;$(VCInstallDir)lib\amd64;$(VCInstallDir)atlmfc\lib\amd64;$(WindowsSDK_LibraryPath_x64);$(CUDA_PATH)\lib\$(Platform) + Build + + + false + + + false + ..\..\Math\Math;..\..\Common\;..\..\Common\include;..\..\Common\PTask\include;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath); + $(SolutionDir)$(Platform)\$(Configuration);$(SolutionDir)..\Common\lib;$(SolutionDir)..\Common\PTask\lib\$(Configuration)\;$(VCInstallDir)lib\amd64;$(VCInstallDir)atlmfc\lib\amd64;$(WindowsSDK_LibraryPath_x64);$(CUDA_PATH)\lib\$(Platform) + Build + $(SolutionDir)..\Common\PTask\bin\;$(ExecutablePath) + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + + + Console + true + + + + + + + Level4 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + true + true + + + Console + true + Delayimp.lib;nvml.lib;cudart.lib;cntkMath.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + $(SolutionDir)$(Platform)\$(Configuration)\ + CNTKMath.dll;nvml.dll + + + + + + + + + + + $(TargetDir)config.txt;$(TargetDir)labels.txt;$(TargetDir)network.txt;$(TargetDir)NdlScript.txt + + + true + Copy content files to target directory + + + true + compute_20,sm_20;compute_30,sm_30;compute_35,sm_35;compute_50,sm_50; + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + + + Console + true + true + true + true + + + + + Level4 + + + Disabled + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + Speed + /d2Zi+ %(AdditionalOptions) + true + + + Console + true + true + true + Delayimp.lib;nvml.lib;cudart.lib;cntkMath.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + true + CNTKMath.dll;nvml.dll + + + copy $(SolutionDir)..\Common\PTask\bin\*.dll $(TargetDir) + Copy over the NVidia tools extention DLL + + + + + + + + + + + true + + + + + true + + + + + true + true + false + false + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + NotUsing + + + NotUsing + + + + + + + + + + + + + {60bdb847-d0c4-4fd3-a947-0c15c08bcdb5} + + + + + + \ No newline at end of file diff --git a/Math/CNTKMathTest/CNTKMathTest.vcxproj b/Math/CNTKMathTest/CNTKMathTest.vcxproj index 76b05184f..7ac2e3ff0 100644 --- a/Math/CNTKMathTest/CNTKMathTest.vcxproj +++ b/Math/CNTKMathTest/CNTKMathTest.vcxproj @@ -117,7 +117,7 @@ WIN32;_DEBUG;%(PreprocessorDefinitions) true true - false + true Windows @@ -159,7 +159,7 @@ WIN32;NDEBUG;%(PreprocessorDefinitions) true true - false + true false /d2Zi+ %(AdditionalOptions) diff --git a/Math/CNTKMathTest/CPUMatrixUnitTests.cpp b/Math/CNTKMathTest/CPUMatrixUnitTests.cpp index 61fa7e46a..f744d8130 100644 --- a/Math/CNTKMathTest/CPUMatrixUnitTests.cpp +++ b/Math/CNTKMathTest/CPUMatrixUnitTests.cpp @@ -24,14 +24,13 @@ namespace CNTKMathTest static void DebugPrint(FILE* gpuDebugFile, Matrix M, const char* str, const bool colwiseVec = true) { fprintf(gpuDebugFile, "\n %s\n", str); - const int matNumRow = M.GetNumRows(); - const int matNumCol = M.GetNumCols(); - const int elemNum = M.GetNumElements(); + const size_t matNumCol = M.GetNumCols(); + const size_t elemNum = M.GetNumElements(); Matrix M1 = M.Transpose(); double* pArray = M1.GetArray(); if (colwiseVec) { - for (int i = 0; i < elemNum; i++) + for (size_t i = 0; i < elemNum; i++) { fprintf(gpuDebugFile, "%3d ", (int)pArray[i]); @@ -39,6 +38,7 @@ namespace CNTKMathTest fprintf(gpuDebugFile, "\n"); } } + //const size_t matNumRow = M.GetNumRows(); //for (int i = 0; i < matNumRow; i++) //{ // for (int j = 0; j < matNumCol; j++) diff --git a/Math/CNTKMathTest/GPUMatrixUnitTests.cpp b/Math/CNTKMathTest/GPUMatrixUnitTests.cpp index 837a222c3..744e3b13d 100644 --- a/Math/CNTKMathTest/GPUMatrixUnitTests.cpp +++ b/Math/CNTKMathTest/GPUMatrixUnitTests.cpp @@ -12,6 +12,8 @@ #define epsilon 0.00001 #define IDX2C(i,j,ld) (((j)*(ld))+(i)) // 0 based indexing +#pragma warning (disable: 4244 4245 4305) // conversions and truncations; we don't care in this test project + #define DEBUG_FLAG 1 using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Microsoft::MSR::CNTK; diff --git a/Math/CNTKMathTest/GPUMatrixcuBLASTests.cpp b/Math/CNTKMathTest/GPUMatrixcuBLASTests.cpp index 7b0fe927e..0c87d0c22 100644 --- a/Math/CNTKMathTest/GPUMatrixcuBLASTests.cpp +++ b/Math/CNTKMathTest/GPUMatrixcuBLASTests.cpp @@ -9,6 +9,8 @@ #include "..\Math\CPUMatrix.h" #include "..\Math\GPUMatrix.cuh" +#pragma warning (disable: 4244 4245 4305) // conversions and truncations; we don't care in this test project + using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Microsoft::MSR::CNTK; @@ -23,11 +25,11 @@ namespace CNTKMathTest TEST_METHOD(GPU_MultiplyAndWeightedAdd_NoExceptionOnly_Test) { - float alpha = 0.435; + float alpha = 0.4; GPUMatrix M0_GPU(12,5); GPUMatrix M1_GPU(5,11); GPUMatrix M2_GPU(12,11); - GPUMatrix::MultiplyAndWeightedAdd(0.1,M0_GPU,false,M1_GPU,false,0.4,M2_GPU); + GPUMatrix::MultiplyAndWeightedAdd(0.1,M0_GPU,false,M1_GPU,false,alpha,M2_GPU); } diff --git a/Math/CNTKMathTest/GPUSparseMatrixUnitTests.cpp b/Math/CNTKMathTest/GPUSparseMatrixUnitTests.cpp index 0c4661d86..8275411c4 100644 --- a/Math/CNTKMathTest/GPUSparseMatrixUnitTests.cpp +++ b/Math/CNTKMathTest/GPUSparseMatrixUnitTests.cpp @@ -13,6 +13,8 @@ using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Microsoft::MSR::CNTK; +#pragma warning (disable: 4244 4245 4305) // conversions and truncations; we don't care in this test project + #define ID_2C(i,j,ld) (((i)*(ld))+(j)) // 0 based indexing namespace CNTKMathTest @@ -163,7 +165,7 @@ namespace CNTKMathTest GPUSparseMatrix ATs = A.Transpose(); GPUMatrix ATd = ATs.CopyToDenseMatrix(); - float* arrTd = ATd.CopyToArray(); + float* arrTd = ATd.CopyToArray(); arrTd; float arrA_times_AT[19] = {17,8,5,0,8,13,0,27,5,0,138,48,0,27,48,117}; GPUMatrix Cet(4,4,arrA_times_AT,matrixFlagNormal); @@ -176,7 +178,7 @@ namespace CNTKMathTest GPUMatrix Cres1(5,5); GPUSparseMatrix::Multiply(ATd,A,Cres1); //Dense times sparse - float* arr = Cres1.CopyToArray(); + float* arr = Cres1.CopyToArray(); arr; Assert::IsTrue(Cres1.IsEqualTo(Cet1)); diff --git a/Math/CNTKMathTest/MatrixBLASTests.cpp b/Math/CNTKMathTest/MatrixBLASTests.cpp index 4b351132c..1425db31e 100644 --- a/Math/CNTKMathTest/MatrixBLASTests.cpp +++ b/Math/CNTKMathTest/MatrixBLASTests.cpp @@ -7,6 +7,8 @@ #include "CppUnitTest.h" #include "..\Math\Matrix.h" +#pragma warning (disable: 4244 4245 4305) // conversions and truncations; we don't care in this test project + #define epsilon 0.000001 #define IDX2C(i,j,ld) (((j)*(ld))+(i)) // 0 based indexing @@ -137,7 +139,7 @@ namespace CNTKMathTest { float x = C2(i,j); float y = (alpha*A2(i,0)+beta*B2(i,j)); - Assert::IsTrue(fabsf(C2(i,j)-(alpha*A2(i,0)+beta*B2(i,j)))(0,A1(i,j)); + Assert::AreEqual(0,x); } else { float x = A1(i,j); - Assert::AreEqual(1,A1(i,j)); + Assert::AreEqual(1,x); } } @@ -128,7 +130,6 @@ namespace CNTKMathTest bool has_big=false; foreach_coord(i,j,A4) { - float x = A4(i,j); Assert::IsTrue((A4(i,j)>=-26.3)&&(A4(i,j)<30.2)); if (A4(i,j)<-3) has_small=true; @@ -577,7 +578,7 @@ namespace CNTKMathTest C.AssignElementProductOf(A,B); foreach_coord(i,j,C) { - Assert::IsTrue(C(i,j)=A(i,j)*B(i,j)); + Assert::IsTrue(C(i,j)==A(i,j)*B(i,j)); } //AddElementProductOf diff --git a/Math/Math/Math.vcxproj b/Math/Math/Math.vcxproj index d4c0d6c90..45e8e8cd2 100644 --- a/Math/Math/Math.vcxproj +++ b/Math/Math/Math.vcxproj @@ -147,6 +147,8 @@ + + From a4bc3b64f03d6d23b1bfc720858673dafee08891 Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Thu, 30 Oct 2014 09:45:02 -0700 Subject: [PATCH 03/31] removed dependency of File.cpp on message.h --- Common/File.cpp | 19 ++++++------------- DataReader/BinaryReader/BinaryReader.vcxproj | 5 +++++ .../LUSequenceReader/LUSequenceReader.vcxproj | 1 + .../SequenceReader/SequenceReader.vcxproj | 1 + .../UCIFastReader/UCIFastReader.vcxproj | 6 ++++++ MachineLearning/CNTKEval/CNTKEval.vcxproj | 4 ++++ Math/Math/Math.vcxproj | 2 ++ 7 files changed, 25 insertions(+), 13 deletions(-) diff --git a/Common/File.cpp b/Common/File.cpp index 1493c7fd3..9d679f8b1 100644 --- a/Common/File.cpp +++ b/Common/File.cpp @@ -9,7 +9,6 @@ #include "basetypes.h" #define FORMAT_SPECIALIZE // to get the specialized version of the format routines #include "fileutil.h" -#include "message.h" #include "File.h" #include #include @@ -77,7 +76,7 @@ void File::goToDelimiter(int delim) ch=fgetc(m_file); if (feof(m_file)) { printf("Unexpected end of file\n"); - throw std::logic_error("Unexpected end of file\n"); + throw std::logic_error("Unexpected end of file\n"); } } } @@ -461,9 +460,7 @@ File& File::operator>>(FileMarker marker) break; case fileMarkerEndFile: // end of file marker, should we throw if it's not the end of the file? if (!IsEOF()) - { - ERROR("fileMarkerEndFile not found"); - } + throw std::runtime_error("fileMarkerEndFile not found"); break; case fileMarkerBeginList: // Beginning of list marker // no marker written unless an list with a count header @@ -477,7 +474,7 @@ File& File::operator>>(FileMarker marker) { int found = EndOfLineOrEOF(true); if (found != (int)true) // EOF can also be returned - ERROR("Newline not found"); + throw std::runtime_error("Newline not found"); } break; case fileMarkerBeginSection: // beginning of section @@ -550,9 +547,7 @@ File& File::GetMarker(FileMarker marker, const std::string& section) string str; *this >> str; if (str != section) - { - ERROR("section name mismatch %s != %s", str.c_str(), section.c_str()); - } + throw std::runtime_error(std::string("section name mismatch ") + str + " != " + section); return *this; } @@ -565,9 +560,7 @@ File& File::GetMarker(FileMarker marker, const std::wstring& section) wstring str; *this >> str; if (str != section) - { - ERROR("section name mismatch %ls != %ls", str.c_str(), section.c_str()); - } + throw std::runtime_error(std::string("section name mismatch ") + msra::strfun::utf8(str) + " != " + msra::strfun::utf8(section)); return *this; } @@ -627,4 +620,4 @@ void File::SetPosition(uint64_t pos) { fsetpos (m_file, pos); } -}}} \ No newline at end of file +}}} diff --git a/DataReader/BinaryReader/BinaryReader.vcxproj b/DataReader/BinaryReader/BinaryReader.vcxproj index 5b3b1f21b..1859fecd3 100644 --- a/DataReader/BinaryReader/BinaryReader.vcxproj +++ b/DataReader/BinaryReader/BinaryReader.vcxproj @@ -171,7 +171,12 @@ + + + + + diff --git a/DataReader/LUSequenceReader/LUSequenceReader.vcxproj b/DataReader/LUSequenceReader/LUSequenceReader.vcxproj index ad0bc7613..8698ffb3f 100644 --- a/DataReader/LUSequenceReader/LUSequenceReader.vcxproj +++ b/DataReader/LUSequenceReader/LUSequenceReader.vcxproj @@ -105,6 +105,7 @@ + diff --git a/DataReader/SequenceReader/SequenceReader.vcxproj b/DataReader/SequenceReader/SequenceReader.vcxproj index 7c7913d36..893d8c69b 100644 --- a/DataReader/SequenceReader/SequenceReader.vcxproj +++ b/DataReader/SequenceReader/SequenceReader.vcxproj @@ -105,6 +105,7 @@ + diff --git a/DataReader/UCIFastReader/UCIFastReader.vcxproj b/DataReader/UCIFastReader/UCIFastReader.vcxproj index 668569f52..e894841e4 100644 --- a/DataReader/UCIFastReader/UCIFastReader.vcxproj +++ b/DataReader/UCIFastReader/UCIFastReader.vcxproj @@ -171,6 +171,12 @@ + + + + + + diff --git a/MachineLearning/CNTKEval/CNTKEval.vcxproj b/MachineLearning/CNTKEval/CNTKEval.vcxproj index ad2beb630..bad48d9b3 100644 --- a/MachineLearning/CNTKEval/CNTKEval.vcxproj +++ b/MachineLearning/CNTKEval/CNTKEval.vcxproj @@ -103,7 +103,11 @@ + + + + diff --git a/Math/Math/Math.vcxproj b/Math/Math/Math.vcxproj index 45e8e8cd2..6b1b386f3 100644 --- a/Math/Math/Math.vcxproj +++ b/Math/Math/Math.vcxproj @@ -147,8 +147,10 @@ + + From b678e9259619b13a67edd7eed6194dc90792aedb Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Thu, 30 Oct 2014 10:33:51 -0700 Subject: [PATCH 04/31] msra_mgram.h no longer uses MESSAGE(_NOLF) macros from message.h; unified Error() and ERROR() functions to RuntimeError() defined in basetypes.h, and eliminated message.h altogether (it was only used for ERROR); new method LogicError()--note: some RuntimeError() calls really should be LogicError()s, need to fix as we notice them --- Common/ConfigFile.cpp | 4 +- Common/DataReader.cpp | 4 +- Common/DataWriter.cpp | 4 +- Common/Include/basetypes.h | 22 +- Common/Include/message.h | 676 +----------------- Common/fileutil.cpp | 115 ++- DataReader/HTKMLFReader/basetypes.h | 11 + DataReader/HTKMLFReader/fileutil.cpp | 117 +-- DataReader/HTKMLFReader/message.h | 474 +----------- DataReader/HTKMLFReader/msra_mgram.h | 133 ++-- .../LUSequenceReader/LUSequenceParser.cpp | 2 +- .../LUSequenceReader/LUSequenceParser.h | 6 +- .../LUSequenceReader/LUSequenceReader.cpp | 20 +- .../LUSequenceReader/LUSequenceWriter.cpp | 6 +- DataReader/SequenceReader/SequenceParser.cpp | 6 +- DataReader/SequenceReader/SequenceParser.h | 2 +- DataReader/SequenceReader/SequenceReader.cpp | 44 +- DataReader/UCIFastReader/UCIFastReader.cpp | 14 +- MachineLearning/CNTKEval/EvalReader.h | 4 +- MachineLearning/CNTKEval/EvalWriter.h | 4 +- MachineLearning/cn/ModelEditLanguage.cpp | 74 +- MachineLearning/cn/ModelEditLanguage.h | 36 +- MachineLearning/cn/NDLNetworkBuilder.h | 8 +- MachineLearning/cn/NDLUtil.h | 4 +- .../cn/NetworkDescriptionLanguage.h | 30 +- .../cn/SynchronousExecutionEngine.h | 42 +- 26 files changed, 365 insertions(+), 1497 deletions(-) diff --git a/Common/ConfigFile.cpp b/Common/ConfigFile.cpp index 814408f22..92a210a15 100644 --- a/Common/ConfigFile.cpp +++ b/Common/ConfigFile.cpp @@ -53,7 +53,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { configString += config.ReadConfigFile(filePath); } else - Error("Cannot specify same config file multiple times at the command line."); + RuntimeError("Cannot specify same config file multiple times at the command line."); } } } @@ -85,7 +85,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { std::string filePaths = line.substr(includeKeywordSize, line.size() - includeKeywordSize); if (filePaths.find(openBraceVar) != std::string::npos) { - Error("Variable usage (eg, \"$varName$\") not supported in \"include\" statements. Explicit path to config file must be provided"); + RuntimeError("Variable usage (eg, \"$varName$\") not supported in \"include\" statements. Explicit path to config file must be provided"); } std::vector filePathVec = msra::strfun::split (filePaths, "+"); diff --git a/Common/DataReader.cpp b/Common/DataReader.cpp index fa6ef9e8e..e067b989a 100644 --- a/Common/DataReader.cpp +++ b/Common/DataReader.cpp @@ -22,7 +22,7 @@ template<> std::string GetReaderName(double) {std::string name = "GetReaderD"; r template void DataReader::Init(const ConfigParameters& /*config*/) { - Error("Init shouldn't be called, use constructor"); + RuntimeError("Init shouldn't be called, use constructor"); // not implemented, calls the underlying class instead } @@ -54,7 +54,7 @@ void DataReader::GetDataReader(const ConfigParameters& config) { std::string message = "Reader not found: "; message += msra::strfun::utf8(m_dllName); - Error((char*) message.c_str()); + RuntimeError((char*) message.c_str()); } // create a variable of each type just to call the proper templated version diff --git a/Common/DataWriter.cpp b/Common/DataWriter.cpp index 4cec61fd3..846e70aab 100644 --- a/Common/DataWriter.cpp +++ b/Common/DataWriter.cpp @@ -22,7 +22,7 @@ template<> std::string GetWriterName(double) {std::string name = "GetWriterD"; r template void DataWriter::Init(const ConfigParameters& /*config*/) { - Error("Init shouldn't be called, use constructor"); + RuntimeError("Init shouldn't be called, use constructor"); // not implemented, calls the underlying class instead } @@ -68,7 +68,7 @@ void DataWriter::GetDataWriter(const ConfigParameters& config) { std::string message = "Writer not found: "; message += msra::strfun::utf8(m_dllName); - Error((char*)message.c_str()); + RuntimeError((char*)message.c_str()); } // create a variable of each type just to call the proper templated version diff --git a/Common/Include/basetypes.h b/Common/Include/basetypes.h index 7369ba5fe..eb64a834f 100644 --- a/Common/Include/basetypes.h +++ b/Common/Include/basetypes.h @@ -122,13 +122,8 @@ using namespace std; #define SAFE_DELETE(p) { if(p) { delete (p); (p)=NULL; } } #define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } } // nasty! use CComPtr<> #ifndef ASSERT -#ifdef _CHECKED // basetypes.h expects this function to be defined (it is in message.h) -extern void _CHECKED_ASSERT_error(const char * file, int line, const char * exp); -#define ASSERT(exp) ((exp)||(_CHECKED_ASSERT_error(__FILE__,__LINE__,#exp),0)) -#else #define ASSERT assert #endif -#endif // ---------------------------------------------------------------------------- // basic data types @@ -933,15 +928,26 @@ using namespace msra::basetypes; // for compatibility #pragma warning (pop) -// Error - throw an error after formatting a message -static inline void Error (const char * format, ...) +// RuntimeError - throw a std::runtime_error with a formatted error string +static inline void RuntimeError (const char * format, ...) { va_list args; char buffer[1024]; va_start (args, format); vsprintf (buffer, format, args); - throw runtime_error (buffer); + throw std::runtime_error(buffer); +}; + +// LogicError - throw a std::logic_error with a formatted error string +static inline void LogicError(const char * format, ...) +{ + va_list args; + char buffer[1024]; + + va_start(args, format); + vsprintf(buffer, format, args); + throw std::logic_error(buffer); }; #endif // _BASETYPES_ diff --git a/Common/Include/message.h b/Common/Include/message.h index 0b485d426..6138b65df 100644 --- a/Common/Include/message.h +++ b/Common/Include/message.h @@ -1,675 +1 @@ -// -// message.h - class for simple I/O of log messages -// -// Copyright (c) Microsoft Corporation. All rights reserved. -// -// $Log: /Speech_To_Speech_Translation/dbn/dbn/message.h $ -// -// 66 7/05/11 8:17 Fseide -// error() now prints the Win32 error code as well, maybe we can now track -// down the unreliable-server problem -// -// 65 11/30/09 1:33p Kit -// updated to compile under winCE -// -// 64 6/07/09 0:00 Fseide -// (added a comment) -// -// 63 5/18/09 15:28 Fseide -// minor bug fix in an error message in __flush() -// -// 62 1/08/09 16:14 Fseide -// moved timeDateStamp() here, as it is often used in logging -// -// 61 1/08/09 9:23 Fseide -// moved _CHECKED_ASSERT_error() to message.h, finally getting rid of -// message.cpp -// -// 60 12/09/08 6:59p Qiluo -// reverted stringerror => strerror -// -// 59 12/09/08 6:29p Qiluo -// strerror => stringerror -// -// 58 11/14/08 7:43p Qiluo -// mark banned APIs -// -// 57 11/11/08 18:19 Fseide -// no longer disables 4996 -// -// 56 11/11/08 17:56 Fseide -// (a comment added) -// -// 55 11/11/08 17:55 Fseide -// replaced strbXXX() calls with safe fixed-buffer overloads -// -// 54 11/11/08 17:47 Fseide -// fixed use of Xprintf() functions to use fixed-size overloads -// -// 53 11/11/08 15:08 Fseide -// replaced safe(r) _vsnprintf() by unsafer vsprintf() assuming there is -// an overload to make it safe again... -// -// 52 11/11/08 14:52 Fseide -// (added a comment) -// -// 51 6/18/08 11:41 Fseide -// added #pragma once -// -// 50 29/05/08 4:58p Kit -// G.muted semantics changed - now only controls whether logging goes to -// stderr or not -// -// 49 29/05/08 3:19p Kit -// changed semantics of G.muted - now G.muted only controls logging to -// stderr - so if G.muted is one but log file is specified, it will still -// log to the file -// -// 48 10/01/07 13:19 Fseide -// added setHeavyLogging() and noFlush flag for cases where a lot of info -// is written to the log file -// -// 47 27/06/07 5:11p Kit -// rolled back to version 45 -// -// 45 27/06/07 4:58p Kit -// changed a few more methods to inline to avoid linker errors -// -// 44 27/06/07 4:54p Kit -// changed a few methods to inline to avoid multiple include linker errors -// -// 43 5/08/07 16:29 Fseide -// increased output buffer size to 30K -// -// 42 4/11/07 17:24 Fseide -// fixed a bug for string overflow for vsnprintf(), 0-terminator missing -// -// 41 07-04-11 14:57 Qfyin -// added a std:: -// -// 40 3/28/07 11:57 Fseide -// fixed the C4702 problem with error() and mem_error() -// -// 39 3/27/07 20:54 Fseide -// silly compiler warning again (inconsistent warning between Release and -// Debug about unreachable code) -// -// 38 3/27/07 20:49 Fseide -// fixed a compiler warning -// -// 37 3/27/07 17:58 Fseide -// added namespace qualifiers -// -// 36 3/27/07 15:59 Fseide -// changed struct back to namespace, uniqueness problem of static g solved -// by moving _glob() into __globals -// -// 35 3/27/07 15:23 Fseide -// added private/public qualifiers back in -// -// 34 3/27/07 15:19 Fseide -// changed namespace into struct (namespace has problems with the shared -// state) -// -// 33 3/27/07 15:14 Fseide -// fixed compiler warnings -// -// 32 3/27/07 13:53 Fseide -// removed 'static' markers as they led to warnings -// -// 31 3/27/07 13:49 Fseide -// changed from class HDM_CLog to namespace msra::logging; -// now does not require message.cpp anymore -// -// 30 2/14/07 15:38 Fseide -// (fixed compiler warnings when compiling managed) -// -// 29 11/22/06 6:39p Rogeryu -// new function getLogFile -// -// 28 5/30/06 6:42p Rogeryu -// refine the log handle reconnection -// -// 27 5/24/06 2:51p Rogeryu -// -// 26 3/24/06 4:40p Rogeryu -// workaround a VC 2003 header bug (va_start macro for references) in -// MESSAGE/ERROR functions -// -// 25 3/24/06 13:33 Fseide -// cleaned up C4996 (back to file level to keep code tidy) -// -// 24 3/22/06 5:44p Rogeryu -// change to strbxxx macros -// -// 23 3/22/06 4:57p Rogeryu -// refine comments -// -// 22 3/21/06 5:21p Rogeryu -// review and fix level2_security OACR warnings -// -// 21 06-03-14 14:32 Yushli -// -// 20 06-03-14 11:58 Yushli -// Suppress C4996 Warning on strerror per function -// -// 19 06-03-14 11:44 Yushli -// Suppress C4996 Warning on strcpy per function -// -// 18 2/24/06 8:03p Kjchen -// depress oacr warnings -// -// 17 9/25/05 12:04p Kjchen -// merge OneNote's change -// -// 16 9/21/05 11:26 Fseide -// output changed from << to fwrite -// -// 15 5/09/05 11:09p Kjchen -// add: using namespace std; -// -// 14 2/17/05 10:32 Fseide -// added muted mode and new method shutUp() -// -// 13 2/03/05 19:37 Fseide -// removed unnecessary dependence on fileutil.h; -// removed dependence on critsec.h (CCritSec now in basetypes.h) -// -// 12 4/19/04 18:58 Fseide -// showbuf() now does not anymore use "-orDie" functions to avoid infinite -// recursion... -// -// 11 2/21/04 10:26 Fseide -// (compiler warnings eliminated) -// -// 10 7/31/03 12:37p Fseide -// ERROR() can now throw an exception instead of exit()-ing; -// new method HDM_CLog::setExceptionFlag(); -// new class message_exception -// -// 9 8/16/02 7:14p Fseide -// now thread-safe -// -// 8 8/01/02 7:48p Fseide -// new function (macro) memError (MEMERROR) to display an error in case of -// out-of-memory (ERROR allocates memory itself) -// -// 7 7/31/02 10:13a Fseide -// implemented logging to file (accessed through SETLOGFILE() macro) -// -// 6 4/03/02 3:58p Fseide -// VSS keyword and copyright added -// -// F. Seide 5 Mar 2002 -// - -#pragma once -#ifndef _MESSAGE_ -#define _MESSAGE_ - -#ifndef UNDER_CE // fixed-buffer overloads not available for wince -#ifdef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES // fixed-buffer overloads for strcpy() etc. -#undef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES -#endif -#define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1 -#endif - -#include "basetypes.h" - -#include -#include -#include -#include // for _time64 in timeDateStamp() - -#pragma warning (push) -#pragma warning (disable : 4793) // caused by varargs - -namespace msra { namespace logging -{ - // ----------------------------------------------------------------------- - // message_exception - exception thrown by this error module - // ----------------------------------------------------------------------- - - class message_exception : public std::runtime_error - { - char buf[1024]; // local buffer for message - const char * dup_what (const char * what) - { - strcpy (buf, "message_exception:"); - strcat (buf, what); - return &buf[0]; - } - public: - message_exception (const char * what) : runtime_error (dup_what (what)) - { - } - }; - - // ----------------------------------------------------------------------- - // global state (hidden as a static struct in a variable) - // ----------------------------------------------------------------------- - - struct __globals - { - msra::basetypes::CCritSec lock; - FILE * logFile; - bool noFlush; // heavy logging: don't flush - bool throwExceptionFlag; // true: don't exit but throw exception - - // G.muted semantics is as follows - // - G.muted && !G.logFile => don't show anything - // - G.muted && G.logFile => write to log file - // - !G.muted && !G.logFile => write to stderr only - // - !G.muted && G.logFile => write to log file and stderr - bool muted; - - std::string filename; // log file name - char buf[30000]; // for _vsnprintf() - - __globals() : logFile (NULL), throwExceptionFlag (true), muted (false), noFlush (false) - { buf[0] = 0; buf[sizeof (buf) / sizeof (*buf) -1] = 0; } - - static __globals & get() { static __globals g; return g; } - }; - -#pragma push_macro ("G") -#define G (__globals::get()) // access global XYZ as G.XYZ - - // ------------------------------------------------------------------------ - // setLogFile(): set the log file - // if non-NULL then every message will be written both to stderr and this - // log file. - // multi-threading: not thread-safe, set this before starting - // ------------------------------------------------------------------------ - - static inline FILE * setLogFile (FILE * newLogFile) - { - FILE * oldLogFile; - - oldLogFile = G.logFile; - - if (newLogFile != stderr) - { - G.logFile = newLogFile; - } - else - { - G.logFile = NULL; - } - - return oldLogFile; - } - - // ------------------------------------------------------------------------ - // setLogFileByName(): set the log file by file name - // in this mode, log file will be re-connected when disconnected - // filename == NULL indicates an attempt to reconnect - // WARNING: if the filename is invalid, it will try to reconnect every time - // ------------------------------------------------------------------------ - - static inline void setLogFileByName (const char * p_filename) - { - FILE * newLogFile = NULL; - if (p_filename == NULL) - { - // for reconnection - ASSERT (G.filename != ""); - newLogFile = fopen (G.filename.c_str (), "ab"); - } - else - { - ASSERT (p_filename[0]); - G.filename = p_filename; // remember filename - newLogFile = fopen (p_filename, "wb"); - } - - // handle open failure - if (newLogFile == NULL) - { - if (G.logFile != NULL) - { - fprintf (G.logFile, "ERROR: setLogFileByName: error opening log file %s: %s\n", - G.filename.c_str (), strerror (errno)); - // in case of a reconnect, this ^^ will obviously fail, we ignore this - } - fprintf (stderr, "ERROR: setLogFileByName: error opening log file %s: %s\n", - G.filename.c_str (), strerror (errno)); - return; - } - - // set new handle - FILE * oldLogFile = setLogFile (newLogFile); - - // close old handle - if (oldLogFile != NULL && oldLogFile != stderr && oldLogFile != stdin) - { - int rc = fclose (oldLogFile); - if (rc != 0) - { - if (G.logFile != NULL) - { // note: this goes to the new log file - fprintf (G.logFile, "ERROR: setLogFileByName: error closing old log file: %s\n", - strerror (errno)); - } - fprintf (stderr, "ERROR: setLogFileByName: error closing old log file: %s\n", - strerror (errno)); - } - } - } - - // ------------------------------------------------------------------------ - // setExceptionFlag(): set flag whether to throw an exception (true) or exit() (false, default) - // ------------------------------------------------------------------------ - - static inline bool setExceptionFlag (bool throwExceptionFlag = true) - { - bool oldFlag = G.throwExceptionFlag; - G.throwExceptionFlag = throwExceptionFlag; - return oldFlag; - } - - // ------------------------------------------------------------------------ - // timeDateStamp() -- often needed for logging - // ------------------------------------------------------------------------ - - static inline std::string timeDateStamp (void) - { -#ifdef _MSC_VER - __time64_t localtime; _time64 (&localtime); // get current time and date - struct tm now = *_localtime64 (&localtime); // convert - char buf[20]; - sprintf (buf, "%04d/%02d/%02d %02d:%02d:%02d", - now.tm_year + 1900, now.tm_mon + 1, now.tm_mday, - now.tm_hour, now.tm_min, now.tm_sec); - return buf; -#else - return "(now)"; // TODO: fix this for GCC builds -#endif - } - - // ------------------------------------------------------------------------ - // __flush(): flush output - // ------------------------------------------------------------------------ - - static inline void __flush() - { - int rc = fflush (G.logFile); - if (rc != 0) - { - fprintf (stderr, "ERROR: __flush: error flushing to log file %s\n", - strerror (errno)); - } - } - - // ------------------------------------------------------------------------ - // setHeavyLogging(): we are heavily logging: don't flush & increase out buf - // ------------------------------------------------------------------------ - - static inline void setHeavyLogging (bool isHeavy) - { - __flush(); // flush the current buffer - if (!isHeavy) - { - G.noFlush = false; - } - else - { - G.noFlush = true; - if (G.logFile) - setvbuf (G.logFile, NULL, _IOFBF, 16384); // flush every 16K - } - } - - // ------------------------------------------------------------------------ - // shutUp(): set muted mode (true: no output will be generated anymore) - // - // multi-threading: retrieving the previous state is not thread-safe, - // if you want, do this before starting - // ------------------------------------------------------------------------ - - static inline bool shutUp (bool quiet = true) - { - bool oldFlag = G.muted; - G.muted = quiet; - return oldFlag; - } - - // ------------------------------------------------------------------------ - // getLogFile(): get log file handle - // ------------------------------------------------------------------------ - - static inline FILE * getLogFile (void) - { - return G.logFile; - } - - // ------------------------------------------------------------------------ - // __showbuf(): output contents of buf[] with prefix prepended - // multi-threading: must be called from within critical section - // ------------------------------------------------------------------------ - - static inline void __showbuf (const std::string & prefix, bool nl) - { - ASSERT (strlen (G.buf) < sizeof (G.buf) / sizeof (*G.buf)); - std::string outtext = prefix + G.buf; - if (nl) outtext += "\n"; - - // write out; first to screen in case we can't write to log file - -#ifndef ONENOTE_COMPILER - // OneNote treats it as an error if stderr is not empty. - // and in OneNote, we can't see message printed to stderr - // So, in OneNote, don't put it into stderr - - // G.muted semantics is as follows - // - G.muted && !G.logFile => don't show anything - // - G.muted && G.logFile => write to log file - // - !G.muted && !G.logFile => write to stderr only - // - !G.muted && G.logFile => write to log file and stderr - if (!G.muted) - { - fwrite ((void*) outtext.c_str(), sizeof (*outtext.c_str()), - outtext.length(), stderr); - if (!G.noFlush) - fflush (stderr); - } -#endif - - // write to log file - - // check whether the log file has been disconnected or not - if (G.filename != "") // with known filename, suppose to reconnect - { - if (G.logFile == NULL || ferror (G.logFile) != 0) - { - setLogFileByName (NULL); // attempt to re-open the log file - - if (G.logFile) - { - fprintf (G.logFile, "ERROR: __showbuf: log file handle lost, reconnected\n"); - } - } - } - - if (G.logFile) - { - size_t n = fwrite ((void*) outtext.c_str(), sizeof (*outtext.c_str()), - outtext.length(), G.logFile); - if (n != outtext.length() * sizeof (*outtext.c_str())) - { // write error - fprintf (stderr, "ERROR: __showbuf: error writing this to log file: %s\n", strerror (errno)); - fwrite ((void*) outtext.c_str(), sizeof (*outtext.c_str()), - outtext.length(), stderr); - } - else if (!G.noFlush) // flush logFile - { - __flush(); - } - } - } - - // ------------------------------------------------------------------------ - // noLoggingReqd: function to determine if any logging reqd - // at all - used so that we can exit early if none reqd - // ------------------------------------------------------------------------ - - static inline bool noLoggingReqd() - { - return G.muted && !G.logFile; - } - - // ------------------------------------------------------------------------ - // message(): like printf(), writing to log output - // multi-threading: this function is thread-safe - // ------------------------------------------------------------------------ - - static inline void message (const char * fmt, ...) - { - if (noLoggingReqd()) return; // muted: all output is suppressed - - msra::basetypes::CAutoLock autoLock (G.lock); - va_list arg_ptr; - va_start (arg_ptr, fmt); - vsprintf (G.buf, fmt, arg_ptr); - __showbuf ("", true); - } - - static void message_nolf (const char * fmt, ...) - { - if (noLoggingReqd()) return; // muted: all output is suppressed - - msra::basetypes::CAutoLock autoLock (G.lock); - va_list arg_ptr; - va_start (arg_ptr, fmt); - vsprintf (G.buf, fmt, arg_ptr); - __showbuf ("", false); - } - - // ------------------------------------------------------------------------ - // warning(): like message(), with text "WARNING: " prepended - // multi-threading: this function is thread-safe - // ------------------------------------------------------------------------ - - static void warning (const char * fmt, ...) - { - if (noLoggingReqd()) return; // muted: all output is suppressed - - msra::basetypes::CAutoLock autoLock (G.lock); - va_list arg_ptr; - va_start (arg_ptr, fmt); - vsprintf (G.buf, fmt, arg_ptr); - __showbuf ("WARNING: ", true); - __flush(); - } - - // ------------------------------------------------------------------------ - // __throw_or_exit(): exit() or throw exception depending on throwExceptionFlag - // ------------------------------------------------------------------------ - - static inline void __throw_or_exit (void) - { - __flush(); - if (G.throwExceptionFlag) - { - throw message_exception (G.buf); - } - exit (1); - } - - // ------------------------------------------------------------------------ - // error(): like warning() but terminates program afterwards - // multi-threading: this function is thread-safe - // ------------------------------------------------------------------------ - -#pragma warning (push) -#pragma warning (disable : 4702) // the 'return 0;' causes this in Release - static int error (const char * fmt, ...) - { -#if 0 // special test code to determine the Windows error in case of a network error - DWORD winErr = GetLastError(); - try - { - msra::basetypes::CAutoLock autoLock (G.lock); - sprintf (G.buf, "%d (\"%S\")", winErr, FormatWin32Error(winErr).c_str()); - if (!noLoggingReqd()) - __showbuf ("Win32 error of subsequent error message: ", true); - } - catch(...){} -#endif - msra::basetypes::CAutoLock autoLock (G.lock); - va_list arg_ptr; - va_start (arg_ptr, fmt); - vsprintf (G.buf, fmt, arg_ptr); - if (!noLoggingReqd()) - { // if muted, we format the msg (for __throw_or_exit) but don't print it - __showbuf ("ERROR: ", true); - } - __throw_or_exit(); - return 0; - } - - // ------------------------------------------------------------------------ - // mem_error(): similar to error() but without any memory allocations - // (only one string argument allowed) - // multi-threading: this function is thread-safe - // ------------------------------------------------------------------------ - - static int mem_error (const char * fmt, int arg) - { - msra::basetypes::CAutoLock autoLock (G.lock); - if (!noLoggingReqd()) - { // if muted, we format the msg (for __throw_or_exit) but don't print it - fprintf (stderr, fmt, arg); - fprintf (stderr, "\n"); - - if (G.logFile) - { - fprintf (G.logFile, fmt, arg); - fprintf (G.logFile, "\n"); - int rc = fflush (G.logFile); - if (rc != 0) - { - fprintf (stderr, "error flushing log message to file: %s\n", - strerror (errno)); - } - } - } - - // format msg for __throw_or_exit() - sprintf (G.buf, fmt, arg); - strcat (G.buf, "\n"); - __throw_or_exit(); - return 0; - } -#pragma warning (pop) - - static inline void __avoid_C4505 (void) - { message (""); message_nolf (""); warning (""); error (""); mem_error ("", 0); } -#pragma pop_macro ("G") -};}; - -#pragma warning(pop) - -// =========================================================================== -// compatibility macros (for older source code) -// =========================================================================== - -#undef ERROR // defined in wingdi.h... aargh! -#define WARNING msra::logging::warning -#define ERROR msra::logging::error -#define MESSAGE msra::logging::message -#define MESSAGE_NOLF msra::logging::message_nolf -#define MEMERROR msra::logging::mem_error -#define SETLOGFILE msra::logging::setLogFile - -// =========================================================================== -// special function for basetypes.h's ASSERT() macro -// =========================================================================== - -#ifdef _CHECKED -void inline _CHECKED_ASSERT_error(const char * file, int line, const char * exp) -{ ERROR ("%s:%d:assertion failure: %s", file, line, exp); } -#endif - -#endif // _MESSAGE_ - +// no longer used \ No newline at end of file diff --git a/Common/fileutil.cpp b/Common/fileutil.cpp index 2210d670b..24637923f 100644 --- a/Common/fileutil.cpp +++ b/Common/fileutil.cpp @@ -16,7 +16,6 @@ #endif #include "basetypes.h" #include "fileutil.h" -#include "message.h" #ifdef __unix__ #include #endif @@ -78,9 +77,9 @@ void fgetText(FILE * f, char& v) const wchar_t* formatString = GetFormatString(v); int rc = fwscanf(f, formatString, &v); if (rc == 0) - ERROR ("error reading value from file (invalid format): %s", formatString); + RuntimeError ("error reading value from file (invalid format): %s", formatString); else if (rc == EOF) - ERROR ("error reading from file: %s", strerror (errno)); + RuntimeError ("error reading from file: %s", strerror (errno)); assert(rc == 1); } void fgetText(FILE * f, wchar_t& v) @@ -88,9 +87,9 @@ void fgetText(FILE * f, wchar_t& v) const wchar_t* formatString = GetFormatString(v); int rc = fwscanf(f, formatString, &v); if (rc == 0) - ERROR ("error reading value from file (invalid format): %s", formatString); + RuntimeError ("error reading value from file (invalid format): %s", formatString); else if (rc == EOF) - ERROR ("error reading from file: %s", strerror (errno)); + RuntimeError ("error reading from file: %s", strerror (errno)); assert(rc == 1); } @@ -117,7 +116,7 @@ FILE * fopenOrDie (const string & pathname, const char * mode) FILE * f = (pathname[0] == '-') ? fopenStdHandle (mode) : fopen (pathname.c_str(), mode); if (f == NULL) { - ERROR ("error opening file '%s': %s", pathname.c_str(), strerror (errno)); + RuntimeError ("error opening file '%s': %s", pathname.c_str(), strerror (errno)); return NULL; // keep OACR happy } if (strchr (mode, 'S')) @@ -132,7 +131,7 @@ FILE * fopenOrDie (const wstring & pathname, const wchar_t * mode) FILE * f = (pathname[0] == '-') ? fopenStdHandle (mode) : _wfopen (pathname.c_str(), mode); if (f == NULL) { - ERROR ("error opening file '%S': %s", pathname.c_str(), strerror (errno)); + RuntimeError ("error opening file '%S': %s", pathname.c_str(), strerror (errno)); return NULL; // keep OACR happy } if (strchr (mode, 'S')) @@ -154,7 +153,7 @@ void fsetmode(FILE * f, char type) { if (type != 'b' && type != 't') { - ERROR ("fsetmode: invalid type '%c'"); + RuntimeError ("fsetmode: invalid type '%c'", type); } #ifdef UNDER_CE // winCE and win32 have different return types for _fileno FILE *fd = fileno (f); // note: no error check possible @@ -165,7 +164,7 @@ void fsetmode(FILE * f, char type) int rc = setmode (fd, mode); if (rc == -1) { - ERROR ("error changing file mode: %s", strerror (errno)); + RuntimeError ("error changing file mode: %s", strerror (errno)); } } @@ -181,7 +180,7 @@ void freadOrDie (void * ptr, size_t size, size_t count, FILE * f) size_t chunkn = min (count, (size_t)15*1024*1024); // BUGBUG: I surely meant this limit to be bytes, not units of 'size'... size_t n = fread (ptr, size, chunkn, f); if (n != chunkn) - ERROR ("error reading from file: %s", strerror (errno)); + RuntimeError ("error reading from file: %s", strerror (errno)); count -= n; ptr = n * size + (char*) ptr; } @@ -208,7 +207,7 @@ void fwriteOrDie (const void * ptr, size_t size, size_t count, FILE * f) size_t n = fwrite ((const void *) p1, 1, wantWrite, f); if (n != wantWrite) { - ERROR ("error writing to file (ptr=0x%08lx, size=%d," + RuntimeError ("error writing to file (ptr=0x%08lx, size=%d," " count=%d, writing %d bytes after %d): %s", ptr, size, count, (int) wantWrite, (int) (size * count - totalBytes), @@ -233,7 +232,7 @@ void fprintfOrDie (FILE * f, const char * fmt, ...) int rc = vfprintf (f, fmt, arg_ptr); if (rc < 0) { - ERROR ("error writing to file: %s", strerror (errno)); + RuntimeError ("error writing to file: %s", strerror (errno)); } } #pragma warning(pop) @@ -247,7 +246,7 @@ void fflushOrDie (FILE * f) int rc = fflush (f); if (rc != 0) { - ERROR ("error flushing to file: %s", strerror (errno)); + RuntimeError ("error flushing to file: %s", strerror (errno)); } } @@ -259,22 +258,22 @@ size_t filesize (FILE * f) size_t curPos = _ftelli64(f); if (curPos == -1L) { - ERROR ("error determining file position: %s", strerror (errno)); + RuntimeError ("error determining file position: %s", strerror (errno)); } int rc = _fseeki64 (f, 0, SEEK_END); if (rc != 0) { - ERROR ("error seeking to end of file: %s", strerror (errno)); + RuntimeError ("error seeking to end of file: %s", strerror (errno)); } size_t len = _ftelli64 (f); if (len == -1L) { - ERROR ("error determining file position: %s", strerror (errno)); + RuntimeError ("error determining file position: %s", strerror (errno)); } rc = _fseeki64 (f, curPos, SEEK_SET); if (rc != 0) { - ERROR ("error resetting file position: %s", strerror (errno)); + RuntimeError ("error resetting file position: %s", strerror (errno)); } return len; } @@ -318,12 +317,12 @@ size_t fseekOrDie (FILE * f, size_t offset, int mode) size_t curPos = _ftelli64 (f); if (curPos == -1L) { - ERROR ("error seeking: %s", strerror (errno)); + RuntimeError ("error seeking: %s", strerror (errno)); } int rc = _fseeki64 (f, offset, mode); if (rc != 0) { - ERROR ("error seeking: %s", strerror (errno)); + RuntimeError ("error seeking: %s", strerror (errno)); } return curPos; } @@ -333,7 +332,7 @@ uint64_t fgetpos (FILE * f) fpos_t post; int rc = ::fgetpos (f, &post); if (rc != 0) - ERROR ("error getting file position: %s", strerror (errno)); + RuntimeError ("error getting file position: %s", strerror (errno)); return post; } @@ -368,7 +367,7 @@ void fsetpos (FILE * f, uint64_t reqpos) fpos_t post = reqpos; int rc = ::fsetpos (f, &post); if (rc != 0) - ERROR ("error setting file position: %s", strerror (errno)); + RuntimeError ("error setting file position: %s", strerror (errno)); } // ---------------------------------------------------------------------------- @@ -378,7 +377,7 @@ void fsetpos (FILE * f, uint64_t reqpos) void unlinkOrDie (const std::string & pathname) { if (unlink (pathname.c_str()) != 0 && errno != ENOENT) // if file is missing that's what we want - ERROR ("error deleting file '%s': %s", pathname.c_str(), strerror (errno)); + RuntimeError ("error deleting file '%s': %s", pathname.c_str(), strerror (errno)); } #ifndef _MSC_VER static int _wunlink (const wchar_t * p) { return unlink (msra::strfun::wcstombs (p).c_str()); } @@ -386,7 +385,7 @@ static int _wunlink (const wchar_t * p) { return unlink (msra::strfun::wcstombs void unlinkOrDie (const std::wstring & pathname) { if (_wunlink (pathname.c_str()) != 0 && errno != ENOENT) // if file is missing that's what we want - ERROR ("error deleting file '%S': %s", pathname.c_str(), strerror (errno)); + RuntimeError ("error deleting file '%S': %s", pathname.c_str(), strerror (errno)); } // ---------------------------------------------------------------------------- @@ -396,13 +395,13 @@ void unlinkOrDie (const std::wstring & pathname) void renameOrDie (const std::string & from, const std::string & to) { if (!MoveFileA (from.c_str(),to.c_str())) - ERROR ("error renaming: %s", GetLastError()); + RuntimeError ("error renaming: %s", GetLastError()); } void renameOrDie (const std::wstring & from, const std::wstring & to) { if (!MoveFileW (from.c_str(),to.c_str())) - ERROR ("error renaming: %s", GetLastError()); + RuntimeError ("error renaming: %s", GetLastError()); } // ---------------------------------------------------------------------------- @@ -492,7 +491,7 @@ CHAR * fgetline (FILE * f, CHAR * buf, int size) if (p == NULL) // EOF reached: next time feof() = true { if (ferror (f)) - ERROR ("error reading line: %s", strerror (errno)); + RuntimeError ("error reading line: %s", strerror (errno)); buf[0] = 0; return buf; } @@ -504,7 +503,7 @@ CHAR * fgetline (FILE * f, CHAR * buf, int size) { basic_string example (p, n < 100 ? n : 100); uint64_t filepos = fgetpos(f); // (for error message only) - ERROR("input line too long at file offset %I64d (max. %d characters allowed) [%s ...]", + RuntimeError ("input line too long at file offset %I64d (max. %d characters allowed) [%s ...]", filepos, size -1, _utf8 (example).c_str()); } @@ -536,7 +535,7 @@ const wchar_t * fgetline (FILE * f, wchar_t * buf, int size) if (p == NULL) // EOF reached: next time feof() = true { if (ferror (f)) - ERROR ("error reading line: %s", strerror (errno)); + RuntimeError ("error reading line: %s", strerror (errno)); buf[0] = 0; return buf; } @@ -547,7 +546,7 @@ const wchar_t * fgetline (FILE * f, wchar_t * buf, int size) if (n >= (size_t) size -1) { wstring example (buf, min (n, 100)); - ERROR ("input line too long at file offset %U64d (max. %d characters allowed) [%S ...]", + RuntimeError ("input line too long at file offset %U64d (max. %d characters allowed) [%S ...]", fgetpos (f), size -1, example.c_str()); } @@ -627,12 +626,10 @@ const char * fgetstring (FILE * f, __out_z_cap(size) char * buf, int size) { int c = fgetc(f); if (c == EOF) - ERROR("error reading string or missing 0: %s", strerror(errno)); + RuntimeError ("error reading string or missing 0: %s", strerror(errno)); if (c == 0) break; if (i >= size - 1) - { - ERROR("input line too long (max. %d characters allowed)", size - 1); - } + RuntimeError ("input line too long (max. %d characters allowed)", size - 1); buf[i] = (char)c; } assert (i < size); @@ -648,7 +645,7 @@ string fgetstring (FILE * f) { char c = (char)fgetc (f); if (c == EOF) - ERROR ("error reading string or missing 0: %s", strerror (errno)); + RuntimeError ("error reading string or missing 0: %s", strerror (errno)); if (c == 0) break; res.push_back (c); } @@ -664,11 +661,11 @@ const wchar_t * fgetstring (FILE * f, __out_z_cap(size) wchar_t * buf, int size) // TODO: we should redefine this to write UTF-16 (which matters on GCC which defines wchar_t as 32 bit) wint_t c = fgetwc(f); if (c == WEOF) - ERROR("error reading string or missing 0: %s", strerror(errno)); + RuntimeError ("error reading string or missing 0: %s", strerror(errno)); if (c == 0) break; if (i >= size - 1) { - ERROR("input line too long (max. %d wchar_tacters allowed)", size - 1); + RuntimeError ("input line too long (max. %d wchar_tacters allowed)", size - 1); } buf[i] = (wchar_t)c; } @@ -699,7 +696,7 @@ wstring fgetwstring (FILE * f) // note the order below works only for little endian wint_t c = (wint_t)((c2 << 8) | c1); if (c == WEOF) - ERROR ("error reading string or missing 0: %s", strerror (errno)); + RuntimeError ("error reading string or missing 0: %s", strerror (errno)); if (c == 0) break; res.push_back ((wchar_t) c); } @@ -716,7 +713,7 @@ wstring fgetwstring (FILE * f) { wint_t c = fgetwc(f); if (c == WEOF) - ERROR("error reading string or missing 0: %s", strerror(errno)); + RuntimeError ("error reading string or missing 0: %s", strerror(errno)); if (c == 0) break; res.push_back((wchar_t)c); } @@ -733,14 +730,14 @@ bool fskipspace (FILE * f) if (c == EOF) // hit the end { if (ferror(f)) - ERROR("error reading from file: %s", strerror(errno)); + RuntimeError ("error reading from file: %s", strerror(errno)); break; } if (!isspace (c)) // end of space: undo getting that character { int rc = ungetc(c, f); if (rc != c) - ERROR("error in ungetc(): %s", strerror(errno)); + RuntimeError ("error in ungetc(): %s", strerror(errno)); break; } } @@ -757,14 +754,14 @@ bool fskipwspace (FILE * f) if (c == WEOF) // hit the end { if (ferror (f)) - ERROR ("error reading from file: %s", strerror (errno)); + RuntimeError ("error reading from file: %s", strerror (errno)); break; } if (!iswspace (c)) // end of space: undo getting that character { wint_t rc = ungetwc (c, f); if (rc != c) - ERROR ("error in ungetc(): %s", strerror (errno)); + RuntimeError ("error in ungetc(): %s", strerror (errno)); break; } } @@ -800,7 +797,7 @@ int fskipNewline (FILE * f, bool skip) return found?(int)true:EOF; int rc = ungetc (c, f); if (rc != c) - ERROR ("error in ungetc(): %s", strerror (errno)); + RuntimeError ("error in ungetc(): %s", strerror (errno)); return (int)found; } // if we get here we saw a newline @@ -835,7 +832,7 @@ int fskipwNewline (FILE * f, bool skip) return found?(int)true:EOF; wint_t rc = ungetwc (c, f); if (rc != c) - ERROR ("error in ungetwc(): %s", strerror (errno)); + RuntimeError ("error in ungetwc(): %s", strerror (errno)); return (int)found; } // if we get here we saw a double newline @@ -855,7 +852,7 @@ const char * fgettoken (FILE * f, __out_z_cap(size) char * buf, int size) if (c == EOF) break; if (isspace (c)) break; if (i >= size -1) - ERROR ("input token too long (max. %d characters allowed)", size -1); + RuntimeError ("input token too long (max. %d characters allowed)", size -1); buf[i] = (char) c; } // ... TODO: while (IsWhiteSpace (c)) c = fgetc (f); // skip trailing space @@ -863,7 +860,7 @@ const char * fgettoken (FILE * f, __out_z_cap(size) char * buf, int size) { int rc = ungetc (c, f); if (rc != c) - ERROR ("error in ungetc(): %s", strerror (errno)); + RuntimeError ("error in ungetc(): %s", strerror (errno)); } assert (i < size); buf[i] = 0; @@ -889,7 +886,7 @@ const wchar_t * fgettoken (FILE * f, __out_z_cap(size) wchar_t * buf, int size) if (c == WEOF) break; if (iswspace (c)) break; if (i >= size -1) - ERROR ("input token too long (max. %d wchar_tacters allowed)", size -1); + RuntimeError ("input token too long (max. %d wchar_tacters allowed)", size -1); buf[i] = (wchar_t) c; } // ... TODO: while (IsWhiteSpace (c)) c = fgetc (f); // skip trailing space @@ -897,7 +894,7 @@ const wchar_t * fgettoken (FILE * f, __out_z_cap(size) wchar_t * buf, int size) { int rc = ungetwc (c, f); if (rc != c) - ERROR ("error in ungetwc(): %s", strerror (errno)); + RuntimeError ("error in ungetwc(): %s", strerror (errno)); } assert (i < size); buf[i] = 0; @@ -982,7 +979,7 @@ void fcheckTag_ascii (FILE * f, const string & expectedTag) fgettoken (f, buf, sizeof(buf)/sizeof(*buf)); if (expectedTag != buf) { - ERROR ("invalid tag '%s' found; expected '%s'", buf, expectedTag.c_str()); + RuntimeError ("invalid tag '%s' found; expected '%s'", buf, expectedTag.c_str()); } } @@ -994,7 +991,7 @@ void fcompareTag (const string & readTag, const string & expectedTag) { if (readTag != expectedTag) { - ERROR ("invalid tag '%s' found; expected '%s'", + RuntimeError ("invalid tag '%s' found; expected '%s'", readTag.c_str(), expectedTag.c_str()); } } @@ -1034,7 +1031,7 @@ void fpad (FILE * f, int n) int pos = ftell (f); if (pos == -1) { - ERROR ("error in ftell(): %s", strerror (errno)); + RuntimeError ("error in ftell(): %s", strerror (errno)); } // determine how many bytes are needed (at least 1 for the 0-terminator) // and create a dummy string of that length incl. terminator @@ -1120,7 +1117,7 @@ int fgetint_ascii (FILE * f) int rc = ungetc (c, f); if (rc != c) { - ERROR ("error in ungetc(): %s", strerror (errno)); + RuntimeError ("error in ungetc(): %s", strerror (errno)); } return res; } @@ -1159,9 +1156,9 @@ float fgetfloat_ascii (FILE * f) fskipspace (f); int rc = fscanf (f, "%f", &val); // security hint: safe overloads if (rc == 0) - ERROR ("error reading float value from file (invalid format): %s"); + RuntimeError ("error reading float value from file (invalid format): %s"); else if (rc == EOF) - ERROR ("error reading from file: %s", strerror (errno)); + RuntimeError ("error reading from file: %s", strerror (errno)); assert (rc == 1); return val; } @@ -1326,7 +1323,7 @@ void fgetfile (FILE * f, std::vector & buffer) size_t n = fread (&inbuf[0], sizeof (inbuf[0]), inbuf.size(), f); if (ferror (f)) { - ERROR ("fgetfile: error reading from file: %s", strerror (errno)); + RuntimeError ("fgetfile: error reading from file: %s", strerror (errno)); } buffer.insert (buffer.end(), inbuf.begin(), inbuf.begin() + n); } @@ -1400,12 +1397,12 @@ void setfiletime (const wstring & path, const FILETIME & time) OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL)); if (h == INVALID_HANDLE_VALUE) { - ERROR ("setfiletime: error opening file: %d", GetLastError()); + RuntimeError ("setfiletime: error opening file: %d", GetLastError()); } BOOL rc = SetFileTime (h, NULL, NULL, &time); if (!rc) { - ERROR ("setfiletime: error setting file time information: %d", GetLastError()); + RuntimeError ("setfiletime: error setting file time information: %d", GetLastError()); } } @@ -1484,7 +1481,7 @@ void expand_wildcards (const wstring & path, vector & paths) { BOOL rc = ExpandWildcards (path, paths); if (!rc) - ERROR("error in expanding wild cards '%S': %S", path.c_str(), FormatWin32Error(::GetLastError()).c_str()); + RuntimeError ("error in expanding wild cards '%S': %S", path.c_str(), FormatWin32Error(::GetLastError()).c_str()); } #endif @@ -1511,7 +1508,7 @@ static void mkdir (const wstring & path) if (att != INVALID_FILE_ATTRIBUTES || (att & FILE_ATTRIBUTE_DIRECTORY) != 0) return; // ok } - ERROR ("make_intermediate_dirs: error creating intermediate directory %S", path.c_str()); + RuntimeError ("make_intermediate_dirs: error creating intermediate directory %S", path.c_str()); } #ifndef _MSC_VER diff --git a/DataReader/HTKMLFReader/basetypes.h b/DataReader/HTKMLFReader/basetypes.h index 1a8d39daf..47330185e 100644 --- a/DataReader/HTKMLFReader/basetypes.h +++ b/DataReader/HTKMLFReader/basetypes.h @@ -959,4 +959,15 @@ using namespace msra::basetypes; // for compatibility #pragma warning (pop) +// RuntimeError - throw a std::runtime_error with a formatted error string +static inline void RuntimeError(const char * format, ...) +{ + va_list args; + char buffer[1024]; + + va_start(args, format); + vsprintf(buffer, format, args); + throw std::runtime_error(buffer); +}; + #endif // _BASETYPES_ diff --git a/DataReader/HTKMLFReader/fileutil.cpp b/DataReader/HTKMLFReader/fileutil.cpp index 55e0d0263..e4f687768 100644 --- a/DataReader/HTKMLFReader/fileutil.cpp +++ b/DataReader/HTKMLFReader/fileutil.cpp @@ -16,7 +16,6 @@ #include "basetypes.h" #include "fileutil.h" -#include "message.h" #include #include #include @@ -50,7 +49,7 @@ template FILE * fopenStdHandle (const _T * mode) // switch to binary mode if not yet (in case it is stdin) int rc = _setmode (_fileno (f), strchr (mode, 'b') ? _O_BINARY : _O_TEXT); if (rc == -1) - ERROR ("error switching stream to binary mode: %s", strerror (errno)); + RuntimeError ("error switching stream to binary mode: %s", strerror (errno)); } return f; } @@ -60,7 +59,7 @@ FILE * fopenOrDie (const STRING & pathname, const char * mode) FILE * f = (pathname[0] == '-') ? fopenStdHandle (mode) : fopen (pathname.c_str(), mode); if (f == NULL) { - ERROR ("error opening file '%s': %s", pathname.c_str(), strerror (errno)); + RuntimeError ("error opening file '%s': %s", pathname.c_str(), strerror (errno)); return NULL; // keep OACR happy } if (strchr (mode, 'S')) @@ -75,7 +74,7 @@ FILE * fopenOrDie (const WSTRING & pathname, const wchar_t * mode) FILE * f = (pathname[0] == '-') ? fopenStdHandle (mode) : _wfopen (pathname.c_str(), mode); if (f == NULL) { - ERROR ("error opening file '%S': %s", pathname.c_str(), strerror (errno)); + RuntimeError ("error opening file '%S': %s", pathname.c_str(), strerror (errno)); return NULL; // keep OACR happy } if (strchr (mode, 'S')) @@ -93,7 +92,7 @@ void fsetmode (FILE * f, char type) { if (type != 'b' && type != 't') { - ERROR ("fsetmode: invalid type '%c'"); + RuntimeError ("fsetmode: invalid type '%c'"); } #ifdef UNDER_CE // winCE and win32 have different return types for _fileno FILE *fd = _fileno (f); // note: no error check possible @@ -104,7 +103,7 @@ void fsetmode (FILE * f, char type) int rc = _setmode (fd, mode); if (rc == -1) { - ERROR ("error changing file mode: %s", strerror (errno)); + RuntimeError ("error changing file mode: %s", strerror (errno)); } } @@ -120,7 +119,7 @@ void freadOrDie (void * ptr, size_t size, size_t count, FILE * f) size_t chunkn = min (count, 15*1024*1024); // BUGBUG: I surely meant this limit to be bytes, not units of 'size'... size_t n = fread (ptr, size, chunkn, f); if (n != chunkn) - ERROR ("error reading from file: %s", strerror (errno)); + RuntimeError ("error reading from file: %s", strerror (errno)); count -= n; ptr = n * size + (char*) ptr; } @@ -135,7 +134,7 @@ void freadOrDie (void * ptr, size_t size, size_t count, const HANDLE f) DWORD n ; ReadFile(f, ptr, (DWORD) chunkn, &n, NULL); if (n != chunkn) - ERROR ("error number for reading from file: %s", GetLastError()); + RuntimeError ("error number for reading from file: %s", GetLastError()); count -= (size_t) (n / size); ptr = n + (char*) ptr; } @@ -162,7 +161,7 @@ void fwriteOrDie (const void * ptr, size_t size, size_t count, FILE * f) size_t n = fwrite ((const void *) p1, 1, wantWrite, f); if (n != wantWrite) { - ERROR ("error writing to file (ptr=0x%08lx, size=%d," + RuntimeError ("error writing to file (ptr=0x%08lx, size=%d," " count=%d, writing %d bytes after %d): %s", ptr, size, count, (int) wantWrite, (int) (size * count - totalBytes), @@ -188,7 +187,7 @@ void fwriteOrDie (const void * ptr, size_t size, size_t count, const HANDLE f) DWORD byteWritten = 0 ; if (WriteFile(f, (const void *) p1, wantWrite, &byteWritten, NULL) == false) { - ERROR ("error writing to file (ptr=0x%08lx, size=%d," + RuntimeError ("error writing to file (ptr=0x%08lx, size=%d," " count=%d, writing %d bytes after %d): %s", ptr, size, count, (int) wantWrite, (int) (size * count - totalBytes), @@ -213,7 +212,7 @@ void fprintfOrDie (FILE * f, const char * fmt, ...) int rc = vfprintf (f, fmt, arg_ptr); if (rc < 0) { - ERROR ("error writing to file: %s", strerror (errno)); + RuntimeError ("error writing to file: %s", strerror (errno)); } } #pragma warning(pop) @@ -227,7 +226,7 @@ void fflushOrDie (FILE * f) int rc = fflush (f); if (rc != 0) { - ERROR ("error flushing to file: %s", strerror (errno)); + RuntimeError ("error flushing to file: %s", strerror (errno)); } } @@ -240,22 +239,22 @@ size_t filesize (FILE * f) long curPos = ftell (f); if (curPos == -1L) { - ERROR ("error determining file position: %s", strerror (errno)); + RuntimeError ("error determining file position: %s", strerror (errno)); } int rc = fseek (f, 0, SEEK_END); if (rc != 0) { - ERROR ("error seeking to end of file: %s", strerror (errno)); + RuntimeError ("error seeking to end of file: %s", strerror (errno)); } long len = ftell (f); if (len == -1L) { - ERROR ("error determining file position: %s", strerror (errno)); + RuntimeError ("error determining file position: %s", strerror (errno)); } rc = fseek (f, curPos, SEEK_SET); if (rc != 0) { - ERROR ("error resetting file position: %s", strerror (errno)); + RuntimeError ("error resetting file position: %s", strerror (errno)); } return (size_t) len; } @@ -299,12 +298,12 @@ long fseekOrDie (FILE * f, long offset, int mode) long curPos = ftell (f); if (curPos == -1L) { - ERROR ("error seeking: %s", strerror (errno)); + RuntimeError ("error seeking: %s", strerror (errno)); } int rc = fseek (f, offset, mode); if (rc != 0) { - ERROR ("error seeking: %s", strerror (errno)); + RuntimeError ("error seeking: %s", strerror (errno)); } return curPos; } @@ -314,7 +313,7 @@ uint64_t fgetpos (FILE * f) fpos_t post; int rc = ::fgetpos (f, &post); if (rc != 0) - ERROR ("error getting file position: %s", strerror (errno)); + RuntimeError ("error getting file position: %s", strerror (errno)); return post; } @@ -347,7 +346,7 @@ void fsetpos (FILE * f, uint64_t reqpos) fpos_t post = reqpos; int rc = ::fsetpos (f, &post); if (rc != 0) - ERROR ("error setting file position: %s", strerror (errno)); + RuntimeError ("error setting file position: %s", strerror (errno)); } // ---------------------------------------------------------------------------- @@ -357,12 +356,12 @@ void fsetpos (FILE * f, uint64_t reqpos) void unlinkOrDie (const std::string & pathname) { if (_unlink (pathname.c_str()) != 0 && errno != ENOENT) // if file is missing that's what we want - ERROR ("error deleting file '%s': %s", pathname.c_str(), strerror (errno)); + RuntimeError ("error deleting file '%s': %s", pathname.c_str(), strerror (errno)); } void unlinkOrDie (const std::wstring & pathname) { if (_wunlink (pathname.c_str()) != 0 && errno != ENOENT) // if file is missing that's what we want - ERROR ("error deleting file '%S': %s", pathname.c_str(), strerror (errno)); + RuntimeError ("error deleting file '%S': %s", pathname.c_str(), strerror (errno)); } // ---------------------------------------------------------------------------- @@ -373,14 +372,14 @@ void unlinkOrDie (const std::wstring & pathname) void renameOrDie (const std::string & from, const std::string & to) { if (!MoveFileA (from.c_str(),to.c_str())) - ERROR ("error renaming: %s", GetLastError()); + RuntimeError ("error renaming: %s", GetLastError()); } #endif void renameOrDie (const std::wstring & from, const std::wstring & to) { if (!MoveFileW (from.c_str(),to.c_str())) - ERROR ("error renaming: %s", GetLastError()); + RuntimeError ("error renaming: %s", GetLastError()); } // ---------------------------------------------------------------------------- @@ -458,7 +457,7 @@ CHAR * fgetline (FILE * f, CHAR * buf, int size) if (p == NULL) // EOF reached: next time feof() = true { if (ferror (f)) - ERROR ("error reading line: %s", strerror (errno)); + RuntimeError ("error reading line: %s", strerror (errno)); buf[0] = 0; return buf; } @@ -469,7 +468,7 @@ CHAR * fgetline (FILE * f, CHAR * buf, int size) if (n >= (size_t) size -1) { basic_string example (p, n < 100 ? n : 100); - ERROR ("input line too long at file offset %I64d (max. %d characters allowed) [%s ...]", + RuntimeError ("input line too long at file offset %I64d (max. %d characters allowed) [%s ...]", filepos, size -1, _utf8 (example).c_str()); } @@ -501,7 +500,7 @@ const wchar_t * fgetline (FILE * f, wchar_t * buf, int size) if (p == NULL) // EOF reached: next time feof() = true { if (ferror (f)) - ERROR ("error reading line: %s", strerror (errno)); + RuntimeError ("error reading line: %s", strerror (errno)); buf[0] = 0; return buf; } @@ -512,7 +511,7 @@ const wchar_t * fgetline (FILE * f, wchar_t * buf, int size) if (n >= (size_t) size -1) { wstring example (buf, min (n, 100)); - ERROR ("input line too long at file offset %U64d (max. %d characters allowed) [%S ...]", + RuntimeError ("input line too long at file offset %U64d (max. %d characters allowed) [%S ...]", fgetpos (f), size -1, example.c_str()); } @@ -592,11 +591,11 @@ const char * fgetstring (FILE * f, __out_z_cap(size) char * buf, int size) { int c = fgetc (f); if (c == EOF) - ERROR ("error reading string or missing 0: %s", strerror (errno)); + RuntimeError ("error reading string or missing 0: %s", strerror (errno)); if (c == 0) break; if (i >= size -1) { - ERROR ("input line too long (max. %d characters allowed)", size -1); + RuntimeError ("input line too long (max. %d characters allowed)", size -1); } buf[i] = (char) c; } @@ -615,7 +614,7 @@ const char * fgetstring (const HANDLE f, __out_z_cap(size) char * buf, int size) if (c == (char) 0) break; if (i >= size -1) { - ERROR ("input line too long (max. %d characters allowed)", size -1); + RuntimeError ("input line too long (max. %d characters allowed)", size -1); } buf[i] = (char) c; } @@ -632,7 +631,7 @@ wstring fgetwstring (FILE * f) { int c = fgetwc (f); if (c == EOF) - ERROR ("error reading string or missing 0: %s", strerror (errno)); + RuntimeError ("error reading string or missing 0: %s", strerror (errno)); if (c == 0) break; res.push_back ((wchar_t) c); } @@ -647,14 +646,14 @@ void fskipspace (FILE * f) if (c == EOF) // hit the end { if (ferror (f)) - ERROR ("error reading from file: %s", strerror (errno)); + RuntimeError ("error reading from file: %s", strerror (errno)); break; } if (!isspace (c)) // end of space: undo getting that character { int rc = ungetc (c, f); if (rc != c) - ERROR ("error in ungetc(): %s", strerror (errno)); + RuntimeError ("error in ungetc(): %s", strerror (errno)); break; } } @@ -679,7 +678,7 @@ void fskipNewline (FILE * f) if (c != '\n') { - ERROR ("unexpected garbage at end of line"); + RuntimeError ("unexpected garbage at end of line"); } } @@ -696,7 +695,7 @@ const char * fgettoken (FILE * f, __out_z_cap(size) char * buf, int size) if (c == EOF) break; if (isspace (c)) break; if (i >= size -1) - ERROR ("input token too long (max. %d characters allowed)", size -1); + RuntimeError ("input token too long (max. %d characters allowed)", size -1); buf[i] = (char) c; } // ... TODO: while (isspace (c)) c = fgetc (f); // skip trailing space @@ -704,7 +703,7 @@ const char * fgettoken (FILE * f, __out_z_cap(size) char * buf, int size) { int rc = ungetc (c, f); if (rc != c) - ERROR ("error in ungetc(): %s", strerror (errno)); + RuntimeError ("error in ungetc(): %s", strerror (errno)); } ASSERT (i < size); buf[i] = 0; @@ -789,7 +788,7 @@ void fcheckTag_ascii (FILE * f, const STRING & expectedTag) fgettoken (f, buf, sizeof(buf)/sizeof(*buf)); if (expectedTag != buf) { - ERROR ("invalid tag '%s' found; expected '%s'", buf, expectedTag.c_str()); + RuntimeError ("invalid tag '%s' found; expected '%s'", buf, expectedTag.c_str()); } } @@ -801,7 +800,7 @@ void fcompareTag (const STRING & readTag, const STRING & expectedTag) { if (readTag != expectedTag) { - ERROR ("invalid tag '%s' found; expected '%s'", + RuntimeError ("invalid tag '%s' found; expected '%s'", readTag.c_str(), expectedTag.c_str()); } } @@ -848,7 +847,7 @@ void fpad (FILE * f, int n) int pos = ftell (f); if (pos == -1) { - ERROR ("error in ftell(): %s", strerror (errno)); + RuntimeError ("error in ftell(): %s", strerror (errno)); } // determine how many bytes are needed (at least 1 for the 0-terminator) // and create a dummy string of that length incl. terminator @@ -940,7 +939,7 @@ int fgetint_ascii (FILE * f) int rc = ungetc (c, f); if (rc != c) { - ERROR ("error in ungetc(): %s", strerror (errno)); + RuntimeError ("error in ungetc(): %s", strerror (errno)); } return res; } @@ -968,9 +967,9 @@ float fgetfloat_ascii (FILE * f) fskipspace (f); int rc = fscanf (f, "%f", &val); // security hint: safe overloads if (rc == 0) - ERROR ("error reading float value from file (invalid format): %s"); + RuntimeError ("error reading float value from file (invalid format): %s"); else if (rc == EOF) - ERROR ("error reading from file: %s", strerror (errno)); + RuntimeError ("error reading from file: %s", strerror (errno)); ASSERT (rc == 1); return val; } @@ -1073,7 +1072,7 @@ void WAVEHEADER::write (FILE * f) long curPos = ftell (f); if (curPos == -1L) { - ERROR ("error determining file position: %s", strerror (errno)); + RuntimeError ("error determining file position: %s", strerror (errno)); } unsigned int len = (unsigned int) filesize (f); unsigned int RiffLength = len - 8; @@ -1085,6 +1084,7 @@ void WAVEHEADER::write (FILE * f) fseekOrDie (f, curPos, SEEK_SET); } +#if 0 unsigned int WAVEHEADER::read (FILE * f, signed short & wRealFormatTag, int & bytesPerSample) { // read header @@ -1099,17 +1099,17 @@ unsigned int WAVEHEADER::read (FILE * f, signed short & wRealFormatTag, int & by wRealFormatTag = 1; // Workaround: pretend it is 1 (seems safe) } (wRealFormatTag == 1 || wRealFormatTag == 7) - || ERROR ("WAVEHEADER::read: wFormatTag=%d not supported for now", wRealFormatTag); + || RuntimeError ("WAVEHEADER::read: wFormatTag=%d not supported for now", wRealFormatTag); unsigned short wChannels = fgetshort (f); unsigned long dwSamplesPerSec = fgetint (f); unsigned int sampleRate = dwSamplesPerSec; /*unsigned long dwAvgBytesPerSec = */ fgetint (f); unsigned short wBlockAlign = fgetshort (f); unsigned short wBitsPerSample = fgetshort (f); - (wBitsPerSample <= 16) || ERROR ("WAVEHEADER::read: invalid wBitsPerSample %d", wBitsPerSample); + (wBitsPerSample <= 16) || RuntimeError ("WAVEHEADER::read: invalid wBitsPerSample %d", wBitsPerSample); bytesPerSample = wBitsPerSample / 8; (wBlockAlign == wChannels * bytesPerSample) - || ERROR ("WAVEHEADER::read: wBlockAlign != wChannels*bytesPerSample not supported"); + || RuntimeError ("WAVEHEADER::read: wBlockAlign != wChannels*bytesPerSample not supported"); while (fmtLen > 16) // unused extra garbage in header { fgetbyte (f); @@ -1117,7 +1117,7 @@ unsigned int WAVEHEADER::read (FILE * f, signed short & wRealFormatTag, int & by } if (wRealFormatTag == 7) { - (bytesPerSample == 1) || ERROR ("WAVEHEADER::read: invalid wBitsPerSample %d for mulaw", wBitsPerSample); + (bytesPerSample == 1) || RuntimeError ("WAVEHEADER::read: invalid wBitsPerSample %d for mulaw", wBitsPerSample); fcheckTag (f, "fact"); unsigned int factLen = fgetint (f); while (factLen > 0) @@ -1160,7 +1160,7 @@ static void fgetwavraw(FILE * f, ARRAY & wav, const WAVEHEADER & wavhd) wav.resize (wavhd.DataLength / bytesPerSample); if (wavhd.wFormatTag == 7) // mulaw { - (wavhd.nChannels == 1) || ERROR ("fgetwav: wChannels=%d not supported for mulaw", wavhd.nChannels); + (wavhd.nChannels == 1) || RuntimeError ("fgetwav: wChannels=%d not supported for mulaw", wavhd.nChannels); ARRAY data; int numSamples = wavhd.DataLength/wavhd.nBlockAlign; data.resize (numSamples); @@ -1177,7 +1177,7 @@ static void fgetwavraw(FILE * f, ARRAY & wav, const WAVEHEADER & wavhd) // ... TODO: support 8 bit linear PCM samples (implement when needed; samples scaled to 'short') else { - ERROR ("bytesPerSample != 2 is not supported except mulaw format!\n"); + RuntimeError ("bytesPerSample != 2 is not supported except mulaw format!\n"); } } @@ -1218,7 +1218,7 @@ void fgetwav (FILE * f, ARRAY & wav, int & sampleRate) } else { - ERROR ("bytesPerSample/wChannels != 2 needs to be implemented"); + RuntimeError ("bytesPerSample/wChannels != 2 needs to be implemented"); } } @@ -1276,7 +1276,7 @@ unsigned int fgetwfx (FILE * f, WAVEFORMATEX & wfx) wfx.wFormatTag = 1; // Workaround: pretend it is 1 (seems safe) } (wfx.wFormatTag == 1 || wfx.wFormatTag == 3 || wfx.wFormatTag == 7) - || ERROR ("WAVEHEADER::read: wFormatTag=%d not supported for now", wfx.wFormatTag); + || RuntimeError ("WAVEHEADER::read: wFormatTag=%d not supported for now", wfx.wFormatTag); wfx.nChannels = fgetshort (f); wfx.nSamplesPerSec = fgetint (f); wfx.nAvgBytesPerSec = fgetint (f); @@ -1294,7 +1294,7 @@ void fputwfx (FILE *f, const WAVEFORMATEX & wfx, unsigned int numSamples) { unsigned int DataLength = numSamples * wfx.nBlockAlign; (DataLength / wfx.nBlockAlign == numSamples) - || ERROR ("fputwfx: data size exceeds WAV header 32-bit range"); + || RuntimeError ("fputwfx: data size exceeds WAV header 32-bit range"); unsigned int RiffLength = 36 + DataLength; unsigned int FmtLength = 16; // file header @@ -1345,6 +1345,7 @@ void fputwav (const wstring & fn, const vector & wav, int sampleRate, int fputwav (f, wav, sampleRate, nChannels); fflushOrDie (f); // after this, fclose() (in destructor of f) cannot fail } +#endif // ---------------------------------------------------------------------------- // fputbyte(): write a byte value @@ -1491,7 +1492,7 @@ void fgetfile (FILE * f, ARRAY & buffer) size_t n = fread (&inbuf[0], sizeof (inbuf[0]), inbuf.size(), f); if (ferror (f)) { - ERROR ("fgetfile: error reading from file: %s", strerror (errno)); + RuntimeError ("fgetfile: error reading from file: %s", strerror (errno)); } buffer.insert (buffer.end(), inbuf.begin(), inbuf.begin() + n); } @@ -1566,12 +1567,12 @@ void setfiletime (const wstring & path, const FILETIME & time) OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL)); if (h == INVALID_HANDLE_VALUE) { - ERROR ("setfiletime: error opening file: %d", GetLastError()); + RuntimeError ("setfiletime: error opening file: %d", GetLastError()); } BOOL rc = SetFileTime (h, NULL, NULL, &time); if (!rc) { - ERROR ("setfiletime: error setting file time information: %d", GetLastError()); + RuntimeError ("setfiletime: error setting file time information: %d", GetLastError()); } } @@ -1649,7 +1650,7 @@ void expand_wildcards (const wstring & path, vector & paths) { BOOL rc = ExpandWildcards (path, paths); if (!rc) - ERROR ("error in expanding wild cards '%S': %S", path.c_str(), FormatWin32Error (::GetLastError()).c_str()); + RuntimeError ("error in expanding wild cards '%S': %S", path.c_str(), FormatWin32Error (::GetLastError()).c_str()); } // ---------------------------------------------------------------------------- @@ -1668,7 +1669,7 @@ static void mkdir (const wstring & path) if (att != INVALID_FILE_ATTRIBUTES || (att & FILE_ATTRIBUTE_DIRECTORY) != 0) return; // ok } - ERROR ("make_intermediate_dirs: error creating intermediate directory %S", path.c_str()); + RuntimeError ("make_intermediate_dirs: error creating intermediate directory %S", path.c_str()); } // make subdir of a file including parents diff --git a/DataReader/HTKMLFReader/message.h b/DataReader/HTKMLFReader/message.h index 0cafff151..98586bc2f 100644 --- a/DataReader/HTKMLFReader/message.h +++ b/DataReader/HTKMLFReader/message.h @@ -1,473 +1 @@ -// -// message.h - class for simple I/O of log messages -// -// Copyright (c) Microsoft Corporation. All rights reserved. -// - -#pragma once -#ifndef _MESSAGE_ -#define _MESSAGE_ - -#ifndef UNDER_CE // fixed-buffer overloads not available for wince -#ifdef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES // fixed-buffer overloads for strcpy() etc. -#undef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES -#endif -#define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1 -#endif - -#include "basetypes.h" - -#include -#include -#include -#include // for _time64 in timeDateStamp() - -#pragma warning (push) -#pragma warning (disable : 4793) // caused by varargs -#pragma warning(disable : 4996) // strcpy and other safety stuff disabled - -namespace msra { namespace logging -{ - // ----------------------------------------------------------------------- - // message_exception - exception thrown by this error module - // ----------------------------------------------------------------------- - - class message_exception : public std::exception - { - char buf[1024]; // local buffer for message - const char * dup_what (const char * what) - { - strcpy (buf, "message_exception:"); // security hint: safe overloads - strcat (buf, what); - return &buf[0]; - } - public: - message_exception (const char * what) : exception (dup_what (what)) - { - } - }; - - // ----------------------------------------------------------------------- - // global state (hidden as a static struct in a variable) - // ----------------------------------------------------------------------- - - struct __globals - { - msra::basetypes::CCritSec lock; - FILE * logFile; - bool noFlush; // heavy logging: don't flush - bool throwExceptionFlag; // true: don't exit but throw exception - - // G.muted semantics is as follows - // - G.muted && !G.logFile => don't show anything - // - G.muted && G.logFile => write to log file - // - !G.muted && !G.logFile => write to stderr only - // - !G.muted && G.logFile => write to log file and stderr - bool muted; - - std::string filename; // log file name - char buf[30000]; // for _vsnprintf() - - __globals() : logFile (NULL), throwExceptionFlag (false), muted (false), noFlush (false) - { buf[0] = 0; buf[sizeof (buf) / sizeof (*buf) -1] = 0; } - - static __globals & get() { static __globals g; return g; } - }; - -#pragma push_macro ("G") -#define G (__globals::get()) // access global XYZ as G.XYZ - - // ------------------------------------------------------------------------ - // setLogFile(): set the log file - // if non-NULL then every message will be written both to stderr and this - // log file. - // multi-threading: not thread-safe, set this before starting - // ------------------------------------------------------------------------ - - static inline FILE * setLogFile (FILE * newLogFile) - { - FILE * oldLogFile; - - oldLogFile = G.logFile; - - if (newLogFile != stderr) - { - G.logFile = newLogFile; - } - else - { - G.logFile = NULL; - } - - return oldLogFile; - } - - // ------------------------------------------------------------------------ - // setLogFileByName(): set the log file by file name - // in this mode, log file will be re-connected when disconnected - // filename == NULL indicates an attempt to reconnect - // WARNING: if the filename is invalid, it will try to reconnect every time - // ------------------------------------------------------------------------ - - static inline void setLogFileByName (const char * p_filename) - { - FILE * newLogFile = NULL; - if (p_filename == NULL) - { - // for reconnection - ASSERT (G.filename != ""); - newLogFile = fopen (G.filename.c_str (), "ab"); - } - else - { - ASSERT (p_filename[0]); - G.filename = p_filename; // remember filename - newLogFile = fopen (p_filename, "wb"); - } - - // handle open failure - if (newLogFile == NULL) - { - if (G.logFile != NULL) - { - fprintf (G.logFile, "ERROR: setLogFileByName: error opening log file %s: %s\n", - G.filename.c_str (), strerror (errno)); - // in case of a reconnect, this ^^ will obviously fail, we ignore this - } - fprintf (stderr, "ERROR: setLogFileByName: error opening log file %s: %s\n", - G.filename.c_str (), strerror (errno)); - return; - } - - // set new handle - FILE * oldLogFile = setLogFile (newLogFile); - - // close old handle - if (oldLogFile != NULL && oldLogFile != stderr && oldLogFile != stdin) - { - int rc = fclose (oldLogFile); - if (rc != 0) - { - if (G.logFile != NULL) - { // note: this goes to the new log file - fprintf (G.logFile, "ERROR: setLogFileByName: error closing old log file: %s\n", - strerror (errno)); - } - fprintf (stderr, "ERROR: setLogFileByName: error closing old log file: %s\n", - strerror (errno)); - } - } - } - - // ------------------------------------------------------------------------ - // setExceptionFlag(): set flag whether to throw an exception (true) or exit() (false, default) - // ------------------------------------------------------------------------ - - static inline bool setExceptionFlag (bool throwExceptionFlag = true) - { - bool oldFlag = G.throwExceptionFlag; - G.throwExceptionFlag = throwExceptionFlag; - return oldFlag; - } - - // ------------------------------------------------------------------------ - // timeDateStamp() -- often needed for logging - // ------------------------------------------------------------------------ - - static inline std::string timeDateStamp (void) - { - __time64_t localtime; _time64 (&localtime); // get current time and date - struct tm now; _localtime64_s (&now, &localtime); // convert - char buf[20]; - sprintf (buf, "%04d/%02d/%02d %02d:%02d:%02d", // security hint: this is an overload - now.tm_year + 1900, now.tm_mon + 1, now.tm_mday, - now.tm_hour, now.tm_min, now.tm_sec); - return buf; - } - - // ------------------------------------------------------------------------ - // __flush(): flush output - // ------------------------------------------------------------------------ - - static inline void __flush() - { - int rc = fflush (G.logFile); - if (rc != 0) - { - fprintf (stderr, "ERROR: __flush: error flushing to log file %s\n", - strerror (errno)); - } - } - - // ------------------------------------------------------------------------ - // setHeavyLogging(): we are heavily logging: don't flush & increase out buf - // ------------------------------------------------------------------------ - - static inline void setHeavyLogging (bool isHeavy) - { - __flush(); // flush the current buffer - if (!isHeavy) - { - G.noFlush = false; - } - else - { - G.noFlush = true; - if (G.logFile) - setvbuf (G.logFile, NULL, _IOFBF, 16384); // flush every 16K - } - } - - // ------------------------------------------------------------------------ - // shutUp(): set muted mode (true: no output will be generated anymore) - // - // multi-threading: retrieving the previous state is not thread-safe, - // if you want, do this before starting - // ------------------------------------------------------------------------ - - static inline bool shutUp (bool quiet = true) - { - bool oldFlag = G.muted; - G.muted = quiet; - return oldFlag; - } - - // ------------------------------------------------------------------------ - // getLogFile(): get log file handle - // ------------------------------------------------------------------------ - - static inline FILE * getLogFile (void) - { - return G.logFile; - } - - // ------------------------------------------------------------------------ - // __showbuf(): output contents of buf[] with prefix prepended - // multi-threading: must be called from within critical section - // ------------------------------------------------------------------------ - - static inline void __showbuf (const std::string & prefix, bool nl) - { - ASSERT (strlen (G.buf) < sizeof (G.buf) / sizeof (*G.buf)); // security hint: safe overloads - std::string outtext = prefix + G.buf; - if (nl) outtext += "\n"; - - // write out; first to screen in case we can't write to log file - -#ifndef ONENOTE_COMPILER - // OneNote treats it as an error if stderr is not empty. - // and in OneNote, we can't see message printed to stderr - // So, in OneNote, don't put it into stderr - - // G.muted semantics is as follows - // - G.muted && !G.logFile => don't show anything - // - G.muted && G.logFile => write to log file - // - !G.muted && !G.logFile => write to stderr only - // - !G.muted && G.logFile => write to log file and stderr - if (!G.muted) - { - fwrite ((void*) outtext.c_str(), sizeof (*outtext.c_str()), - outtext.length(), stderr); - if (!G.noFlush) - fflush (stderr); - } -#endif - - // write to log file - - // check whether the log file has been disconnected or not - if (G.filename != "") // with known filename, suppose to reconnect - { - if (G.logFile == NULL || ferror (G.logFile) != 0) - { - setLogFileByName (NULL); // attempt to re-open the log file - - if (G.logFile) - { - fprintf (G.logFile, "ERROR: __showbuf: log file handle lost, reconnected\n"); - } - } - } - - if (G.logFile) - { - size_t n = fwrite ((void*) outtext.c_str(), sizeof (*outtext.c_str()), - outtext.length(), G.logFile); - if (n != outtext.length() * sizeof (*outtext.c_str())) - { // write error - fprintf (stderr, "ERROR: __showbuf: error writing this to log file: %s\n", strerror (errno)); - fwrite ((void*) outtext.c_str(), sizeof (*outtext.c_str()), - outtext.length(), stderr); - } - else if (!G.noFlush) // flush logFile - { - __flush(); - } - } - } - - // ------------------------------------------------------------------------ - // noLoggingReqd: function to determine if any logging reqd - // at all - used so that we can exit early if none reqd - // ------------------------------------------------------------------------ - - static inline bool noLoggingReqd() - { - return G.muted && !G.logFile; - } - - // ------------------------------------------------------------------------ - // message(): like printf(), writing to log output - // multi-threading: this function is thread-safe - // ------------------------------------------------------------------------ - - static inline void message (const char * fmt, ...) - { - if (noLoggingReqd()) return; // muted: all output is suppressed - - msra::basetypes::CAutoLock autoLock (G.lock); - va_list arg_ptr; - va_start (arg_ptr, fmt); - vsprintf (G.buf, fmt, arg_ptr); // security hint: this is an overload - __showbuf ("", true); - } - - static void message_nolf (const char * fmt, ...) - { - if (noLoggingReqd()) return; // muted: all output is suppressed - - msra::basetypes::CAutoLock autoLock (G.lock); - va_list arg_ptr; - va_start (arg_ptr, fmt); - vsprintf (G.buf, fmt, arg_ptr); // security hint: this is an overload - __showbuf ("", false); - } - - // ------------------------------------------------------------------------ - // warning(): like message(), with text "WARNING: " prepended - // multi-threading: this function is thread-safe - // ------------------------------------------------------------------------ - - static void warning (const char * fmt, ...) - { - if (noLoggingReqd()) return; // muted: all output is suppressed - - msra::basetypes::CAutoLock autoLock (G.lock); - va_list arg_ptr; - va_start (arg_ptr, fmt); - vsprintf (G.buf, fmt, arg_ptr); // security hint: this is an overload - __showbuf ("WARNING: ", true); - __flush(); - } - - // ------------------------------------------------------------------------ - // __throw_or_exit(): exit() or throw exception depending on throwExceptionFlag - // ------------------------------------------------------------------------ - - static inline void __throw_or_exit (void) - { - __flush(); - if (G.throwExceptionFlag) - { - throw message_exception (G.buf); - } - exit (1); - } - - // ------------------------------------------------------------------------ - // error(): like warning() but terminates program afterwards - // multi-threading: this function is thread-safe - // ------------------------------------------------------------------------ - -#pragma warning (push) -#pragma warning (disable : 4702) // the 'return 0;' causes this in Release - static int error (const char * fmt, ...) - { -#if 1 // special test code to determine the Windows error in case of a network error - DWORD winErr = GetLastError(); - try - { - msra::basetypes::CAutoLock autoLock (G.lock); - sprintf (G.buf, "%d (\"%S\")", winErr, FormatWin32Error(winErr).c_str()); - if (!noLoggingReqd()) - __showbuf ("Win32 error of subsequent error message: ", true); - } - catch(...){} -#endif - msra::basetypes::CAutoLock autoLock (G.lock); - va_list arg_ptr; - va_start (arg_ptr, fmt); - vsprintf (G.buf, fmt, arg_ptr); // security hint: this is an overload - if (!noLoggingReqd()) - { // if muted, we format the msg (for __throw_or_exit) but don't print it - __showbuf ("ERROR: ", true); - } - __throw_or_exit(); - return 0; - } - - // ------------------------------------------------------------------------ - // mem_error(): similar to error() but without any memory allocations - // (only one string argument allowed) - // multi-threading: this function is thread-safe - // ------------------------------------------------------------------------ - - static int mem_error (const char * fmt, int arg) - { - msra::basetypes::CAutoLock autoLock (G.lock); - if (!noLoggingReqd()) - { // if muted, we format the msg (for __throw_or_exit) but don't print it - fprintf (stderr, fmt, arg); - fprintf (stderr, "\n"); - - if (G.logFile) - { - fprintf (G.logFile, fmt, arg); - fprintf (G.logFile, "\n"); - int rc = fflush (G.logFile); - if (rc != 0) - { - fprintf (stderr, "error flushing log message to file: %s\n", - strerror (errno)); - } - } - } - - // format msg for __throw_or_exit() - sprintf (G.buf, fmt, arg); // security hint: this is an overload - strcat (G.buf, "\n"); // security hint: this is an overload - __throw_or_exit(); - return 0; - } -#pragma warning (pop) - - static inline void __avoid_C4505 (void) - { message (""); message_nolf (""); warning (""); error (""); mem_error ("", 0); } -#pragma pop_macro ("G") -};}; - -#pragma warning(pop) - -// =========================================================================== -// compatibility macros (for older source code) -// =========================================================================== - -#undef ERROR // defined in wingdi.h... aargh! -#define WARNING msra::logging::warning -#define ERROR msra::logging::error -#define MESSAGE msra::logging::message -#define MESSAGE_NOLF msra::logging::message_nolf -#define MEMERROR msra::logging::mem_error -#define SETLOGFILE msra::logging::setLogFile - -// =========================================================================== -// special function for basetypes.h's ASSERT() macro -// =========================================================================== - -#ifdef _CHECKED -void inline _CHECKED_ASSERT_error(const char * file, int line, const char * exp) -{ ERROR ("%s:%d:assertion failure: %s", file, line, exp); } -#endif - -#endif // _MESSAGE_ - +// removed \ No newline at end of file diff --git a/DataReader/HTKMLFReader/msra_mgram.h b/DataReader/HTKMLFReader/msra_mgram.h index 7d37f260c..b8f85ff30 100644 --- a/DataReader/HTKMLFReader/msra_mgram.h +++ b/DataReader/HTKMLFReader/msra_mgram.h @@ -9,7 +9,6 @@ #pragma once #include "basetypes.h" -#include "message.h" // for logging and throwing #include "fileutil.h" // for opening/reading the ARPA file #include #include @@ -173,7 +172,7 @@ public: fgetstring (f, buf); int id = (*this)[buf]; if (id != k) - throw logic_error ("plsa: sequence error while reading vocabulary"); + RuntimeError ("plsa: sequence error while reading vocabulary"); } } }; @@ -1027,7 +1026,7 @@ public: const double log10 = log (10.0); for (int m = 1; m <= M; m++) { - MESSAGE_NOLF ("estimate: writing %d %d-grams..", map.size (m), m); + fprintf (stderr, "estimate: writing %d %d-grams..", map.size (m), m); int step = (int) logP.size (m) / 100; if (step == 0) step = 1; int numMGramsWritten = 0; @@ -1061,13 +1060,13 @@ public: // progress if (numMGramsWritten % step == 0) { - MESSAGE_NOLF ("."); + fprintf (stderr, "."); } numMGramsWritten++; } fflushOrDie (outf); ASSERT (numMGramsWritten == map.size (m)); - MESSAGE (""); + fprintf (stderr, "\n"); } fprintfOrDie (outf, "\n\\end\\\n"); @@ -1126,7 +1125,7 @@ public: { int lineNo = 0; msra::basetypes::auto_file_ptr f = fopenOrDie (pathname, L"rbS"); - MESSAGE_NOLF ("read: reading %S", pathname.c_str()); + fprintf (stderr, "read: reading %S", pathname.c_str()); filename = pathname; // (keep this info for debugging) // --- read header information @@ -1154,7 +1153,7 @@ public: M = (int) dims.size() -1; if (M == 0) - ERROR ("read: mal-formed LM file, no dimension information (%d): %S", lineNo, pathname.c_str()); + RuntimeError ("read: mal-formed LM file, no dimension information (%d): %S", lineNo, pathname.c_str()); int fileM = M; if (M > maxM) M = maxM; @@ -1188,7 +1187,7 @@ public: lineNo++, fgetline (f, buf); if (sscanf (buf, "\\%d-grams:", &n) != 1 || n != m) - ERROR ("read: mal-formed LM file, bad section header (%d): %S", lineNo, pathname.c_str()); + RuntimeError ("read: mal-formed LM file, bad section header (%d): %S", lineNo, pathname.c_str()); lineNo++, fgetline (f, buf); std::vector mgram (m +1, -1); // current mgram being read ([0]=dummy) @@ -1207,7 +1206,7 @@ public: // -- parse the line tokens = &buf[0]; if ((int) tokens.size() != ((m < fileM) ? m + 2 : m + 1)) - ERROR ("read: mal-formed LM file, incorrect number of tokens (%d): %S", lineNo, pathname.c_str()); + RuntimeError ("read: mal-formed LM file, incorrect number of tokens (%d): %S", lineNo, pathname.c_str()); double scoreVal = atof (tokens[0]); // ... use sscanf() instead for error checking? double thisLogP = scoreVal * ln10xLMF; // convert to natural log @@ -1239,7 +1238,7 @@ public: { id = symbolToId (tok); if (id == -1) - ERROR ("read: mal-formed LM file, m-gram contains unknown word (%d): %S", lineNo, pathname.c_str()); + RuntimeError ("read: mal-formed LM file, m-gram contains unknown word (%d): %S", lineNo, pathname.c_str()); } } mgram[n] = id; // that's our id @@ -1289,9 +1288,9 @@ skipMGram: } } - MESSAGE_NOLF (", %d %d-grams", map.size (m), m); + fprintf (stderr, ", %d %d-grams", map.size (m), m); } - MESSAGE (""); + fprintf (stderr, "\n"); // check end tag if (M == fileM) @@ -1299,7 +1298,7 @@ skipMGram: while (buf[0] == 0 && !feof (f)) lineNo++, fgetline (f, buf); if (strcmp (buf, "\\end\\") != 0) - ERROR ("read: mal-formed LM file, no \\end\\ tag (%d): %S", lineNo, pathname.c_str()); + RuntimeError ("read: mal-formed LM file, no \\end\\ tag (%d): %S", lineNo, pathname.c_str()); } // update zerogram score by one appropriate for OOVs @@ -1534,7 +1533,7 @@ protected: if (seenMass > 1.0) { if (seenMass > 1.0001) // (a minor round-off error is acceptable) - WARNING ("estimate: seen mass > 1.0: %8.5f --oops??", seenMass); + fprintf (stderr, "estimate: seen mass > 1.0: %8.5f --oops??\n", seenMass); seenMass = 1.0; // oops? } @@ -1543,7 +1542,7 @@ protected: if (coveredBackoffMass > 1.0) { if (coveredBackoffMass > 1.0001) // 1.0 for unigrams, sometimes flags this - WARNING ("estimate: unseen backoff mass < 0: %8.5f --oops??", 1.0 - coveredBackoffMass); + fprintf (stderr, "estimate: unseen backoff mass < 0: %8.5f --oops??\n", 1.0 - coveredBackoffMass); coveredBackoffMass = 1.0; // oops? } @@ -1640,11 +1639,11 @@ public: {// first time initial minObs.resize(M, 0); if (M > 2) minObs[2] = 2; // GangLi: prune trigram if Obs < 2, this is default value - MESSAGE("Set miniObs to 0 0 2."); + fprintf (stderr, "Set miniObs to 0 0 2.\n"); } else { - MESSAGE("Not reset miniObs because it has already been set."); + fprintf (stderr, "Not reset miniObs because it has already been set.\n"); } for (int m = 1; m <= M; m++) counts.reserve (m, 1000000); // something to start with @@ -1654,7 +1653,7 @@ public: void setMinObs(const std::vector & setMinObs) { if (minObs.size() != setMinObs.size()) - ERROR("In setMinObs: setMinObs size (%d) is not for %d-gram.", setMinObs.size(), minObs.size()); + RuntimeError("In setMinObs: setMinObs size (%d) is not for %d-gram.", setMinObs.size(), minObs.size()); minObs = setMinObs; } @@ -1688,7 +1687,7 @@ protected: mcounts.push_back (mgram_map::coord(), ntoks); // zerogram count std::vector keybuf (M+1); // do one order after another (to save memory) - MESSAGE_NOLF ("merge: adding %d tokens...", ntoks); + fprintf (stderr, "merge: adding %d tokens...", ntoks); for (int m = 1; m <= M; m++) { mgram_map::cache_t mmapCache; @@ -1770,7 +1769,7 @@ protected: mcounts.push_back (mmap.create (newkey, mmapCache), count); // store 'count' under 'key' } } - MESSAGE_NOLF (" %d %d-grams", mcounts.size (m), m); + fprintf (stderr, " %d %d-grams", mcounts.size (m), m); } // remove used up tokens from the buffer @@ -1788,7 +1787,7 @@ protected: map.swap (mmap); counts.swap (mcounts); - MESSAGE (""); + fprintf (stderr, "\n"); // destructor will delete previous counts and map (now in mcount/mmap) } @@ -1889,7 +1888,7 @@ protected: int id = dropId == -1 ? userSymMap[p] : constSymMap[p]; ids.push_back (id); - if (totalTokens++ % 100000 == 0) MESSAGE_NOLF ("."); + if (totalTokens++ % 100000 == 0) fprintf (stderr, "."); } ids.push_back (endId); totalTokens += 2; @@ -1928,7 +1927,7 @@ public: int dropId = filterVocabulary ? unkId != -1 ? unkId : userSymMap.size() : -1; if (filterVocabulary) - ERROR ("CMGramLMEstimator::read() not tested for filterVocabulary==true"); + RuntimeError ("CMGramLMEstimator::read() not tested for filterVocabulary==true"); // reset adaptation adapt (NULL, maxM); // pass dimension here @@ -1947,14 +1946,14 @@ public: string thispath = fgetline (f); if (thispath.empty() || thispath[0] == '#') continue; // comment msra::basetypes::auto_file_ptr thisf = fopenOrDie (thispath, "rbS"); - MESSAGE_NOLF ("read: ingesting training text from %s ..", thispath.c_str()); + fprintf (stderr, "read: ingesting training text from %s ..", thispath.c_str()); int numTokens = read (thisf, userSymMap, startId, endId, dropId); - MESSAGE ("%d tokens", numTokens); + fprintf (stderr, "%d tokens\n", numTokens); } } else if (!tag.empty() && tag[0] == '#') { - ERROR ("read: unknown tag '%s'", tag.c_str()); + RuntimeError ("read: unknown tag '%s'", tag.c_str()); } else // no tag: just load the file directly { @@ -2028,7 +2027,7 @@ public: while (M > 0 && counts.size (M) == 0) resize (M-1); for (int m = 1; m <= M; m++) - MESSAGE ("estimate: read %d %d-grams", counts.size (m), m); + fprintf (stderr, "estimate: read %d %d-grams\n", counts.size (m), m); // === Kneser-Ney smoothing // This is a strange algorithm. @@ -2043,14 +2042,14 @@ public: mgram_data KNTotalCounts; // [shifted, shortened m-gram] (*,v,*) if (M >= 2) { - MESSAGE ("estimate: allocating Kneser-Ney counts..."); + fprintf (stderr, "estimate: allocating Kneser-Ney counts...\n"); KNCounts.init (M-1); for (int m = 0; m <= M-1; m++) KNCounts.assign (m, counts.size (m), 0); KNTotalCounts.init (M-2); for (int m = 0; m <= M-2; m++) KNTotalCounts.assign (m, counts.size (m), 0); - MESSAGE ("estimate: computing Kneser-Ney counts..."); + fprintf (stderr, "estimate: computing Kneser-Ney counts...\n"); // loop over all m-grams to determine KN counts for (mgram_map::deep_iterator iter (map); iter; ++iter) @@ -2082,7 +2081,7 @@ public: std::vector d1 (M+1, 0.0); std::vector d2 (M+1, 0.0); std::vector d3 (M+1, 0.0); - MESSAGE_NOLF ("estimate: discounting values:"); + fprintf (stderr, "estimate: discounting values:"); { // actually estimate discounting values @@ -2122,11 +2121,11 @@ public: { if (n1[m] == 0) throw runtime_error (msra::strfun::strprintf ("estimate: error estimating discounting values: n1[%d] == 0", m)); if (n2[m] == 0) throw runtime_error (msra::strfun::strprintf ("estimate: error estimating discounting values: n2[%d] == 0", m)); - //if (n3[m] == 0) ERROR ("estimate: error estimating discounting values: n3[%d] == 0", m); + //if (n3[m] == 0) RuntimeError ("estimate: error estimating discounting values: n3[%d] == 0", m); double Y = n1[m] / (n1[m] + 2.0 * n2[m]); if (n3[m] ==0 || n4[m] == 0) { - WARNING ("estimate: n3[%d] or n4[%d] is 0, falling back to unmodified discounting", m, m); + fprintf (stderr, "estimate: n3[%d] or n4[%d] is 0, falling back to unmodified discounting\n", m, m); d1[m] = Y; d2[m] = Y; d3[m] = Y; @@ -2138,16 +2137,16 @@ public: d3[m] = 3.0 - 4.0 * Y * n4[m] / n3[m]; } // ... can these be negative?? - MESSAGE_NOLF (" (%.3f, %.3f, %.3f)", d1[m], d2[m], d3[m]); + fprintf (stderr, " (%.3f, %.3f, %.3f)", d1[m], d2[m], d3[m]); } - MESSAGE (""); + fprintf (stderr, "\n"); } // === threshold against minimum counts (set counts to 0) // this is done to save memory, but it has no impact on the seen probabilities // ...well, it does, as pruned mass get pushed to back-off distribution... ugh! - MESSAGE ("estimate: pruning against minimum counts..."); + fprintf (stderr, "estimate: pruning against minimum counts...\n"); // prune unigrams first (unigram cut-off can be higher than m-gram cut-offs, // as a means to decimate the vocabulary) @@ -2161,7 +2160,7 @@ public: dropWord[wid] = true; // will throw out all related m-grams removedWords++; } - MESSAGE ("estimate: removing %d too rare vocabulary entries", removedWords); + fprintf (stderr, "estimate: removing %d too rare vocabulary entries\n", removedWords); // now prune m-grams against count cut-off @@ -2189,7 +2188,7 @@ public: if (m < M) histCoord[m] = iter; mgram_map::coord j = histCoord[m-1]; // parent if (counts[j] == 0) - ERROR ("estimate: invalid pruning: a parent m-gram got pruned away"); + RuntimeError ("estimate: invalid pruning: a parent m-gram got pruned away"); //throw runtime_error ("estimate: invalid pruning: a parent m-gram got pruned away"); numMGrams[m]++; } @@ -2197,7 +2196,7 @@ public: for (int m = 1; m <= M; m++) { - MESSAGE ("estimate: %d-grams after pruning: %d out of %d (%.1f%%)", m, + fprintf (stderr, "estimate: %d-grams after pruning: %d out of %d (%.1f%%)\n", m, numMGrams[m], counts.size (m), 100.0 * numMGrams[m] / max (counts.size (m), 1)); } @@ -2212,7 +2211,7 @@ public: // === estimate M-gram - MESSAGE ("estimate: estimating probabilities..."); + fprintf (stderr, "estimate: estimating probabilities...\n"); // dimension the m-gram store mgram_data P (M); // [M+1][i] probabilities @@ -2231,7 +2230,7 @@ public: P.push_back (mgram_map::coord(), 0.0f); // will be updated later for (int m = 1; m <= M; m++) { - MESSAGE ("estimate: estimating %d %d-gram probabilities...", numMGrams[m], m); + fprintf (stderr, "estimate: estimating %d %d-gram probabilities...\n", numMGrams[m], m); // loop over all m-grams of level 'm' msra::basetypes::fixed_vector histCoord (m); @@ -2270,7 +2269,7 @@ public: { count = KNCounts[iter]; // (u,v,w) -> count (*,v,w) if (count == 0) // must exist - ERROR ("estimate: malformed data: back-off value not found (numerator)"); + RuntimeError ("estimate: malformed data: back-off value not found (numerator)"); const mgram_map::key key_h = key.pop_w(); mgram_map::foundcoord c_h = map[key_h]; @@ -2278,7 +2277,7 @@ public: throw runtime_error ("estimate: invalid shortened KN history"); histCount = KNTotalCounts[c_h]; // (u,v,w) -> count (*,v,*) if (histCount == 0) // must exist - ERROR ("estimate: malformed data: back-off value not found (denominator)"); + RuntimeError ("estimate: malformed data: back-off value not found (denominator)"); ASSERT (histCount >= count); } } @@ -2323,7 +2322,7 @@ public: throw runtime_error ("estimate: negative discounted count value"); if (histCount == 0) - ERROR ("estimate: unexpected 0 denominator"); + RuntimeError ("estimate: unexpected 0 denominator"); double dP = dcount / histCount; // and this is the discounted probability value { @@ -2339,7 +2338,7 @@ skippruned:; // m-gram was pruned } } // the distributions are not normalized --discount mass is missing - MESSAGE ("estimate: freeing memory for counts..."); + fprintf (stderr, "estimate: freeing memory for counts...\n"); KNCounts.clear(); // free some memory KNTotalCounts.clear(); @@ -2372,7 +2371,7 @@ skippruned:; // m-gram was pruned if (missingUnigramMass > 0.0) { float missingUnigramProb = (float) (missingUnigramMass * P[mgram_map::coord()]); - MESSAGE ("estimate: distributing missing unigram mass of %.2f to %d unigrams", + fprintf (stderr, "estimate: distributing missing unigram mass of %.2f to %d unigrams\n", missingUnigramMass, vocabSize); for (mgram_map::iterator iter (map, 1); iter; ++iter) { @@ -2383,7 +2382,7 @@ skippruned:; // m-gram was pruned // --- M-gram sections --back-off weights - MESSAGE ("estimate: determining back-off weights..."); + fprintf (stderr, "estimate: determining back-off weights...\n"); computeBackoff (map, M, P, logB, false); // now the LM is normalized assuming the ARPA back-off computation @@ -2412,9 +2411,9 @@ skippruned:; // m-gram was pruned // desired OOV score. updateOOVScore(); - MESSAGE_NOLF ("estimate: done"); - for (int m = 1; m <= M; m++) MESSAGE_NOLF (", %d %d-grams", logP.size (m), m); - MESSAGE (""); + fprintf (stderr, "estimate: done"); + for (int m = 1; m <= M; m++) fprintf (stderr, ", %d %d-grams", logP.size (m), m); + fprintf (stderr, "\n"); } }; @@ -2509,10 +2508,10 @@ skipMGram: template static void write (const ILM & lm, int M, FILE * outf, const SYMMAP & symbols) { - MESSAGE ("write: cloning..."); + fprintf (stderr, "write: cloning...\n"); CMGramLMClone outlm; outlm.clone (lm, M); - MESSAGE ("write: saving..."); + fprintf (stderr, "write: saving...\n"); ((const CMGramLM&) outlm).write (outf, symbols); } @@ -2808,7 +2807,7 @@ public: { int lineNo = 0; msra::basetypes::auto_file_ptr f = fopenOrDie (pathname, L"rbS"); - MESSAGE_NOLF ("read: reading %S", pathname.c_str()); + fprintf (stderr, "read: reading %S", pathname.c_str()); filename = pathname; // (keep this info for debugging) // --- read header information @@ -2836,7 +2835,7 @@ public: M = (int) dims.size() -1; if (M == 0) - ERROR ("read: mal-formed LM file, no dimension information (%d): %S", lineNo, pathname.c_str()); + RuntimeError ("read: mal-formed LM file, no dimension information (%d): %S", lineNo, pathname.c_str()); int fileM = M; if (M > maxM) M = maxM; @@ -2866,7 +2865,7 @@ public: lineNo++, fgetline (f, buf); if (sscanf (buf, "\\%d-grams:", &n) != 1 || n != m) - ERROR ("read: mal-formed LM file, bad section header (%d): %S", lineNo, pathname.c_str()); + RuntimeError ("read: mal-formed LM file, bad section header (%d): %S", lineNo, pathname.c_str()); lineNo++, fgetline (f, buf); std::vector mgram (m +1); // current mgram being read @@ -2888,7 +2887,7 @@ public: const char * delim = " \t\n\r"; const char * score = strtok (&buf[0], delim); if (score == NULL || score[0] == 0) // not checking whether it is numeric - ERROR ("read: mal-formed LM file, no score (%d): %S", lineNo, pathname.c_str()); + RuntimeError ("read: mal-formed LM file, no score (%d): %S", lineNo, pathname.c_str()); double scoreVal = atof (score); double logP = scoreVal * ln10xLMF; // convert to natural log @@ -2897,7 +2896,7 @@ public: { /*const*/ char * tok = strtok (NULL, delim); if (tok == NULL) - ERROR ("read: mal-formed LM file, not enough words in mgram (%d): %S", lineNo, pathname.c_str()); + RuntimeError ("read: mal-formed LM file, not enough words in mgram (%d): %S", lineNo, pathname.c_str()); // map to id int id; if (m == 1) // unigram: build vocab table @@ -2922,7 +2921,7 @@ public: { id = symbolToId (tok); if (id == -1) - ERROR ("read: mal-formed LM file, m-gram contains unknown word (%d): %S", lineNo, pathname.c_str()); + RuntimeError ("read: mal-formed LM file, m-gram contains unknown word (%d): %S", lineNo, pathname.c_str()); } } mgram[n] = id; // that's our id @@ -2934,7 +2933,7 @@ public: { const char * bo = strtok (NULL, delim); if (score == NULL || score[0] == 0) // not checking whether it is numeric - ERROR ("read: mal-formed LM file, no score (%d): %S", lineNo, pathname.c_str()); + RuntimeError ("read: mal-formed LM file, no score (%d): %S", lineNo, pathname.c_str()); double boVal = atof (bo); logB = boVal * ln10xLMF; // convert to natural log } @@ -2956,7 +2955,7 @@ public: continue; if (prevValid && mgram[n] < prevmgram[n]) - ERROR ("read: mal-formed LM file, m-gram out of order (%d): %S", lineNo, pathname.c_str()); + RuntimeError ("read: mal-formed LM file, m-gram out of order (%d): %S", lineNo, pathname.c_str()); // a history token differs from previous mgram. That history must exist. const std::vector & entries_n = entries[n]; @@ -2965,14 +2964,14 @@ public: int end = refs_h[histEntry[n -1] +1].firstEntry; int i = findEntry (entries_n, beg, end, mgram[n]); if (i == -1) // unknown history: fall back - ERROR ("read: mal-formed LM file, m-gram history not defined (%d): %S", lineNo, pathname.c_str()); + RuntimeError ("read: mal-formed LM file, m-gram history not defined (%d): %S", lineNo, pathname.c_str()); // found it: narrow down search range histEntry[n] = i; prevValid = false; } if (prevValid && mgram[m] <= prevmgram[m]) - ERROR ("read: mal-formed LM file, m-gram out of order (%d): %S", lineNo, pathname.c_str()); + RuntimeError ("read: mal-formed LM file, m-gram out of order (%d): %S", lineNo, pathname.c_str()); if (m < M) // create history entry refs[m].push_back (LMHIST (0, logB)); @@ -3015,9 +3014,9 @@ skipMGram: } } - MESSAGE_NOLF (", %d %d-grams", entries[m].size(), m); + fprintf (stderr, ", %d %d-grams", entries[m].size(), m); } - MESSAGE (""); + fprintf (stderr, "\n"); // check end tag if (M == fileM) @@ -3025,7 +3024,7 @@ skipMGram: while (buf[0] == 0 && !feof (f)) lineNo++, fgetline (f, buf); if (strcmp (buf, "\\end\\") != 0) - ERROR ("read: mal-formed LM file, no \\end\\ tag (%d): %S", lineNo, pathname.c_str()); + RuntimeError ("read: mal-formed LM file, no \\end\\ tag (%d): %S", lineNo, pathname.c_str()); } // update zerogram score @@ -3107,7 +3106,7 @@ public: if (logP <= -1e20) { #if 0 // should really not happen - MESSAGE ("skipping poor-scoring %s (%.2f)", symMap[buf[i]], logP); + fprintf (stderr, "skipping poor-scoring %s (%.2f)\n", symMap[buf[i]], logP); #endif numOOVTokens++; continue; @@ -3134,14 +3133,14 @@ public: strcat (seq, "_"); strcat (seq, symMap[buf[i1]]); } - MESSAGE ("=%-22s\t%6.2f\t%s\t%s %s", seq+1, logP, pbuf +1, smseenhist, smseen); + fprintf (stderr, "=%-22s\t%6.2f\t%s\t%s %s\n", seq+1, logP, pbuf +1, smseenhist, smseen); #else symMap; #endif #if 0 // testing of optimization double logP1 = lm.score_unoptimized (&buf[0], i +1); // use full history if (fabs (logP - logP1) > 1e-3) - ERROR ("bug in optimized score()"); + RuntimeError ("bug in optimized score()"); #endif logPAcc += logP; numTokensAcc++; diff --git a/DataReader/LUSequenceReader/LUSequenceParser.cpp b/DataReader/LUSequenceReader/LUSequenceParser.cpp index 799529bad..e55f71bd0 100644 --- a/DataReader/LUSequenceReader/LUSequenceParser.cpp +++ b/DataReader/LUSequenceReader/LUSequenceParser.cpp @@ -272,7 +272,7 @@ size_t LUSequenceParser::UpdateBuffer() size_t bytesToRead = min(m_bufferSize, m_fileSize-m_bufferStart)-saveBytes; size_t bytesRead = fread(m_fileBuffer+saveBytes, 1, bytesToRead, m_pFile); if (bytesRead == 0 && ferror(m_pFile)) - Error("LUSequenceParser::UpdateBuffer - error reading file"); + RuntimeError("LUSequenceParser::UpdateBuffer - error reading file"); return bytesRead; } diff --git a/DataReader/LUSequenceReader/LUSequenceParser.h b/DataReader/LUSequenceReader/LUSequenceParser.h index 3d367ce84..bc3804eff 100644 --- a/DataReader/LUSequenceReader/LUSequenceParser.h +++ b/DataReader/LUSequenceReader/LUSequenceParser.h @@ -220,10 +220,10 @@ public: errno_t err = _wfopen_s( &m_pFile, fileName, L"rb" ); if (err) - Error("LUSequenceParser::ParseInit - error opening file"); + RuntimeError("LUSequenceParser::ParseInit - error opening file"); int rc = _fseeki64(m_pFile, 0, SEEK_END); if (rc) - Error("LUSequenceParser::ParseInit - error seeking in file"); + RuntimeError("LUSequenceParser::ParseInit - error seeking in file"); m_fileBuffer = new BYTE[m_bufferSize]; } @@ -271,7 +271,7 @@ public: if (mFile) fclose(mFile); if (_wfopen_s(&mFile, fileName, L"rt") != 0) - Error("cannot open file %s", fileName); + RuntimeError("cannot open file %s", fileName); } void ParseReset() diff --git a/DataReader/LUSequenceReader/LUSequenceReader.cpp b/DataReader/LUSequenceReader/LUSequenceReader.cpp index 78bbd0cd8..405fbe6a1 100644 --- a/DataReader/LUSequenceReader/LUSequenceReader.cpp +++ b/DataReader/LUSequenceReader/LUSequenceReader.cpp @@ -90,7 +90,7 @@ bool LUSequenceReader::GetIdFromLabel(const vector& labelValue val.push_back(found->second); } else - Error("LUSequenceReader::GetIdFromLabel: cannot find value"); + RuntimeError("LUSequenceReader::GetIdFromLabel: cannot find value"); } return true; } @@ -262,7 +262,7 @@ void LUSequenceReader::Init(const ConfigParameters& readerConfig) } } else - Error("two label definitions (in and out) required for Sequence Reader"); + RuntimeError("two label definitions (in and out) required for Sequence Reader"); ConfigParameters featureConfig = readerConfig(m_featuresName,""); ConfigParameters labelConfig[2] = {readerConfig(m_labelsName[0],""),readerConfig(m_labelsName[1],"")}; @@ -440,7 +440,7 @@ void LUSequenceReader::ChangeMaping(const map& maplist { if (punk == word4idx.end()) { - Error("check unk list is missing "); + RuntimeError("check unk list is missing "); } idx = punk->second; } @@ -468,7 +468,7 @@ void LUSequenceReader::ReadLabelInfo(const wstring & vocfile, if (vin == nullptr) { - Error("cannot open word class file"); + RuntimeError("cannot open word class file"); } b = 0; while (!feof(vin)){ @@ -720,7 +720,7 @@ bool LUSequenceReader::SentenceEnd() { LabelIdType index ; if (CheckIdFromLabel(labelInfo.endSequence, labelInfo, index) == false) - Error("cannot find sentence begining label"); + RuntimeError("cannot find sentence begining label"); if (m_labelIdData[jEnd] == index ) return true; @@ -752,7 +752,7 @@ void LUSequenceReader::SetLabelMapping(const std::wstring& /*sectionNa { if (m_cachingReader) { - Error("Cannot set mapping table when the caching reader is being used"); + RuntimeError("Cannot set mapping table when the caching reader is being used"); } LabelInfo& labelInfo = m_labelInfo[( m_labelInfo[labelInfoOut].type == labelNextWord)?labelInfoIn:labelInfoOut]; @@ -776,7 +776,7 @@ template bool LUSequenceReader::GetData(const std::wstring& sectionName, size_t numRecords, void* data, size_t& dataBufferSize, size_t recordStart) { if (!m_cachingReader) - Error("GetData not supported in LUSequenceReader"); + RuntimeError("GetData not supported in LUSequenceReader"); return m_cachingReader->GetData(sectionName, numRecords, data, dataBufferSize, recordStart); } @@ -832,7 +832,7 @@ void BatchLUSequenceReader::Init(const ConfigParameters& readerConfig) } } else - Error("two label definitions (in and out) required for Sequence Reader"); + RuntimeError("two label definitions (in and out) required for Sequence Reader"); ConfigParameters featureConfig = readerConfig(m_featuresName,""); ConfigParameters labelConfig[2] = {readerConfig(m_labelsName[0],""),readerConfig(m_labelsName[1],"")}; @@ -1185,7 +1185,7 @@ bool BatchLUSequenceReader::EnsureDataAvailable(size_t /*mbStartSample } else { - Error("Input label expected to be a category label"); + RuntimeError("Input label expected to be a category label"); } } @@ -1238,7 +1238,7 @@ bool BatchLUSequenceReader::GetMinibatch(std::map m_mbSize * mToProcess.size()){ - Error("specified minibatch size %d is smaller than the actual minibatch size %d. memory can crash!", m_mbSize, actualmbsize); + RuntimeError("specified minibatch size %d is smaller than the actual minibatch size %d. memory can crash!", m_mbSize, actualmbsize); } // now get the labels diff --git a/DataReader/LUSequenceReader/LUSequenceWriter.cpp b/DataReader/LUSequenceReader/LUSequenceWriter.cpp index e4290ba08..48b65cc7c 100644 --- a/DataReader/LUSequenceReader/LUSequenceWriter.cpp +++ b/DataReader/LUSequenceReader/LUSequenceWriter.cpp @@ -39,7 +39,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ConfigArray outputNames = writerConfig("outputNodeNames",""); if (outputNames.size()<1) - Error("writer needs at least one outputNodeName specified in config"); + RuntimeError("writer needs at least one outputNodeName specified in config"); foreach_index(i, outputNames) // inputNames should map to node names @@ -75,7 +75,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (vin == nullptr) { - Error("cannot open word class file"); + RuntimeError("cannot open word class file"); } b = 0; while (!feof(vin)){ @@ -128,7 +128,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { string str(outputFile.begin(), outputFile.end()); ofs = fopen(str.c_str(), "wt"); if (ofs == nullptr) - Error("Cannot open open %s for writing", str.c_str()); + RuntimeError("Cannot open open %s for writing", str.c_str()); outputFileIds[outputFile] = ofs; fp = ofs; } diff --git a/DataReader/SequenceReader/SequenceParser.cpp b/DataReader/SequenceReader/SequenceParser.cpp index 6d7cdc358..e31350a08 100644 --- a/DataReader/SequenceReader/SequenceParser.cpp +++ b/DataReader/SequenceReader/SequenceParser.cpp @@ -341,7 +341,7 @@ int64_t SequenceParser::GetFilePosition() { int64_t position = _ftelli64(m_pFile); if (position == -1L) - Error("SequenceParser::GetFilePosition - error retrieving file position in file"); + RuntimeError("SequenceParser::GetFilePosition - error retrieving file position in file"); return position; } @@ -354,7 +354,7 @@ void SequenceParser::SetFilePosition(int64_t position) { int rc = _fseeki64(m_pFile, position, SEEK_SET); if (rc) - Error("SequenceParser::SetFilePosition - error seeking in file"); + RuntimeError("SequenceParser::SetFilePosition - error seeking in file"); // setup state machine to start at this position PrepareStartPosition(position); @@ -407,7 +407,7 @@ size_t SequenceParser::UpdateBuffer() size_t bytesToRead = min(m_bufferSize, m_fileSize-m_bufferStart)-saveBytes; size_t bytesRead = fread(m_fileBuffer+saveBytes, 1, bytesToRead, m_pFile); if (bytesRead == 0 && ferror(m_pFile)) - Error("SequenceParser::UpdateBuffer - error reading file"); + RuntimeError("SequenceParser::UpdateBuffer - error reading file"); return bytesRead; } diff --git a/DataReader/SequenceReader/SequenceParser.h b/DataReader/SequenceReader/SequenceParser.h index b3a8a93b2..4fd562174 100644 --- a/DataReader/SequenceReader/SequenceParser.h +++ b/DataReader/SequenceReader/SequenceParser.h @@ -522,7 +522,7 @@ public: if (mFile) fclose(mFile); if (_wfopen_s(&mFile, fileName, L"rt") != 0) - Error("cannot open file %s", fileName); + RuntimeError("cannot open file %s", fileName); } void ParseReset() diff --git a/DataReader/SequenceReader/SequenceReader.cpp b/DataReader/SequenceReader/SequenceReader.cpp index 3592fd195..c3b59bd15 100644 --- a/DataReader/SequenceReader/SequenceReader.cpp +++ b/DataReader/SequenceReader/SequenceReader.cpp @@ -178,7 +178,7 @@ bool SequenceReader::EnsureDataAvailable(size_t mbStartSample, bool /* if ((m_sequence.size() == 1 ? epochSample : epochSample - m_sequence[m_sequence.size()-2]) > m_mbSize) { fprintf(stderr, "read sentence length is longer than the minibatch size. should be smaller. increase the minibatch size to at least %d", epochSample); - Error("read sentence length is longer than the minibatch size. should be smaller. increase the minibatch size to at least %d", epochSample); + RuntimeError("read sentence length is longer than the minibatch size. should be smaller. increase the minibatch size to at least %d", epochSample); } if (!_strcmpi(labelValue.c_str(), m_labelInfo[labelInfoIn].endSequence.c_str())) @@ -198,25 +198,25 @@ bool SequenceReader::EnsureDataAvailable(size_t mbStartSample, bool /* } else { - Error("Input label expected to be a category label"); + RuntimeError("Input label expected to be a category label"); } // if we have potential features if (m_featureDim > 0) { - Error("to-do. Assume sparse input feature. need to change the code from dense matrix"); + RuntimeError("to-do. Assume sparse input feature. need to change the code from dense matrix"); // move the position up to the start of the additional features section /* pos += labelIn.dim; assert(pos + m_featureDim == m_featureData.size()); // this has to be an even number, a pair of index and value if ((spos.numberPos&1) != 0) - Error("Features must be specified in pairs (index:value). Invalid features for label '%s'\n", labelValue); + RuntimeError("Features must be specified in pairs (index:value). Invalid features for label '%s'\n", labelValue); while (feature < spos.numberPos) { int index = (int)featureTemp[feature++]; if (index < 0 || index >= m_featureDim) - Error("Invalid feature index: %d for label '%s', feature max dimension = %lld\n", index, labelValue, m_featureDim); + RuntimeError("Invalid feature index: %d for label '%s', feature max dimension = %lld\n", index, labelValue, m_featureDim); ElemType value = featureTemp[feature++]; m_featureData[pos+index] = value; @@ -240,7 +240,7 @@ bool SequenceReader::EnsureDataAvailable(size_t mbStartSample, bool /* } else { - Error("Invalid output label type, expected Category, or Next Word"); + RuntimeError("Invalid output label type, expected Category, or Next Word"); } // get the ID from the label @@ -260,11 +260,11 @@ bool SequenceReader::EnsureDataAvailable(size_t mbStartSample, bool /* int jEnd = (int) m_labelIdData.size() - 1; LabelIdType index ; if (CheckIdFromLabel(labelInfo.endSequence, labelInfo, index) == false) - Error("cannot find sentence begining label"); + RuntimeError("cannot find sentence begining label"); if (m_labelIdData[jEnd] != index ) /// for language model, the first word/letter has to be - Error("SequenceReader: the last letter/word of a batch has to be the sentence ending symbol"); + RuntimeError("SequenceReader: the last letter/word of a batch has to be the sentence ending symbol"); } } @@ -436,7 +436,7 @@ void SequenceReader::Init(const ConfigParameters& readerConfig) } } else - Error("two label definitions (in and out) required for Sequence Reader"); + RuntimeError("two label definitions (in and out) required for Sequence Reader"); ConfigParameters featureConfig = readerConfig(m_featuresName,""); ConfigParameters labelConfig[2] = {readerConfig(m_labelsName[0],""),readerConfig(m_labelsName[1],"")}; @@ -616,7 +616,7 @@ void SequenceReader::ReadClassInfo(const wstring & vocfile, bool /*fla if (vin == nullptr) { - Error("cannot open word class file"); + RuntimeError("cannot open word class file"); } for (int a = 0; a < nwords; a++) { @@ -948,7 +948,7 @@ bool SequenceReader::SentenceEnd() { LabelIdType index ; if (CheckIdFromLabel(labelInfo.endSequence, labelInfo, index) == false) - Error("cannot find sentence begining label"); + RuntimeError("cannot find sentence begining label"); if (m_labelIdData[jEnd] == index ) return true; @@ -1088,7 +1088,7 @@ bool SequenceReader::GetMinibatch(std::map m_mbSize){ - Error("specified minibatch size %d is smaller than the actual minibatch size %d. memory can crash!", m_mbSize, actualmbsize); + RuntimeError("specified minibatch size %d is smaller than the actual minibatch size %d. memory can crash!", m_mbSize, actualmbsize); } // hit the end of the dataset, @@ -1184,7 +1184,7 @@ bool SequenceReader::GetMinibatch(std::map::GetMinibatch(std::map::OrganizeClass() for (i=0; i::SetLabelMapping(const std::wstring& /*sectionName { if (m_cachingReader) { - Error("Cannot set mapping table when the caching reader is being used"); + RuntimeError("Cannot set mapping table when the caching reader is being used"); } LabelInfo& labelInfo = m_labelInfo[( m_labelInfo[labelInfoOut].type == labelNextWord)?labelInfoIn:labelInfoOut]; @@ -1283,7 +1283,7 @@ template bool SequenceReader::GetData(const std::wstring& sectionName, size_t numRecords, void* data, size_t& dataBufferSize, size_t recordStart) { if (!m_cachingReader) - Error("GetData not supported in SequenceReader"); + RuntimeError("GetData not supported in SequenceReader"); return m_cachingReader->GetData(sectionName, numRecords, data, dataBufferSize, recordStart); } @@ -1324,7 +1324,7 @@ void BatchSequenceReader::Init(const ConfigParameters& readerConfig) } } else - Error("two label definitions (in and out) required for Sequence Reader"); + RuntimeError("two label definitions (in and out) required for Sequence Reader"); ConfigParameters featureConfig = readerConfig(m_featuresName,""); ConfigParameters labelConfig[2] = {readerConfig(m_labelsName[0],""),readerConfig(m_labelsName[1],"")}; @@ -1681,7 +1681,7 @@ bool BatchSequenceReader::EnsureDataAvailable(size_t /*mbStartSample*/ } else { - Error("Input label expected to be a category label"); + RuntimeError("Input label expected to be a category label"); } // now get the output label @@ -1700,7 +1700,7 @@ bool BatchSequenceReader::EnsureDataAvailable(size_t /*mbStartSample*/ } else { - Error("Invalid output label type, expected Category, or Next Word"); + RuntimeError("Invalid output label type, expected Category, or Next Word"); } // get the ID from the label @@ -1747,7 +1747,7 @@ bool BatchSequenceReader::GetMinibatch(std::map m_mbSize * mToProcess.size()){ - Error("specified minibatch size %d is smaller than the actual minibatch size %d. memory can crash!", m_mbSize, actualmbsize); + RuntimeError("specified minibatch size %d is smaller than the actual minibatch size %d. memory can crash!", m_mbSize, actualmbsize); } // now get the labels @@ -1834,7 +1834,7 @@ bool BatchSequenceReader::GetMinibatch(std::map::EnsureDataAvailable(size_t mbStartSample, bool end if (value == m_mapLabelToId.end()) { if (m_labelFileToWrite.empty()) - Error("label found in data not specified in label mapping file: %s", label.c_str()); + RuntimeError("label found in data not specified in label mapping file: %s", label.c_str()); // new label so add it to the mapping tables m_mapLabelToId[label] = m_labelIdMax; m_mapIdToLabel[m_labelIdMax] = label; @@ -301,11 +301,11 @@ void UCIFastReader::Init(const ConfigParameters& readerConfig) ConfigParameters configFeatures = readerConfig(m_featuresName,""); ConfigParameters configLabels = readerConfig(m_labelsName,"");; if (configFeatures.size() == 0) - Error("features file not found, required in configuration: i.e. 'features=[file=c:\\myfile.txt;start=1;dim=123]'"); + RuntimeError("features file not found, required in configuration: i.e. 'features=[file=c:\\myfile.txt;start=1;dim=123]'"); if (configLabels.size() == 0) fprintf(stderr, "Warning: labels are not specified."); else if (configFeatures("file","") != configLabels("file","")) - Error("features and label files must be the same file, use separate readers to define single use files"); + RuntimeError("features and label files must be the same file, use separate readers to define single use files"); size_t vdim = configFeatures("dim"); string name = configFeatures.Name(); @@ -409,7 +409,7 @@ void UCIFastReader::Init(const ConfigParameters& readerConfig) if (allowLabelCreation) m_labelFileToWrite = labelPath; else - Error("label mapping file %ws not found, can be created with a 'createLabelMap' command/action\n", labelPath.c_str()); + RuntimeError("label mapping file %ws not found, can be created with a 'createLabelMap' command/action\n", labelPath.c_str()); } } @@ -612,7 +612,7 @@ void UCIFastReader::SetupEpoch() { fprintf(stderr, "WARNING: file %ws NOT written to disk, label file will only be written when starting epochs at the beginning of the dataset\n", m_labelFileToWrite.c_str()); m_labelFileToWrite.clear(); - Error("LabelMappingFile not provided in config, must be provided if not starting from epoch Zero (0)"); + RuntimeError("LabelMappingFile not provided in config, must be provided if not starting from epoch Zero (0)"); } } m_epochStartSample = m_mbStartSample = mbStartSample; @@ -700,7 +700,7 @@ void UCIFastReader::StartMinibatchLoop(size_t mbSize, size_t epoch, si if (m_randomizeRange != randomizeAuto) { if ((m_epochSize != requestDataSize && m_epochSize % m_randomizeRange != 0) || (m_randomizeRange % m_mbSize != 0)) - Error("randomizeRange must be an even multiple of mbSize and an integral factor of epochSize"); + RuntimeError("randomizeRange must be an even multiple of mbSize and an integral factor of epochSize"); m_randomordering.resize(m_randomizeRange, m_randomizeRange); } } @@ -741,7 +741,7 @@ bool UCIFastReader::GetMinibatch(std::map& features = *matrices[m_featuresName]; diff --git a/MachineLearning/CNTKEval/EvalReader.h b/MachineLearning/CNTKEval/EvalReader.h index 63957f401..e227509a5 100644 --- a/MachineLearning/CNTKEval/EvalReader.h +++ b/MachineLearning/CNTKEval/EvalReader.h @@ -45,7 +45,7 @@ public: { // record count must be the same for all the data if (recordCount != m_recordCount) - Error("Error: Record Count of %ls (%lux%lu) does not match the record count of previous entries (%lu).", val.c_str(), rows, recordCount, m_recordCount); + RuntimeError("Record Count of %ls (%lux%lu) does not match the record count of previous entries (%lu).", val.c_str(), rows, recordCount, m_recordCount); } else { @@ -136,7 +136,7 @@ public: // allocate the matrix if we don't have one yet if (iterIn == matrices.end()) { - Error("No matrix data found for key '%ls', cannot continue", val.c_str()); + RuntimeError("No matrix data found for key '%ls', cannot continue", val.c_str()); } Matrix* matrix = iterIn->second; diff --git a/MachineLearning/CNTKEval/EvalWriter.h b/MachineLearning/CNTKEval/EvalWriter.h index 58ef219ba..3c31e0e85 100644 --- a/MachineLearning/CNTKEval/EvalWriter.h +++ b/MachineLearning/CNTKEval/EvalWriter.h @@ -42,7 +42,7 @@ public: { // record count must be the same for all the data if (recordCount != m_recordCount) - Error("Error: Record Count of %ls (%lux%lu) does not match the record count of previous entries (%lu).", val.c_str(), rows, recordCount, m_recordCount); + RuntimeError("Record Count of %ls (%lux%lu) does not match the record count of previous entries (%lu).", val.c_str(), rows, recordCount, m_recordCount); } else { @@ -97,7 +97,7 @@ public: // allocate the matrix if we don't have one yet if (iterIn == matrices.end()) { - Error("No matrix data found for key '%ls', cannot continue", val.c_str()); + RuntimeError("No matrix data found for key '%ls', cannot continue", val.c_str()); } Matrix* matrix = (Matrix*)iterIn->second; diff --git a/MachineLearning/cn/ModelEditLanguage.cpp b/MachineLearning/cn/ModelEditLanguage.cpp index 5685227ee..5945ac359 100644 --- a/MachineLearning/cn/ModelEditLanguage.cpp +++ b/MachineLearning/cn/ModelEditLanguage.cpp @@ -110,7 +110,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa { size_t numFixedParams = 0, numOptionalParams = 0; if (params.size() > numFixedParams + numOptionalParams || params.size() < numFixedParams) - Error("Invalid number of parameters. Valid parameters: CreateModel(). newly created model always becomes the new default."); + RuntimeError("Invalid number of parameters. Valid parameters: CreateModel(). newly created model always becomes the new default."); ComputationNetwork* cn = new ComputationNetwork(CPUDEVICE); OverrideModelNameAndSetDefaultModel(cn); @@ -119,7 +119,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa { size_t numFixedParams = 1, numOptionalParams = 0; if (params.size() > numFixedParams + numOptionalParams || params.size() < numFixedParams) - Error("Invalid number of parameters. Valid parameters: CreateModelWithName(modelName). newly created model always becomes the new default."); + RuntimeError("Invalid number of parameters. Valid parameters: CreateModelWithName(modelName). newly created model always becomes the new default."); ComputationNetwork* cn = new ComputationNetwork(CPUDEVICE); OverrideModelNameAndSetDefaultModel(cn, params[0]); @@ -128,7 +128,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa { size_t numFixedParams = 1, numOptionalParams = 1; if (params.size() > numFixedParams + numOptionalParams || params.size() < numFixedParams) - Error("Invalid number of parameters. Valid parameters: LoadModel(modelFileName, [format=cntk]). newly loaded model always becomes the new default."); + RuntimeError("Invalid number of parameters. Valid parameters: LoadModel(modelFileName, [format=cntk]). newly loaded model always becomes the new default."); std::wstring modelFormat = GetOptionalModelFormat(params, numFixedParams); @@ -140,7 +140,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa { size_t numFixedParams = 2, numOptionalParams = 1; if (params.size() > numFixedParams + numOptionalParams || params.size() < numFixedParams) - Error("Invalid number of parameters. Valid parameters: LoadModelWithName(modelName, modelFileName, [format=cntk]). newly loaded model always becomes the new default."); + RuntimeError("Invalid number of parameters. Valid parameters: LoadModelWithName(modelName, modelFileName, [format=cntk]). newly loaded model always becomes the new default."); std::wstring modelFormat = GetOptionalModelFormat(params, numFixedParams); @@ -152,7 +152,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa { size_t numFixedParams = 2, numOptionalParams = 1; if (params.size() > numFixedParams + numOptionalParams || params.size() < numFixedParams) - Error("Invalid number of parameters. Valid parameters: LoadNDLSnippet(modelName, ndlsnippet)."); + RuntimeError("Invalid number of parameters. Valid parameters: LoadNDLSnippet(modelName, ndlsnippet)."); string modelName = params[0]; wstring ndlSnippetFileName = params[1]; @@ -167,7 +167,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa { if (!ndlScript.Exists(section)) { - Error("Section %s specified in optional parameter was not found in the %ls file\n", section.c_str(), ndlSnippetFileName.c_str()); + RuntimeError("Section %s specified in optional parameter was not found in the %ls file\n", section.c_str(), ndlSnippetFileName.c_str()); } ConfigValue ndlSnippet = ndlScript(section); EvaluateNDLSnippet(ndlSnippet, cn); @@ -183,7 +183,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa { size_t numFixedParams = 1, numOptionalParams = 1; if (params.size() > numFixedParams + numOptionalParams || params.size() < numFixedParams) - Error("Invalid number of parameters. Valid parameters: SaveDefaultModel(modelFileName, [format=cntk])."); + RuntimeError("Invalid number of parameters. Valid parameters: SaveDefaultModel(modelFileName, [format=cntk])."); std::wstring modelFormat = GetOptionalModelFormat(params, numFixedParams); @@ -191,7 +191,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa ComputationNetwork* cn = m_netNdlDefault->cn; if (cn == NULL) - Error("SaveDefaultModel can only be called after a default name exists (i.e., at least one model is loaded.)"); + RuntimeError("SaveDefaultModel can only be called after a default name exists (i.e., at least one model is loaded.)"); // validate the network before we save it out ProcessNDLScript(m_netNdlDefault, ndlPassAll, true); @@ -202,7 +202,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa { size_t numFixedParams = 2, numOptionalParams = 1; if (params.size() > numFixedParams + numOptionalParams || params.size() < numFixedParams) - Error("Invalid number of parameters. Valid parameters: SaveModel(modelName, modelFileName, [format=cntk])."); + RuntimeError("Invalid number of parameters. Valid parameters: SaveModel(modelName, modelFileName, [format=cntk])."); std::wstring modelFormat = GetOptionalModelFormat(params, numFixedParams); @@ -211,7 +211,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa NetNdl* netNdl = &m_mapNameToNetNdl[modelName]; if (netNdl->cn == NULL) - Error("SaveModel can only be called after a network has been setup, no active model named %ls.", modelName.c_str()); + RuntimeError("SaveModel can only be called after a network has been setup, no active model named %ls.", modelName.c_str()); // validate and finish the second pass through NDL if any in-line NDL was defined ProcessNDLScript(netNdl, ndlPassAll, true); @@ -221,7 +221,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa { size_t numFixedParams = 1, numOptionalParams = 0; if (params.size() > numFixedParams + numOptionalParams || params.size() < numFixedParams) - Error("Invalid number of parameters. Valid parameters: SetDefaultModel(modelName)"); + RuntimeError("Invalid number of parameters. Valid parameters: SetDefaultModel(modelName)"); SetExistingModelAsDefault(params[0]); } @@ -248,7 +248,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa { size_t numFixedParams = 2, numOptionalParams = 1; if (params.size() > numFixedParams + numOptionalParams || params.size() < numFixedParams) - Error("Invalid number of parameters. Valid parameters: DumpNetwork(modelName, fileName, [includeData=false|true])"); + RuntimeError("Invalid number of parameters. Valid parameters: DumpNetwork(modelName, fileName, [includeData=false|true])"); bool includeData = GetOptionalIncludeDataValue(params, numFixedParams); @@ -257,7 +257,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa auto found = m_mapNameToNetNdl.find(modelName); if (found == m_mapNameToNetNdl.end()) - Error("Model %s does not exist. Cannot dump non-existant model.", modelName); + RuntimeError("Model %s does not exist. Cannot dump non-existant model.", modelName); else { NetNdl* netNdl = &found->second; @@ -269,7 +269,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa { size_t numFixedParams = 2, numOptionalParams = 1; if (params.size() > numFixedParams + numOptionalParams || params.size() < numFixedParams) - Error("Invalid number of parameters. Valid parameters: DumpNode(nodeName, fileName, [includeData=false|true])"); + RuntimeError("Invalid number of parameters. Valid parameters: DumpNode(nodeName, fileName, [includeData=false|true])"); bool includeData = GetOptionalIncludeDataValue(params, numFixedParams); @@ -284,7 +284,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa { size_t numFixedParams = 2, numOptionalParams = 1; if (params.size() > numFixedParams + numOptionalParams || params.size() < numFixedParams) - Error("Invalid number of parameters. Valid parameters are: CopyNode(fromNode, toNode, [copy=all|value])"); + RuntimeError("Invalid number of parameters. Valid parameters are: CopyNode(fromNode, toNode, [copy=all|value])"); CopyNodeFlags copyFlags = GetOptionalCopyNodeFlags(params, numFixedParams); @@ -296,7 +296,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa { size_t numFixedParams = 3, numOptionalParams = 1; if (params.size() > numFixedParams + numOptionalParams || params.size() < numFixedParams) - Error("Invalid number of parameters. Valid parameters are: CopySubTree(fromNode, toNetwork, toNodeNamePrefix, [copy=all|value])"); + RuntimeError("Invalid number of parameters. Valid parameters are: CopySubTree(fromNode, toNetwork, toNodeNamePrefix, [copy=all|value])"); CopyNodeFlags copyFlags = GetOptionalCopyNodeFlags(params, numFixedParams); @@ -309,7 +309,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa { size_t numFixedParams = 2, numOptionalParams = 0; if (params.size() > numFixedParams + numOptionalParams || params.size() < numFixedParams) - Error("Invalid number of parameters. Valid parameters are: CopyNodeInputs(fromNode, toNode)"); + RuntimeError("Invalid number of parameters. Valid parameters are: CopyNodeInputs(fromNode, toNode)"); // get the nodes NetNdl* netNdlTo; @@ -317,7 +317,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa vector names = GenerateNames(params[0], params[1], netNdlFrom, netNdlTo); if (netNdlFrom != netNdlTo) - Error("CopyInputs requires two symbols from the same network, %s and %s belong to different networks", params[0], params[1]); + RuntimeError("CopyInputs requires two symbols from the same network, %s and %s belong to different networks", params[0], params[1]); ProcessNDLScript(netNdlFrom, ndlPassAll); for (GenNameValue name : names) @@ -333,7 +333,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa { size_t numFixedParams = 3, numOptionalParams = 0; if (params.size() > numFixedParams + numOptionalParams || params.size() < numFixedParams) - Error("Invalid number of parameters. Valid parameters are: SetNodeInput(toNode, inputID(0-based), inputNodeName)"); + RuntimeError("Invalid number of parameters. Valid parameters are: SetNodeInput(toNode, inputID(0-based), inputNodeName)"); // get the nodes NetNdl* netNdlTo; @@ -343,12 +343,12 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa int inputNum = params[1]; if (netNdlTo != netNdlFrom) - Error("SetNodeInput() requires two symbols from the same network, %s and %s belong to different networks", params[0], params[2]); + RuntimeError("SetNodeInput() requires two symbols from the same network, %s and %s belong to different networks", params[0], params[2]); if (nodeFrom.size() != 1) - Error("SetNodeInput() must have a single value input, %s doesn't represent one item",params[0]); + RuntimeError("SetNodeInput() must have a single value input, %s doesn't represent one item",params[0]); if (nodeTo.size() < 1) - Error("SetNodeInput() must have at least one target, %s doesn't represent any items",params[2]); + RuntimeError("SetNodeInput() must have at least one target, %s doesn't represent any items",params[2]); // process outstanding NDL scripts ensuring that the inputs have all been resolved ProcessNDLScript(netNdlFrom, ndlPassResolve); @@ -360,13 +360,13 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa else if (EqualInsensitive(name, "SetNodeInputs", "SetInputs")) { if (params.size() > 4 || params.size() < 2) - Error("Invalid number of parameters. Valid parameters are: SetNodeInputs(toNode, inputNodeName1, [inputNodeName2, inputNodeName3])"); + RuntimeError("Invalid number of parameters. Valid parameters are: SetNodeInputs(toNode, inputNodeName1, [inputNodeName2, inputNodeName3])"); // get the nodes NetNdl* netNdlTo; vector*> nodeTo = FindSymbols(params[0], netNdlTo); if (nodeTo.size() != 1) - Error("SetNodeInputs() must have exactly one target, %s doesn't represent any node.",params[0]); + RuntimeError("SetNodeInputs() must have exactly one target, %s doesn't represent any node.",params[0]); vector*> inputNodes; inputNodes.resize(params.size()-1); @@ -380,10 +380,10 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa vector*> nodeFrom = FindSymbols(params[i], netNdlFrom); if (netNdlTo != netNdlFrom) - Error("SetNodeInputs() requires all symbols from the same network, %s and %s belong to different networks", params[0], params[i]); + RuntimeError("SetNodeInputs() requires all symbols from the same network, %s and %s belong to different networks", params[0], params[i]); if (nodeFrom.size() != 1) - Error("SetNodeInputs() each input node should be translated to one node name. %s is translated to multiple node names.", params[i]); + RuntimeError("SetNodeInputs() each input node should be translated to one node name. %s is translated to multiple node names.", params[i]); inputNodes[i-1] = nodeFrom[0]; } @@ -395,12 +395,12 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa else if (inputNodes.size() == 3) nodeTo[0]->AttachInputs(inputNodes[0], inputNodes[1], inputNodes[2]); else - Error("SetNodeInputs(): You specified more than 3 input nodes."); + RuntimeError("SetNodeInputs(): You specified more than 3 input nodes."); } else if (EqualInsensitive(name, "SetProperty")) { if (params.size() != 3) - Error("Invalid number of parameters: Valid parameters are: SetProperty(toNode, propertyName, propertyValue)"); + RuntimeError("Invalid number of parameters: Valid parameters are: SetProperty(toNode, propertyName, propertyValue)"); std::string propName = params[1]; MELProperty prop=melPropNull; @@ -434,7 +434,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa } else { - Error("Invalid property, %s, is not supported", propName); + RuntimeError("Invalid property, %s, is not supported", propName); } // get the nodes @@ -491,7 +491,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa } default: { - Error("Invalid property, %s, is not supported", propName); + RuntimeError("Invalid property, %s, is not supported", propName); break; } } @@ -501,7 +501,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa { size_t numFixedParams = 3, numOptionalParams = 0; if (params.size() > numFixedParams + numOptionalParams || params.size() < numFixedParams) - Error("Invalid number of parameters. Valid parameters are: SetPropertyForSubTree(rootNodeName, propertyName, propertyValue)"); + RuntimeError("Invalid number of parameters. Valid parameters are: SetPropertyForSubTree(rootNodeName, propertyName, propertyValue)"); std::string propName = params[1]; MELProperty prop=melPropNull; @@ -511,7 +511,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa } else { - Error("Invalid property, %s, is not supported", propName); + RuntimeError("Invalid property, %s, is not supported", propName); } // get the nodes @@ -533,7 +533,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa } default: { - Error("Invalid property, %s, is not supported", propName); + RuntimeError("Invalid property, %s, is not supported", propName); break; } } @@ -558,7 +558,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa } if (nodes.size() < 1) - Error("Delete must have at least one target, %s doesn't represent any items",params[i]); + RuntimeError("Delete must have at least one target, %s doesn't represent any items",params[i]); for (ComputationNode* node : nodes) { netNdl->cn->DeleteNode(node->NodeName()); @@ -569,7 +569,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa { size_t numFixedParams = 2, numOptionalParams = 0; if (params.size() > numFixedParams + numOptionalParams || params.size() < numFixedParams) - Error("Invalid number of parameters. Valid parameters are Rename(oldNodeName, newNodeName)"); + RuntimeError("Invalid number of parameters. Valid parameters are Rename(oldNodeName, newNodeName)"); // get the nodes NetNdl* netNdlTo; @@ -577,7 +577,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa vector nodeNames = GenerateNames(params[0], params[1], netNdlFrom, netNdlTo); if (netNdlFrom != netNdlTo) - Error("CopyInputs requires two symbols from the same network, %s and %s belong to different networks", params[0], params[1]); + RuntimeError("CopyInputs requires two symbols from the same network, %s and %s belong to different networks", params[0], params[1]); // process everything in case these nodes may have tags on them ProcessNDLScript(netNdlFrom, ndlPassAll); @@ -591,7 +591,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa } else { - Error("Unknown Editor function %s", name.c_str()); + RuntimeError("Unknown Editor function %s", name.c_str()); } } diff --git a/MachineLearning/cn/ModelEditLanguage.h b/MachineLearning/cn/ModelEditLanguage.h index 3996d2e69..32f5f3080 100644 --- a/MachineLearning/cn/ModelEditLanguage.h +++ b/MachineLearning/cn/ModelEditLanguage.h @@ -160,7 +160,7 @@ public: else { if (ndlNode->GetType() != ndlTypeConstant) - Error("Matching NDL name found for %s, but no corresponding computation node found\n", symbol); + RuntimeError("Matching NDL name found for %s, but no corresponding computation node found\n", symbol); // probably a constant node, so make the ComputationNode that is equivalent ComputationNode* nodePtr = cn->CreateLearnableParameter(name, 1, 1); ndlNode->SetEvalValue(nodePtr); @@ -170,7 +170,7 @@ public: } } if (nodes.empty()) - Error("FindSymbols could not find a symbol for %s\n", symbol); + RuntimeError("FindSymbols could not find a symbol for %s\n", symbol); return nodes; } @@ -207,7 +207,7 @@ public: vector*> nodes = netNdlIn->cn->GetNodesFromName(name); if (!nodes.size()) //found - Error("GenerateNames: Node name does not exist %ls.", name.c_str()); + RuntimeError("GenerateNames: Node name does not exist %ls.", name.c_str()); size_t firstStartOut, firstCountOut, secondStartOut, secondCountOut; netNdlOut = ParseName(symbolOut, firstStartOut, firstCountOut, secondStartOut, secondCountOut); @@ -236,7 +236,7 @@ public: // make sure the patterns are the same if (!(singleInputMultiOutput || ((!firstCount == !firstCountOut) && (!secondCount == !secondCountOut)))) { - Error("The input symbols and output symbols must match, when the input matches has more than one element. %s = %s not allowed", symbolOut.c_str(), symbolIn.c_str()); + RuntimeError("The input symbols and output symbols must match, when the input matches has more than one element. %s = %s not allowed", symbolOut.c_str(), symbolIn.c_str()); } // get the first and last "unchanged" portions @@ -254,7 +254,7 @@ public: // make sure that there are some nodes to copy to if (nodesOut.size() == 0) - Error("Setting a single input to multiple outputs requires the multiple outputs to exist. In %ls = %ls, %ls does not match any nodes.", nameOut.c_str(), name.c_str(), nameOut.c_str()); + RuntimeError("Setting a single input to multiple outputs requires the multiple outputs to exist. In %ls = %ls, %ls does not match any nodes.", nameOut.c_str(), name.c_str(), nameOut.c_str()); // this is the *.W = L2.W case // We want to find all the destination existing matches and then assign the in node to all of them @@ -397,7 +397,7 @@ public: { auto found = m_mapNameToNetNdl.find(modelName); if (found == m_mapNameToNetNdl.end()) - Error("Model %s does not exist. Cannot set it to default.", modelName); + RuntimeError("Model %s does not exist. Cannot set it to default.", modelName); else m_netNdlDefault = &found->second; } @@ -417,7 +417,7 @@ public: } else { - Error("Invalid optional parameter %s, valid optional parameters: includeData=(false|true)", propName.c_str()); + RuntimeError("Invalid optional parameter %s, valid optional parameters: includeData=(false|true)", propName.c_str()); } } } @@ -441,12 +441,12 @@ public: } else { - Error("Invalid optional parameter value %s, valid values are: format=(cntk)", value.c_str()); + RuntimeError("Invalid optional parameter value %s, valid values are: format=(cntk)", value.c_str()); } } else { - Error("Invalid optional parameter %s, valid optional parameters: format=(cntk)", propName.c_str()); + RuntimeError("Invalid optional parameter %s, valid optional parameters: format=(cntk)", propName.c_str()); } } } @@ -466,12 +466,12 @@ public: { if (value.empty()) { - Error("Invalid optional parameter value , a section name must be specified: section=(sectionName)"); + RuntimeError("Invalid optional parameter value , a section name must be specified: section=(sectionName)"); } } else { - Error("Invalid optional parameter %s, valid optional parameters: section=(sectionName)", propName.c_str()); + RuntimeError("Invalid optional parameter %s, valid optional parameters: section=(sectionName)", propName.c_str()); } } } @@ -501,12 +501,12 @@ public: } else { - Error("Invalid optional parameter value %s in CopyNode(), valid values are copyFlag=(all|value)", value.c_str()); + RuntimeError("Invalid optional parameter value %s in CopyNode(), valid values are copyFlag=(all|value)", value.c_str()); } } else { - Error("Invalid optional parameter to Copy, %s\n valid optional parameters: copyFlag=(all|value)", propName.c_str()); + RuntimeError("Invalid optional parameter to Copy, %s\n valid optional parameters: copyFlag=(all|value)", propName.c_str()); } } } @@ -556,7 +556,7 @@ public: { auto paramStart = token.find_first_of(OPENBRACES); if (paramStart == npos) - Error("Invalid macro/function call can not be parsed: %s\n", token.c_str()); + RuntimeError("Invalid macro/function call can not be parsed: %s\n", token.c_str()); nameFunction = token.substr(0, paramStart); Trim(nameFunction); params = token.substr(paramStart); @@ -625,7 +625,7 @@ public: { size_t tokenStartNew = keyEnd+1; if (!(tokenStartNew < tokenEnd)) - Error("Equal at the end of line not allowed"); + RuntimeError("Equal at the end of line not allowed"); std::string rightValue = stringParse.substr(tokenStartNew,tokenEnd-tokenStartNew); Trim(rightValue); @@ -634,7 +634,7 @@ public: if (foundBrace == npos) { if (!m_netNdlDefault) - Error("NDL Command cannot be executed until default model is established, cannot set '%s' without a default mode\n Try calling SetDefaultModel(model) before any NDL statement are embedded\n", key.c_str()); + RuntimeError("NDL Command cannot be executed until default model is established, cannot set '%s' without a default mode\n Try calling SetDefaultModel(model) before any NDL statement are embedded\n", key.c_str()); HandleNDLInline(stringParse, tokenStart, tokenEnd); } else //createModel, loadModel, or loadNDL @@ -668,7 +668,7 @@ public: else { // not a MEL command, so pass it on to NDL if (!m_netNdlDefault) - Error("NDL Command cannot be executed until default model is established, cannot set '%s' without a default mode\n Try calling SetDefaultModel(model) before any NDL statement are embedded\n", key.c_str()); + RuntimeError("NDL Command cannot be executed until default model is established, cannot set '%s' without a default mode\n Try calling SetDefaultModel(model) before any NDL statement are embedded\n", key.c_str()); HandleNDLInline(stringParse, tokenStart, tokenEnd); } } @@ -680,7 +680,7 @@ public: { std::string value = stringParse.substr(tokenStart,tokenEnd-tokenStart); if (keyEnd > tokenEnd) - Error("Invalid line, expecting function call, %s", value); + RuntimeError("Invalid line, expecting function call, %s", value); std::string functionName; std::string paramList; // Function(x,y,z) - function with no return diff --git a/MachineLearning/cn/NDLNetworkBuilder.h b/MachineLearning/cn/NDLNetworkBuilder.h index 1f4d65555..d23f3447e 100644 --- a/MachineLearning/cn/NDLNetworkBuilder.h +++ b/MachineLearning/cn/NDLNetworkBuilder.h @@ -85,7 +85,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { std::string name = config("load"); if (!config.Exists(name)) - Error("the configuration parameter 'load=%s' doesn't specify another section in this configuration file.\n" + RuntimeError("the configuration parameter 'load=%s' doesn't specify another section in this configuration file.\n" "No 'networkDescription' variable was defined if specifying a separate file was desired.\n ", name.c_str()); newConfig.Insert(name, config(name)); @@ -93,11 +93,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { } if (!config.Exists("run")) - Error("In NDLNetworkBuilder section either a 'networkDescription=filename' or 'run=sectionName' must exist."); + RuntimeError("In NDLNetworkBuilder section either a 'networkDescription=filename' or 'run=sectionName' must exist."); std::string name = config("run"); if (!config.Exists(name)) - Error("the configuration parameter 'run=%s' doesn't specify another section in this configuration file.\n" + RuntimeError("the configuration parameter 'run=%s' doesn't specify another section in this configuration file.\n" "No 'networkDescription' variable was defined if specifying a separate file was desired.\n ", name.c_str()); newConfig.Insert(name, config(name)); @@ -107,7 +107,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { std::string networkConfigString = networkConfig; if (networkConfigString.find_first_of("+") != std::string::npos) - Error("\"+\" not allowed in \"networkDescription\" value. Multiple files cannot be specified via \"networkDescription\" parameter. " + RuntimeError("\"+\" not allowed in \"networkDescription\" value. Multiple files cannot be specified via \"networkDescription\" parameter. " "In order to load multiple NDL files (eg, for loading several files of macros), use the \"ndlMacros\" parameter."); // find the "run" and "load" keys and add them diff --git a/MachineLearning/cn/NDLUtil.h b/MachineLearning/cn/NDLUtil.h index c5b967264..cced408ee 100644 --- a/MachineLearning/cn/NDLUtil.h +++ b/MachineLearning/cn/NDLUtil.h @@ -151,7 +151,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { valid = (outputType == ndlTypeArray || outputType == ndlTypeFunction || outputType == ndlTypeMacroCall); } if (!valid) - Error("Invalid network node definition for '%s', nonexistant or wrong type", symbolName.c_str()); + RuntimeError("Invalid network node definition for '%s', nonexistant or wrong type", symbolName.c_str()); if (nodeArray) { vector*> nodes; @@ -168,7 +168,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { // if no evaluation value exists throw an error if (cnNode == nullptr) { - Error("Invalid node '%s' as an output node, nonexistant or wrong type", nodes[i]->GetName().c_str()); + RuntimeError("Invalid node '%s' as an output node, nonexistant or wrong type", nodes[i]->GetName().c_str()); } // see if it's already in the collection diff --git a/MachineLearning/cn/NetworkDescriptionLanguage.h b/MachineLearning/cn/NetworkDescriptionLanguage.h index 7fe295c2c..2bd47b322 100644 --- a/MachineLearning/cn/NetworkDescriptionLanguage.h +++ b/MachineLearning/cn/NetworkDescriptionLanguage.h @@ -287,7 +287,7 @@ public: if (!node || node->GetType() != ndlTypeConstant) { std::string name = node->GetName(); - Error("Scalar expected, '%s' must be a constant or variable that resolves to a constant\n", name.c_str()); + RuntimeError("Scalar expected, '%s' must be a constant or variable that resolves to a constant\n", name.c_str()); } return node->GetValue(); } @@ -307,7 +307,7 @@ public: // make sure the actual parameters and expected parameters match if (m_parameters.size() < m_paramMacro.size()) { - Error("Parameter mismatch, %d parameters provided, %d expected in call to %s\n", + RuntimeError("Parameter mismatch, %d parameters provided, %d expected in call to %s\n", m_parameters.size(),m_paramMacro.size(),m_value.c_str()); } @@ -326,7 +326,7 @@ public: else if (nodeParam->GetType() == ndlTypeOptionalParameter) { if (i < m_paramMacro.size()) - Error("Parameter mismatch, parameter %d is an optional parameter, but should be a required parameter\n",i); + RuntimeError("Parameter mismatch, parameter %d is an optional parameter, but should be a required parameter\n",i); // if no symbol yet, add it if (!m_script->ExistsSymbol(paramName)) { @@ -420,7 +420,7 @@ public: m_scriptString = configValue; NDLNode* ndlNode = s_global.CheckName(macroName, true); if (ndlNode == NULL) - Error("Invalid macro definition, %s not found", macroName.c_str()); + RuntimeError("Invalid macro definition, %s not found", macroName.c_str()); // get and parse the parameters ConfigArray parameters = ndlNode->GetParamMacro(); @@ -435,7 +435,7 @@ public: // in which case 'functionName' will get the default node name returned if (CheckFunction(functionName)) { - Error("NDLScript: Macro %s includes a parameter %s, which is also the name of a function. Parameter names may not be the same as function names.", macroName.c_str(), param.c_str()); + RuntimeError("NDLScript: Macro %s includes a parameter %s, which is also the name of a function. Parameter names may not be the same as function names.", macroName.c_str(), param.c_str()); } NDLNode* paramNode = new NDLNode(param, param, this, ndlTypeParameter); @@ -549,7 +549,7 @@ public: if (script != NULL) { if (node->GetType() != ndlTypeMacroCall || script == NULL) - Error("Symbol name not valid, %s is not a macro, so %s cannot be interpretted",search.c_str(),symbol.c_str() ); + RuntimeError("Symbol name not valid, %s is not a macro, so %s cannot be interpretted",search.c_str(),symbol.c_str() ); return script->FindSymbol(symbol.substr(firstDot+1), searchForDotNames); } } @@ -598,7 +598,7 @@ public: if (nodeFound->GetType() != ndlTypeUndetermined && nodeFound->GetType() != ndlTypeParameter) { std::string value = found->second->GetValue(); - Error("Symbol '%s' currently assigned to '%s' reassigning to a different value not allowed\n", symbol.c_str(), value.c_str()); + RuntimeError("Symbol '%s' currently assigned to '%s' reassigning to a different value not allowed\n", symbol.c_str(), value.c_str()); } } m_symbols[symbol] = node; @@ -612,7 +612,7 @@ public: auto found = m_symbols.find(symbol); if (found == m_symbols.end()) { - Error("Symbol '%s' currently does not exist, attempting to assigned value '%s' AssignSymbol() requires existing symbol\n", symbol.c_str(), node->GetValue()); + RuntimeError("Symbol '%s' currently does not exist, attempting to assigned value '%s' AssignSymbol() requires existing symbol\n", symbol.c_str(), node->GetValue()); } m_symbols[symbol] = node; } @@ -726,7 +726,7 @@ public: { auto paramStart = token.find_first_of(OPENBRACES); if (paramStart == npos) - Error("Invalid macro/function call can not be parsed: %s\n", token.c_str()); + RuntimeError("Invalid macro/function call can not be parsed: %s\n", token.c_str()); nameFunction = token.substr(0, paramStart); Trim(nameFunction); params = token.substr(paramStart); @@ -763,7 +763,7 @@ public: } if (paramNode == NULL) { - Error("variable name '%s' not found, must be previously defined\n", param.c_str()); + RuntimeError("variable name '%s' not found, must be previously defined\n", param.c_str()); } else { @@ -832,7 +832,7 @@ public: std::string nameFunction, params; NDLNode* ndlNode = CallStringParse(token, nameFunction, params); if (ndlNode) - Error("function '%s' already defined\n", nameFunction.c_str()); + RuntimeError("function '%s' already defined\n", nameFunction.c_str()); ndlNode = new NDLNode(nameFunction, params, &s_global, ndlTypeMacro); // now set the variables/parameters which will be parsed when the body shows up @@ -854,7 +854,7 @@ public: NDLNode* ndlNode = CallStringParse(token, nameFunction, params); if (ndlNode == NULL) - Error("Undefined function or macro '%s' in %s\n", nameFunction.c_str(), token.c_str()); + RuntimeError("Undefined function or macro '%s' in %s\n", nameFunction.c_str(), token.c_str()); // now setup the variables/parameters ConfigValue value = ConfigValue(params, nameFunction); @@ -890,7 +890,7 @@ public: oneLineDefinition = true; tokenStart = stringParse.find_first_not_of(" \t", tokenStart+1); if (tokenStart == npos) - Error("Body of Macro missing"); + RuntimeError("Body of Macro missing"); } NDLScript* script = new NDLScript(ConfigValue(stringParse.substr(tokenStart, tokenEnd-tokenStart), macroNode->GetName()), macroNode->GetName(), oneLineDefinition); @@ -908,7 +908,7 @@ public: { keyEnd = stringParse.find_first_of(OPENBRACES, tokenStart); if (keyEnd == npos || keyEnd >= tokenEnd) - Error("Invalid statement, does not contain an '=' sign: %s\n", stringParse.substr(tokenStart, tokenEnd-tokenStart).c_str()); + RuntimeError("Invalid statement, does not contain an '=' sign: %s\n", stringParse.substr(tokenStart, tokenEnd-tokenStart).c_str()); m_macroNode = ParseDefinition(stringParse.substr(tokenStart, tokenEnd-tokenStart)); // the body of the macro will come through next time return tokenEnd; @@ -930,7 +930,7 @@ public: // check to make sure variable name isn't a valid function name as well string strTemp = key; if (CheckFunction(strTemp)) - Error("variable %s is invalid, it is reserved because it is also the name of a function", key.c_str()); + RuntimeError("variable %s is invalid, it is reserved because it is also the name of a function", key.c_str()); tokenStart = keyEnd; if (stringParse[keyEnd] == '=') diff --git a/MachineLearning/cn/SynchronousExecutionEngine.h b/MachineLearning/cn/SynchronousExecutionEngine.h index 534114705..675cb5c9e 100644 --- a/MachineLearning/cn/SynchronousExecutionEngine.h +++ b/MachineLearning/cn/SynchronousExecutionEngine.h @@ -67,7 +67,7 @@ public: if (InputValue::TypeName() == cnNodeType) { if (parameter.size() < 1 || parameter.size() > 2) - Error("%ws should have 1 or 2 parameters[rows, [cols=1]].", cnNodeType); + RuntimeError("%ws should have 1 or 2 parameters[rows, [cols=1]].", cnNodeType); if (pass == ndlPassInitial) { @@ -86,7 +86,7 @@ public: else if (SparseInputValue::TypeName() == cnNodeType) { if (parameter.size() < 1 || parameter.size() > 2) - Error("%ws should have 1 or 2 parameters[rows, [cols=1]].", cnNodeType); + RuntimeError("%ws should have 1 or 2 parameters[rows, [cols=1]].", cnNodeType); if (pass == ndlPassInitial) { @@ -105,7 +105,7 @@ public: else if (cnNodeType == L"ImageInput") { if (parameter.size() < 3 || parameter.size() > 4) - Error("%ws should have 3 or 4 parameters[imageWidth, imageHeight, imageChannels, [numImages=1]].", cnNodeType); + RuntimeError("%ws should have 3 or 4 parameters[imageWidth, imageHeight, imageChannels, [numImages=1]].", cnNodeType); if (pass == ndlPassInitial) { @@ -122,7 +122,7 @@ public: else if (LearnableParameter::TypeName() == cnNodeType) { if (parameter.size() < 1 || parameter.size() > 2) - Error("%ws should have 1 or 2 parameters[rows, [cols=1]] plus other optional parameters (needGradient=[true|false], init=[uniform|gaussian|fixedvalue], initValueScale=[1|float], value=[0|float]).", cnNodeType); + RuntimeError("%ws should have 1 or 2 parameters[rows, [cols=1]] plus other optional parameters (needGradient=[true|false], init=[uniform|gaussian|fixedvalue], initValueScale=[1|float], value=[0|float]).", cnNodeType); if (pass == ndlPassInitial) { @@ -155,22 +155,22 @@ public: { std::string initFromFilePath = node->GetOptionalParameter("initFromFilePath", ""); if (initFromFilePath == "") - Error("initFromFilePath must be set when using \"fromFile\" initialization method"); + RuntimeError("initFromFilePath must be set when using \"fromFile\" initialization method"); if(initFromFilePath[0] == '\"' && initFromFilePath[initFromFilePath.size()-1] == '\"') // remove the opening and closing double quotes initFromFilePath = initFromFilePath.substr(1, initFromFilePath.size()-2); if(!fexists(initFromFilePath)) - Error("File pointed to by initFromFilePath does not exist: %s", initFromFilePath); + RuntimeError("File pointed to by initFromFilePath does not exist: %s", initFromFilePath); m_net.InitLearnableParametersFromFile(nodePtr, initFromFilePath); } else - Error("init must be one of the values of [uniform|gaussian|fixedvalue]"); + RuntimeError("init must be one of the values of [uniform|gaussian|fixedvalue]"); } } else if (SparseLearnableParameter::TypeName() == cnNodeType) { if (parameter.size() < 1 || parameter.size() > 2) - Error("%ws should have 1 or 2 parameters[rows, [cols=1]] plus other optional parameters (needGradient=[true|false], init=[uniform|gaussian|fixedvalue], initValueScale=[1|float], value=[0|float]).", cnNodeType); + RuntimeError("%ws should have 1 or 2 parameters[rows, [cols=1]] plus other optional parameters (needGradient=[true|false], init=[uniform|gaussian|fixedvalue], initValueScale=[1|float], value=[0|float]).", cnNodeType); if (pass == ndlPassInitial) { @@ -203,22 +203,22 @@ public: { std::string initFromFilePath = node->GetOptionalParameter("initFromFilePath", ""); if (initFromFilePath == "") - Error("initFromFilePath must be set when using \"fromFile\" initialization method"); + RuntimeError("initFromFilePath must be set when using \"fromFile\" initialization method"); if(initFromFilePath[0] == '\"' && initFromFilePath[initFromFilePath.size()-1] == '\"') // remove the opening and closing double quotes initFromFilePath = initFromFilePath.substr(1, initFromFilePath.size()-2); if(!fexists(initFromFilePath)) - Error("File pointed to by initFromFilePath does not exist: %s", initFromFilePath); + RuntimeError("File pointed to by initFromFilePath does not exist: %s", initFromFilePath); m_net.InitLearnableParametersFromFile(nodePtr, initFromFilePath); } else - Error("init must be one of the values of [uniform|gaussian|fixedvalue]"); + RuntimeError("init must be one of the values of [uniform|gaussian|fixedvalue]"); } } else if (cnNodeType == L"Constant") { if (parameter.size() != 1) - Error("Constant should have 1 fixed parameter [val] and two optional parameters [rows=[1|yourvalue], cols=[1|yourvalue]]."); + RuntimeError("Constant should have 1 fixed parameter [val] and two optional parameters [rows=[1|yourvalue], cols=[1|yourvalue]]."); if (pass == ndlPassInitial) { @@ -281,7 +281,7 @@ public: else if (cnNodeType == ConvolutionNode::TypeName()) { if (parameter.size() != 7) - Error("%ws should have 7 fixed parameters[weightNodeName, inputValueNodeName, kernelWidth, kernelHeight, outputChannels,horizontalSubsample, verticalSubsample] and two optional parameters [zeroPadding = [false|yourvalue], maxTempMemSizeInSamples = [0|yourvalue]].", cnNodeType); + RuntimeError("%ws should have 7 fixed parameters[weightNodeName, inputValueNodeName, kernelWidth, kernelHeight, outputChannels,horizontalSubsample, verticalSubsample] and two optional parameters [zeroPadding = [false|yourvalue], maxTempMemSizeInSamples = [0|yourvalue]].", cnNodeType); // setup the parameter position of children so we can hook them up later nodeParamCount = 2; @@ -314,7 +314,7 @@ public: else if (cnNodeType == MaxPoolingNode::TypeName()) { if (parameter.size() != 5) - Error("%ws should have 5 parameters[inputValueNodeName, windowWidth, windowHeight, horizontalSubsample, verticalSubsample].", cnNodeType); + RuntimeError("%ws should have 5 parameters[inputValueNodeName, windowWidth, windowHeight, horizontalSubsample, verticalSubsample].", cnNodeType); // setup the parameter position of children so we can hook them up later nodeParamCount = 1; @@ -341,7 +341,7 @@ public: else if (cnNodeType == AveragePoolingNode::TypeName()) { if (parameter.size() != 5) - Error("%ws should have 5 parameters[inputValueNodeName, windowWidth, windowHeight, horizontalSubsample, verticalSubsample].", cnNodeType); + RuntimeError("%ws should have 5 parameters[inputValueNodeName, windowWidth, windowHeight, horizontalSubsample, verticalSubsample].", cnNodeType); // setup the parameter position of children so we can hook them up later nodeParamCount = 1; @@ -405,7 +405,7 @@ public: break; default: if (nodeParamCount > 0) - Error("Invalid number of parameters name = '%s' call = '%s'\n", node->GetName().c_str(), node->GetValue().c_str()); + RuntimeError("Invalid number of parameters name = '%s' call = '%s'\n", node->GetName().c_str(), node->GetValue().c_str()); break; } @@ -461,7 +461,7 @@ public: std::size_t firstDotPos = name.find_first_of("."); if (firstDotPos == std::string::npos) { - Error("Logic Error: nodeParam of type \"ndlTypeDotParameter\" doesn't have a dot in its name: %s", name.c_str()); + LogicError("nodeParam of type \"ndlTypeDotParameter\" doesn't have a dot in its name: %s", name.c_str()); } std::string nameBeforeDot = name.substr(0, firstDotPos); @@ -504,7 +504,7 @@ public: // if we still didn't get a value, throw an error if (nodeParam->GetEvalValue() == nullptr) { - Error("Logic Error: Dot name could not be resolved '%s': should have a node named '%ls' in computational network\n", nodeParam->GetName().c_str(), name.c_str()); + LogicError("Dot name could not be resolved '%s': should have a node named '%ls' in computational network\n", nodeParam->GetName().c_str(), name.c_str()); } } return nodeParam; @@ -525,7 +525,7 @@ public: if (script == NULL) { std::wstring name = baseName + L"." + msra::strfun::utf16(node->GetName()); - Error("Logic Error: no script for a parameter node in call to %ls\n", name.c_str()); + LogicError("no script for a parameter node in call to %ls\n", name.c_str()); } // evaluate the parameter if we haven't yet, or if we are in the resolve pass (need to set the inputs) @@ -567,7 +567,7 @@ public: } else { - Error("Parameter name could not be resolved '%s'\n", name.c_str()); + RuntimeError("Parameter name could not be resolved '%s'\n", name.c_str()); } } } @@ -590,7 +590,7 @@ public: default: { std::wstring name = baseName + L"." + msra::strfun::utf16(node->GetName()); - Error("Invalid parameter (macro definitions and arrays not allowed), see call to %ls\n", name.c_str()); + RuntimeError("Invalid parameter (macro definitions and arrays not allowed), see call to %ls\n", name.c_str()); } break; } From 29f5adff6eeedb9bdc1242e841b47a485d19791b Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Thu, 30 Oct 2014 10:38:38 -0700 Subject: [PATCH 05/31] removed message.h from Solution --- Common/Include/message.h | 1 - DataReader/BinaryReader/BinaryReader.vcxproj | 1 - DataReader/HTKMLFReader/HTKMLFReader.vcxproj | 1 - DataReader/HTKMLFReader/message.h | 1 - DataReader/LUSequenceReader/LUSequenceReader.vcxproj | 1 - DataReader/SequenceReader/SequenceReader.vcxproj | 1 - DataReader/UCIFastReader/UCIFastReader.vcxproj | 1 - MachineLearning/CNTKEval/CNTKEval.vcxproj | 1 - MachineLearning/cn/cn.vcxproj | 1 - Math/Math/Math.vcxproj | 1 - 10 files changed, 10 deletions(-) delete mode 100644 Common/Include/message.h delete mode 100644 DataReader/HTKMLFReader/message.h diff --git a/Common/Include/message.h b/Common/Include/message.h deleted file mode 100644 index 6138b65df..000000000 --- a/Common/Include/message.h +++ /dev/null @@ -1 +0,0 @@ -// no longer used \ No newline at end of file diff --git a/DataReader/BinaryReader/BinaryReader.vcxproj b/DataReader/BinaryReader/BinaryReader.vcxproj index 1859fecd3..40f1aa78f 100644 --- a/DataReader/BinaryReader/BinaryReader.vcxproj +++ b/DataReader/BinaryReader/BinaryReader.vcxproj @@ -176,7 +176,6 @@ - diff --git a/DataReader/HTKMLFReader/HTKMLFReader.vcxproj b/DataReader/HTKMLFReader/HTKMLFReader.vcxproj index 5e1f55248..9f380a385 100644 --- a/DataReader/HTKMLFReader/HTKMLFReader.vcxproj +++ b/DataReader/HTKMLFReader/HTKMLFReader.vcxproj @@ -178,7 +178,6 @@ - diff --git a/DataReader/HTKMLFReader/message.h b/DataReader/HTKMLFReader/message.h deleted file mode 100644 index 98586bc2f..000000000 --- a/DataReader/HTKMLFReader/message.h +++ /dev/null @@ -1 +0,0 @@ -// removed \ No newline at end of file diff --git a/DataReader/LUSequenceReader/LUSequenceReader.vcxproj b/DataReader/LUSequenceReader/LUSequenceReader.vcxproj index 8698ffb3f..ad0bc7613 100644 --- a/DataReader/LUSequenceReader/LUSequenceReader.vcxproj +++ b/DataReader/LUSequenceReader/LUSequenceReader.vcxproj @@ -105,7 +105,6 @@ - diff --git a/DataReader/SequenceReader/SequenceReader.vcxproj b/DataReader/SequenceReader/SequenceReader.vcxproj index 893d8c69b..7c7913d36 100644 --- a/DataReader/SequenceReader/SequenceReader.vcxproj +++ b/DataReader/SequenceReader/SequenceReader.vcxproj @@ -105,7 +105,6 @@ - diff --git a/DataReader/UCIFastReader/UCIFastReader.vcxproj b/DataReader/UCIFastReader/UCIFastReader.vcxproj index e894841e4..9bb579937 100644 --- a/DataReader/UCIFastReader/UCIFastReader.vcxproj +++ b/DataReader/UCIFastReader/UCIFastReader.vcxproj @@ -176,7 +176,6 @@ - diff --git a/MachineLearning/CNTKEval/CNTKEval.vcxproj b/MachineLearning/CNTKEval/CNTKEval.vcxproj index bad48d9b3..54ad0edd3 100644 --- a/MachineLearning/CNTKEval/CNTKEval.vcxproj +++ b/MachineLearning/CNTKEval/CNTKEval.vcxproj @@ -107,7 +107,6 @@ - diff --git a/MachineLearning/cn/cn.vcxproj b/MachineLearning/cn/cn.vcxproj index 42e20126a..ebd270b41 100644 --- a/MachineLearning/cn/cn.vcxproj +++ b/MachineLearning/cn/cn.vcxproj @@ -229,7 +229,6 @@ - diff --git a/Math/Math/Math.vcxproj b/Math/Math/Math.vcxproj index 6b1b386f3..03ad001e8 100644 --- a/Math/Math/Math.vcxproj +++ b/Math/Math/Math.vcxproj @@ -150,7 +150,6 @@ - From 8ac0d24f037f17cf93296bdce029e331ba11698f Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Thu, 30 Oct 2014 10:50:04 -0700 Subject: [PATCH 06/31] Runtime/LogicError() now return 'bool' so that they can be used in a form DOSTUFF() || RuntimeError() if DOSTUFF() returns false upon failure. This is a common pattern in perl code and can improve readability by getting if statements out of your face. --- Common/Include/basetypes.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Common/Include/basetypes.h b/Common/Include/basetypes.h index eb64a834f..6e3226b39 100644 --- a/Common/Include/basetypes.h +++ b/Common/Include/basetypes.h @@ -929,7 +929,7 @@ using namespace msra::basetypes; // for compatibility #pragma warning (pop) // RuntimeError - throw a std::runtime_error with a formatted error string -static inline void RuntimeError (const char * format, ...) +static inline bool RuntimeError (const char * format, ...) { va_list args; char buffer[1024]; @@ -940,7 +940,7 @@ static inline void RuntimeError (const char * format, ...) }; // LogicError - throw a std::logic_error with a formatted error string -static inline void LogicError(const char * format, ...) +static inline bool LogicError(const char * format, ...) { va_list args; char buffer[1024]; From 1e236a181f5b72c2d7345bcdb12295d4097ed109 Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Thu, 30 Oct 2014 11:02:49 -0700 Subject: [PATCH 07/31] grouped projects into Solution folders; removed unused Win32 and Mixed configurations --- CNTKSolution/CNTKSolution.sln | 62 ++++++++--------------------------- 1 file changed, 13 insertions(+), 49 deletions(-) diff --git a/CNTKSolution/CNTKSolution.sln b/CNTKSolution/CNTKSolution.sln index f136d2858..51fc29bb2 100644 --- a/CNTKSolution/CNTKSolution.sln +++ b/CNTKSolution/CNTKSolution.sln @@ -14,7 +14,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cn", "..\MachineLearning\cn {E6646FFE-3588-4276-8A15-8D65C22711C1} = {E6646FFE-3588-4276-8A15-8D65C22711C1} EndProjectSection EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "UnitTests", "UnitTests", "{D45DF403-6781-444E-B654-A96868C5BE68}" +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Unit Tests", "Unit Tests", "{D45DF403-6781-444E-B654-A96868C5BE68}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CNTKMathTest", "..\Math\CNTKMathTest\CNTKMathTest.vcxproj", "{6CEE834A-8104-46A8-8902-64C81BD7928F}" EndProject @@ -31,8 +31,6 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "UCIFastReader", "..\DataRea {1D5787D4-52E4-45DB-951B-82F220EE0C6A} = {1D5787D4-52E4-45DB-951B-82F220EE0C6A} EndProjectSection EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{2326995F-9ABE-4DEE-BAD0-147541548B5A}" -EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "BinaryReader", "..\DataReader\BinaryReader\BinaryReader.vcxproj", "{1D5787D4-52E4-45DB-951B-82F220EE0C6A}" ProjectSection(ProjectDependencies) = postProject {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} @@ -59,98 +57,56 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CNTKEvalTest", "..\MachineL {482999D1-B7E2-466E-9F8D-2119F93EAFD9} = {482999D1-B7E2-466E-9F8D-2119F93EAFD9} EndProjectSection EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Reader Plugins", "Reader Plugins", "{33EBFE78-A1A8-4961-8938-92A271941F94}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "CNTK", "CNTK", "{DD043083-71A4-409A-AA91-F9C548DCF7EC}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|Mixed Platforms = Debug|Mixed Platforms Debug|x64 = Debug|x64 - Release|Mixed Platforms = Release|Mixed Platforms Release|x64 = Release|x64 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution - {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Debug|Mixed Platforms.ActiveCfg = Debug|x64 - {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Debug|Mixed Platforms.Build.0 = Debug|x64 {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Debug|x64.ActiveCfg = Debug|x64 {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Debug|x64.Build.0 = Debug|x64 - {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Release|Mixed Platforms.ActiveCfg = Release|x64 - {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Release|Mixed Platforms.Build.0 = Release|x64 {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Release|x64.ActiveCfg = Release|x64 {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Release|x64.Build.0 = Release|x64 - {E6F26F9A-FF64-4F0A-B749-CD309EE357EE}.Debug|Mixed Platforms.ActiveCfg = Debug|x64 - {E6F26F9A-FF64-4F0A-B749-CD309EE357EE}.Debug|Mixed Platforms.Build.0 = Debug|x64 {E6F26F9A-FF64-4F0A-B749-CD309EE357EE}.Debug|x64.ActiveCfg = Debug|x64 {E6F26F9A-FF64-4F0A-B749-CD309EE357EE}.Debug|x64.Build.0 = Debug|x64 - {E6F26F9A-FF64-4F0A-B749-CD309EE357EE}.Release|Mixed Platforms.ActiveCfg = Release|x64 - {E6F26F9A-FF64-4F0A-B749-CD309EE357EE}.Release|Mixed Platforms.Build.0 = Release|x64 {E6F26F9A-FF64-4F0A-B749-CD309EE357EE}.Release|x64.ActiveCfg = Release|x64 {E6F26F9A-FF64-4F0A-B749-CD309EE357EE}.Release|x64.Build.0 = Release|x64 - {6CEE834A-8104-46A8-8902-64C81BD7928F}.Debug|Mixed Platforms.ActiveCfg = Debug|x64 - {6CEE834A-8104-46A8-8902-64C81BD7928F}.Debug|Mixed Platforms.Build.0 = Debug|x64 {6CEE834A-8104-46A8-8902-64C81BD7928F}.Debug|x64.ActiveCfg = Debug|x64 {6CEE834A-8104-46A8-8902-64C81BD7928F}.Debug|x64.Build.0 = Debug|x64 - {6CEE834A-8104-46A8-8902-64C81BD7928F}.Release|Mixed Platforms.ActiveCfg = Release|x64 - {6CEE834A-8104-46A8-8902-64C81BD7928F}.Release|Mixed Platforms.Build.0 = Release|x64 {6CEE834A-8104-46A8-8902-64C81BD7928F}.Release|x64.ActiveCfg = Release|x64 - {33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Debug|Mixed Platforms.ActiveCfg = Debug|x64 - {33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Debug|Mixed Platforms.Build.0 = Debug|x64 {33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Debug|x64.ActiveCfg = Debug|x64 {33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Debug|x64.Build.0 = Debug|x64 - {33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Release|Mixed Platforms.ActiveCfg = Release|x64 - {33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Release|Mixed Platforms.Build.0 = Release|x64 {33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Release|x64.ActiveCfg = Release|x64 {33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Release|x64.Build.0 = Release|x64 - {668BEED5-AC07-4F35-B3AE-EE65A7F9C976}.Debug|Mixed Platforms.ActiveCfg = Debug|x64 - {668BEED5-AC07-4F35-B3AE-EE65A7F9C976}.Debug|Mixed Platforms.Build.0 = Debug|x64 {668BEED5-AC07-4F35-B3AE-EE65A7F9C976}.Debug|x64.ActiveCfg = Debug|x64 {668BEED5-AC07-4F35-B3AE-EE65A7F9C976}.Debug|x64.Build.0 = Debug|x64 - {668BEED5-AC07-4F35-B3AE-EE65A7F9C976}.Release|Mixed Platforms.ActiveCfg = Release|x64 - {668BEED5-AC07-4F35-B3AE-EE65A7F9C976}.Release|Mixed Platforms.Build.0 = Release|x64 {668BEED5-AC07-4F35-B3AE-EE65A7F9C976}.Release|x64.ActiveCfg = Release|x64 - {E6646FFE-3588-4276-8A15-8D65C22711C1}.Debug|Mixed Platforms.ActiveCfg = Debug|x64 - {E6646FFE-3588-4276-8A15-8D65C22711C1}.Debug|Mixed Platforms.Build.0 = Debug|x64 {E6646FFE-3588-4276-8A15-8D65C22711C1}.Debug|x64.ActiveCfg = Debug|x64 {E6646FFE-3588-4276-8A15-8D65C22711C1}.Debug|x64.Build.0 = Debug|x64 - {E6646FFE-3588-4276-8A15-8D65C22711C1}.Release|Mixed Platforms.ActiveCfg = Release|x64 - {E6646FFE-3588-4276-8A15-8D65C22711C1}.Release|Mixed Platforms.Build.0 = Release|x64 {E6646FFE-3588-4276-8A15-8D65C22711C1}.Release|x64.ActiveCfg = Release|x64 {E6646FFE-3588-4276-8A15-8D65C22711C1}.Release|x64.Build.0 = Release|x64 - {1D5787D4-52E4-45DB-951B-82F220EE0C6A}.Debug|Mixed Platforms.ActiveCfg = Debug|x64 - {1D5787D4-52E4-45DB-951B-82F220EE0C6A}.Debug|Mixed Platforms.Build.0 = Debug|x64 {1D5787D4-52E4-45DB-951B-82F220EE0C6A}.Debug|x64.ActiveCfg = Debug|x64 {1D5787D4-52E4-45DB-951B-82F220EE0C6A}.Debug|x64.Build.0 = Debug|x64 - {1D5787D4-52E4-45DB-951B-82F220EE0C6A}.Release|Mixed Platforms.ActiveCfg = Release|x64 - {1D5787D4-52E4-45DB-951B-82F220EE0C6A}.Release|Mixed Platforms.Build.0 = Release|x64 {1D5787D4-52E4-45DB-951B-82F220EE0C6A}.Release|x64.ActiveCfg = Release|x64 {1D5787D4-52E4-45DB-951B-82F220EE0C6A}.Release|x64.Build.0 = Release|x64 - {9A2F2441-5972-4EA8-9215-4119FCE0FB68}.Debug|Mixed Platforms.ActiveCfg = Debug|x64 - {9A2F2441-5972-4EA8-9215-4119FCE0FB68}.Debug|Mixed Platforms.Build.0 = Debug|x64 {9A2F2441-5972-4EA8-9215-4119FCE0FB68}.Debug|x64.ActiveCfg = Debug|x64 {9A2F2441-5972-4EA8-9215-4119FCE0FB68}.Debug|x64.Build.0 = Debug|x64 - {9A2F2441-5972-4EA8-9215-4119FCE0FB68}.Release|Mixed Platforms.ActiveCfg = Release|x64 - {9A2F2441-5972-4EA8-9215-4119FCE0FB68}.Release|Mixed Platforms.Build.0 = Release|x64 {9A2F2441-5972-4EA8-9215-4119FCE0FB68}.Release|x64.ActiveCfg = Release|x64 {9A2F2441-5972-4EA8-9215-4119FCE0FB68}.Release|x64.Build.0 = Release|x64 - {62836DC1-DF77-4B98-BF2D-45C943B7DDC6}.Debug|Mixed Platforms.ActiveCfg = Debug|x64 - {62836DC1-DF77-4B98-BF2D-45C943B7DDC6}.Debug|Mixed Platforms.Build.0 = Debug|x64 {62836DC1-DF77-4B98-BF2D-45C943B7DDC6}.Debug|x64.ActiveCfg = Debug|x64 {62836DC1-DF77-4B98-BF2D-45C943B7DDC6}.Debug|x64.Build.0 = Debug|x64 - {62836DC1-DF77-4B98-BF2D-45C943B7DDC6}.Release|Mixed Platforms.ActiveCfg = Release|x64 - {62836DC1-DF77-4B98-BF2D-45C943B7DDC6}.Release|Mixed Platforms.Build.0 = Release|x64 {62836DC1-DF77-4B98-BF2D-45C943B7DDC6}.Release|x64.ActiveCfg = Release|x64 {62836DC1-DF77-4B98-BF2D-45C943B7DDC6}.Release|x64.Build.0 = Release|x64 - {482999D1-B7E2-466E-9F8D-2119F93EAFD9}.Debug|Mixed Platforms.ActiveCfg = Debug|x64 - {482999D1-B7E2-466E-9F8D-2119F93EAFD9}.Debug|Mixed Platforms.Build.0 = Debug|x64 {482999D1-B7E2-466E-9F8D-2119F93EAFD9}.Debug|x64.ActiveCfg = Debug|x64 {482999D1-B7E2-466E-9F8D-2119F93EAFD9}.Debug|x64.Build.0 = Debug|x64 - {482999D1-B7E2-466E-9F8D-2119F93EAFD9}.Release|Mixed Platforms.ActiveCfg = Release|x64 - {482999D1-B7E2-466E-9F8D-2119F93EAFD9}.Release|Mixed Platforms.Build.0 = Release|x64 {482999D1-B7E2-466E-9F8D-2119F93EAFD9}.Release|x64.ActiveCfg = Release|x64 {482999D1-B7E2-466E-9F8D-2119F93EAFD9}.Release|x64.Build.0 = Release|x64 - {0F30EBCF-09F3-4EED-BF54-4214BCE53FEC}.Debug|Mixed Platforms.ActiveCfg = Debug|x64 - {0F30EBCF-09F3-4EED-BF54-4214BCE53FEC}.Debug|Mixed Platforms.Build.0 = Debug|x64 {0F30EBCF-09F3-4EED-BF54-4214BCE53FEC}.Debug|x64.ActiveCfg = Debug|x64 {0F30EBCF-09F3-4EED-BF54-4214BCE53FEC}.Debug|x64.Build.0 = Debug|x64 - {0F30EBCF-09F3-4EED-BF54-4214BCE53FEC}.Release|Mixed Platforms.ActiveCfg = Release|Win32 - {0F30EBCF-09F3-4EED-BF54-4214BCE53FEC}.Release|Mixed Platforms.Build.0 = Release|Win32 {0F30EBCF-09F3-4EED-BF54-4214BCE53FEC}.Release|x64.ActiveCfg = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution @@ -160,5 +116,13 @@ Global {6CEE834A-8104-46A8-8902-64C81BD7928F} = {D45DF403-6781-444E-B654-A96868C5BE68} {668BEED5-AC07-4F35-B3AE-EE65A7F9C976} = {D45DF403-6781-444E-B654-A96868C5BE68} {0F30EBCF-09F3-4EED-BF54-4214BCE53FEC} = {D45DF403-6781-444E-B654-A96868C5BE68} + {1D5787D4-52E4-45DB-951B-82F220EE0C6A} = {33EBFE78-A1A8-4961-8938-92A271941F94} + {33D2FD22-DEF2-4507-A58A-368F641AEBE5} = {33EBFE78-A1A8-4961-8938-92A271941F94} + {62836DC1-DF77-4B98-BF2D-45C943B7DDC6} = {33EBFE78-A1A8-4961-8938-92A271941F94} + {9A2F2441-5972-4EA8-9215-4119FCE0FB68} = {33EBFE78-A1A8-4961-8938-92A271941F94} + {E6646FFE-3588-4276-8A15-8D65C22711C1} = {33EBFE78-A1A8-4961-8938-92A271941F94} + {E6F26F9A-FF64-4F0A-B749-CD309EE357EE} = {DD043083-71A4-409A-AA91-F9C548DCF7EC} + {482999D1-B7E2-466E-9F8D-2119F93EAFD9} = {DD043083-71A4-409A-AA91-F9C548DCF7EC} + {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {DD043083-71A4-409A-AA91-F9C548DCF7EC} EndGlobalSection EndGlobal From 69643b7a94dce92390cfe3341d5d82f38061ab7f Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Thu, 30 Oct 2014 14:41:33 -0700 Subject: [PATCH 08/31] removed WAV reading from fileutil.h, to remove an unnecessary Win32 dependency; changed a 'byte*' to a 'char*' in GPUSparseMatrix.cu allowing to remove dependency on Windows.h; fixed a Sleep() call for Linux compat --- Common/File.cpp | 7 +++++++ Common/Include/File.h | 21 ++++++++++++++++----- Common/Include/basetypes.h | 6 +++++- Common/Include/fileutil.h | 4 ++-- Common/fileutil.cpp | 2 ++ Math/Math/GPUSparseMatrix.cu | 6 +++--- 6 files changed, 35 insertions(+), 11 deletions(-) diff --git a/Common/File.cpp b/Common/File.cpp index 9d679f8b1..44f057fb7 100644 --- a/Common/File.cpp +++ b/Common/File.cpp @@ -12,6 +12,12 @@ #include "File.h" #include #include +#ifdef _WIN32 +#include +#endif +#ifdef __unix__ +#include +#endif namespace Microsoft{ namespace MSR { namespace CNTK { @@ -620,4 +626,5 @@ void File::SetPosition(uint64_t pos) { fsetpos (m_file, pos); } + }}} diff --git a/Common/Include/File.h b/Common/Include/File.h index 39d855f30..ef00f5cc5 100644 --- a/Common/Include/File.h +++ b/Common/Include/File.h @@ -8,6 +8,12 @@ #include #include #include +#ifdef _WIN32 +#include +#endif +#ifdef __unix__ +#include +#endif #include "fileutil.h" // for f{ge,pu}t{,Text}() namespace Microsoft{ namespace MSR { namespace CNTK { @@ -40,8 +46,9 @@ enum FileMarker }; // attempt a given operation (lambda) and retry multiple times -// body - the lambda to retry -template static void attempt (int retries, const FUNCTION & body) +// body - the lambda to retry, must be restartable + +template static void attempt(int retries, const FUNCTION & body) { for (int attempt = 1; ; attempt++) { @@ -53,16 +60,20 @@ template static void attempt (int retries, const FUNCTION & b } catch (const std::exception & e) { + void sleep(size_t ms); if (attempt >= retries) throw; // failed N times --give up and rethrow the error fprintf (stderr, "attempt: %s, retrying %d-th time out of %d...\n", e.what(), attempt+1, retries); - ::Sleep (1000); // wait a little, then try again -#ifdef _DEBUG - DebugBreak(); + // wait a little, then try again +#ifdef _WIN32 + ::Sleep(1000); +#else // assuming __unix__ + sleep(1); #endif } } } + template static void attempt (const FUNCTION & body) { static const int retries = 5; diff --git a/Common/Include/basetypes.h b/Common/Include/basetypes.h index 6e3226b39..73e5cb450 100644 --- a/Common/Include/basetypes.h +++ b/Common/Include/basetypes.h @@ -73,13 +73,15 @@ OACR_WARNING_DISABLE(POTENTIAL_ARGUMENT_TYPE_MISMATCH, "Not level1 or level2_sec #include #include // for HUGE_VAL #include -using namespace std; +#include #include #include #ifdef _MSC_VER #include // for CRITICAL_SECTION and Unicode conversion functions --TODO: is there a portable alternative? #endif +using namespace std; + // CRT error handling seems to not be included in wince headers // so we define our own imports #ifdef UNDER_CE @@ -890,6 +892,7 @@ template static inline void byteswap (V & v) throw() bytereverse (v[i]); } +#if 0 // execute a block with retry // Block must be restartable. // Use this when writing small files to those unreliable Windows servers. @@ -913,6 +916,7 @@ template static void attempt (int retries, const FUNCTION & b } } } +#endif };}; // namespace diff --git a/Common/Include/fileutil.h b/Common/Include/fileutil.h index 3ef7c1bb3..9a331722d 100644 --- a/Common/Include/fileutil.h +++ b/Common/Include/fileutil.h @@ -229,8 +229,6 @@ #define _FILEUTIL_ #include -#include // for mmreg.h and FILETIME --TODO: we should be able to remove this (for portability; currently CUDA chokes) -#include #include // for std::find #include #include @@ -701,6 +699,7 @@ namespace msra { namespace files { bool fuptodate (const wstring & target, const wstring & input, bool inputrequired = true); };}; +#if 0 // ---------------------------------------------------------------------------- // simple support for WAV file I/O // ---------------------------------------------------------------------------- @@ -740,6 +739,7 @@ void fputwfx (FILE *f, const WAVEFORMATEX & wfx, unsigned int numSamples); // channel. j is sample index. // ---------------------------------------------------------------------------- void fgetraw (FILE *f,std::vector< std::vector > & data,const WAVEHEADER & wavhd); +#endif // ---------------------------------------------------------------------------- // temp functions -- clean these up diff --git a/Common/fileutil.cpp b/Common/fileutil.cpp index 24637923f..918ae7359 100644 --- a/Common/fileutil.cpp +++ b/Common/fileutil.cpp @@ -24,7 +24,9 @@ #include #include #include +#ifdef _WIN32 #include "Windows.h" // for FILETIME +#endif #include // for std::find #include diff --git a/Math/Math/GPUSparseMatrix.cu b/Math/Math/GPUSparseMatrix.cu index 5fd1c4fd5..96ccb5680 100644 --- a/Math/Math/GPUSparseMatrix.cu +++ b/Math/Math/GPUSparseMatrix.cu @@ -1054,7 +1054,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (cSize >= rowBufferRequired && c.NzLocation() != NULL && canReuseBuffer) { // determine the final location if we reuse the buffer - csrRowPtrC = (int*)((byte*)c.NzLocation() + nzBufSize); + csrRowPtrC = (int*)((char*)c.NzLocation() + nzBufSize); } else { @@ -2151,7 +2151,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } // What we would like to do here, is transfer to CPUSparse and save, do that when the format is the same - byte* hostBuffer = new byte[us.BufferSize()]; + char* hostBuffer = new char[us.BufferSize()]; // TODO: use std::shared_ptr GPUSparseMatrix hostSide(us.GetNumRows(), us.GetNumCols(), us.NzCount(), (ElemType*)hostBuffer, us.GetFormat()); CUDACALL(cudaMemcpy(hostBuffer, us.NzLocation(),us.BufferSize(),cudaMemcpyDeviceToHost)); @@ -2186,4 +2186,4 @@ namespace Microsoft { namespace MSR { namespace CNTK { template MATH_API File& operator<<(File& stream, const GPUSparseMatrix& us); template MATH_API File& operator<<(File& stream, const GPUSparseMatrix& us); -}}} \ No newline at end of file +}}} From 9fe2259f157f8a5467791d4d644fc0b1d36a9026 Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Thu, 30 Oct 2014 16:28:43 -0700 Subject: [PATCH 09/31] added two missing headers for compiling with GCC; dummy implementation utf8() and utf16() for GCC (which currently only works for 7-bit ASCII characters) --- Common/File.cpp | 1 + Common/Include/basetypes.h | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/Common/File.cpp b/Common/File.cpp index 44f057fb7..fc77c0af3 100644 --- a/Common/File.cpp +++ b/Common/File.cpp @@ -12,6 +12,7 @@ #include "File.h" #include #include +#include #ifdef _WIN32 #include #endif diff --git a/Common/Include/basetypes.h b/Common/Include/basetypes.h index 73e5cb450..ba90766a9 100644 --- a/Common/Include/basetypes.h +++ b/Common/Include/basetypes.h @@ -67,6 +67,7 @@ OACR_WARNING_DISABLE(POTENTIAL_ARGUMENT_TYPE_MISMATCH, "Not level1 or level2_sec #endif #include +#include #include // include here because we redefine some names later #include #include @@ -548,6 +549,7 @@ typedef strfun::_strprintf wstrprintf; // wchar_t version #endif // string-encoding conversion functions +#ifdef _WIN32 struct utf8 : std::string { utf8 (const std::wstring & p) // utf-16 to -8 { size_t len = p.length(); @@ -573,6 +575,27 @@ struct utf16 : std::wstring { utf16 (const std::string & p) // utf-8 to -16 ASSERT (rc < buf.size ()); (*(std::wstring*)this) = &buf[0]; }}; +#else // TODO: complete this once we are building on actual Linux, currently using default locale instead of UTF-8 locale +static inline std::string utf8(const std::wstring & p) // output: UTF-8 +{ + size_t len = p.length(); + msra::basetypes::fixed_vector buf(2 * len + 1); // max: 1 wchar => 2 mb chars + std::fill(buf.begin(), buf.end(), 0); + // BUGBUG: We need to set the locale, so for now this only works for plain ASCII + ::wcstombs(&buf[0], p.c_str(), 2 * len + 1); + return std::string(&buf[0]); +} +static inline std::wstring utf16(const std::string & p) // input: UTF-8 +{ + size_t len = p.length(); + msra::basetypes::fixed_vector buf(len + 1); // max: >1 mb chars => 1 wchar + std::fill(buf.begin(), buf.end(), (wchar_t)0); + OACR_WARNING_SUPPRESS(UNSAFE_STRING_FUNCTION, "Reviewed OK. size checked. [rogeryu 2006/03/21]"); + // BUGBUG: We need to set the locale, so for now this only works for plain ASCII + ::mbstowcs(&buf[0], p.c_str(), len + 1); + return std::wstring(&buf[0]); +} +#endif #pragma warning(push) #pragma warning(disable : 4996) // Reviewed by Yusheng Li, March 14, 2006. depr. fn (wcstombs, mbstowcs) From 88125bec4c3db6b25f543b995f7eb5d12766820c Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Thu, 30 Oct 2014 16:40:23 -0700 Subject: [PATCH 10/31] get/setfiletime() removed, as they are only used by functions identical to existing fuptodate() and fexists(), which we use instead --- Common/Include/fileutil.h | 9 --------- Common/fileutil.cpp | 10 ++++++---- MachineLearning/cn/SGD.h | 19 +------------------ MachineLearning/cn/cn.cpp | 7 +++---- 4 files changed, 10 insertions(+), 35 deletions(-) diff --git a/Common/Include/fileutil.h b/Common/Include/fileutil.h index 9a331722d..51ef8f82a 100644 --- a/Common/Include/fileutil.h +++ b/Common/Include/fileutil.h @@ -668,15 +668,6 @@ namespace msra { namespace files { vector fgetfilelines (const wstring & pathname, vector & readbuffer); };}; -// ---------------------------------------------------------------------------- -// getfiletime(), setfiletime(): access modification time -// ---------------------------------------------------------------------------- - -// Note: we use struct _FILETIME instead of FILETIME to avoid having to include Windows.h, for increased portability. -// As a next step, we shall make these two functions local to fileutil.cpp, and move all code that uses it in there as well. -bool getfiletime (const std::wstring & path, struct _FILETIME & time); -void setfiletime (const std::wstring & path, const struct _FILETIME & time); - // ---------------------------------------------------------------------------- // expand_wildcards() -- expand a path with wildcards (also intermediate ones) // ---------------------------------------------------------------------------- diff --git a/Common/fileutil.cpp b/Common/fileutil.cpp index 918ae7359..675c3d3c7 100644 --- a/Common/fileutil.cpp +++ b/Common/fileutil.cpp @@ -51,9 +51,7 @@ template <> const wchar_t* GetScanFormatString(unsigned int) {return L" %u"; template <> const wchar_t* GetScanFormatString(unsigned long) {return L" %lu";} template <> const wchar_t* GetScanFormatString(float) {return L" %g";} template <> const wchar_t* GetScanFormatString(double) {return L" %lg";} -#if (SIZE_MAX != UINT_MAX) // on 32 bit platforms, the following will be flagged as a redefinition template <> const wchar_t* GetScanFormatString(size_t) {return L" %llu";} -#endif template <> const wchar_t* GetScanFormatString(long long) {return L" %lli";} template <> const wchar_t* GetFormatString(char) {return L" %hc";} @@ -66,9 +64,7 @@ template <> const wchar_t* GetFormatString(unsigned int) {return L" %u";} template <> const wchar_t* GetFormatString(unsigned long) {return L" %lu";} template <> const wchar_t* GetFormatString(float) {return L" %.9g";} template <> const wchar_t* GetFormatString(double) {return L" %.17g";} -#if (SIZE_MAX != UINT_MAX) template <> const wchar_t* GetFormatString(size_t) { return L" %llu"; } -#endif template <> const wchar_t* GetFormatString(long long) {return L" %lli";} // ---------------------------------------------------------------------------- @@ -1559,10 +1555,16 @@ bool msra::files::fuptodate (const wstring & target, const wstring & input, bool if (!getfiletime (target, targettime)) return false; // target missing: need to update FILETIME inputtime; if (!getfiletime (input, inputtime)) return !inputrequired; // input missing: if required, pretend to be out of date as to force caller to fail +#if 1 // formerly called IsResultFileUpdateToDate() + // up to date if target has higher time stamp + return (targettime.dwHighDateTime > inputtime.dwHighDateTime) || + (targettime.dwHighDateTime == inputtime.dwHighDateTime && targettime.dwLowDateTime >= inputtime.dwLowDateTime); +#else ULARGE_INTEGER targett, inputt; memcpy (&targett, &targettime, sizeof (targett)); memcpy (&inputt, &inputtime, sizeof (inputt)); return !(targett.QuadPart < inputt.QuadPart); // up to date if target not older than input +#endif } /// separate string by separator diff --git a/MachineLearning/cn/SGD.h b/MachineLearning/cn/SGD.h index 1c067ccf2..0606a74c0 100644 --- a/MachineLearning/cn/SGD.h +++ b/MachineLearning/cn/SGD.h @@ -1252,7 +1252,7 @@ protected: { const wstring prevEpochFile = GetModelNameForEpoch (e-1); - if (IsResultFileUpdateToDate (curEpochFile, prevEpochFile, false)) + if (msra::files::fuptodate (curEpochFile, prevEpochFile, false)) { firstEpoch = size_t(e)+1; break; @@ -1264,23 +1264,6 @@ protected: return firstEpoch; } - //up to date if resultFile is older than srcFile or missing - // TODO: move this to fileutil.h to allow for portable implementation - bool IsResultFileUpdateToDate (const wstring & resultFile, const wstring & srcFile, const bool IsSrcFileNeeded) - { - FILETIME resultFileTime; - if (!getfiletime (resultFile, resultFileTime)) - return false; // not up to date is resultFile is missing - - FILETIME srcFileTime; - if (!getfiletime (srcFile, srcFileTime)) - return !IsSrcFileNeeded; // srcFile missing: if required, the result file is not up to date - - //up to date if resultFile has higher time stamp - return (resultFileTime.dwHighDateTime > srcFileTime.dwHighDateTime) || - (resultFileTime.dwHighDateTime == srcFileTime.dwHighDateTime && resultFileTime.dwLowDateTime >= srcFileTime.dwLowDateTime); - } - AdaptationRegType ParseAdaptationRegType(wstring s) { transform(s.begin(), s.end(), s.begin(),tolower); diff --git a/MachineLearning/cn/cn.cpp b/MachineLearning/cn/cn.cpp index dbf4a1601..f9c12942e 100644 --- a/MachineLearning/cn/cn.cpp +++ b/MachineLearning/cn/cn.cpp @@ -177,12 +177,11 @@ void DoCrossValidate(const ConfigParameters& config) { wstring cvModelPath = msra::strfun::wstrprintf (L"%ws.%lld", modelPath.c_str(), i); - FILETIME resultFileTime; - if (!getfiletime (cvModelPath, resultFileTime)) + if (!fexists (cvModelPath)) { fprintf(stderr, "model %ws does not exist.\n", cvModelPath.c_str()); - if (finalModelEvaluated || !getfiletime (modelPath, resultFileTime)) - continue; //file missing + if (finalModelEvaluated || !fexists (modelPath)) + continue; // file missing else { cvModelPath = modelPath; From c848afb42be1aeb7a8fb9911c939767291cd3c44 Mon Sep 17 00:00:00 2001 From: Malcolm Slaney Date: Mon, 22 Sep 2014 12:10:36 -0700 Subject: [PATCH 11/31] Starting to fix things so they run under Linux --- Math/Math/Makefile | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 Math/Math/Makefile diff --git a/Math/Math/Makefile b/Math/Math/Makefile new file mode 100644 index 000000000..154f2ad83 --- /dev/null +++ b/Math/Math/Makefile @@ -0,0 +1,32 @@ +CSOURCES = CPUMatrix.cpp CPUSparseMatrix.cpp Matrix.cpp + +OBJECTS = CPUMatrix.o CPUSparseMatrix.o Matrix.o \ + GPUSparseMatrix.o GPUWatcher.o \ + GPUMatrixCUDAKernels.o GPUMatrix.o + +INCLUDES = -I../../Common/Include -I/opt/acml5.3.1/gfortran64_mp_int64/include + +DEPS = + +CFLAGS = $(INCLUDES) \ + -D BASETYPES_NO_UNSAFECRTOVERLOAD -DBASETYPES_NO_STRPRINTF \ + -DLINUX -D_FILEUTIL_ -Wnon-template-friend -std=c++11 + +NVCFLAGS = -DLINUX -I../../Common/Include -D_FILEUTIL_ -arch sm_11 + +CXX = gcc +NVCC = nvcc + +all: libmatrixmat.so + +libmatrixmat.so: $(OBJECTS) + $(CXX) -shared $(OBJECTS) -o libmatrixmat.so + +libmatrixmat.so: $(OBJECTS) + + +%.o: %.cpp $(DEPS) + $(CXX) -c $(CFLAGS) $< -o $@ + +%.o: %.cu $(DEPS) + $(NVCC) -c $(NVCFLAGS) $< -o $@ From 33e48b635fd92e11188f039afd3f696944633b4e Mon Sep 17 00:00:00 2001 From: Malcolm Slaney Date: Mon, 22 Sep 2014 12:21:34 -0700 Subject: [PATCH 12/31] First attempt at getting stuff to compile under Linux. Not everything in Math/Math works, but hopefully this is forward progress. --- Common/Include/File.h | 14 +- Common/Include/basetypes.h | 50 ++- Math/Math/CPUMatrix.cpp | 421 ++++++++++---------- Math/Math/CPUMatrix.h | 21 +- Math/Math/CPUSparseMatrix.cpp | 183 +++++---- Math/Math/CPUSparseMatrix.h | 13 +- Math/Math/CommonMatrix.h | 13 +- Math/Math/GPUMatrix.cu | 599 ++++++++++++++++------------- Math/Math/GPUMatrix.cuh | 26 +- Math/Math/GPUMatrixCUDAKernels.cu | 6 +- Math/Math/GPUSparseMatrix.cu | 527 ++++++++++++------------- Math/Math/GPUSparseMatrix.cuh | 45 ++- Math/Math/InstantiateTemplates.cpp | 6 +- Math/Math/Matrix.cpp | 92 +++-- Math/Math/stdafx.h | 6 +- Math/Math/targetver.h | 4 + 16 files changed, 1149 insertions(+), 877 deletions(-) diff --git a/Common/Include/File.h b/Common/Include/File.h index ef00f5cc5..306508c77 100644 --- a/Common/Include/File.h +++ b/Common/Include/File.h @@ -123,13 +123,18 @@ public: template File& operator<<(T val) { +#ifndef LINUX attempt([=]() +#endif { if (IsTextBased()) fputText(m_file, val); else fput(m_file, val); - }); + } +#ifndef LINUX + ); +#endif return *this; } File& operator<<(const std::wstring& val); @@ -156,13 +161,18 @@ public: template File& operator>>(T& val) { +#ifndef LINUX attempt([&]() +#endif { if (IsTextBased()) fgetText(m_file, val); else fget(m_file, val); - }); + } +#ifndef LINUX + ); +#endif return *this; } diff --git a/Common/Include/basetypes.h b/Common/Include/basetypes.h index ba90766a9..7b424502c 100644 --- a/Common/Include/basetypes.h +++ b/Common/Include/basetypes.h @@ -7,6 +7,12 @@ #ifndef _BASETYPES_ #define _BASETYPES_ +#ifdef LINUX +typedef char16_t TCHAR; +#include +#define vsprintf_s vsprintf /* Not sure this is right... Malcolm */ +#endif /* LINUX */ + #ifndef UNDER_CE // fixed-buffer overloads not available for wince #ifdef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES // fixed-buffer overloads for strcpy() etc. #undef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES @@ -72,7 +78,7 @@ OACR_WARNING_DISABLE(POTENTIAL_ARGUMENT_TYPE_MISMATCH, "Not level1 or level2_sec #include #include #include -#include // for HUGE_VAL +#include // for HUGE_VAL // potential double isnan definition #include #include #include @@ -283,6 +289,16 @@ public: noncopyable(){} }; + +#ifdef LINUX +#define CRITICAL_SECTION int +void InitializeCriticalSection(int *) {} +void DeleteCriticalSection(int *) {} +void EnterCriticalSection(int *) {} +void LeaveCriticalSection(int *) {} + +#endif + // class CCritSec and CAutoLock -- simple critical section handling // TODO: Currently only working under Windows; BROKEN otherwise, to be fixed class CCritSec @@ -305,6 +321,8 @@ public: #endif }; +#ifndef LINUX + // locks a critical section, and unlocks it automatically // when the lock goes out of scope class CAutoLock @@ -399,6 +417,8 @@ public: }; #endif +#endif /* LINUX */ + };}; // namespace #if 0 //ndef BASETYPES_NO_UNSAFECRTOVERLOAD // if on, no unsafe CRT overload functions @@ -552,6 +572,7 @@ typedef strfun::_strprintf wstrprintf; // wchar_t version #ifdef _WIN32 struct utf8 : std::string { utf8 (const std::wstring & p) // utf-16 to -8 { +#ifdef MALCOLM size_t len = p.length(); if (len == 0) { return;} // empty string msra::basetypes::fixed_vector buf (3 * len + 1); // max: 1 wchar => up to 3 mb chars @@ -561,9 +582,11 @@ struct utf8 : std::string { utf8 (const std::wstring & p) // utf-16 to -8 &buf[0], (int) buf.size(), NULL, NULL); if (rc == 0) throw std::runtime_error ("WideCharToMultiByte"); (*(std::string*)this) = &buf[0]; +#endif /* Malcolm */ }}; struct utf16 : std::wstring { utf16 (const std::string & p) // utf-8 to -16 { +#ifdef MALCOLM size_t len = p.length(); if (len == 0) { return;} // empty string msra::basetypes::fixed_vector buf (len + 1); @@ -574,6 +597,7 @@ struct utf16 : std::wstring { utf16 (const std::string & p) // utf-8 to -16 if (rc == 0) throw std::runtime_error ("MultiByteToWideChar"); ASSERT (rc < buf.size ()); (*(std::wstring*)this) = &buf[0]; +#endif /* Malcolm */ }}; #else // TODO: complete this once we are building on actual Linux, currently using default locale instead of UTF-8 locale static inline std::string utf8(const std::wstring & p) // output: UTF-8 @@ -603,8 +627,10 @@ static inline std::string wcstombs (const std::wstring & p) // output: MBCS { size_t len = p.length(); msra::basetypes::fixed_vector buf (2 * len + 1); // max: 1 wchar => 2 mb chars +#ifdef MALCOLM std::fill (buf.begin (), buf.end (), 0); ::wcstombs (&buf[0], p.c_str(), 2 * len + 1); +#endif /* Malcolm */ return std::string (&buf[0]); } static inline std::wstring mbstowcs (const std::string & p) // input: MBCS @@ -647,7 +673,7 @@ template static inline std::basic_string<_T> join (const std::vector auto_handle; #endif +#ifdef MALCOLM // like auto_ptr but calls freeFunc_p (type free_func_t) instead of delete to clean up // minor difference - wrapped object is T, not T *, so to wrap a // T *, use auto_clean @@ -802,6 +832,7 @@ public: operator const T () const { return it; } T detach () { T tmp = it; it = 0; return tmp; } // release ownership of object }; +#endif /* MALCOLM */ #if 0 // simple timer @@ -844,12 +875,23 @@ namespace msra { namespace files { class textreader { +#ifndef LINUX msra::basetypes::auto_file_ptr f; +#else + FILE *f; +#endif /* LINUX */ std::vector buf; // read buffer (will only grow, never shrink) int ch; // next character (we need to read ahead by one...) char getch() { char prevch = (char) ch; ch = fgetc (f); return prevch; } public: +#ifndef LINUX textreader (const std::wstring & path) : f (path.c_str(), "rb") { buf.reserve (10000); ch = fgetc (f); } +#else + textreader (const std::wstring & path) { + f = fopen((char *)path.c_str(), "rb"); + ch = fgetc(f); /* I Think this is right ... Malcolm */ + } +#endif /* LINUX */ operator bool() const { return ch != EOF; } // true if still a line to read std::string getline() // get and consume the next line { @@ -935,7 +977,11 @@ template static void attempt (int retries, const FUNCTION & b if (attempt >= retries) throw; // failed N times --give up and rethrow the error fprintf (stderr, "attempt: %s, retrying %d-th time out of %d...\n", e.what(), attempt+1, retries); +#ifndef LINUX ::Sleep (1000); // wait a little, then try again +#else + sleep(1); +#endif /* LINUX */ } } } diff --git a/Math/Math/CPUMatrix.cpp b/Math/Math/CPUMatrix.cpp index 23b1e5585..d76320f66 100644 --- a/Math/Math/CPUMatrix.cpp +++ b/Math/Math/CPUMatrix.cpp @@ -17,7 +17,19 @@ #include "CPUMatrix.h" #include #include +#include + +#ifndef LINUX #include +#else + +#ifndef max +#define max(a,b) (((a) > (b)) ? (a) : (b)) +#endif + +#include +#endif /* LINUX */ + #ifdef LEAKDETECT #include #endif @@ -83,14 +95,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::ZeroInit() { - m_computeDevice = CPUDEVICE; - m_pArray = nullptr; - m_numRows = 0; - m_numCols = 0; - m_elemSizeAllocated = 0; - m_matrixName=NULL; - m_format = matrixFormatDense; - m_externalBuffer = false; + this->m_computeDevice = CPUDEVICE; + this->m_pArray = nullptr; + this->m_numRows = 0; + this->m_numCols = 0; + this->m_elemSizeAllocated = 0; + this->m_matrixName=NULL; + this->m_format = matrixFormatDense; + this->m_externalBuffer = false; } template @@ -112,13 +124,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { { ZeroInit(); - m_numRows = numRows; - m_numCols = numCols; - m_elemSizeAllocated = GetNumElements(); + this->m_numRows = numRows; + this->m_numCols = numCols; + this->m_elemSizeAllocated = this->GetNumElements(); - if (m_elemSizeAllocated != 0) + if (this->m_elemSizeAllocated != 0) { - m_pArray = new ElemType[m_elemSizeAllocated]; + this->m_pArray = new ElemType[this->m_elemSizeAllocated]; SetValue(0); } } @@ -137,7 +149,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ZeroInit(); if (!deepCopyFrom.IsEmpty()) SetValue(deepCopyFrom); - SetMatrixName(deepCopyFrom.m_matrixName); + this->SetMatrixName(deepCopyFrom.m_matrixName); } //assignment operator, deep copy @@ -147,7 +159,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { Clear(); if (!deepCopyFrom.IsEmpty()) SetValue(deepCopyFrom); - SetMatrixName(deepCopyFrom.m_matrixName); + this->SetMatrixName(deepCopyFrom.m_matrixName); return *this; } @@ -156,14 +168,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { template CPUMatrix::CPUMatrix(CPUMatrix&& moveFrom) { - m_computeDevice = moveFrom.m_computeDevice; - m_numRows = moveFrom.m_numRows; - m_numCols = moveFrom.m_numCols; - m_elemSizeAllocated = moveFrom.m_elemSizeAllocated; - m_pArray = moveFrom.m_pArray; //shallow copy the pointer - m_matrixName = moveFrom.m_matrixName; - m_format = moveFrom.m_format; - m_externalBuffer = moveFrom.m_externalBuffer; + this->m_computeDevice = moveFrom.m_computeDevice; + this->m_numRows = moveFrom.m_numRows; + this->m_numCols = moveFrom.m_numCols; + this->m_elemSizeAllocated = moveFrom.m_elemSizeAllocated; + this->m_pArray = moveFrom.m_pArray; //shallow copy the pointer + this->m_matrixName = moveFrom.m_matrixName; + this->m_format = moveFrom.m_format; + this->m_externalBuffer = moveFrom.m_externalBuffer; //release the pointer from the source object so that the destructor won't release it twice moveFrom.ZeroInit(); } @@ -174,16 +186,16 @@ namespace Microsoft { namespace MSR { namespace CNTK { { if (this != &moveFrom) { - if (OwnBuffer() && m_pArray != nullptr) - delete[] m_pArray; //always delete the data pointer since we will use the pointer from moveFrom + if (this->OwnBuffer() && this->m_pArray != nullptr) + delete[] this->m_pArray; //always delete the data pointer since we will use the pointer from moveFrom - m_computeDevice = moveFrom.m_computeDevice; - m_numRows = moveFrom.m_numRows; - m_numCols = moveFrom.m_numCols; - m_elemSizeAllocated = moveFrom.m_elemSizeAllocated; - m_pArray = moveFrom.m_pArray; - m_format = moveFrom.m_format; - m_externalBuffer = moveFrom.m_externalBuffer; + this->m_computeDevice = moveFrom.m_computeDevice; + this->m_numRows = moveFrom.m_numRows; + this->m_numCols = moveFrom.m_numCols; + this->m_elemSizeAllocated = moveFrom.m_elemSizeAllocated; + this->m_pArray = moveFrom.m_pArray; + this->m_format = moveFrom.m_format; + this->m_externalBuffer = moveFrom.m_externalBuffer; //release the pointer from the source object so that the destructor won't release it twice moveFrom.ZeroInit(); @@ -200,11 +212,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::Clear() { - if (m_pArray!=nullptr && OwnBuffer()) + if (this->m_pArray!=nullptr && this->OwnBuffer()) { - delete [] m_pArray; - m_pArray = nullptr; - m_elemSizeAllocated = 0; + delete [] this->m_pArray; + this->m_pArray = nullptr; + this->m_elemSizeAllocated = 0; } BaseMatrix::Clear(); @@ -221,17 +233,17 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (numCols == 0) throw std::logic_error("The slice cannot have 0 columns."); - if (startColumn + numCols > m_numCols) + if (startColumn + numCols > this->m_numCols) throw std::logic_error("The slice is out of range of the source matrix."); CPUMatrix slice; slice.m_externalBuffer = true; //memory of a slice is managed externally. - slice.m_numRows = m_numRows; + slice.m_numRows = this->m_numRows; slice.m_numCols = numCols; slice.m_elemSizeAllocated = slice.GetNumElements(); - slice.m_pArray = m_pArray + startColumn * m_numRows; - slice.m_format = m_format; + slice.m_pArray = this->m_pArray + startColumn * this->m_numRows; + slice.m_format = this->m_format; return slice; } @@ -242,16 +254,16 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (numCols == 0) throw std::logic_error("The slice cannot have 0 columns."); - if (startColumn + numCols > m_numCols) + if (startColumn + numCols > this->m_numCols) throw std::logic_error("The slice is out of range of the source matrix."); Clear(); - SetOwnBuffer(false); //memory of a slice is managed externally. - m_numRows = fromMatrix.m_numRows; - m_numCols = numCols; - m_elemSizeAllocated = GetNumElements(); - m_pArray = m_pArray + startColumn *m_numRows; + this->SetOwnBuffer(false); //memory of a slice is managed externally. + this->m_numRows = fromMatrix.m_numRows; + this->m_numCols = numCols; + this->m_elemSizeAllocated = this->GetNumElements(); + this->m_pArray = this->m_pArray + startColumn *this->m_numRows; return *this; } @@ -276,7 +288,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { for (long j=0; jm_pArray + j*numRows, a.m_pArray + j*k + startIndex, sizeof(ElemType) * numRows); ////four-way unrolling //for (long i=0, startRow = startIndex; i<(m & ~3); i+=4, startRow+=4) @@ -306,10 +318,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (a.GetNumRows() != numRows) throw std::logic_error("AddToRowSliceValuesOf: a.GetNumRows() != numRows."); - if (startIndex + numRows > GetNumRows()) + if (startIndex + numRows > this->GetNumRows()) throw std::logic_error("AddToRowSliceValuesOf: startIndex + numRows exceeds GetNumRows()."); - if (a.GetNumCols() != GetNumCols()) + if (a.GetNumCols() != this->GetNumCols()) throw std::logic_error("AddToRowSliceValuesOf: columns does not match."); long n=(long)a.GetNumCols(), m=(long)numRows; @@ -425,7 +437,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template CPUMatrix CPUMatrix::Transpose() { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("Transpose: Matrix is empty."); CPUMatrix c; @@ -471,29 +483,29 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::SetValue(const ElemType v) { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("SetValue: Matrix is empty."); if (v == 0) { - memset(m_pArray, 0, sizeof(ElemType) * GetNumElements()); + memset(this->m_pArray, 0, sizeof(ElemType) * this->GetNumElements()); } else { - long m=(long)GetNumElements(); + long m=(long)this->GetNumElements(); #pragma omp parallel for //four-way unrolling for (long i=0; i<(m & ~3); i+=4) { - m_pArray[i] = v; - m_pArray[i+1] = v; - m_pArray[i+2] = v; - m_pArray[i+3] = v; + this->m_pArray[i] = v; + this->m_pArray[i+1] = v; + this->m_pArray[i+2] = v; + this->m_pArray[i+3] = v; } //handle remaining stuffs for (long i=m & ~3; im_pArray[i] = v; } } } @@ -501,13 +513,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::SetColumn(const ElemType* colPointer, size_t j) { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("SetColumn: Matrix is empty."); if (colPointer==NULL) return; auto& us = *this; - long m=(long)GetNumRows(); + long m=(long)this->GetNumRows(); #pragma omp parallel for //four-way unrolling for (long i=0; i<(m & ~3); i+=4) @@ -528,11 +540,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::SetColumn(const ElemType val, size_t j) { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("SetColumn: Matrix is empty."); auto& us = *this; - long m=(long)GetNumRows(); + long m=(long)this->GetNumRows(); #pragma omp parallel for //four-way unrolling for (long i=0; i<(m & ~3); i+=4) @@ -552,12 +564,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::SetColumn(const CPUMatrix& valMat, size_t j) { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("SetColumn: Matrix is empty."); assert(valMat.GetNumRows() == this->GetNumRows() && valMat.GetNumCols() == 1) ; auto& us = *this; - long m=(long)GetNumRows(); + long m=(long)this->GetNumRows(); #pragma omp parallel for //four-way unrolling for (long i=0; i<(m & ~3); i+=4) @@ -583,7 +595,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { Resize(deepCopyFrom.GetNumRows(), deepCopyFrom.GetNumCols()); size_t cpSize = deepCopyFrom.GetNumElements(); if (cpSize != 0) - memcpy(m_pArray, deepCopyFrom.m_pArray, cpSize*sizeof(ElemType)); + memcpy(this->m_pArray, deepCopyFrom.m_pArray, cpSize*sizeof(ElemType)); } template @@ -592,30 +604,30 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (pArray == nullptr) throw std::invalid_argument("Invalid pArray."); - m_format = matrixFormatDense; - m_computeDevice = CPUDEVICE; + this->m_format = matrixFormatDense; + this->m_computeDevice = CPUDEVICE; // if it's externally managed, then populate the structure if (matrixFlags&matrixFlagDontOwnBuffer) { - if (m_pArray != nullptr) - delete [] m_pArray; + if (this->m_pArray != nullptr) + delete [] this->m_pArray; - m_pArray = pArray; - m_numRows = numRows; - m_numCols = numCols; + this->m_pArray = pArray; + this->m_numRows = numRows; + this->m_numCols = numCols; // free previous array allocation if any before overwriting - if (m_pArray != nullptr) - delete[] m_pArray; - m_pArray = pArray; - m_elemSizeAllocated = GetNumElements(); - m_externalBuffer = true; + if (this->m_pArray != nullptr) + delete[] this->m_pArray; + this->m_pArray = pArray; + this->m_elemSizeAllocated = this->GetNumElements(); + this->m_externalBuffer = true; } else { Resize(numRows, numCols); - if (IsEmpty()) + if (this->IsEmpty()) { throw std::invalid_argument("NumRows or NumCols is 0. Nothing to copy"); } @@ -623,7 +635,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { if (!(matrixFlags&matrixFormatRowMajor)) //compatible to internal structure { - memcpy(m_pArray, pArray, GetNumElements()*sizeof(ElemType)); + memcpy(this->m_pArray, pArray, this->GetNumElements()*sizeof(ElemType)); } else //need to transpose { @@ -634,9 +646,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { foreach_column(j, us) { #ifndef USE_MKL - dcopy((int)numRows, reinterpret_cast (pArray+j), (int)numCols, reinterpret_cast (m_pArray + LocateColumn(j)), 1); + dcopy((int)numRows, reinterpret_cast (pArray+j), (int)numCols, reinterpret_cast (this->m_pArray + LocateColumn(j)), 1); #else - cblas_dcopy ((int)numRows, reinterpret_cast (pArray+j), (int)numCols, reinterpret_cast (m_pArray + LocateColumn(j)), 1); + cblas_dcopy ((int)numRows, reinterpret_cast (pArray+j), (int)numCols, reinterpret_cast (this->m_pArray + LocateColumn(j)), 1); #endif } } @@ -648,9 +660,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { { #pragma warning (suppress: 4244) #ifndef USE_MKL - scopy((int)numRows, reinterpret_cast (pArray+j), (int)numCols, reinterpret_cast (m_pArray + LocateColumn(j)), 1); + scopy((int)numRows, reinterpret_cast (pArray+j), (int)numCols, reinterpret_cast (this->m_pArray + LocateColumn(j)), 1); #else - cblas_scopy ((int)numRows, reinterpret_cast (pArray+j), (int)numCols, reinterpret_cast (m_pArray + LocateColumn(j)), 1); + cblas_scopy ((int)numRows, reinterpret_cast (pArray+j), (int)numCols, reinterpret_cast (this->m_pArray + LocateColumn(j)), 1); #endif } } @@ -663,14 +675,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::SetDiagonalValue(const ElemType v) { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("SetDiagonalValue: Matrix is empty."); - if (GetNumRows() != GetNumCols()) + if (this->GetNumRows() != this->GetNumCols()) throw std::logic_error("SetDiagonalValue: NumRows and NumCols do not agree."); auto& us = *this; - long m=(long)GetNumRows(); + long m=(long)this->GetNumRows(); #pragma omp parallel for //four-way unrolling for (long i=0; i<(m & ~3); i+=4) @@ -690,10 +702,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::SetDiagonalValue(CPUMatrix& vector) { - if (IsEmpty() || vector.IsEmpty()) + if (this->IsEmpty() || vector.IsEmpty()) throw std::logic_error("SetDiagonalValue: Matrix is empty."); - if (GetNumRows() != GetNumCols()) + if (this->GetNumRows() != this->GetNumCols()) throw std::logic_error("SetDiagonalValue: NumRows and NumCols do not agree."); if (vector.GetNumRows() != 1 && vector.GetNumCols() != 1) @@ -701,13 +713,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (vector.GetNumElements() == 1) //reduce to simple form SetDiagonalValue(vector(0,0)); - else if (vector.GetNumRows() != GetNumRows()) + else if (vector.GetNumRows() != this->GetNumRows()) throw std::logic_error("SetDiagonalValue: input vector's dimension does not agree with [this]."); else { auto& us = *this; - long m=(long)GetNumRows(); + long m=(long)this->GetNumRows(); if (vector.GetNumRows() == 1) //row vector { #pragma omp parallel for @@ -748,26 +760,30 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::SetUniformRandomValue(const ElemType low, const ElemType high, unsigned long seed) { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("SetUniformRandomValue: Matrix is empty."); +#ifdef _MSC_VER // TODO: check if available under GCC/Linux std::ranlux64_base_01 generator; generator.seed(seed==USE_TIME_BASED_SEED ? (unsigned long) time(NULL) : seed); +#else + std::default_random_engine generator (seed); +#endif std::uniform_real_distribution r(low, high); - long m=(long)GetNumElements(); + long m=(long)this->GetNumElements(); //four-way unrolling for (long i=0; i<(m & ~3); i+=4) { - m_pArray[i] = r(generator); - m_pArray[i+1] = r(generator); - m_pArray[i+2] = r(generator); - m_pArray[i+3] = r(generator); + this->m_pArray[i] = r(generator); + this->m_pArray[i+1] = r(generator); + this->m_pArray[i+2] = r(generator); + this->m_pArray[i+3] = r(generator); } //handle remaining stuffs for (long i=m & ~3; im_pArray[i] = r(generator); } } @@ -778,12 +794,16 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (sigma <= 0) throw std::invalid_argument("SetUniformRandomValue: sigma must be a positive value."); - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("SetUniformRandomValue: Matrix is empty."); auto& us = *this; - std::ranlux64_base_01 generator; +#ifndef LINUX + std::ranlux64_base_01 generator; generator.seed(seed==USE_TIME_BASED_SEED ? (unsigned long) time(NULL) : seed); +#else + std::default_random_engine generator (seed); +#endif /* LINUX */ std::normal_distribution r(mean, sigma); //#pragma omp parallel for //is it thread safe? foreach_coord(i,j,us) @@ -798,15 +818,19 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (sigma <= 0) throw std::invalid_argument("SetUniformRandomValue: sigma must be a positive value."); - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("SetUniformRandomValue: Matrix is empty."); auto& us = *this; - std::ranlux64_base_01 generator; +#ifndef LINUX + std::ranlux64_base_01 generator; generator.seed(seed==USE_TIME_BASED_SEED ? (unsigned long) time(NULL) : seed); +#else + std::default_random_engine generator (seed); +#endif /* LINUX */ std::normal_distribution r(mean, sigma); - long m=(long)GetNumRows(), n=(long)GetNumCols(); + long m=(long)this->GetNumRows(), n=(long)this->GetNumCols(); for (long j=0; j - void CPUMatrix::SetUniformRandomMask(const ElemType maskRate, const ElemType scaleValue, unsigned long seed=USE_TIME_BASED_SEED) + void CPUMatrix::SetUniformRandomMask(const ElemType maskRate, const ElemType scaleValue, unsigned long seed) { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("SetUniformRandomValue: Matrix is empty."); auto& us = *this; +#ifndef LINUX std::ranlux64_base_01 generator; generator.seed(seed==USE_TIME_BASED_SEED ? (unsigned long) time(NULL) : seed); +#else + std::default_random_engine generator (seed==USE_TIME_BASED_SEED ? (unsigned long) time(NULL) : seed); +#endif /* LINUX */ std::uniform_real_distribution r(0, 1); - long m=(long)GetNumRows(), n=(long)GetNumCols(); + long m=(long)this->GetNumRows(), n=(long)this->GetNumCols(); ElemType v; for (long j=0; jGetNumRows() == gradients.GetNumRows() && this->GetNumCols() == gradients.GetNumCols()); - ElemType *a=m_pArray, *d_v=gradients.m_pArray; - size_t n = GetNumElements(); + ElemType *a=this->m_pArray, *d_v=gradients.m_pArray; + size_t n = this->GetNumElements(); long nLoop = (long)n - n%4; const ElemType floor = 1e-16f; @@ -994,40 +1023,40 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::Reshape(const size_t numRows, const size_t numCols) { - assert (numRows*numCols == GetNumElements()); - if (numRows*numCols != GetNumElements()) + assert (numRows*numCols == this->GetNumElements()); + if (numRows*numCols != this->GetNumElements()) throw std::invalid_argument("Reshape: total number of elements does not match."); - m_numRows = numRows; - m_numCols = numCols; + this->m_numRows = numRows; + this->m_numCols = numCols; } //if growONly is true, resize will not reallocate memory if the current memory is large enough (i.e., will not shrink) template void CPUMatrix::Resize(const size_t numRows, const size_t numCols, bool growOnly /*=true*/) { - m_numRows = numRows; - m_numCols = numCols; + this->m_numRows = numRows; + this->m_numCols = numCols; - size_t numElements = GetNumElements(); - if (numElements > m_elemSizeAllocated || (!growOnly && (numElements != m_elemSizeAllocated))) + size_t numElements = this->GetNumElements(); + if (numElements > this->m_elemSizeAllocated || (!growOnly && (numElements != this->m_elemSizeAllocated))) { - if (OwnBuffer() && m_pArray) + if (this->OwnBuffer() && this->m_pArray) { - delete[] m_pArray; //delete and reallocate - m_pArray = nullptr; + delete[] this->m_pArray; //delete and reallocate + this->m_pArray = nullptr; } - if (IsEmpty()) + if (this->IsEmpty()) { - m_elemSizeAllocated = 0; - m_pArray = nullptr; + this->m_elemSizeAllocated = 0; + this->m_pArray = nullptr; } else { - if (!OwnBuffer()) + if (!this->OwnBuffer()) throw runtime_error("Resizing an matrix you don't own is not supported."); - m_elemSizeAllocated = numElements; - m_pArray = new ElemType[m_elemSizeAllocated]; + this->m_elemSizeAllocated = numElements; + this->m_pArray = new ElemType[this->m_elemSizeAllocated]; SetValue(0); } } @@ -1037,11 +1066,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { template ElemType* CPUMatrix::CopyToArray() const { - size_t numElements = GetNumElements(); + size_t numElements = this->GetNumElements(); if (numElements != 0) { ElemType* arrayCopyTo = new ElemType[numElements]; - memcpy(arrayCopyTo, m_pArray, sizeof(ElemType)*numElements); + memcpy(arrayCopyTo, this->m_pArray, sizeof(ElemType)*numElements); return arrayCopyTo; } else @@ -1055,7 +1084,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template size_t CPUMatrix::CopyToArray(ElemType*& arrayCopyTo, size_t& currentArraySize) const { - size_t numElements = GetNumElements(); + size_t numElements = this->GetNumElements(); if (numElements > currentArraySize) { @@ -1066,7 +1095,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (numElements != 0) { - memcpy(arrayCopyTo, m_pArray, sizeof(ElemType)*numElements); + memcpy(arrayCopyTo, this->m_pArray, sizeof(ElemType)*numElements); } return numElements; @@ -1075,15 +1104,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { template inline size_t CPUMatrix::LocateElement (const size_t row, const size_t col) const { - assert (row < m_numRows && col < m_numCols); - return col * m_numRows + row; // matrix in column-wise storage + assert (row < this->m_numRows && col < this->m_numCols); + return col * this->m_numRows + row; // matrix in column-wise storage } template size_t CPUMatrix::LocateColumn (const size_t col) const { - assert (col < m_numCols); - return col * m_numRows; // matrix in column-wise storage + assert (col < this->m_numCols); + return col * this->m_numRows; // matrix in column-wise storage } #pragma endregion Basic Operators @@ -1099,7 +1128,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template CPUMatrix CPUMatrix::operator+ (ElemType alpha) const { - CPUMatrix c(GetNumRows(), GetNumCols()); + CPUMatrix c(this->GetNumRows(), this->GetNumCols()); c.AssignSumOf(alpha, *this); return c; } @@ -1114,7 +1143,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (this != &a) Resize(a.GetNumRows(), a.GetNumCols()); - long m=(long)GetNumRows(), n=(long)GetNumCols(); + long m=(long)this->GetNumRows(), n=(long)this->GetNumCols(); #pragma omp parallel for for (long j=0; j CPUMatrix CPUMatrix::operator+ (const CPUMatrix& a) const { - if (GetNumElements() == 1) + if (this->GetNumElements() == 1) { CPUMatrix c(a); c += (*this)(0,0); @@ -1203,7 +1232,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template CPUMatrix CPUMatrix::operator- (ElemType alpha) const { - CPUMatrix c(GetNumRows(), GetNumCols()); + CPUMatrix c(this->GetNumRows(), this->GetNumCols()); c.AssignDifferenceOf(*this, alpha); return c; } @@ -1218,7 +1247,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (this != &a) Resize(a.GetNumRows(), a.GetNumCols()); - long m=(long)GetNumRows(), n=(long)GetNumCols(); + long m=(long)this->GetNumRows(), n=(long)this->GetNumCols(); #pragma omp parallel for for (long j=0; jGetNumRows(), n=(long)this->GetNumCols(); #pragma omp parallel for for (long j=0; j CPUMatrix CPUMatrix::operator* (ElemType alpha) const { - CPUMatrix c(GetNumRows(), GetNumCols()); + CPUMatrix c(this->GetNumRows(), this->GetNumCols()); Scale(alpha, *this, c); return c; } @@ -1353,7 +1382,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { CPUMatrix CPUMatrix::operator* (const CPUMatrix& a) const { auto& us = *this; - if (GetNumElements() == 1) + if (this->GetNumElements() == 1) { CPUMatrix c; c.AssignProductOf(us(0,0), a); @@ -1399,7 +1428,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template CPUMatrix CPUMatrix::operator^ (ElemType alpha) const { - CPUMatrix c(GetNumRows(), GetNumCols()); + CPUMatrix c(this->GetNumRows(), this->GetNumCols()); ElementWisePower(alpha, *this, c); return c; } @@ -1441,7 +1470,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (this != &a) Resize(a.GetNumRows(), a.GetNumCols()); - long m=(long)GetNumRows(), n=(long)GetNumCols(); + long m=(long)this->GetNumRows(), n=(long)this->GetNumCols(); #pragma omp parallel for for (long j=0; jGetNumRows() && a.GetNumCols() == this->GetNumCols())) throw std::invalid_argument("AddElementProductOf : The input matrix dimensions do not match [this]."); auto& us=*this; - long m=(long)GetNumRows(), n=(long)GetNumCols(); + long m=(long)this->GetNumRows(), n=(long)this->GetNumCols(); #pragma omp parallel for for (long j=0; j CPUMatrix& CPUMatrix::ColumnElementMultiplyWith(const CPUMatrix& a) { - if (a.IsEmpty() || IsEmpty()) + if (a.IsEmpty() || this->IsEmpty()) throw std::logic_error("ColumnElementMultiplyWith: Matrix is empty."); - assert (a.GetNumRows() == GetNumRows() && a.GetNumCols() == 1); - if (!(a.GetNumRows() == GetNumRows() && a.GetNumCols() == 1)) + assert (a.GetNumRows() == this->GetNumRows() && a.GetNumCols() == 1); + if (!(a.GetNumRows() == this->GetNumRows() && a.GetNumCols() == 1)) throw std::invalid_argument("ColumnElementMultiplyWith: The input matrix should be a col vector and match [this]'s rows."); auto& us=*this; - long m=(long)GetNumRows(), n=(long)GetNumCols(); + long m=(long)this->GetNumRows(), n=(long)this->GetNumCols(); #pragma omp parallel for for (long j=0; j CPUMatrix& CPUMatrix::RowElementMultiplyWith(const CPUMatrix& a) { - if (a.IsEmpty() || IsEmpty()) + if (a.IsEmpty() || this->IsEmpty()) throw std::logic_error("RowElementMultiplyWith: Matrix is empty."); - assert (a.GetNumRows() == 1 && a.GetNumCols() == GetNumCols()); - if (!(a.GetNumRows() == 1 && a.GetNumCols() == GetNumCols())) + assert (a.GetNumRows() == 1 && a.GetNumCols() == this->GetNumCols()); + if (!(a.GetNumRows() == 1 && a.GetNumCols() == this->GetNumCols())) throw std::invalid_argument("RowElementMultiplyWith: The input matrix should be a row vector and match [this]'s columns."); auto& us=*this; - long m=(long)GetNumRows(), n=(long)GetNumCols(); + long m=(long)this->GetNumRows(), n=(long)this->GetNumCols(); #pragma omp parallel for for (long j=0; jGetNumRows(), n=(long)this->GetNumCols(); #pragma omp parallel for for (long j=0; jGetNumRows(), n=(long)this->GetNumCols(); #pragma omp parallel for for (long j=0; jGetNumRows(), n=(long)this->GetNumCols(); #pragma omp parallel for for (long j=0; jGetNumRows(), n=(long)this->GetNumCols(); #pragma omp parallel for for (long j=0; jGetNumRows(), n=(long)this->GetNumCols(); #pragma omp parallel for for (long j=0; jGetNumRows(), n=(long)this->GetNumCols(); #pragma omp parallel for for (long j=0; j CPUMatrix& CPUMatrix::InplaceTruncateBottom (const ElemType threshold) { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("InplaceTruncateBottom: Matrix is empty."); auto& us=*this; - long m=(long)GetNumRows(), n=(long)GetNumCols(); + long m=(long)this->GetNumRows(), n=(long)this->GetNumCols(); #pragma omp parallel for for (long j=0; j CPUMatrix& CPUMatrix::InplaceTruncate (const ElemType threshold) { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("InplaceTruncateBottom: Matrix is empty."); auto& us=*this; ElemType locThresholdPos = abs(threshold); ElemType locTHresholdNeg = -locThresholdPos; - long m=(long)GetNumRows(), n=(long)GetNumCols(); + long m=(long)this->GetNumRows(), n=(long)this->GetNumCols(); #pragma omp parallel for for (long j=0; j CPUMatrix& CPUMatrix::InplaceTruncateTop (const ElemType threshold) { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("InplaceTruncateTop: Matrix is empty."); auto& us=*this; @@ -2329,7 +2358,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template CPUMatrix& CPUMatrix::SetToZeroIfAbsLessThan (const ElemType threshold) { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("SetToZeroIfAbsLessThan: Matrix is empty."); auto& us=*this; @@ -2348,24 +2377,24 @@ namespace Microsoft { namespace MSR { namespace CNTK { template ElemType CPUMatrix::SumOfAbsElements () const { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("SumOfAbsElements: Matrix is empty."); if (sizeof(ElemType) == sizeof(double)) { #ifndef USE_MKL - return (ElemType)dasum((int)GetNumElements(), reinterpret_cast (m_pArray), 1); + return (ElemType)dasum((int)this->GetNumElements(), reinterpret_cast (this->m_pArray), 1); #else - return (ElemType)cblas_dasum((int)GetNumElements(), reinterpret_cast (m_pArray), 1); + return (ElemType)cblas_dasum((int)this->GetNumElements(), reinterpret_cast (this->m_pArray), 1); #endif } else { #pragma warning (suppress: 4244) #ifndef USE_MKL - return sasum((int)GetNumElements(), reinterpret_cast (m_pArray), 1); + return sasum((int)this->GetNumElements(), reinterpret_cast (this->m_pArray), 1); #else - return cblas_sasum ((int)GetNumElements(), reinterpret_cast (m_pArray), 1); + return cblas_sasum ((int)this->GetNumElements(), reinterpret_cast (m_pArray), 1); #endif } } @@ -2374,7 +2403,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template ElemType CPUMatrix::SumOfElements () const { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("SumOfElements: Matrix is empty."); ElemType sum=0; @@ -2384,12 +2413,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { #pragma omp parallel for reduction(+:sum) for (long i=0; i<(m & ~3); i+=4) { - sum += m_pArray[i] + m_pArray[i+1] + m_pArray[i+2] + m_pArray[i+3] ; + sum += this->m_pArray[i] + this->m_pArray[i+1] + this->m_pArray[i+2] + this->m_pArray[i+3] ; } //handle remaining stuffs for (long i=m & ~3; im_pArray[i]; } return sum; @@ -2418,7 +2447,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::VectorNorm1(CPUMatrix& c, const bool isColWise) const { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("VectorNormInf: Matrix is empty."); auto& us=*this; @@ -2476,7 +2505,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::VectorNorm2(CPUMatrix& c, const bool isColWise) const { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("VectorNorm2: Matrix is empty."); auto& us=*this; @@ -2559,7 +2588,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::VectorNormInf(CPUMatrix& c, const bool isColWise) const { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("VectorNormInf: Matrix is empty."); auto& us=*this; @@ -2676,7 +2705,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { throw invalid_argument("AddColumnReshapeProductOf: number of rows in a should be multiples of that in b."); long rowsC = rowsA / rowsB; - if (rowsC != GetNumRows() || cols != GetNumCols()) + if (rowsC != this->GetNumRows() || cols != this->GetNumCols()) throw invalid_argument("AddColumnReshapeProductOf: This matrix does not have the right size."); auto & us = *this; @@ -2742,23 +2771,23 @@ namespace Microsoft { namespace MSR { namespace CNTK { template ElemType CPUMatrix::FrobeniusNorm() const { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("FrobeniusNorm: Matrix is empty."); ElemType v = 0; - long m=(long)GetNumElements(); + long m=(long)this->GetNumElements(); //four-way unrolling #pragma omp parallel for reduction(+:v) for (long i=0; i<(m & ~3); i+=4) { - v += m_pArray[i] * m_pArray[i] + m_pArray[i+1] * m_pArray[i+1] + m_pArray[i+2] * m_pArray[i+2] + m_pArray[i+3] * m_pArray[i+3]; + v += this->m_pArray[i] * this->m_pArray[i] + this->m_pArray[i+1] * this->m_pArray[i+1] + this->m_pArray[i+2] * this->m_pArray[i+2] + this->m_pArray[i+3] * this->m_pArray[i+3]; } //handle remaining stuffs for (long i=m & ~3; im_pArray[i] * this->m_pArray[i]; } return sqrt(v); @@ -2780,7 +2809,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template ElemType CPUMatrix::MatrixNormInf() const { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("MatrixNormInf: Matrix is empty."); auto& us=*this; @@ -2800,7 +2829,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template ElemType CPUMatrix::MatrixNorm0() const { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("MatrixNorm0: Matrix is empty."); auto& us=*this; @@ -2823,7 +2852,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template ElemType CPUMatrix::MatrixNorm1() const { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("MatrixNorm1: Matrix is empty."); auto& us=*this; @@ -2886,12 +2915,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::VectorMax(CPUMatrix& maxIndexes, CPUMatrix& maxValues, const bool isColWise) const { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("VectorMax: Matrix is empty."); auto& us=*this; - const int m = (int)GetNumRows(); - const int n = (int)GetNumCols(); + const int m = (int)this->GetNumRows(); + const int n = (int)this->GetNumCols(); assert (m>0 && n>0); //converting from size_t to int may cause overflow @@ -2945,12 +2974,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::VectorMin(CPUMatrix& minIndexes, CPUMatrix& minValues, const bool isColWise) const { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("VectorMin: Matrix is empty."); auto& us=*this; - const int m = (int)GetNumRows(); - const int n = (int)GetNumCols(); + const int m = (int)this->GetNumRows(); + const int n = (int)this->GetNumCols(); assert (m>0 && n>0); //converting from size_t to int may cause overflow @@ -3028,16 +3057,16 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::Print(const char* matrixName, size_t rowStart, size_t rowEnd, size_t colStart, size_t colEnd) const { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("Print: Matrix is empty."); - if (rowEnd >= GetNumRows() || colEnd >= GetNumCols()) + if (rowEnd >= this->GetNumRows() || colEnd >= this->GetNumCols()) throw std::invalid_argument("Index out of range."); if (matrixName != nullptr) - fprintf (stderr, "\n###### %s (%lu, %lu) ######\n", matrixName, GetNumRows(), GetNumCols()); + fprintf (stderr, "\n###### %s (%lu, %lu) ######\n", matrixName, this->GetNumRows(), this->GetNumCols()); else - fprintf (stderr, "\n###### Unnamed Matrix (%lu, %lu) ######\n", GetNumRows(), GetNumCols()); + fprintf (stderr, "\n###### Unnamed Matrix (%lu, %lu) ######\n", this->GetNumRows(), this->GetNumCols()); fprintf (stderr, "\n------ Print Range (%lu:%lu, %lu:%lu) ------\n", rowStart, rowEnd, colStart, colEnd); @@ -3053,7 +3082,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::Print(const char* matrixName /*=nullptr*/) const { - Print(matrixName, 0, GetNumRows()-1, 0, GetNumCols()-1); + Print(matrixName, 0, this->GetNumRows()-1, 0, this->GetNumCols()-1); } // file I/O @@ -3933,7 +3962,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (a.IsEmpty()) throw std::logic_error("Scale: Input matrix a is empty."); if (alpha.GetNumElements()!=1) +#ifndef LINUX throw std::exception("Matrix alpha must be 1x1"); +#else + throw std::exception(); +#endif /* LINUX */ CPUMatrix::Scale(alpha(0,0),a); } diff --git a/Math/Math/CPUMatrix.h b/Math/Math/CPUMatrix.h index d7142b055..cb026e4a9 100644 --- a/Math/Math/CPUMatrix.h +++ b/Math/Math/CPUMatrix.h @@ -7,16 +7,22 @@ #include #include #include +#include /* LINUX */ #include "File.h" #include "Helpers.h" #include "CommonMatrix.h" +#ifndef LINUX #ifdef MATH_EXPORTS #define MATH_API __declspec(dllexport) #else #define MATH_API __declspec(dllimport) #endif +#else /* LINUX */ +#define MATH_API +#endif /* LINUX */ + #ifndef USE_TIME_BASED_SEED #define USE_TIME_BASED_SEED ULONG_MAX #endif @@ -51,8 +57,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { ~CPUMatrix(); public: - size_t BufferSize() const {return m_numRows*m_numCols*sizeof(ElemType);} - ElemType* BufferPointer() const {return m_pArray;} + size_t BufferSize() const {return this->m_numRows*this->m_numCols*sizeof(ElemType);} + ElemType* BufferPointer() const {return this->m_pArray;} CPUMatrix ColumnSlice(size_t startColumn, size_t numCols) const; CPUMatrix& AssignColumnSlice(const CPUMatrix& fromMatrix, size_t startColumn, size_t numCols); @@ -73,15 +79,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { inline ElemType& operator() (const size_t row, const size_t col) { - return m_pArray[LocateElement(row, col)]; + return this->m_pArray[LocateElement(row, col)]; } inline const ElemType& operator() (const size_t row, const size_t col) const { - return m_pArray[LocateElement(row, col)]; + return this->m_pArray[LocateElement(row, col)]; } inline ElemType Get00Element() const { - return m_pArray[0]; + return this->m_pArray[0]; } void SetValue(const ElemType v); @@ -347,8 +353,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { protected: - inline size_t LocateElement (const size_t i, const size_t j) const; - inline size_t LocateColumn (const size_t j) const; + // Was inline.. but without definition, it doesn't make sense. + size_t LocateElement (const size_t i, const size_t j) const; + size_t LocateColumn (const size_t j) const; private: void ZeroInit(); //should only be used by constructors. diff --git a/Math/Math/CPUSparseMatrix.cpp b/Math/Math/CPUSparseMatrix.cpp index fc421c138..a7dd44c95 100644 --- a/Math/Math/CPUSparseMatrix.cpp +++ b/Math/Math/CPUSparseMatrix.cpp @@ -15,7 +15,9 @@ #include "CPUSparseMatrix.h" #include #include +#ifndef LINUX #include +#endif /* LINUX */ #ifdef LEAKDETECT #include #endif @@ -90,70 +92,77 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUSparseMatrix::ZeroInit() { - m_numRows = 0; - m_numCols = 0; - m_elemSizeAllocated = 0; - m_externalBuffer = false; - m_pArray = NULL; - m_computeDevice = CPUDEVICE; - m_nz = 0; - m_matrixName = NULL; + this->m_numRows = 0; + this->m_numCols = 0; + this->m_elemSizeAllocated = 0; + this->m_externalBuffer = false; + this->m_pArray = NULL; + this->m_computeDevice = CPUDEVICE; + this->m_nz = 0; + this->m_matrixName = NULL; - if(m_format == MatrixFormat::matrixFormatSparseCSC || m_format == MatrixFormat::matrixFormatSparseCSR) + if(this->m_format == MatrixFormat::matrixFormatSparseCSC || this->m_format == MatrixFormat::matrixFormatSparseCSR) { - m_colIdx = -1; - m_val = NULL; - m_row = NULL; - m_pb = NULL; + this->m_colIdx = -1; + this->m_val = NULL; + this->m_row = NULL; + this->m_pb = NULL; } - else if (m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow) + else if (this->m_format == MatrixFormat::matrixFormatSparseBlockCol || this->m_format == MatrixFormat::matrixFormatSparseBlockRow) { - m_blockSize = 0; - m_blockVal = NULL; - m_blockIds = NULL; + this->m_blockSize = 0; + this->m_blockVal = NULL; + this->m_blockIds = NULL; } } template CPUSparseMatrix::CPUSparseMatrix(const MatrixFormat format) { + this->CheckInit(format); + } + + //should only be used by constructors. + template + void CPUSparseMatrix::CheckInit(const MatrixFormat format) + { if(format != MatrixFormat::matrixFormatSparseCSC && format != MatrixFormat::matrixFormatSparseCSR && format != MatrixFormat::matrixFormatSparseBlockCol && format != MatrixFormat::matrixFormatSparseBlockRow) { throw std::logic_error("CPUSparseMatrix: unsupported sparse matrix format"); } - m_format = format; + this->m_format = format; ZeroInit(); } template CPUSparseMatrix::CPUSparseMatrix(const MatrixFormat format, const size_t numRows, const size_t numCols, const size_t size) - { CPUSparseMatrix::CPUSparseMatrix(format); + { this->CheckInit(format); Resize(numRows, numCols, size); } template CPUSparseMatrix::~CPUSparseMatrix() { - if (m_matrixName!=NULL) + if (this->m_matrixName!=NULL) { - delete[] m_matrixName; - m_matrixName = nullptr; + delete[] this->m_matrixName; + this->m_matrixName = nullptr; } - if(m_format == MatrixFormat::matrixFormatSparseCSC || m_format == MatrixFormat::matrixFormatSparseCSR) + if(this->m_format == MatrixFormat::matrixFormatSparseCSC || this->m_format == MatrixFormat::matrixFormatSparseCSR) { - if(m_val != NULL) - delete[] m_val; - if(m_row != NULL) - delete[] m_row; - if(m_pb != NULL) - delete[] m_pb; + if(this->m_val != NULL) + delete[] this->m_val; + if(this->m_row != NULL) + delete[] this->m_row; + if(this->m_pb != NULL) + delete[] this->m_pb; } - else if (m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow) + else if (this->m_format == MatrixFormat::matrixFormatSparseBlockCol || this->m_format == MatrixFormat::matrixFormatSparseBlockRow) { - if(m_blockVal != NULL) - delete[] m_blockVal; - if(m_blockIds != NULL) - delete[] m_blockIds; + if(this->m_blockVal != NULL) + delete[] this->m_blockVal; + if(this->m_blockIds != NULL) + delete[] this->m_blockIds; } } @@ -167,76 +176,76 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUSparseMatrix::SetValue(const size_t rIdx, const size_t cIdx, const ElemType v) { - if(m_format != MatrixFormat::matrixFormatSparseCSC && m_format != MatrixFormat::matrixFormatSparseCSR) + if(this->m_format != MatrixFormat::matrixFormatSparseCSC && this->m_format != MatrixFormat::matrixFormatSparseCSR) { throw std::logic_error("CPUSparseMatrix: unsupported SetValue() call."); } - if(m_elemSizeAllocated < m_nz +1) { + if(this->m_elemSizeAllocated < this->m_nz +1) { throw std::logic_error("CPUSparseMatrix: allocated size is too small."); } - if(rIdx < 0 || rIdx >= m_numRows) { + if(rIdx < 0 || rIdx >= this->m_numRows) { throw std::logic_error("CPUSparseMatrix: SetValue() invalid row id"); } - if(cIdx < 0 || cIdx >= m_numCols) { + if(cIdx < 0 || cIdx >= this->m_numCols) { throw std::logic_error("CPUSparseMatrix: SetValue() invalid column id"); } - size_t r = (m_format == matrixFormatSparseCSC) ? rIdx: cIdx; - size_t c = (m_format == matrixFormatSparseCSC) ? cIdx: rIdx; + size_t r = (this->m_format == matrixFormatSparseCSC) ? rIdx: cIdx; + size_t c = (this->m_format == matrixFormatSparseCSC) ? cIdx: rIdx; - m_val[m_nz] = v; - m_row[m_nz] = r; + this->m_val[this->m_nz] = v; + this->m_row[this->m_nz] = r; //consistency check - if(c == m_colIdx && r <= m_row[m_nz-1]) + if(c == this->m_colIdx && r <= this->m_row[this->m_nz-1]) { throw std::logic_error("CPUSparseMatrix: SetValue is not called properly"); } - if (c != m_colIdx) + if (c != this->m_colIdx) { m_pb[c] = m_nz; m_colIdx = (int) c; } - m_pb[c+1] = m_nz+1; - m_nz++; + this->m_pb[c+1] = this->m_nz+1; + this->m_nz++; } template ElemType* CPUSparseMatrix::BufferPointer() const { - if(m_format == MatrixFormat::matrixFormatSparseCSC || m_format == MatrixFormat::matrixFormatSparseCSR) + if(this->m_format == MatrixFormat::matrixFormatSparseCSC || this->m_format == MatrixFormat::matrixFormatSparseCSR) { - return m_val; + return this->m_val; } else { - return m_blockVal; + return this->m_blockVal; } } template void CPUSparseMatrix::Resize(const size_t numRows, const size_t numCols, size_t size) { - m_nz = 0; - m_colIdx = -1; - m_numRows = numRows; - m_numCols = numCols; + this->m_nz = 0; + this->m_colIdx = -1; + this->m_numRows = numRows; + this->m_numCols = numCols; - if(m_elemSizeAllocated < size) + if(this->m_elemSizeAllocated < size) { - m_elemSizeAllocated = size; - if(m_format == MatrixFormat::matrixFormatSparseCSC || m_format == MatrixFormat::matrixFormatSparseCSR) + this->m_elemSizeAllocated = size; + if(this->m_format == MatrixFormat::matrixFormatSparseCSC || this->m_format == MatrixFormat::matrixFormatSparseCSR) { - if(m_val != NULL) - delete[] m_val; - if(m_row != NULL) - delete[] m_row; - if(m_pb != NULL) - delete[] m_pb; + if(this->m_val != NULL) + delete[] this->m_val; + if(this->m_row != NULL) + delete[] this->m_row; + if(this->m_pb != NULL) + delete[] this->m_pb; //int len = m_format == MatrixFormat::matrixFormatSparseCSC ? numCols : numRows; size_t len = numCols > numRows ? numCols : numRows; @@ -245,12 +254,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { m_pb = new size_t[len+1]; } - else if(m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow) + else if(this->m_format == MatrixFormat::matrixFormatSparseBlockCol || this->m_format == MatrixFormat::matrixFormatSparseBlockRow) { - if(m_blockVal != NULL) - delete[] m_blockVal; - if(m_blockIds != NULL) - delete[] m_blockIds; + if(this->m_blockVal != NULL) + delete[] this->m_blockVal; + if(this->m_blockIds != NULL) + delete[] this->m_blockIds; size_t max = numCols > numRows ? numCols : numRows; m_blockVal = new ElemType[size]; @@ -263,9 +272,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUSparseMatrix::Reset() { - m_nz = 0; - m_colIdx = -1; - m_blockSize = 0; + this->m_nz = 0; + this->m_colIdx = -1; + this->m_blockSize = 0; } //c = op(a) * op(this) or c += op(a) * op(this) @@ -489,7 +498,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { } else { +#ifndef LINUX throw std::exception("CPUSparseMatrix:: ScaleAndAdd() Not implemented"); +#else + throw std::exception(); +#endif /* LINUX */ } } @@ -509,7 +522,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { throw std::logic_error("AssignSoftmaxOf: Matrix a, class, idx2cls or label is empty."); if(etp.GetFormat() != MatrixFormat::matrixFormatSparseCSC) +#ifndef LINUX throw std::exception("CPUSparseMatrix:: ClassEntropy() only support CSC"); +#else + throw std::exception(); +#endif /* LINUX */ size_t nC = cls.GetNumCols(); size_t nV = label.GetNumRows() - nC; @@ -682,7 +699,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { c.SetValue(0.0); } - if(m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow) + if(this->m_format == MatrixFormat::matrixFormatSparseBlockCol || this->m_format == MatrixFormat::matrixFormatSparseBlockRow) { for(size_t j = 0; j < m_blockSize; j++) { @@ -701,7 +718,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { } else { +#ifndef LINUX throw std::exception("CPUSparseMatrix:: NormalGrad() only support block sparse format"); +#else + throw std::exception(); +#endif /* LINUX */ } } @@ -716,7 +737,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } const ElemType floor = 1e-16f; - if(m_format == MatrixFormat::matrixFormatSparseCSC || m_format == MatrixFormat::matrixFormatSparseCSR) + if(this->m_format == MatrixFormat::matrixFormatSparseCSC || this->m_format == MatrixFormat::matrixFormatSparseCSR) { size_t col_num = (m_format == MatrixFormat::matrixFormatSparseCSC) ? GetNumCols() : GetNumRows(); for(size_t j = 0; j < col_num; j++) @@ -737,7 +758,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { c(row, col) = adenorm; } } - } else if(m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow) + } else if(this->m_format == MatrixFormat::matrixFormatSparseBlockCol || this->m_format == MatrixFormat::matrixFormatSparseBlockRow) { for(size_t j = 0; j < m_blockSize; j++) { @@ -746,7 +767,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t start = j* len; for(size_t p = start; p < start+len; p++) { - ElemType val = m_blockVal[p]; + ElemType val = this->m_blockVal[p]; size_t row = (m_format == MatrixFormat::matrixFormatSparseBlockCol) ? (p - start) : i; size_t col = (m_format == MatrixFormat::matrixFormatSparseBlockCol) ? i : (p - start); @@ -763,7 +784,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template CPUSparseMatrix& CPUSparseMatrix::InplaceTruncate (const ElemType threshold) { - if(m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow) + if(this->m_format == MatrixFormat::matrixFormatSparseBlockCol || this->m_format == MatrixFormat::matrixFormatSparseBlockRow) { ElemType locThresholdPos = abs(threshold); ElemType locTHresholdNeg = -locThresholdPos; @@ -774,20 +795,24 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t start = j* len; for (size_t p = start; p < start+len; p++) { - if (m_blockVal[p] > locThresholdPos) + if (this->m_blockVal[p] > locThresholdPos) { - m_blockVal[p] = locThresholdPos; + this->m_blockVal[p] = locThresholdPos; } - else if (m_blockVal[p] < locTHresholdNeg) + else if (this->m_blockVal[p] < locTHresholdNeg) { - m_blockVal[p] = locTHresholdNeg; + this->m_blockVal[p] = locTHresholdNeg; } } } } else { +#ifndef LINUX throw std::exception("CPUSparseMatrix:: InplaceTruncate() only support block based sparse matrix"); +#else + throw std::exception(); +#endif /* LINUX */ } return *this; } diff --git a/Math/Math/CPUSparseMatrix.h b/Math/Math/CPUSparseMatrix.h index a73fb05b9..0de38c087 100644 --- a/Math/Math/CPUSparseMatrix.h +++ b/Math/Math/CPUSparseMatrix.h @@ -6,15 +6,17 @@ #pragma once #include -#include "cpumatrix.h" +#include "CPUMatrix.h" #include #include +#ifndef LINUX #ifdef MATH_EXPORTS #define MATH_API __declspec(dllexport) #else #define MATH_API __declspec(dllimport) #endif +#endif /* Linux - already defined in CPUMatrix.h */ namespace Microsoft { namespace MSR { namespace CNTK { @@ -24,6 +26,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { private: void ZeroInit(); + void CheckInit(const MatrixFormat format); public: CPUSparseMatrix(const MatrixFormat format); @@ -37,7 +40,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { void ShiftBy(int /*numShift*/) { NOT_IMPLEMENTED; } - size_t BufferSize() const {return m_elemSizeAllocated*sizeof(ElemType);} + size_t BufferSize() const {return this->m_elemSizeAllocated*sizeof(ElemType);} ElemType* BufferPointer() const; void SetGaussianRandomValue(const ElemType /*mean*/, const ElemType /*sigma*/, unsigned long /*seed*/) { NOT_IMPLEMENTED; } @@ -46,14 +49,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { const CPUSparseMatrix & label, const CPUMatrix& cls, const CPUMatrix& idx2cls, CPUSparseMatrix& etp, CPUMatrix& entropyScore); - static void CPUSparseMatrix::ClassEntropyError(CPUSparseMatrix& a); + static void ClassEntropyError(CPUSparseMatrix& a); - static void CPUSparseMatrix::ClassEntropyGradientOfInput( + static void ClassEntropyGradientOfInput( const CPUSparseMatrix& error, const CPUMatrix& weight, CPUMatrix& grd); - static void CPUSparseMatrix::ClassEntropyGradientOfWeight( + static void ClassEntropyGradientOfWeight( const CPUSparseMatrix& error, const CPUMatrix& input, const CPUSparseMatrix & label, diff --git a/Math/Math/CommonMatrix.h b/Math/Math/CommonMatrix.h index 460d3dcd6..f0dd57d46 100644 --- a/Math/Math/CommonMatrix.h +++ b/Math/Math/CommonMatrix.h @@ -6,6 +6,11 @@ #pragma once #include +#include + +#ifdef LINUX +#define wcsnlen_s wcsnlen /* Not sure if this is best replacement... Malcolm */ +#endif #define AUTOPLACEMATRIX 1000 // used in parameters only #define MANAGEDEXTERN -2 // managed externally (i.e. PTask) @@ -77,7 +82,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { void SetMatrixName(const wchar_t* s) { Clear(); - if (s!=nullptr) + if (s!=NULL) { size_t n = wcsnlen_s(s, SIZE_MAX); m_matrixName = new wchar_t[n+1]; @@ -102,10 +107,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { protected: void Clear() { - if (m_matrixName!=nullptr) + if (m_matrixName!=NULL) { delete[] m_matrixName; - m_matrixName = nullptr; + m_matrixName = NULL; } } @@ -120,4 +125,4 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t m_nz; //Number of non-zero elements for sparse matrices (unused in other formats) wchar_t* m_matrixName; }; -}}} \ No newline at end of file +}}} diff --git a/Math/Math/GPUMatrix.cu b/Math/Math/GPUMatrix.cu index 6eae9f467..7374aee54 100644 --- a/Math/Math/GPUMatrix.cu +++ b/Math/Math/GPUMatrix.cu @@ -27,8 +27,13 @@ bool do_sync = false; bool do_sync = true; #endif +#ifndef LINUX // thread local storage to access the current stream, initalize to default stream -__declspec( thread ) cudaStream_t t_stream = cudaStreamDefault; +__declspec( thread ) +#endif /* LINUX */ + cudaStream_t t_stream = cudaStreamDefault; + +extern int _ConvertSMVer2Cores(int major, int minor); // SetStream - set the stream that will be used by the GPU routines void MATH_API SetStream(cudaStream_t stream) @@ -47,7 +52,11 @@ void CURAND_CALL(curandStatus x) { if(x!=CURAND_STATUS_SUCCESS) { +#ifndef LINUX throw std::exception("CURAND fail"); +#else /* LINUX */ + throw std::exception(); +#endif /* LINUX */ } } @@ -55,7 +64,11 @@ void CUBLAS_CALL(cublasStatus_t x) { if(x!=CUBLAS_STATUS_SUCCESS) { +#ifndef LINUX throw std::exception("CUBLAS fail"); +#else /* LINUX */ + throw std::exception(); +#endif /* LINUX */ } } @@ -66,7 +79,11 @@ void CUDA_CALL(cudaError_t x) const char* errmsg = cudaGetErrorString(x); std::cout<<"!!!!!!!!CUDA EXCEPTION: "< DeviceBoundNumber::DeviceBoundNumber(DeviceBoundNumber &&shallowCopy) { this->ShallowCopyFrom(shallowCopy.m_data,shallowCopy.m_computeDevice); shallowCopy.m_data=NULL; } +#endif template void DeviceBoundNumber::ShallowCopyFrom(ElemType* newVal,int newValsDevceId) { - m_computeDevice = newValsDevceId; - m_data = newVal; + this->m_computeDevice = newValsDevceId; + this->m_data = newVal; } template DeviceBoundNumber::~DeviceBoundNumber() { - if (m_data!=NULL) + if (this->m_data!=NULL) { - if (m_computeDevice<0) + if (this->m_computeDevice<0) { - delete m_data; - m_data = NULL; + delete this->m_data; + this->m_data = NULL; } - else if (m_computeDevice != MANAGEDEXTERN) - CUDA_CALL(cudaFree(m_data)); + else if (this->m_computeDevice != MANAGEDEXTERN) + CUDA_CALL(cudaFree(this->m_data)); } } @@ -147,7 +166,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (chosenDeviceId != AUTOPLACEMATRIX) return chosenDeviceId; - __try + try { // stash previous device state // if there was one on entry: @@ -188,7 +207,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { chosenDeviceId = curDev; return curDev; } - __except(1) + catch (int e) { return -1; // CPU } @@ -209,12 +228,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { template ElemType* GPUMatrix::CopyToArray() const { - size_t numElements = GetNumElements(); + size_t numElements = this->GetNumElements(); if (numElements != 0) { PrepareDevice(); ElemType* pArray = new ElemType[numElements]; - CUDA_CALL(cudaMemcpy(pArray,m_pArray,sizeof(ElemType)*m_numRows*m_numCols,cudaMemcpyDeviceToHost)); + CUDA_CALL(cudaMemcpy(pArray,this->m_pArray,sizeof(ElemType)*this->m_numRows*this->m_numCols,cudaMemcpyDeviceToHost)); return pArray; } else @@ -228,7 +247,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template size_t GPUMatrix::CopyToArray(ElemType*& arrayCopyTo, size_t& currentArraySize) const { - size_t numElements = GetNumElements(); + size_t numElements = this->GetNumElements(); if (numElements > currentArraySize) { @@ -240,7 +259,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (numElements != 0) { PrepareDevice(); - CUDA_CALL(cudaMemcpy(arrayCopyTo, m_pArray, sizeof(ElemType)*numElements, cudaMemcpyDeviceToHost)); + CUDA_CALL(cudaMemcpy(arrayCopyTo, this->m_pArray, sizeof(ElemType)*numElements, cudaMemcpyDeviceToHost)); } return numElements; @@ -249,29 +268,29 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUMatrix::ChangeDeviceTo(int to_id) { - if (!OwnBuffer()) + if (!this->OwnBuffer()) throw std::logic_error("Cannot change device on Managed external matrix"); if (to_id == CPUDEVICE) throw std::logic_error("to_id must be valid GPU"); - if (m_computeDevice==to_id) + if (this->m_computeDevice==to_id) return; PrepareDevice((short)to_id); ElemType* d_dst=NULL; - CUDA_CALL(cudaMalloc((void**)&d_dst,sizeof(ElemType)*m_numRows*m_numCols)); + CUDA_CALL(cudaMalloc((void**)&d_dst,sizeof(ElemType)*this->m_numRows*this->m_numCols)); - m_elemSizeAllocated = m_numRows*m_numCols; + this->m_elemSizeAllocated = this->m_numRows*this->m_numCols; // check to make sure we have something to copy (on init we often have zero sized allocations) - if (m_elemSizeAllocated > 0) + if (this->m_elemSizeAllocated > 0) { // first try peer access int canAccessPeer = false; - CUDA_CALL(cudaDeviceCanAccessPeer(&canAccessPeer, to_id, m_computeDevice)); + CUDA_CALL(cudaDeviceCanAccessPeer(&canAccessPeer, to_id, this->m_computeDevice)); if (canAccessPeer) { - CUDA_CALL(cudaDeviceEnablePeerAccess(m_computeDevice, 0)); - CUDA_CALL(cudaMemcpyPeer(d_dst,to_id,m_pArray,m_computeDevice,sizeof(ElemType)*m_numRows*m_numCols)); + CUDA_CALL(cudaDeviceEnablePeerAccess(this->m_computeDevice, 0)); + CUDA_CALL(cudaMemcpyPeer(d_dst,to_id,this->m_pArray,this->m_computeDevice,sizeof(ElemType)*this->m_numRows*this->m_numCols)); } else { @@ -287,8 +306,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { } } PrepareDevice(); - CUDA_CALL(cudaFree(m_pArray)); - m_pArray=d_dst; + CUDA_CALL(cudaFree(this->m_pArray)); + this->m_pArray=d_dst; PrepareDevice((short)to_id); m_computeDevice=to_id; @@ -298,38 +317,38 @@ namespace Microsoft { namespace MSR { namespace CNTK { void GPUMatrix::performInplaceFunction(int kind) { PrepareDevice(); - LONG64 N= (LONG64) GetNumElements(); + LONG64 N= (LONG64) this->GetNumElements(); int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); switch (kind) { case 0: - _inplaceSigmoidOnCuda<<>>(m_pArray,N); + _inplaceSigmoidOnCuda<<>>(this->m_pArray,N); break; case 1: - _inplaceTanhOnCuda<<>>(m_pArray,N); + _inplaceTanhOnCuda<<>>(this->m_pArray,N); break; case 2: - _inplaceSqrtOnCuda<<>>(m_pArray,N); + _inplaceSqrtOnCuda<<>>(this->m_pArray,N); break; case 3: - _inplaceExpOnCuda<<>>(m_pArray,N); + _inplaceExpOnCuda<<>>(this->m_pArray,N); break; case 4: - _inplaceLogOnCuda<<>>(m_pArray,N); + _inplaceLogOnCuda<<>>(this->m_pArray,N); break; case 5: - _inplaceAbsOnCuda<<>>(m_pArray,N); + _inplaceAbsOnCuda<<>>(this->m_pArray,N); break; case 6: - _inplaceLinRectDerivative<<>>(m_pArray,N); + _inplaceLinRectDerivative<<>>(this->m_pArray,N); break; case 7: - _inplaceCosineOnCuda<<>>(m_pArray,N); + _inplaceCosineOnCuda<<>>(this->m_pArray,N); break; case 8: - _inplaceNegativeSineOnCuda<<>>(m_pArray,N); + _inplaceNegativeSineOnCuda<<>>(this->m_pArray,N); break; } if (do_sync) CUDA_CALL(cudaEventRecord(done)); @@ -346,14 +365,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUMatrix::ZeroInit(int deviceId) { - m_computeDevice = deviceId; - m_pArray = nullptr; - m_numRows = 0; - m_numCols = 0; - m_elemSizeAllocated = 0; - m_matrixName=NULL; - m_format = matrixFormatDense; - m_externalBuffer = false; + this->m_computeDevice = deviceId; + this->m_pArray = NULL; + this->m_numRows = 0; + this->m_numCols = 0; + this->m_elemSizeAllocated = 0; + this->m_matrixName=NULL; + this->m_format = matrixFormatDense; + this->m_externalBuffer = false; } template @@ -381,15 +400,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (deviceId == MANAGEDEXTERN) throw std::logic_error("constructor cannot be used with Managed Extern types"); ZeroInit(deviceId); - m_numRows = numRows; - m_numCols = numCols; - m_elemSizeAllocated = GetNumElements(); + this->m_numRows = numRows; + this->m_numCols = numCols; + this->m_elemSizeAllocated = this->GetNumElements(); - if (m_elemSizeAllocated != 0) + if (this->m_elemSizeAllocated != 0) { PrepareDevice(); - CUDA_CALL(cudaMalloc((void**)&m_pArray,sizeof(ElemType)*m_elemSizeAllocated)); - CUDA_CALL(cudaMemset(m_pArray,0,sizeof(ElemType)*m_elemSizeAllocated)); + CUDA_CALL(cudaMalloc((void**)&this->m_pArray,sizeof(ElemType)*this->m_elemSizeAllocated)); + CUDA_CALL(cudaMemset(this->m_pArray,0,sizeof(ElemType)*this->m_elemSizeAllocated)); } }; @@ -408,21 +427,23 @@ namespace Microsoft { namespace MSR { namespace CNTK { SetMatrixName(deepCopyFrom.m_matrixName); } +#ifndef LINUX template GPUMatrix::GPUMatrix(GPUMatrix&& moveFrom) { - m_numRows = moveFrom.m_numRows; - m_numCols = moveFrom.m_numCols; - m_computeDevice = moveFrom.m_computeDevice; - m_pArray = moveFrom.m_pArray; //shallow copy the pointer - m_matrixName=moveFrom.m_matrixName; - m_elemSizeAllocated = moveFrom.m_elemSizeAllocated; - m_format = moveFrom.m_format; - m_externalBuffer = moveFrom.m_externalBuffer; + this->m_numRows = moveFrom.m_numRows; + this->m_numCols = moveFrom.m_numCols; + this->m_computeDevice = moveFrom.m_computeDevice; + this->m_pArray = moveFrom.m_pArray; //shallow copy the pointer + this->m_matrixName=moveFrom.m_matrixName; + this->m_elemSizeAllocated = moveFrom.m_elemSizeAllocated; + this->m_format = moveFrom.m_format; + this->m_externalBuffer = moveFrom.m_externalBuffer; //release the pointer from the source object so that the destructor won't release it twice moveFrom.ZeroInit(0); } +#endif //assignment operator, deep copy template @@ -436,30 +457,32 @@ namespace Microsoft { namespace MSR { namespace CNTK { return *this; } +#ifndef LINUX //move assignment operator, shallow copy template GPUMatrix& GPUMatrix::operator=(GPUMatrix&& moveFrom) { if (this != &moveFrom) { - if (OwnBuffer() && m_pArray!=NULL) + if (OwnBuffer() && this->m_pArray!=NULL) { - CUDA_CALL(cudaFree(m_pArray)); + CUDA_CALL(cudaFree(this->m_pArray)); } - m_numRows = moveFrom.m_numRows; - m_numCols = moveFrom.m_numCols; - m_elemSizeAllocated = moveFrom.m_elemSizeAllocated; - m_pArray = moveFrom.m_pArray; - m_computeDevice = moveFrom.m_computeDevice; - m_format = moveFrom.m_format; - m_externalBuffer = moveFrom.m_externalBuffer; + this->m_numRows = moveFrom.m_numRows; + this->m_numCols = moveFrom.m_numCols; + this->m_elemSizeAllocated = moveFrom.m_elemSizeAllocated; + this->m_pArray = moveFrom.m_pArray; + this->m_computeDevice = moveFrom.m_computeDevice; + this->m_format = moveFrom.m_format; + this->m_externalBuffer = moveFrom.m_externalBuffer; //release the pointer from the source object so that the destructor won't release it twice moveFrom.ZeroInit(0); } return *this; } +#endif /* LINUX */ template GPUMatrix::~GPUMatrix(void) @@ -470,19 +493,19 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUMatrix::Clear() { - if (OwnBuffer() && m_pArray!=NULL) + if (this->OwnBuffer() && this->m_pArray!=NULL) { - if (m_computeDevice>=0) + if (this->m_computeDevice>=0) { PrepareDevice(); - cudaFree(m_pArray); - m_pArray = nullptr; - m_elemSizeAllocated = 0; + cudaFree(this->m_pArray); + this->m_pArray = NULL; + this->m_elemSizeAllocated = 0; } } BaseMatrix::Clear(); - ZeroInit(m_computeDevice); + ZeroInit(this->m_computeDevice); } #pragma endregion Constructors and Destructor @@ -490,14 +513,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { int GPUMatrix::GetComputeDeviceId() const { // for externally managed memory the CUDA context will have the current device - if (m_computeDevice == MANAGEDEXTERN) + if (this->m_computeDevice == MANAGEDEXTERN) { int devId; - assert(m_externalBuffer); + assert(this->m_externalBuffer); CUDA_CALL(cudaGetDevice(&devId)); return devId; } - return m_computeDevice; + return this->m_computeDevice; } #pragma region Basic Operators @@ -507,10 +530,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (numCols == 0) throw std::logic_error("The slice cannot have 0 columns."); - if (startColumn + numCols > m_numCols) + if (startColumn + numCols > this->m_numCols) throw std::logic_error("The slice is out of range of the source matrix."); - GPUMatrix slice(m_numRows, numCols, m_pArray + startColumn * m_numRows, matrixFlagDontOwnBuffer, m_computeDevice); + GPUMatrix slice(this->m_numRows, numCols, this->m_pArray + startColumn * this->m_numRows, matrixFlagDontOwnBuffer, this->m_computeDevice); return slice; } @@ -521,19 +544,19 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (numCols == 0) throw std::logic_error("The slice cannot have 0 columns."); - if (startColumn + numCols > m_numCols) + if (startColumn + numCols > this->m_numCols) throw std::logic_error("The slice is out of range of the source matrix."); Clear(); - m_computeDevice=fromMatrix.m_computeDevice; - m_externalBuffer=true; - m_numRows = fromMatrix.m_numRows; - m_pArray=fromMatrix.m_pArray + startColumn * m_numRows; + this->m_computeDevice=fromMatrix.m_computeDevice; + this->m_externalBuffer=true; + this->m_numRows = fromMatrix.m_numRows; + this->m_pArray=fromMatrix.m_pArray + startColumn * this->m_numRows; - m_elemSizeAllocated = GetNumElements(); - m_matrixName=NULL; - m_format = fromMatrix.m_format; + this->m_elemSizeAllocated = this->GetNumElements(); + this->m_matrixName=NULL; + this->m_format = fromMatrix.m_format; return *this; } @@ -551,12 +574,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { Resize(numRows, a.GetNumCols()); - LONG64 N=(LONG64)GetNumElements(); + LONG64 N=(LONG64)this->GetNumElements(); int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _assignRowSliceValuesOf<<>>(m_pArray, a.m_pArray, N, (long)startIndex, (long)numRows, (long)a.GetNumRows()); + _assignRowSliceValuesOf<<>>(this->m_pArray, a.m_pArray, N, (long)startIndex, (long)numRows, (long)a.GetNumRows()); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -574,10 +597,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (a.GetNumRows() != numRows) throw std::logic_error("AddToRowSliceValuesOf: a.GetNumRows() != numRows."); - if (startIndex + numRows > GetNumRows()) + if (startIndex + numRows > this->GetNumRows()) throw std::logic_error("AddToRowSliceValuesOf: startIndex + numRows exceeds GetNumRows()."); - if (a.GetNumCols() != GetNumCols()) + if (a.GetNumCols() != this->GetNumCols()) throw std::logic_error("AddToRowSliceValuesOf: columns does not match."); LONG64 N=(LONG64)a.GetNumElements(); @@ -585,7 +608,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _addToRowSliceValuesOf<<>>(m_pArray, a.m_pArray, N, (long)startIndex, (long)GetNumRows(), (long)a.GetNumRows()); + _addToRowSliceValuesOf<<>>(this->m_pArray, a.m_pArray, N, (long)startIndex, (long)this->GetNumRows(), (long)a.GetNumRows()); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -650,7 +673,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix GPUMatrix::Transpose() const { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("Transpose: Matrix is empty."); GPUMatrix c(this->GetComputeDeviceId()); @@ -709,9 +732,22 @@ namespace Microsoft { namespace MSR { namespace CNTK { { st = cublasDgeam(cuHandle,transA,transB,m,n,reinterpret_cast(&alpha),reinterpret_cast(a.m_pArray),(int)a.m_numRows,reinterpret_cast(&beta),reinterpret_cast(a.m_pArray),(int)a.m_numRows,reinterpret_cast(this->m_pArray),(int)this->m_numRows); } - else throw std::exception("Unsupported template argument in GPUMatrix"); + else + { +#ifndef LINUX + throw std::exception("Unsupported template argument in GPUMatrix"); +#else + throw std::exception(); +#endif /* LINUX */ + } if (st!=CUBLAS_STATUS_SUCCESS) + { +#ifndef LINUX throw std::exception("AssignTransposeOf failed"); +#else + throw std::exception(); +#endif /* LINUX */ + } this->m_numRows=a.m_numCols; this->m_numCols=a.m_numRows; this->SetMatrixName(a.GetMatrixName()); @@ -721,15 +757,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUMatrix::SetValue(const ElemType v) { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("SetValue: Matrix is empty."); - LONG64 N=(LONG64)GetNumElements(); + LONG64 N=(LONG64)this->GetNumElements(); int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _setValue<<>>(m_pArray,v,N); + _setValue<<>>(this->m_pArray,v,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -738,15 +774,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUMatrix::SetValue(const ElemType* d_v) //d_v is pointer to the the value in GPU memory { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("SetValue: Matrix is empty."); - LONG64 N=(LONG64)GetNumElements(); + LONG64 N=(LONG64)this->GetNumElements(); int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _setValue<<>>(m_pArray,d_v,N); + _setValue<<>>(this->m_pArray,d_v,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -755,11 +791,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUMatrix::SetColumn(const ElemType* colPointer, size_t colInd) { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("SetValue: Matrix is empty."); if (colPointer==NULL) return; - CUDA_CALL(cudaMemcpy(m_pArray+LocateColumn(colInd),colPointer,sizeof(ElemType)*m_numRows,cudaMemcpyHostToDevice)); + CUDA_CALL(cudaMemcpy(this->m_pArray+LocateColumn(colInd),colPointer,sizeof(ElemType)*this->m_numRows,cudaMemcpyHostToDevice)); } template @@ -769,10 +805,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { return; Resize(deepCopyFrom.GetNumRows(), deepCopyFrom.GetNumCols()); - m_format = deepCopyFrom.m_format; // copy the format over just to be sure + this->m_format = deepCopyFrom.m_format; // copy the format over just to be sure size_t cpSize = deepCopyFrom.GetNumRows() * deepCopyFrom.GetNumCols(); if (cpSize != 0) - CUDA_CALL(cudaMemcpy(m_pArray,deepCopyFrom.m_pArray,cpSize*sizeof(ElemType),cudaMemcpyDeviceToDevice)); + CUDA_CALL(cudaMemcpy(this->m_pArray,deepCopyFrom.m_pArray,cpSize*sizeof(ElemType),cudaMemcpyDeviceToDevice)); } template @@ -782,30 +818,30 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (matrixFlags&matrixFlagDontOwnBuffer) { // free the existing array if it used to be an owned array - if (OwnBuffer() && m_pArray!=NULL) + if (this->OwnBuffer() && this->m_pArray!=NULL) { PrepareDevice(); - CUDA_CALL(cudaFree(m_pArray)); + CUDA_CALL(cudaFree(this->m_pArray)); } - m_numRows = numRows; - m_numCols = numCols; - m_pArray = pArray; - m_elemSizeAllocated = GetNumElements(); - m_matrixName = NULL; - m_format = matrixFormatDense; - m_externalBuffer = true; - m_computeDevice = deviceId; + this->m_numRows = numRows; + this->m_numCols = numCols; + this->m_pArray = pArray; + this->m_elemSizeAllocated = this->GetNumElements(); + this->m_matrixName = NULL; + this->m_format = matrixFormatDense; + this->m_externalBuffer = true; + this->m_computeDevice = deviceId; } else { // if didn't previously own the buffer, wipe it clean - if (!OwnBuffer()) + if (!this->OwnBuffer()) { ZeroInit(deviceId); } // if the devices are different move it now - if (m_computeDevice != deviceId && deviceId >= 0) + if (this->m_computeDevice != deviceId && deviceId >= 0) { Clear(); ZeroInit(deviceId); @@ -813,36 +849,40 @@ namespace Microsoft { namespace MSR { namespace CNTK { // now resize/allocate as necessary Resize(numRows, numCols); - m_externalBuffer = false; + this->m_externalBuffer = false; // copy over the content to the buffer PrepareDevice(); - if (pArray!=nullptr) + if (pArray!=NULL) { if (!(matrixFlags&matrixFormatRowMajor)) { - CUDA_CALL(cudaMemcpy(m_pArray, pArray, sizeof(ElemType)*GetNumElements(), + CUDA_CALL(cudaMemcpy(this->m_pArray, pArray, sizeof(ElemType)*this->GetNumElements(), (matrixFlags&matrixFlagSetValueOnDevice)?cudaMemcpyDeviceToDevice:cudaMemcpyHostToDevice)); } else { +#ifndef LINUX throw std::exception("Row major isn't implemented"); +#else + throw std::exception(); +#endif /* LINUX */ } } } - m_format = matrixFormatDense; + this->m_format = matrixFormatDense; } template void GPUMatrix::SetDiagonalValue(const ElemType v) { - unsigned long N=(unsigned long)GetNumRows(); + unsigned long N=(unsigned long)this->GetNumRows(); int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _setDiagonalValue<<>>(m_pArray,v,N,(unsigned long)GetNumRows()); + _setDiagonalValue<<>>(this->m_pArray,v,N,(unsigned long)this->GetNumRows()); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -851,10 +891,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUMatrix::SetDiagonalValue(GPUMatrix& vector) { - if (IsEmpty() || vector.IsEmpty()) + if (this->IsEmpty() || vector.IsEmpty()) throw std::logic_error("SetDiagonalValue: Matrix is empty."); - if (GetNumRows() != GetNumCols()) + if (this->GetNumRows() != this->GetNumCols()) throw std::logic_error("SetDiagonalValue: NumRows and NumCols do not agree."); if (vector.GetNumRows() != 1 && vector.GetNumCols() != 1) @@ -863,16 +903,16 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (vector.GetNumElements() == 1) //reduce to simple form SetDiagonalValue(vector.m_pArray[0]); - else if (vector.GetNumRows() != GetNumRows()) + else if (vector.GetNumRows() != this->GetNumRows()) throw std::logic_error("SetDiagonalValue: input vector's dimension does not agree with [this]."); else { - long N=(long)GetNumRows(); + long N=(long)this->GetNumRows(); int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _setDiagonalValueFromVector<<>>(m_pArray,vector.m_pArray,N); + _setDiagonalValueFromVector<<>>(this->m_pArray,vector.m_pArray,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -896,11 +936,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { CUDA_CALL(cudaEventCreate(&done)); if (sizeof(ElemType)==sizeof(float)) { - CURAND_CALL(curandGenerateUniform(((curandGenerator_t*)s_curandGenerator)[0], reinterpret_cast(m_pArray), GetNumElements())); + CURAND_CALL(curandGenerateUniform(((curandGenerator_t*)s_curandGenerator)[0], reinterpret_cast(this->m_pArray), this->GetNumElements())); } else { - CURAND_CALL(curandGenerateUniformDouble(((curandGenerator_t*)s_curandGenerator)[0], reinterpret_cast(m_pArray), GetNumElements())); + CURAND_CALL(curandGenerateUniformDouble(((curandGenerator_t*)s_curandGenerator)[0], reinterpret_cast(this->m_pArray), this->GetNumElements())); } CUDA_CALL(cudaEventRecord(done)); CUDA_CALL(cudaEventSynchronize(done)); @@ -911,7 +951,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t blocksPerGrid = (size_t)ceil(N/(double)threadsPerBlock); if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _rescaleToRange<<>>(m_pArray,N,low,high); + _rescaleToRange<<>>(this->m_pArray,N,low,high); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -960,11 +1000,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { CUDA_CALL(cudaEventCreate(&done)); if (sizeof(ElemType)==sizeof(float)) { - CURAND_CALL(curandGenerateUniform((((curandGenerator_t*)s_curandGenerator)[0]), reinterpret_cast(m_pArray), GetNumElements())); + CURAND_CALL(curandGenerateUniform((((curandGenerator_t*)s_curandGenerator)[0]), reinterpret_cast(this->m_pArray), this->GetNumElements())); } else { - CURAND_CALL(curandGenerateUniformDouble((((curandGenerator_t*)s_curandGenerator)[0]), reinterpret_cast(m_pArray), GetNumElements())); + CURAND_CALL(curandGenerateUniformDouble((((curandGenerator_t*)s_curandGenerator)[0]), reinterpret_cast(this->m_pArray), this->GetNumElements())); } CUDA_CALL(cudaEventRecord(done)); CUDA_CALL(cudaEventSynchronize(done)); @@ -974,7 +1014,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t N=GetNumElements(); size_t blocksPerGrid = (size_t)ceil(N/(double)threadsPerBlock); if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _setMaskAndScale<<>>(m_pArray,N,maskRate,scaleValue); + _setMaskAndScale<<>>(this->m_pArray,N,maskRate,scaleValue); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -991,8 +1031,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { assert(this->GetNumRows() == gradients.GetNumRows() && this->GetNumCols() == gradients.GetNumCols()); - int blocksPerGrid = (GetNumElements() + threadsPerBlock -1 )/threadsPerBlock; - _adagrad<<>>(m_pArray, gradients.m_pArray, GetNumElements()); + int blocksPerGrid = (this->GetNumElements() + threadsPerBlock -1 )/threadsPerBlock; + _adagrad<<>>(this->m_pArray, gradients.m_pArray, this->GetNumElements()); } template @@ -1055,41 +1095,41 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUMatrix::Reshape(const size_t numRows, const size_t numCols) { - assert (numRows*numCols == GetNumElements()); - if (numRows*numCols != GetNumElements()) + assert (numRows*numCols == this->GetNumElements()); + if (numRows*numCols != this->GetNumElements()) throw std::invalid_argument("Reshape: total number of elements does not match."); - m_numRows = numRows; - m_numCols = numCols; + this->m_numRows = numRows; + this->m_numCols = numCols; } template void GPUMatrix::Resize(const size_t numRows, const size_t numCols, bool growOnly) { - if (m_numRows==numRows && m_numCols==numCols) + if (this->m_numRows==numRows && this->m_numCols==numCols) return; - m_numRows = numRows; - m_numCols = numCols; + this->m_numRows = numRows; + this->m_numCols = numCols; - size_t numElements = GetNumElements(); - if (numElements > m_elemSizeAllocated || (!growOnly && numElements != m_elemSizeAllocated)) + size_t numElements = this->GetNumElements(); + if (numElements > this->m_elemSizeAllocated || (!growOnly && numElements != this->m_elemSizeAllocated)) { - if (IsEmpty()) + if (this->IsEmpty()) { - m_elemSizeAllocated = 0; - m_pArray = NULL; + this->m_elemSizeAllocated = 0; + this->m_pArray = NULL; } else { - if (!OwnBuffer()) + if (!this->OwnBuffer()) throw std::invalid_argument("Can't resize a externally managed matrix"); PrepareDevice(); - if (m_pArray!=NULL) - CUDA_CALL(cudaFree(m_pArray)); //delete and reallocate - m_elemSizeAllocated = numElements; - CUDA_CALL(cudaMalloc((void**)&m_pArray,sizeof(ElemType)*m_elemSizeAllocated)); - CUDA_CALL(cudaMemset(m_pArray,0,sizeof(ElemType)*m_elemSizeAllocated)); + if (this->m_pArray!=NULL) + CUDA_CALL(cudaFree(this->m_pArray)); //delete and reallocate + this->m_elemSizeAllocated = numElements; + CUDA_CALL(cudaMalloc((void**)&this->m_pArray,sizeof(ElemType)*this->m_elemSizeAllocated)); + CUDA_CALL(cudaMemset(this->m_pArray,0,sizeof(ElemType)*this->m_elemSizeAllocated)); } } } @@ -1097,22 +1137,22 @@ namespace Microsoft { namespace MSR { namespace CNTK { template size_t GPUMatrix::LocateElement (const size_t row, const size_t col) const { - assert (row < m_numRows && col < m_numCols); - return col * m_numRows + row; // matrix in column-wise storage + assert (row < this->m_numRows && col < this->m_numCols); + return col * this->m_numRows + row; // matrix in column-wise storage } template size_t GPUMatrix::LocateColumn (const size_t col) const { - assert (col < m_numCols); - return col * m_numRows; // matrix in column-wise storage + assert (col < this->m_numCols); + return col * this->m_numRows; // matrix in column-wise storage } template ElemType GPUMatrix::Get00Element() const { ElemType res=0; - CUDA_CALL(cudaMemcpy(&res,m_pArray,sizeof(ElemType),cudaMemcpyDeviceToHost)); + CUDA_CALL(cudaMemcpy(&res,this->m_pArray,sizeof(ElemType),cudaMemcpyDeviceToHost)); return res; } #pragma endregion Basic Operators @@ -1121,13 +1161,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix& GPUMatrix::operator+= (ElemType alpha) { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("operator+=: Matrix is empty."); - LONG64 N=(LONG64)GetNumElements(); + LONG64 N=(LONG64)this->GetNumElements(); int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _addValue<<>>(m_pArray,alpha,N); + _addValue<<>>(this->m_pArray,alpha,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1137,10 +1177,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix GPUMatrix::operator+ (ElemType alpha) const { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("operator+: Matrix is empty."); - auto& us=*this; + const GPUMatrix& us=*this; GPUMatrix c(us); c+=alpha; return c; @@ -1180,7 +1220,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix GPUMatrix::operator+ (const GPUMatrix& a) const { - if (GetNumElements()==1) + if (this->GetNumElements()==1) { GPUMatrix c(a); c+=this->Get00Element(); @@ -1211,7 +1251,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix& GPUMatrix::operator-= (ElemType alpha) { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("operato-=: Matrix is empty."); return this->operator+=(-1*alpha); } @@ -1219,7 +1259,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix GPUMatrix::operator- (ElemType alpha) const { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("operator-: Matrix is empty."); return this->operator+(-1*alpha); } @@ -1228,12 +1268,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { GPUMatrix& GPUMatrix::AssignDifferenceOf(const ElemType alpha, const GPUMatrix& a) { this->Resize(a.m_numRows,a.m_numCols); - LONG64 N=(LONG64)GetNumElements(); + LONG64 N=(LONG64)this->GetNumElements(); int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _assignDifferenceOf1<<>>(m_pArray,alpha,a.m_pArray,N); + _assignDifferenceOf1<<>>(this->m_pArray,alpha,a.m_pArray,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1248,12 +1288,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { GPUMatrix& GPUMatrix::AssignDifferenceOf(const GPUMatrix& a, const ElemType alpha) { this->Resize(a.m_numRows,a.m_numCols); - LONG64 N=(LONG64)GetNumElements(); + LONG64 N=(LONG64)this->GetNumElements(); int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _assignDifferenceOf2<<>>(m_pArray,alpha,a.m_pArray,N); + _assignDifferenceOf2<<>>(this->m_pArray,alpha,a.m_pArray,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1306,7 +1346,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix GPUMatrix::operator* (ElemType alpha) const { - GPUMatrix c(GetNumRows(), GetNumCols()); + GPUMatrix c(this->GetNumRows(), this->GetNumCols()); Scale(alpha, *this, c); return c; } @@ -1341,8 +1381,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix GPUMatrix::operator* (const GPUMatrix& a) const { - auto& us = *this; - if (GetNumElements() == 1) + const GPUMatrix& us = *this; + if (this->GetNumElements() == 1) { GPUMatrix c(this->GetComputeDeviceId()); c.AssignProductOf(this->Get00Element(), a); @@ -1379,7 +1419,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix& GPUMatrix::operator^= (ElemType alpha) { - auto& us = *this; + GPUMatrix& us = *this; ElementWisePower(alpha, us, us); return us; } @@ -1387,7 +1427,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix GPUMatrix::operator^ (ElemType alpha) const { - GPUMatrix c(GetNumRows(), GetNumCols()); + GPUMatrix c(this->GetNumRows(), this->GetNumCols()); ElementWisePower(alpha, *this, c); return c; } @@ -1410,15 +1450,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (!(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols())) throw std::invalid_argument("The input matrix dimensions do not match."); - if (!(a.GetNumRows() == GetNumRows() && a.GetNumCols() == GetNumCols())) + if (!(a.GetNumRows() == this->GetNumRows() && a.GetNumCols() == this->GetNumCols())) throw std::invalid_argument("The input matrix dimensions do not match [this]."); - LONG64 N=(LONG64)GetNumElements(); + LONG64 N=(LONG64)this->GetNumElements(); int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _addElementProductOf<<>>(m_pArray,a.m_pArray,b.m_pArray,N); + _addElementProductOf<<>>(this->m_pArray,a.m_pArray,b.m_pArray,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1428,10 +1468,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix& GPUMatrix::ColumnElementMultiplyWith(const GPUMatrix& a) { - if (a.IsEmpty() || IsEmpty()) + if (a.IsEmpty() || this->IsEmpty()) throw std::logic_error("ColumnElementMultiplyWith: Matrix is empty."); - if (!(a.GetNumRows() == GetNumRows() && a.GetNumCols() == 1)) + if (!(a.GetNumRows() == this->GetNumRows() && a.GetNumCols() == 1)) throw std::invalid_argument("ColumnElementMultiplyWith: The input matrix should be a col vector and match [this]'s rows."); long N=(long)a.GetNumRows(); @@ -1440,7 +1480,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _columnElementMultiplyWith<<>>(m_pArray,a.m_pArray,N,M); + _columnElementMultiplyWith<<>>(this->m_pArray,a.m_pArray,N,M); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1451,10 +1491,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix& GPUMatrix::RowElementMultiplyWith(const GPUMatrix& a) { - if (a.IsEmpty() || IsEmpty()) + if (a.IsEmpty() || this->IsEmpty()) throw std::logic_error("RowElementMultiplyWith: Matrix is empty."); - if (!(a.GetNumRows() == 1 && a.GetNumCols() == GetNumCols())) + if (!(a.GetNumRows() == 1 && a.GetNumCols() == this->GetNumCols())) throw std::invalid_argument("RowElementMultiplyWith: The input matrix should be a row vector and match [this]'s columns."); long N = (long)this->GetNumRows(); @@ -1463,7 +1503,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _rowElementMultiplyWith<<>>(m_pArray,a.m_pArray,N,M); + _rowElementMultiplyWith<<>>(this->m_pArray,a.m_pArray,N,M); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1520,15 +1560,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix& GPUMatrix::ElementInverse () { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("ElementInverse: Matrix is empty."); - LONG64 N=(LONG64)GetNumElements(); + LONG64 N=(LONG64)this->GetNumElements(); int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _elemInverse<<>>(m_pArray,N); + _elemInverse<<>>(this->m_pArray,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1553,12 +1593,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { GPUMatrix& GPUMatrix::AssignSigmoidOf (const GPUMatrix& a) { this->Resize(a.GetNumRows(),a.GetNumCols()); - LONG64 N=(LONG64)GetNumElements(); + LONG64 N=(LONG64)this->GetNumElements(); int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _assignSigmoidOf<<>>(a.m_pArray,m_pArray,N); + _assignSigmoidOf<<>>(a.m_pArray,this->m_pArray,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1585,12 +1625,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { Resize(a.GetNumRows(), a.GetNumCols()); PrepareDevice(); - LONG64 N=(LONG64)GetNumElements(); + LONG64 N=(LONG64)this->GetNumElements(); int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _assignSigmoidDerivative<<>>(a.m_pArray, m_pArray, N); + _assignSigmoidDerivative<<>>(a.m_pArray, this->m_pArray, N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1622,7 +1662,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { PrepareDevice(); if (isColWise) { - long N=(long)GetNumCols(); //one kernel per column + long N=(long)this->GetNumCols(); //one kernel per column int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); @@ -1633,7 +1673,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } else { - long N=(long)GetNumRows(); //one kernel per column + long N=(long)this->GetNumRows(); //one kernel per column int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); @@ -1777,15 +1817,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix& GPUMatrix::InplaceTruncateBottom (const ElemType threshold) { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("InplaceTruncateBottom: Matrix is empty."); - LONG64 N=(LONG64)GetNumElements(); + LONG64 N=(LONG64)this->GetNumElements(); int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock); PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _inplaceTruncateBottom<<>>(m_pArray,threshold,N); + _inplaceTruncateBottom<<>>(this->m_pArray,threshold,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1803,12 +1843,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { Resize(a.GetNumRows(), a.GetNumCols()); } - LONG64 N=(LONG64)GetNumElements(); + LONG64 N=(LONG64)this->GetNumElements(); int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock); PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _assignTruncateBottom<<>>(m_pArray,a.m_pArray,threshold,N); + _assignTruncateBottom<<>>(this->m_pArray,a.m_pArray,threshold,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1819,14 +1859,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix& GPUMatrix::InplaceTruncateTop (const ElemType threshold) { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("InplaceTruncateTop: Matrix is empty."); - LONG64 N=(LONG64)GetNumElements(); + LONG64 N=(LONG64)this->GetNumElements(); int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock); PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _inplaceTruncateTop<<>>(m_pArray,threshold,N); + _inplaceTruncateTop<<>>(this->m_pArray,threshold,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1844,12 +1884,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { Resize(a.GetNumRows(), a.GetNumCols()); } - LONG64 N=(LONG64)GetNumElements(); + LONG64 N=(LONG64)this->GetNumElements(); int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock); a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _assignTruncateTop<<>>(m_pArray,a.m_pArray,threshold,N); + _assignTruncateTop<<>>(this->m_pArray,a.m_pArray,threshold,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1859,14 +1899,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix& GPUMatrix::SetToZeroIfAbsLessThan (const ElemType threshold) { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("SetToZeroIfAbsLessThan: Matrix is empty."); - LONG64 N=(LONG64)GetNumElements(); + LONG64 N=(LONG64)this->GetNumElements(); int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock); PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _setToZeroIfAbsLessThan<<>>(m_pArray,threshold,N); + _setToZeroIfAbsLessThan<<>>(this->m_pArray,threshold,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1883,13 +1923,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (sizeof(ElemType)==sizeof(float)) { float res=0; - cublasSasum(cuHandle,(LONG64)GetNumElements(),reinterpret_cast(m_pArray),1,&res); + cublasSasum(cuHandle,(LONG64)this->GetNumElements(),reinterpret_cast(this->m_pArray),1,&res); return res; } else { double res=0; - cublasDasum(cuHandle,(LONG64)GetNumElements(),reinterpret_cast(m_pArray),1,&res); + cublasDasum(cuHandle,(LONG64)this->GetNumElements(),reinterpret_cast(this->m_pArray),1,&res); return ElemType(res); } } @@ -1905,7 +1945,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ElemType h_sum; CUDA_CALL(cudaMalloc((void**)&d_sum,sizeof(ElemType))); //WARNING: THIS kernel is not the most efficient way! - _reductionSum<<<1,1024,0,t_stream>>>(m_pArray,d_sum,(LONG64)this->GetNumElements()); + _reductionSum<<<1,1024,0,t_stream>>>(this->m_pArray,d_sum,(LONG64)this->GetNumElements()); CUDA_CALL(cudaMemcpy(&h_sum,d_sum,sizeof(ElemType),cudaMemcpyDeviceToHost)); CUDA_CALL(cudaFree(d_sum)); return h_sum; @@ -1940,7 +1980,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ElemType* d_sum = NULL; CUDA_CALL(cudaMalloc((void**)&d_sum,sizeof(ElemType))); //WARNING: THIS kernel is not the most efficient way! - _reductionSum<<<1,1024,0,t_stream>>>(m_pArray,d_sum,(LONG64)this->GetNumElements()); + _reductionSum<<<1,1024,0,t_stream>>>(this->m_pArray,d_sum,(LONG64)this->GetNumElements()); DeviceBoundNumber result; result.ShallowCopyFrom(d_sum,GetComputeDeviceId()); return result; @@ -1954,17 +1994,17 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (sizeof(ElemType)==sizeof(float)) { int resInd=0; - cublasIsamax(cuHandle,(LONG64)GetNumElements(),reinterpret_cast(m_pArray),1,&resInd); + cublasIsamax(cuHandle,(LONG64)this->GetNumElements(),reinterpret_cast(this->m_pArray),1,&resInd); resInd--; - CUDA_CALL(cudaMemcpy(reinterpret_cast(&res),reinterpret_cast(m_pArray+resInd),sizeof(float),cudaMemcpyDeviceToHost)); + CUDA_CALL(cudaMemcpy(reinterpret_cast(&res),reinterpret_cast(this->m_pArray+resInd),sizeof(float),cudaMemcpyDeviceToHost)); return res; } else { int resInd=0; - cublasIdamax(cuHandle,(LONG64)GetNumElements(),reinterpret_cast(m_pArray),1,&resInd); + cublasIdamax(cuHandle,(LONG64)this->GetNumElements(),reinterpret_cast(this->m_pArray),1,&resInd); resInd--; - CUDA_CALL(cudaMemcpy(reinterpret_cast(&res),m_pArray+resInd,sizeof(float),cudaMemcpyDeviceToHost)); + CUDA_CALL(cudaMemcpy(reinterpret_cast(&res),this->m_pArray+resInd,sizeof(float),cudaMemcpyDeviceToHost)); return res; } } @@ -1973,20 +2013,20 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix& GPUMatrix::ElementMultiplyWith (const GPUMatrix& a) { - if (IsEmpty() || a.IsEmpty()) + if (this->IsEmpty() || a.IsEmpty()) throw std::logic_error("ElementMultiplyWith: Matrix is empty."); - auto& us=*this; + GPUMatrix& us=*this; assert (us.GetNumRows() == a.GetNumRows() && us.GetNumCols() == a.GetNumCols()); if (us.GetNumRows() != a.GetNumRows() || us.GetNumCols() != a.GetNumCols()) throw std::invalid_argument("The matrix dimensions do not match."); - LONG64 N=(LONG64)GetNumElements(); + LONG64 N=(LONG64)this->GetNumElements(); int blocksPerGrid =(int)ceil(((double)N)/threadsPerBlock); a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _elemMul<<>>(m_pArray,a.m_pArray,N); + _elemMul<<>>(this->m_pArray,a.m_pArray,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -2004,12 +2044,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { throw std::invalid_argument("The input matrix dimensions do not match."); Resize(a.GetNumRows(), a.GetNumCols()); - LONG64 N=(LONG64)GetNumElements(); + LONG64 N=(LONG64)this->GetNumElements(); int blocksPerGrid =(int)ceil(((double)N)/threadsPerBlock); a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _assignElementProductOf<<>>(m_pArray,a.m_pArray,b.m_pArray,N); + _assignElementProductOf<<>>(this->m_pArray,a.m_pArray,b.m_pArray,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -2033,12 +2073,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { throw std::invalid_argument("The input matrix dimensions do not match."); Resize(a.GetNumRows(), a.GetNumCols()); - LONG64 N=(LONG64)GetNumElements(); + LONG64 N=(LONG64)this->GetNumElements(); int blocksPerGrid =(int)ceil(((double)N)/threadsPerBlock); a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _assignElementDivisionOf<<>>(m_pArray,a.m_pArray,b.m_pArray,N); + _assignElementDivisionOf<<>>(this->m_pArray,a.m_pArray,b.m_pArray,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -2054,7 +2094,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUMatrix::VectorNorm1(GPUMatrix& c, const bool isColWise) const { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("VectorNorm1: Matrix is empty."); const long n = (long)this->GetNumRows(); @@ -2079,7 +2119,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _vectorNorm1<<>>(c.m_pArray, m_pArray,n,m,isColWise); + _vectorNorm1<<>>(c.m_pArray, this->m_pArray,n,m,isColWise); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -2095,7 +2135,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUMatrix::VectorNorm2(GPUMatrix& c, const bool isColWise) const { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("VectorNorm2: Matrix is empty."); const long n = (long)this->GetNumRows(); @@ -2120,7 +2160,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _vectorNorm2<<>>(c.m_pArray, m_pArray,n,m,isColWise); + _vectorNorm2<<>>(c.m_pArray, this->m_pArray,n,m,isColWise); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -2136,7 +2176,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUMatrix::VectorNormInf(GPUMatrix& c, const bool isColWise) const { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("VectorMax: Matrix is empty."); //this implementation is not efficient @@ -2174,12 +2214,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { long rowsA = (long)a.GetNumRows(); long rowsB = (long)b.GetNumRows(); Resize(rowsA * rowsB, cols); - float N=(float)GetNumElements(); + float N=(float)this->GetNumElements(); int blocksPerGrid =(int)ceil(N/threadsPerBlock); a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _assignKhatriRaoProductOf<<>>(m_pArray,a.m_pArray,b.m_pArray,rowsA, rowsB, cols); + _assignKhatriRaoProductOf<<>>(this->m_pArray,a.m_pArray,b.m_pArray,rowsA, rowsB, cols); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -2206,18 +2246,18 @@ namespace Microsoft { namespace MSR { namespace CNTK { long rowsA = (long)a.GetNumRows(); long rowsB = (long)b.GetNumRows(); if (rowsA % rowsB != 0) - throw invalid_argument("AddColumnReshapeProductOf: number of rows in a should be multiples of that in b."); + throw std::invalid_argument("AddColumnReshapeProductOf: number of rows in a should be multiples of that in b."); long rowsC = rowsA / rowsB; - if (rowsC != GetNumRows() || cols != GetNumCols()) - throw invalid_argument("AddColumnReshapeProductOf: This matrix does not have the right size."); + if (rowsC != this->GetNumRows() || cols != this->GetNumCols()) + throw std::invalid_argument("AddColumnReshapeProductOf: This matrix does not have the right size."); - float N=(float)GetNumElements(); + float N=(float)this->GetNumElements(); int blocksPerGrid =(int)ceil(N/threadsPerBlock); a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _addColumnReshapeProductOf<<>>(m_pArray,a.m_pArray,b.m_pArray, rowsB, rowsC, cols, transposeAColumn); + _addColumnReshapeProductOf<<>>(this->m_pArray,a.m_pArray,b.m_pArray, rowsB, rowsC, cols, transposeAColumn); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -2235,7 +2275,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template ElemType GPUMatrix::FrobeniusNorm() const { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("FrobeniusNorm: Matrix is empty."); PrepareDevice(); @@ -2243,7 +2283,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ElemType h_sum=0; CUDA_CALL(cudaMalloc((void**)&d_sum,sizeof(ElemType))); //WARNING: THIS kernel is not the most efficient way! - _reductionSum2<<<1,1024,0,t_stream>>>(m_pArray,d_sum,(LONG64)this->GetNumElements(), true); + _reductionSum2<<<1,1024,0,t_stream>>>(this->m_pArray,d_sum,(LONG64)this->GetNumElements(), true); CUDA_CALL(cudaMemcpy(&h_sum,d_sum,sizeof(ElemType),cudaMemcpyDeviceToHost)); CUDA_CALL(cudaFree(d_sum)); @@ -2260,7 +2300,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { PrepareDevice(); //WARNING: THIS kernel is not the most efficient way! - _reductionSum2<<<1,1024,0,t_stream>>>(a.m_pArray,m_pArray,(LONG64)a.GetNumElements(), true); + _reductionSum2<<<1,1024,0,t_stream>>>(a.m_pArray,this->m_pArray,(LONG64)a.GetNumElements(), true); return *this; } @@ -2268,7 +2308,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template ElemType GPUMatrix::MatrixNormInf() const { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("MatrixNorm1: Matrix is empty."); PrepareDevice(); @@ -2276,7 +2316,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ElemType h_maxAbs=0; CUDA_CALL(cudaMalloc((void**)&d_maxAbs,sizeof(ElemType))); //WARNING: THIS kernel is not the most efficient way! - _reductionMatrixNormInf<<<1,1024,0,t_stream>>>(m_pArray,d_maxAbs,(LONG64)this->GetNumElements()); + _reductionMatrixNormInf<<<1,1024,0,t_stream>>>(this->m_pArray,d_maxAbs,(LONG64)this->GetNumElements()); CUDA_CALL(cudaMemcpy(&h_maxAbs,d_maxAbs,sizeof(ElemType),cudaMemcpyDeviceToHost)); CUDA_CALL(cudaFree(d_maxAbs)); return h_maxAbs; @@ -2285,7 +2325,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template ElemType GPUMatrix::MatrixNorm1() const { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("MatrixNorm1: Matrix is empty."); return this->SumOfAbsElements(); } @@ -2293,7 +2333,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template ElemType GPUMatrix::MatrixNorm0() const { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("MatrixNorm0: Matrix is empty."); PrepareDevice(); @@ -2301,7 +2341,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ElemType h_nz=0; CUDA_CALL(cudaMalloc((void**)&d_nz,sizeof(ElemType))); //WARNING: THIS kernel is not the most efficient way! - _reductionMatrixNorm0<<<1,1024,0,t_stream>>>(m_pArray,d_nz,(LONG64)this->GetNumElements()); + _reductionMatrixNorm0<<<1,1024,0,t_stream>>>(this->m_pArray,d_nz,(LONG64)this->GetNumElements()); CUDA_CALL(cudaMemcpy(&h_nz,d_nz,sizeof(ElemType),cudaMemcpyDeviceToHost)); CUDA_CALL(cudaFree(d_nz)); return h_nz; @@ -2350,12 +2390,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUMatrix::VectorMax(GPUMatrix& maxIndexes, GPUMatrix& maxValues, const bool isColWise) const { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("VectorMax: Matrix is empty."); - auto& us=*this; - const long m = (long)GetNumRows(); - const long n = (long)GetNumCols(); + const GPUMatrix& us=*this; + const long m = (long)this->GetNumRows(); + const long n = (long)this->GetNumCols(); assert (m>0 && n>0); //converting from size_t to int may cause overflow PrepareDevice(); cudaEvent_t done = nullptr; @@ -2386,12 +2426,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUMatrix::VectorMin(GPUMatrix& minIndexes, GPUMatrix& minValues, const bool isColWise) const { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("VectorMax: Matrix is empty."); - auto& us=*this; - const int m = (int)GetNumRows(); - const int n = (int)GetNumCols(); + const GPUMatrix& us=*this; + const int m = (int)this->GetNumRows(); + const int n = (int)this->GetNumCols(); assert (m>0 && n>0); //converting from size_t to int may cause overflow PrepareDevice(); @@ -2453,7 +2493,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUMatrix::Print(const char* matrixName /*=nullptr*/) const { - Print(matrixName, 0, GetNumRows()-1, 0, GetNumCols()-1); + Print(matrixName, 0, this->GetNumRows()-1, 0, this->GetNumCols()-1); } // file I/O @@ -2552,7 +2592,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { assert (verticalSubsample <= windowHeight && horizontalSubsample <= windowWidth); - UINT batchSize = inputBatch.GetNumCols(); + unsigned int batchSize = inputBatch.GetNumCols(); Resize(outputSizePerSample, batchSize); int numThreadPerBlock = threadsPerBlock; @@ -2581,7 +2621,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { assert (verticalSubsample <= windowHeight && horizontalSubsample <= windowWidth); - UINT batchSize = outputGradientBatch.GetNumCols(); + unsigned int batchSize = outputGradientBatch.GetNumCols(); int numThreadPerBlock = threadsPerBlock; PrepareDevice(); @@ -2608,7 +2648,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { assert (verticalSubsample <= windowHeight && horizontalSubsample <= windowWidth); - UINT batchSize = inputBatch.GetNumCols(); + unsigned int batchSize = inputBatch.GetNumCols(); Resize(outputSizePerSample, batchSize); int numThreadPerBlock = threadsPerBlock; @@ -2679,10 +2719,22 @@ namespace Microsoft { namespace MSR { namespace CNTK { int l = int(transposeB ? b.m_numCols : b.m_numRows); c.Resize(m,n); - if (!(m>0 && k>0 && l>0 && n>0)) throw std::exception("!(m>0 && k>0 && l>0 && n>0)"); //converting from size_t to int may cause overflow + if (!(m>0 && k>0 && l>0 && n>0)) + { +#ifndef LINUX + throw std::exception("!(m>0 && k>0 && l>0 && n>0)"); //converting from size_t to int may cause overflow +#else + throw std::exception(); //converting from size_t to int may cause overflow +#endif /* LINUX */ + } if (k!=l) + { +#ifndef LINUX throw std::exception("matrix dim mismatch in MultiplyAndWeightedAdd"); - +#else + throw std::exception(); +#endif /* LINUX */ + } if (sizeof(ElemType)==sizeof(float)) { CUBLAS_CALL(cublasSgemm(cuHandle,transA,transB,m,n,k,reinterpret_cast(&alpha),reinterpret_cast(a.m_pArray),(int)a.m_numRows,reinterpret_cast(b.m_pArray),(int)b.m_numRows,reinterpret_cast(&beta),reinterpret_cast(c.m_pArray),(int)c.m_numRows)); @@ -2691,7 +2743,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { { CUBLAS_CALL(cublasDgemm(cuHandle,transA,transB,m,n,k,reinterpret_cast(&alpha),reinterpret_cast(a.m_pArray),(int)a.m_numRows,reinterpret_cast(b.m_pArray),(int)b.m_numRows,reinterpret_cast(&beta),reinterpret_cast(c.m_pArray),(int)c.m_numRows)); } - else throw std::exception("Unsupported template argument in GPUMatrix"); + else + { +#ifndef LINUX + throw std::exception("Unsupported template argument in GPUMatrix"); +#else + throw std::exception(); +#endif /* LINUX */ + } c.m_numRows=m; c.m_numCols=n; } @@ -2757,7 +2816,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { { CUBLAS_CALL(cublasDaxpy(cuHandle,len,reinterpret_cast (&alpha),reinterpret_cast (a.m_pArray),incx,reinterpret_cast (c.m_pArray) ,incy)); } - else throw std::exception("Unsupported template argument in GPUMatrix"); + else + { +#ifndef LINUX + throw std::exception("Unsupported template argument in GPUMatrix"); +#else + throw std::exception(); +#endif /* LINUX */ + } } else if (a.GetNumElements() == 1) { @@ -3028,7 +3094,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { double alph = alpha; CUBLAS_CALL(cublasDscal(cuHandle,int(a.m_numRows*a.m_numCols),&alph,(double*)a.m_pArray,1)); } - else throw std::exception("Unsupported template argument in GPUMatrix"); + else + { +#ifndef LINUX + throw std::exception("Unsupported template argument in GPUMatrix"); +#else + throw std::exception(); +#endif /* LINUX */ + } } @@ -3036,7 +3109,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { void GPUMatrix::Scale(GPUMatrix& alpha, GPUMatrix& a) { if (alpha.GetNumElements()!=1) + { +#ifndef LINUX throw std::exception("Matrix alpha must be 1x1"); +#else + throw std::exception(); +#endif /* LINUX */ + } cublasHandle_t cuHandle = GetCublasHandle(a.GetComputeDeviceId()); cublasSetPointerMode(cuHandle, CUBLAS_POINTER_MODE_DEVICE); if (sizeof(ElemType)==sizeof(float)) @@ -3050,7 +3129,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { else { cublasSetPointerMode(cuHandle, CUBLAS_POINTER_MODE_HOST); +#ifndef LINUX throw std::exception("Unsupported template argument in GPUMatrix"); +#else + throw std::exception(); +#endif /* LINUX */ } cublasSetPointerMode(cuHandle, CUBLAS_POINTER_MODE_HOST); } @@ -3372,7 +3455,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { // !!!!This is from helper_cuda.h which comes with CUDA samples!!!! Consider if it is benefitial to just include all helper_cuda.h // Beginning of GPU Architecture definitions -inline int _ConvertSMVer2Cores(int major, int minor) +int _ConvertSMVer2Cores(int major, int minor) { // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM typedef struct diff --git a/Math/Math/GPUMatrix.cuh b/Math/Math/GPUMatrix.cuh index 3bb79d045..ce7fdace9 100644 --- a/Math/Math/GPUMatrix.cuh +++ b/Math/Math/GPUMatrix.cuh @@ -17,11 +17,17 @@ typedef struct cublasContext *cublasHandle_t; struct CUstream_st; typedef struct CUstream_st *cudaStream_t; +#ifndef LINUX +#ifndef MATH_API #ifdef MATH_EXPORTS #define MATH_API __declspec(dllexport) #else #define MATH_API __declspec(dllimport) #endif +#endif /* MATH_API */ +#else /* LINUX */ +#define MATH_API +#endif #ifndef USE_TIME_BASED_SEED #define USE_TIME_BASED_SEED ULONG_MAX @@ -45,10 +51,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { public: DeviceBoundNumber() {m_data=NULL;}; DeviceBoundNumber(const DeviceBoundNumber &deepCopy); +#ifndef LINUX DeviceBoundNumber(DeviceBoundNumber &&shallowCopy); +#endif ~DeviceBoundNumber(); int GetDeviceId() const {return m_computeDevice;} - ElemType* ExposePointer2Value() const {return m_data;} + ElemType* ExposePointer2Value() const {return this->m_data;} //performs shallow copy only void ShallowCopyFrom(ElemType* newVal,int newValsDevceId); }; @@ -76,8 +84,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { GPUMatrix(const size_t numRows, const size_t numCols, ElemType *pArray, const size_t matrixFlags=matrixFlagNormal,int deviceId=0); GPUMatrix(const GPUMatrix& deepCopyFrom); GPUMatrix& operator=(const GPUMatrix& deepCopyFrom); //assignment operator, deep copy +#ifndef LINUX GPUMatrix(GPUMatrix&& moveFrom); GPUMatrix& operator=(GPUMatrix&& moveFrom); //move assignment operator, shallow copy +#endif /* LINUX */ ~GPUMatrix(void); static int GetBestGPUDeviceId(); @@ -95,8 +105,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { GPUMatrix ColumnSlice(size_t startColumn, size_t numCols) const; GPUMatrix& AssignColumnSlice(const GPUMatrix& fromMatrix, size_t startColumn, size_t numCols); - size_t BufferSize() const {return m_numRows*m_numCols*sizeof(ElemType);} - ElemType* BufferPointer() const {return m_pArray;} + size_t BufferSize() const {return this->m_numRows*this->m_numCols*sizeof(ElemType);} + ElemType* BufferPointer() const {return this->m_pArray;} void Adagrad(GPUMatrix& gradients); void RmsProp(GPUMatrix& gradients, @@ -109,8 +119,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { void Reshape(const size_t numRows, const size_t numCols); void Resize(const size_t numRows, const size_t numCols, bool growOnly = true); //by default we only reallocate if need to grow - ElemType& operator() (const size_t /*row*/, const size_t /*col*/) { throw std::exception("GPUMatrix doesn't support this"); } - const ElemType& operator() (const size_t /*row*/, const size_t /*col*/) const { throw std::exception("GPUMatrix doesn't support this"); } + ElemType& operator() (const size_t /*row*/, const size_t /*col*/) { throw std::logic_error("GPUMatrix doesn't support this"); } + const ElemType& operator() (const size_t /*row*/, const size_t /*col*/) const { throw std::logic_error("GPUMatrix doesn't support this"); } ElemType Get00Element() const; void SetValue(const ElemType v); @@ -262,7 +272,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { GPUMatrix& AssignInnerProductOfMatrices(const GPUMatrix& a, const GPUMatrix& b); void Print(const char* matrixName, size_t rowStart, size_t rowEnd, size_t colStart, size_t colEnd) const; - void Print(const char* matrixName = nullptr) const; //print whole matrix. can be expensive + void Print(const char* matrixName = NULL) const; //print whole matrix. can be expensive void ReadFromFile(FILE* f, const char * matrixName); //matrixName is used to verify that correct matrix is read. void WriteToFile(FILE* f, const char * matrixName); //matrixName is used to verify that correct matrix is read. @@ -334,7 +344,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t elsize; stream>>elsize; if (sizeof(ElemType)!=elsize) +#ifndef LINUX throw std::exception("Template argument size doesn't match those in file"); +#else + throw std::exception(); +#endif std::wstring matrixName; size_t numRows, numCols; int format; diff --git a/Math/Math/GPUMatrixCUDAKernels.cu b/Math/Math/GPUMatrixCUDAKernels.cu index 066e391ee..6c035c2db 100644 --- a/Math/Math/GPUMatrixCUDAKernels.cu +++ b/Math/Math/GPUMatrixCUDAKernels.cu @@ -2626,7 +2626,11 @@ __global__ void _normalGrad( } } +#ifndef LINUX static __inline__ __device__ double atomicAdd(double* address, double val) +#else +static __device__ double atomicAdd(double* address, double val) +#endif { unsigned long long int* address_as_ull = (unsigned long long int*)address; unsigned long long int old = *address_as_ull, assumed; @@ -3223,4 +3227,4 @@ else d_tmp[0] = max((ElemType)0, d_tmp[0]/max((ElemType)1.0e-10,sqrt(d_tmp[1]))/max((ElemType)1.0e-10,sqrt(d_tmp[2]))); } } -*/ \ No newline at end of file +*/ diff --git a/Math/Math/GPUSparseMatrix.cu b/Math/Math/GPUSparseMatrix.cu index 96ccb5680..31bf6bb8f 100644 --- a/Math/Math/GPUSparseMatrix.cu +++ b/Math/Math/GPUSparseMatrix.cu @@ -18,7 +18,10 @@ #pragma warning (disable: 4127) // conditional expression is constant; "if (sizeof(ElemType)==sizeof(float))" triggers this // thread local storage to access the current stream, initalize to default stream -extern __declspec( thread ) cudaStream_t t_stream; +#ifndef LINUX +extern __declspec( thread ) +#endif + cudaStream_t t_stream; void CUDACALL(cudaError_t x) { @@ -27,7 +30,7 @@ void CUDACALL(cudaError_t x) const char* errmsg = cudaGetErrorString(x); std::cout<<"!!!!!!!!CUDA EXCEPTION: "< void GPUSparseMatrix::ZeroInit() { - m_legacy = true; - m_computeDevice=0; //current GPU device Id - m_numRows=0; - m_numCols=0; - m_elemSizeAllocated = m_nz = 0; //Number of non-zero elements - m_format = matrixFormatSparseCSR; - m_externalBuffer = false; - m_pArray=NULL; - m_matrixName=NULL; + this->m_legacy = true; + this->m_computeDevice=0; //current GPU device Id + this->m_numRows=0; + this->m_numCols=0; + this->m_elemSizeAllocated = this->m_nz = 0; //Number of non-zero elements + this->m_format = matrixFormatSparseCSR; + this->m_externalBuffer = false; + this->m_pArray=NULL; + this->m_matrixName=NULL; } template @@ -77,7 +80,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUSparseMatrix::GPUSparseMatrix(const GPUSparseMatrix& deepCopy) { - m_legacy = true; + this->m_legacy = true; DeepCopy(deepCopy); } @@ -85,15 +88,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { GPUSparseMatrix::GPUSparseMatrix(const size_t numRows, const size_t numCols, const size_t nz, ElemType* pArray, const size_t matrixFlags /*=matrixFormatSparseCSR*/, int deviceId /*=MANAGEDEXTERN*/, const size_t elemSizeAllocated /*=0*/) { - m_legacy = true; - m_computeDevice=deviceId; - m_numRows=numRows; - m_numCols=numCols; - m_nz=nz; - m_elemSizeAllocated=elemSizeAllocated?elemSizeAllocated:nz; - m_pArray = pArray; - m_format = (MatrixFormat)(matrixFormatMask&matrixFlags); - m_externalBuffer = true; + this->m_legacy = true; + this->m_computeDevice=deviceId; + this->m_numRows=numRows; + this->m_numCols=numCols; + this->m_nz=nz; + this->m_elemSizeAllocated=elemSizeAllocated?elemSizeAllocated:nz; + this->m_pArray = pArray; + this->m_format = (MatrixFormat)(matrixFormatMask&matrixFlags); + this->m_externalBuffer = true; } // legacy code @@ -104,11 +107,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { { Clear(); } - m_numRows=nR; - m_numCols=nC; - m_nz=0; - m_elemSizeAllocated=m_nz; - m_pArray = nullptr; + this->m_numRows=nR; + this->m_numCols=nC; + this->m_nz=0; + this->m_elemSizeAllocated=m_nz; + this->m_pArray = NULL; }*/ // PrepareDevice - Setup the correct cuda context for an operation @@ -126,41 +129,41 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUSparseMatrix::DeepCopy(const GPUSparseMatrix& deepCopy) { - m_computeDevice=deepCopy.m_computeDevice; - m_numRows=deepCopy.m_numRows; - m_numCols=deepCopy.m_numCols; - m_nz=deepCopy.m_nz; - m_elemSizeAllocated=deepCopy.m_elemSizeAllocated; - m_format = deepCopy.m_format; + this->m_computeDevice=deepCopy.m_computeDevice; + this->m_numRows=deepCopy.m_numRows; + this->m_numCols=deepCopy.m_numCols; + this->m_nz=deepCopy.m_nz; + this->m_elemSizeAllocated=deepCopy.m_elemSizeAllocated; + this->m_format = deepCopy.m_format; deepCopy.PrepareDevice(); // about to overwrite this buffer, so free it if we own it - if (OwnBuffer() && m_pArray!=NULL) + if (this->OwnBuffer() && this->m_pArray!=NULL) { - CUDACALL(cudaFree(m_pArray)); + CUDACALL(cudaFree(this->m_pArray)); } else if (!deepCopy.OwnBuffer()) { // just copy over the pointer, this assumses duplicate non-owned buffers are valid - m_pArray = deepCopy.m_pArray; + this->m_pArray = deepCopy.m_pArray; } else if (deepCopy.m_pArray!=NULL) { - CUDACALL(cudaMalloc((void **)&m_pArray,BufferSize())); - CUDACALL(cudaMemcpy(m_pArray,deepCopy.m_pArray,BufferSize(),cudaMemcpyDeviceToDevice)); + CUDACALL(cudaMalloc((void **)&this->m_pArray,BufferSize())); + CUDACALL(cudaMemcpy(this->m_pArray,deepCopy.m_pArray,BufferSize(),cudaMemcpyDeviceToDevice)); } else - m_pArray = NULL; - m_externalBuffer = deepCopy.m_externalBuffer; + this->m_pArray = NULL; + this->m_externalBuffer = deepCopy.m_externalBuffer; if (deepCopy.m_matrixName!=NULL) { - m_matrixName = new wchar_t[wcslen(deepCopy.m_matrixName)+1]; - wmemcpy(m_matrixName,deepCopy.m_matrixName,wcslen(deepCopy.m_matrixName)+1); + this->m_matrixName = new wchar_t[wcslen(deepCopy.m_matrixName)+1]; + wmemcpy(this->m_matrixName,deepCopy.m_matrixName,wcslen(deepCopy.m_matrixName)+1); } else - m_matrixName=NULL; + this->m_matrixName=NULL; } template @@ -196,20 +199,20 @@ namespace Microsoft { namespace MSR { namespace CNTK { CUSPARSECALL(cusparseSetStream(cusparseHandle, t_stream)); if (sizeof(ElemType)==sizeof(float)) { - CUSPARSECALL(cusparseScsr2dense(cusparseHandle,int(m_numRows),int(m_numCols),descr,(float*)NzLocation(),RowLocation(),ColLocation(),(float*)pArrayDev,int(m_numRows))); + CUSPARSECALL(cusparseScsr2dense(cusparseHandle,int(this->m_numRows),int(this->m_numCols),descr,(float*)NzLocation(),RowLocation(),ColLocation(),(float*)pArrayDev,int(this->m_numRows))); } else { - CUSPARSECALL(cusparseDcsr2dense(cusparseHandle,int(m_numRows),int(m_numCols),descr,(double*)NzLocation(),RowLocation(),ColLocation(),(double*)pArrayDev,int(m_numRows))); + CUSPARSECALL(cusparseDcsr2dense(cusparseHandle,int(this->m_numRows),int(this->m_numCols),descr,(double*)NzLocation(),RowLocation(),ColLocation(),(double*)pArrayDev,int(this->m_numRows))); } CUDACALL(cudaEventRecord(done)); CUDACALL(cudaEventSynchronize(done)); CUDACALL(cudaEventDestroy(done)); CUSPARSECALL(cusparseDestroy(cusparseHandle)); - res.SetValue(m_numRows,m_numCols,pArrayDev,(matrixFlagNormal|matrixFlagSetValueOnDevice)); + res.SetValue(this->m_numRows,this->m_numCols,pArrayDev,(matrixFlagNormal|matrixFlagSetValueOnDevice)); if (pArrayDev!=NULL) CUDACALL(cudaFree(pArrayDev)); - res.SetMatrixName(m_matrixName); + res.SetMatrixName(this->m_matrixName); return res; } @@ -229,12 +232,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { cusparseSetMatType(descr,CUSPARSE_MATRIX_TYPE_GENERAL); cusparseSetMatIndexBase(descr,CUSPARSE_INDEX_BASE_ZERO); - m_numRows = denseMatrix.GetNumRows(); //m - m_numCols = denseMatrix.GetNumCols(); //n - m_format = matrixFormatSparseCSR; + this->m_numRows = denseMatrix.GetNumRows(); //m + this->m_numCols = denseMatrix.GetNumCols(); //n + this->m_format = matrixFormatSparseCSR; int *nnzPerRow = NULL; - CUDACALL(cudaMalloc((void**)&nnzPerRow,sizeof(int)*m_numCols)); + CUDACALL(cudaMalloc((void**)&nnzPerRow,sizeof(int)*this->m_numCols)); int nnzTotalDevHostPtr = -1; @@ -242,39 +245,39 @@ namespace Microsoft { namespace MSR { namespace CNTK { CUDACALL(cudaEventCreate(&done)); if (sizeof(ElemType)==sizeof(float)) { - CUSPARSECALL(cusparseSnnz(cusparseHandle,(m_format&matrixFormatRowMajor)?CUSPARSE_DIRECTION_ROW:CUSPARSE_DIRECTION_COLUMN,(int)m_numRows,(int)m_numCols,descr, - reinterpret_cast(denseMatrix.BufferPointer()), (int)m_numRows,nnzPerRow,&nnzTotalDevHostPtr)); + CUSPARSECALL(cusparseSnnz(cusparseHandle,(this->m_format&matrixFormatRowMajor)?CUSPARSE_DIRECTION_ROW:CUSPARSE_DIRECTION_COLUMN,(int)this->m_numRows,(int)this->m_numCols,descr, + reinterpret_cast(denseMatrix.BufferPointer()), (int)this->m_numRows,nnzPerRow,&nnzTotalDevHostPtr)); } else { - CUSPARSECALL(cusparseDnnz(cusparseHandle,(m_format&matrixFormatRowMajor)?CUSPARSE_DIRECTION_ROW:CUSPARSE_DIRECTION_COLUMN,(int)m_numRows,(int)m_numCols,descr, - reinterpret_cast(denseMatrix.BufferPointer()), (int)m_numRows,nnzPerRow,&nnzTotalDevHostPtr)); + CUSPARSECALL(cusparseDnnz(cusparseHandle,(this->m_format&matrixFormatRowMajor)?CUSPARSE_DIRECTION_ROW:CUSPARSE_DIRECTION_COLUMN,(int)this->m_numRows,(int)this->m_numCols,descr, + reinterpret_cast(denseMatrix.BufferPointer()), (int)this->m_numRows,nnzPerRow,&nnzTotalDevHostPtr)); } CUDACALL(cudaEventRecord(done)); CUDACALL(cudaEventSynchronize(done)); CUDACALL(cudaEventDestroy(done)); // about to overwrite this buffer, so free it if we own it - if (OwnBuffer() && m_pArray!=NULL) + if (this->OwnBuffer() && this->m_pArray!=NULL) { - CUDACALL(cudaFree(m_pArray)); + CUDACALL(cudaFree(this->m_pArray)); } //allocate memory for sparse matrix - m_elemSizeAllocated = m_nz = nnzTotalDevHostPtr; - CUDACALL(cudaMalloc((void**)&m_pArray,BufferSize())); - m_externalBuffer = false; + this->m_elemSizeAllocated = this->m_nz = nnzTotalDevHostPtr; + CUDACALL(cudaMalloc((void**)&this->m_pArray,BufferSize())); + this->m_externalBuffer = false; CUDACALL(cudaEventCreate(&done)); if (sizeof(ElemType)==sizeof(float)) { - CUSPARSECALL(cusparseSdense2csr(cusparseHandle,(int)m_numRows,(int)m_numCols,descr,reinterpret_cast(denseMatrix.BufferPointer()), - (int)m_numRows,nnzPerRow,reinterpret_cast(NzLocation()),RowLocation(),ColLocation())); + CUSPARSECALL(cusparseSdense2csr(cusparseHandle,(int)this->m_numRows,(int)this->m_numCols,descr,reinterpret_cast(denseMatrix.BufferPointer()), + (int)this->m_numRows,nnzPerRow,reinterpret_cast(NzLocation()),RowLocation(),ColLocation())); } else { - CUSPARSECALL(cusparseDdense2csr(cusparseHandle,(int)m_numRows,(int)m_numCols,descr,reinterpret_cast(denseMatrix.BufferPointer()), - (int)m_numRows,nnzPerRow,reinterpret_cast(NzLocation()),RowLocation(),ColLocation())); + CUSPARSECALL(cusparseDdense2csr(cusparseHandle,(int)this->m_numRows,(int)this->m_numCols,descr,reinterpret_cast(denseMatrix.BufferPointer()), + (int)this->m_numRows,nnzPerRow,reinterpret_cast(NzLocation()),RowLocation(),ColLocation())); } CUDACALL(cudaEventRecord(done)); CUDACALL(cudaEventSynchronize(done)); @@ -291,18 +294,19 @@ namespace Microsoft { namespace MSR { namespace CNTK { return *this; } +#ifndef LINUX template GPUSparseMatrix::GPUSparseMatrix(GPUSparseMatrix&& moveFrom) { - m_computeDevice=moveFrom.m_computeDevice; - m_numRows=moveFrom.m_numRows; - m_numCols=moveFrom.m_numCols; - m_nz=moveFrom.m_nz; - m_elemSizeAllocated = moveFrom.m_elemSizeAllocated; - m_pArray = moveFrom.m_pArray; - m_format = moveFrom.m_format; - m_externalBuffer = moveFrom.m_externalBuffer; - m_matrixName=moveFrom.m_matrixName; + this->m_computeDevice=moveFrom.m_computeDevice; + this->m_numRows=moveFrom.m_numRows; + this->m_numCols=moveFrom.m_numCols; + this->m_nz=moveFrom.m_nz; + this->m_elemSizeAllocated = moveFrom.m_elemSizeAllocated; + this->m_pArray = moveFrom.m_pArray; + this->m_format = moveFrom.m_format; + this->m_externalBuffer = moveFrom.m_externalBuffer; + this->m_matrixName=moveFrom.m_matrixName; moveFrom.ZeroInit(); } @@ -311,26 +315,27 @@ namespace Microsoft { namespace MSR { namespace CNTK { GPUSparseMatrix& GPUSparseMatrix::operator=(GPUSparseMatrix&& moveFrom) { Clear(); - m_computeDevice=moveFrom.m_computeDevice; - m_numRows=moveFrom.m_numRows; - m_numCols=moveFrom.m_numCols; - m_nz=moveFrom.m_nz; - m_elemSizeAllocated = moveFrom.m_elemSizeAllocated; - m_pArray = moveFrom.m_pArray; - m_format = moveFrom.m_format; - m_externalBuffer = moveFrom.m_externalBuffer; + this->m_computeDevice=moveFrom.m_computeDevice; + this->m_numRows=moveFrom.m_numRows; + this->m_numCols=moveFrom.m_numCols; + this->m_nz=moveFrom.m_nz; + this->m_elemSizeAllocated = moveFrom.m_elemSizeAllocated; + this->m_pArray = moveFrom.m_pArray; + this->m_format = moveFrom.m_format; + this->m_externalBuffer = moveFrom.m_externalBuffer; - m_matrixName=moveFrom.m_matrixName; + this->m_matrixName=moveFrom.m_matrixName; moveFrom.m_pArray = NULL; moveFrom.m_matrixName=NULL; return *this; } +#endif /* LINUX */ template GPUSparseMatrix::~GPUSparseMatrix() { - if(m_legacy) + if(this->m_legacy) { Clear(); } @@ -343,26 +348,26 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUSparseMatrix::ClearNew() { - if (m_matrixName!=NULL) + if (this->m_matrixName!=NULL) { - delete[] m_matrixName; - m_matrixName = nullptr; + delete[] this->m_matrixName; + this->m_matrixName = NULL; } - if(m_format == MatrixFormat::matrixFormatSparseCSC || m_format == MatrixFormat::matrixFormatSparseCSR) + if(this->m_format == matrixFormatSparseCSC || this->m_format == matrixFormatSparseCSR) { - if(m_val != NULL) - CUDACALL(cudaFree(m_val)); - if(m_row != NULL) - CUDACALL(cudaFree(m_row)); - if(m_pb != NULL) - CUDACALL(cudaFree(m_pb)); + if(this->m_val != NULL) + CUDACALL(cudaFree(this->m_val)); + if(this->m_row != NULL) + CUDACALL(cudaFree(this->m_row)); + if(this->m_pb != NULL) + CUDACALL(cudaFree(this->m_pb)); } - else if (m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow) + else if (this->m_format == matrixFormatSparseBlockCol || this->m_format == matrixFormatSparseBlockRow) { - if(m_blockVal != NULL) - CUDACALL(cudaFree(m_blockVal)); - if(m_blockIds != NULL) - CUDACALL(cudaFree(m_blockIds)); + if(this->m_blockVal != NULL) + CUDACALL(cudaFree(this->m_blockVal)); + if(this->m_blockIds != NULL) + CUDACALL(cudaFree(this->m_blockIds)); } } @@ -370,10 +375,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUSparseMatrix::Clear() { - if (m_pArray!=NULL) - CUDACALL(cudaFree(m_pArray)); - if (m_matrixName!=NULL) - delete[] m_matrixName; + if (this->m_pArray!=NULL) + CUDACALL(cudaFree(this->m_pArray)); + if (this->m_matrixName!=NULL) + delete[] this->m_matrixName; ZeroInit(); } @@ -385,19 +390,19 @@ namespace Microsoft { namespace MSR { namespace CNTK { { bool reallocate = (BufferSize() != a.BufferSize()); - m_numRows=a.m_numRows; - m_numCols=a.m_numCols; - m_nz=a.m_nz; - m_elemSizeAllocated = a.m_elemSizeAllocated; - m_format = a.m_format; + this->m_numRows=a.m_numRows; + this->m_numCols=a.m_numCols; + this->m_nz=a.m_nz; + this->m_elemSizeAllocated = a.m_elemSizeAllocated; + this->m_format = a.m_format; if (reallocate) { - if (!OwnBuffer()) - throw runtime_error("cannot reallocate a buffer not owned by the matrix"); - if (m_pArray!=NULL) - CUDACALL(cudaFree(m_pArray)); - CUDACALL(cudaMalloc((void **)&m_pArray,BufferSize())); + if (!this->OwnBuffer()) + throw std::runtime_error("cannot reallocate a buffer not owned by the matrix"); + if (this->m_pArray!=NULL) + CUDACALL(cudaFree(this->m_pArray)); + CUDACALL(cudaMalloc((void **)&this->m_pArray,BufferSize())); } // copy over the non-zero locations from the source matrix @@ -412,108 +417,108 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUSparseMatrix::Init() { - m_legacy = false; - m_numRows = 0; - m_numCols = 0; - m_elemSizeAllocated = 0; - m_externalBuffer = false; - m_pArray = NULL; + this->m_legacy = false; + this->m_numRows = 0; + this->m_numCols = 0; + this->m_elemSizeAllocated = 0; + this->m_externalBuffer = false; + this->m_pArray = NULL; PrepareDevice(); - m_nz = 0; - m_matrixName = NULL; + this->m_nz = 0; + this->m_matrixName = NULL; - if(m_format == MatrixFormat::matrixFormatSparseCSC || m_format == MatrixFormat::matrixFormatSparseCSR) + if(this->m_format == matrixFormatSparseCSC || this->m_format == matrixFormatSparseCSR) { - m_colIdx = -1; - m_val = NULL; - m_row = NULL; - m_pb = NULL; - m_rowIdx = NULL; - m_col = NULL; + this->m_colIdx = -1; + this->m_val = NULL; + this->m_row = NULL; + this->m_pb = NULL; + this->m_rowIdx = NULL; + this->m_col = NULL; - m_block2Id = NULL; - m_block2UniqId = NULL; + this->m_block2Id = NULL; + this->m_block2UniqId = NULL; } - else if (m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow) + else if (this->m_format == matrixFormatSparseBlockCol || this->m_format == matrixFormatSparseBlockRow) { - m_blockSize = 0; - m_blockVal = NULL; - m_blockIds = NULL; + this->m_blockSize = 0; + this->m_blockVal = NULL; + this->m_blockIds = NULL; } } template GPUSparseMatrix::GPUSparseMatrix(const MatrixFormat format, const int deviceId) { - if(format != MatrixFormat::matrixFormatSparseCSC && format != MatrixFormat::matrixFormatSparseCSR && format != MatrixFormat::matrixFormatSparseBlockCol && format != MatrixFormat::matrixFormatSparseBlockRow) + if(format != matrixFormatSparseCSC && format != matrixFormatSparseCSR && format != matrixFormatSparseBlockCol && format != matrixFormatSparseBlockRow) { throw std::logic_error("GPUSparseMatrix: unsupported sparse matrix format"); } - m_format = format; - m_computeDevice = deviceId; + this->m_format = format; + this->m_computeDevice = deviceId; Init(); } template ElemType* GPUSparseMatrix::BufferPointer() const { - if(m_format == MatrixFormat::matrixFormatSparseCSC || m_format == MatrixFormat::matrixFormatSparseCSR) + if(this->m_format == matrixFormatSparseCSC || this->m_format == matrixFormatSparseCSR) { - return m_val; + return this->m_val; } else { - return m_blockVal; + return this->m_blockVal; } } template void GPUSparseMatrix::Resize(const size_t numRows, const size_t numCols, size_t size) { - m_nz = 0; - m_colIdx = -1; - m_numRows = numRows; - m_numCols = numCols; - if(m_elemSizeAllocated < size) + this->m_nz = 0; + this->m_colIdx = -1; + this->m_numRows = numRows; + this->m_numCols = numCols; + if(this->m_elemSizeAllocated < size) { - m_elemSizeAllocated = size; - if(m_format == MatrixFormat::matrixFormatSparseCSC || m_format == MatrixFormat::matrixFormatSparseCSR) + this->m_elemSizeAllocated = size; + if(this->m_format == matrixFormatSparseCSC || this->m_format == matrixFormatSparseCSR) { - if(m_val != NULL) - CUDACALL(cudaFree(m_val)); - if(m_row != NULL) - CUDACALL(cudaFree(m_row)); - if(m_pb != NULL) - CUDACALL(cudaFree(m_pb)); - if(m_rowIdx != NULL) - CUDACALL(cudaFree(m_rowIdx)); - if(m_col != NULL) - CUDACALL(cudaFree(m_col)); - if(m_block2Id != NULL) - CUDACALL(cudaFree(m_block2Id)); - if(m_block2UniqId != NULL) - CUDACALL(cudaFree(m_block2UniqId)); + if(this->m_val != NULL) + CUDACALL(cudaFree(this->m_val)); + if(this->m_row != NULL) + CUDACALL(cudaFree(this->m_row)); + if(this->m_pb != NULL) + CUDACALL(cudaFree(this->m_pb)); + if(this->m_rowIdx != NULL) + CUDACALL(cudaFree(this->m_rowIdx)); + if(this->m_col != NULL) + CUDACALL(cudaFree(this->m_col)); + if(this->m_block2Id != NULL) + CUDACALL(cudaFree(this->m_block2Id)); + if(this->m_block2UniqId != NULL) + CUDACALL(cudaFree(this->m_block2UniqId)); PrepareDevice(); - CUDACALL(cudaMalloc((void **)&m_val,sizeof(ElemType)*size)); - CUDACALL(cudaMalloc((void **)&m_row,sizeof(size_t)*size)); - int len = m_format == MatrixFormat::matrixFormatSparseCSC ? numCols : numRows; - CUDACALL(cudaMalloc((void **)&m_pb,sizeof(size_t)*(len+1))); - CUDACALL(cudaMalloc((void **)&m_rowIdx,sizeof(size_t)*size)); - CUDACALL(cudaMalloc((void **)&m_col,sizeof(size_t)*size)); - CUDACALL(cudaMalloc((void **)&m_block2Id,sizeof(size_t)*(numCols*2))); - CUDACALL(cudaMalloc((void **)&m_block2UniqId,sizeof(size_t)*(numCols*2))); + CUDACALL(cudaMalloc((void **)&this->m_val,sizeof(ElemType)*size)); + CUDACALL(cudaMalloc((void **)&this->m_row,sizeof(size_t)*size)); + int len = this->m_format == matrixFormatSparseCSC ? numCols : numRows; + CUDACALL(cudaMalloc((void **)&this->m_pb,sizeof(size_t)*(len+1))); + CUDACALL(cudaMalloc((void **)&this->m_rowIdx,sizeof(size_t)*size)); + CUDACALL(cudaMalloc((void **)&this->m_col,sizeof(size_t)*size)); + CUDACALL(cudaMalloc((void **)&this->m_block2Id,sizeof(size_t)*(numCols*2))); + CUDACALL(cudaMalloc((void **)&this->m_block2UniqId,sizeof(size_t)*(numCols*2))); } - else if(m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow) + else if(this->m_format == matrixFormatSparseBlockCol || this->m_format == matrixFormatSparseBlockRow) { - if(m_blockVal != NULL) - CUDACALL(cudaFree(m_blockVal)); - if(m_blockIds != NULL) - CUDACALL(cudaFree(m_blockIds)); + if(this->m_blockVal != NULL) + CUDACALL(cudaFree(this->m_blockVal)); + if(this->m_blockIds != NULL) + CUDACALL(cudaFree(this->m_blockIds)); PrepareDevice(); - CUDACALL(cudaMalloc((void **)&m_blockVal,sizeof(ElemType)*size)); + CUDACALL(cudaMalloc((void **)&this->m_blockVal,sizeof(ElemType)*size)); int max = numCols > numRows ? numCols : numRows; - CUDACALL(cudaMalloc((void **)&m_blockIds,sizeof(size_t)*max)); + CUDACALL(cudaMalloc((void **)&this->m_blockIds,sizeof(size_t)*max)); } } } @@ -522,9 +527,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUSparseMatrix::Reset() { - m_nz = 0; - m_colIdx = -1; - m_blockSize = 0; + this->m_nz = 0; + this->m_colIdx = -1; + this->m_blockSize = 0; } #pragma endregion Constructors and Destructor @@ -535,46 +540,46 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUSparseMatrix::SetMatrixFromCSCFormat(size_t *h_row, size_t *h_rowIdx, size_t size, size_t blockSize) { - if(m_format != MatrixFormat::matrixFormatSparseCSC) + if(this->m_format != matrixFormatSparseCSC) { throw std::logic_error("CPUSparseMatrix: unsupported SetValue() call."); } - if(m_elemSizeAllocated < size) + if(this->m_elemSizeAllocated < size) { throw std::logic_error("CPUSparseMatrix: allocated size is too small."); } Reset(); - m_nz = size; - m_blockSize = blockSize; + this->m_nz = size; + this->m_blockSize = blockSize; PrepareDevice(); - CUDACALL(cudaMemcpy(m_row, h_row, sizeof(size_t)*size,cudaMemcpyHostToDevice)); - CUDACALL(cudaMemcpy(m_rowIdx, h_rowIdx, sizeof(size_t)*size,cudaMemcpyHostToDevice)); + CUDACALL(cudaMemcpy(this->m_row, h_row, sizeof(size_t)*size,cudaMemcpyHostToDevice)); + CUDACALL(cudaMemcpy(this->m_rowIdx, h_rowIdx, sizeof(size_t)*size,cudaMemcpyHostToDevice)); } template void GPUSparseMatrix::SetMatrixFromLabelAndClass(size_t *h_row, size_t *h_block2Id, size_t *h_block2UniqId, size_t labelSize, size_t expandedSize, size_t blockSize) { - if(m_format != MatrixFormat::matrixFormatSparseCSC) + if(this->m_format != matrixFormatSparseCSC) { throw std::logic_error("CPUSparseMatrix: unsupported SetValue() call."); } - if(m_elemSizeAllocated < labelSize) + if(this->m_elemSizeAllocated < labelSize) { throw std::logic_error("CPUSparseMatrix: allocated size is too small."); } Reset(); - m_nz = labelSize; - m_expandedSize = expandedSize; - m_blockSize = blockSize; + this->m_nz = labelSize; + this->m_expandedSize = expandedSize; + this->m_blockSize = blockSize; PrepareDevice(); - CUDACALL(cudaMemcpy(m_row, h_row, sizeof(size_t)*labelSize,cudaMemcpyHostToDevice)); - CUDACALL(cudaMemcpy(m_block2Id, h_block2Id, sizeof(size_t)*labelSize,cudaMemcpyHostToDevice)); - CUDACALL(cudaMemcpy(m_block2UniqId, h_block2UniqId, sizeof(size_t)*labelSize,cudaMemcpyHostToDevice)); + CUDACALL(cudaMemcpy(this->m_row, h_row, sizeof(size_t)*labelSize,cudaMemcpyHostToDevice)); + CUDACALL(cudaMemcpy(this->m_block2Id, h_block2Id, sizeof(size_t)*labelSize,cudaMemcpyHostToDevice)); + CUDACALL(cudaMemcpy(this->m_block2UniqId, h_block2UniqId, sizeof(size_t)*labelSize,cudaMemcpyHostToDevice)); } // forward pass from feature to hidden layer @@ -584,7 +589,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { if (lhs.GetComputeDeviceId()!=rhs.GetComputeDeviceId()||(lhs.GetComputeDeviceId()!=c.GetComputeDeviceId())) - throw std::exception("MultiplyAndWeightedAddStD: All matrices must be on the same GPU"); + throw stdException("MultiplyAndWeightedAddStD: All matrices must be on the same GPU"); if (lhs.IsEmpty() || rhs.IsEmpty()) throw std::logic_error("LeftMultiplyAndAdd: one of the input matrix is empty."); @@ -653,7 +658,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { const GPUSparseMatrix& rhs, const bool transposeB, GPUSparseMatrix& c) { if (lhs.GetComputeDeviceId()!=rhs.GetComputeDeviceId()) - throw std::exception("GPUSparseMatrix::MultiplyAndAdd: All matrices must be on the same GPU"); + throw stdException("GPUSparseMatrix::MultiplyAndAdd: All matrices must be on the same GPU"); int m = transposeA? (int)lhs.GetNumCols(): (int)lhs.GetNumRows(); int k = transposeA? (int)lhs.GetNumRows(): (int)lhs.GetNumCols(); @@ -714,12 +719,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { void GPUSparseMatrix::ScaleAndAdd(const ElemType alpha, const GPUSparseMatrix& lhs, GPUMatrix& rhs) { if (lhs.GetComputeDeviceId()!=rhs.GetComputeDeviceId()) - throw std::exception("GPUSparseMatrix::ScaleAndAdd: All matrices must be on the same GPU"); + throw stdException("GPUSparseMatrix::ScaleAndAdd: All matrices must be on the same GPU"); - if (lhs.m_format == MatrixFormat::matrixFormatSparseBlockCol || lhs.m_format == MatrixFormat::matrixFormatSparseBlockRow) + if (lhs.m_format == matrixFormatSparseBlockCol || lhs.m_format == matrixFormatSparseBlockRow) { - size_t len = (lhs.m_format == MatrixFormat::matrixFormatSparseBlockCol) ? lhs.GetNumRows(): lhs.GetNumCols(); - bool blockCol = (lhs.m_format == MatrixFormat::matrixFormatSparseBlockCol); + size_t len = (lhs.m_format == matrixFormatSparseBlockCol) ? lhs.GetNumRows(): lhs.GetNumCols(); + bool blockCol = (lhs.m_format == matrixFormatSparseBlockCol); cudaEvent_t done = nullptr; CUDACALL(cudaEventCreate(&done)); @@ -738,7 +743,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } else { - throw std::exception("GPUSparseMatrix:: ScaleAndAdd() Not implemented"); + throw stdException("GPUSparseMatrix:: ScaleAndAdd() Not implemented"); } } @@ -756,7 +761,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { int deviceId = a.GetComputeDeviceId(); if (weight.GetComputeDeviceId()!=deviceId || label.GetComputeDeviceId()!=deviceId || cls.GetComputeDeviceId()!=deviceId || idx2cls.GetComputeDeviceId()!=deviceId || etp.GetComputeDeviceId()!=deviceId ) - throw std::exception("GPUSparseMatrix:: ClassEntropy() All matrices must be on the same GPU"); + throw stdException("GPUSparseMatrix:: ClassEntropy() All matrices must be on the same GPU"); size_t nC = cls.GetNumCols(); size_t nV = label.GetNumRows() - nC; @@ -831,7 +836,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { int deviceId = error.GetComputeDeviceId(); if (weight.GetComputeDeviceId()!=deviceId || grd.GetComputeDeviceId()!=deviceId ) - throw std::exception("GPUSparseMatrix::ClassEntropyGradientOfInput() All matrices must be on the same GPU"); + throw stdException("GPUSparseMatrix::ClassEntropyGradientOfInput() All matrices must be on the same GPU"); grd.SetValue((ElemType)0); cudaEvent_t done = nullptr; @@ -858,7 +863,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { int deviceId = error.GetComputeDeviceId(); if (input.GetComputeDeviceId()!=deviceId || label.GetComputeDeviceId()!=deviceId || cls.GetComputeDeviceId()!=deviceId || idx2cls.GetComputeDeviceId()!=deviceId || grd.GetComputeDeviceId()!=deviceId ) - throw std::exception("GPUSparseMatrix::ClassEntropyGradientOfWeight() All matrices must be on the same GPU"); + throw stdException("GPUSparseMatrix::ClassEntropyGradientOfWeight() All matrices must be on the same GPU"); grd.SetFormat(matrixFormatSparseBlockRow); size_t nz = label.m_blockSize * grd.GetNumCols(); @@ -898,20 +903,20 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUSparseMatrix& GPUSparseMatrix::InplaceTruncate (const ElemType threshold) { - if(m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow) + if(this->m_format == matrixFormatSparseBlockCol || this->m_format == matrixFormatSparseBlockRow) { long N=(long)GetNZElements(); int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock); cudaEvent_t done = nullptr; CUDACALL(cudaEventCreate(&done)); - _inplaceTruncate<<>>(m_blockVal,threshold,N); + _inplaceTruncate<<>>(this->m_blockVal,threshold,N); CUDACALL(cudaEventRecord(done)); CUDACALL(cudaEventSynchronize(done)); CUDACALL(cudaEventDestroy(done)); } else { - throw std::exception("GPUSparseMatrix:: InplaceTruncate() only support block based sparse matrix"); + throw stdException("GPUSparseMatrix:: InplaceTruncate() only support block based sparse matrix"); } return *this; } @@ -926,7 +931,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { c.SetValue(0.0); } - if(m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow) + if(this->m_format == matrixFormatSparseBlockCol || this->m_format == matrixFormatSparseBlockRow) { size_t blocksPerGrid = m_blockSize; bool isBlockCol = (m_format == MatrixFormat::matrixFormatSparseBlockCol); @@ -937,8 +942,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { isBlockCol, len, momentum, - m_blockIds, - m_blockVal, + this->m_blockIds, + this->m_blockVal, c.BufferPointer(), c.GetNumRows()); CUDACALL(cudaEventRecord(done)); @@ -947,7 +952,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } else { - throw std::exception("GPUSparseMatrix:: NormalGrad() only support block sparse format"); + throw stdException("GPUSparseMatrix:: NormalGrad() only support block sparse format"); } } @@ -960,7 +965,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { const GPUMatrix& b, ElemType beta, GPUMatrix& c) { if (a.GetComputeDeviceId()!=b.GetComputeDeviceId()||(b.GetComputeDeviceId()!=a.GetComputeDeviceId())) - throw std::exception("MultiplyAndWeightedAddStD: All matrices must be on the same GPU"); + throw stdException("MultiplyAndWeightedAddStD: All matrices must be on the same GPU"); a.PrepareDevice(); cusparseHandle_t cusparseHandle = 0; CUSPARSECALL(cusparseCreate(&cusparseHandle)); @@ -1022,7 +1027,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t GPUSparseMatrix::ElemCountFromBufferSize(size_t totalBufferSize) { size_t elemSizeAllocated; - if (m_format & matrixFormatCompressed) + if (this->m_format & matrixFormatCompressed) { elemSizeAllocated = (totalBufferSize-CompressedIndexSize())/(sizeof(int)+sizeof(ElemType)); } @@ -1105,7 +1110,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { void GPUSparseMatrix::Multiply(const GPUSparseMatrix& S1, bool transposeS1, const GPUSparseMatrix& S2, bool transposeS2, GPUSparseMatrix &c) { if (S1.GetComputeDeviceId()!=S2.GetComputeDeviceId()) - throw std::exception("Sparse matrix multiply: both matrices must be on the same device"); + throw stdException("Sparse matrix multiply: both matrices must be on the same device"); S1.PrepareDevice(); cusparseHandle_t cusparseHandle = 0; @@ -1122,7 +1127,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { int k = int(transposeS1 ? S1.GetNumRows() : S1.GetNumCols()); int l = int(transposeS2 ? S2.GetNumCols() : S2.GetNumRows()); if (k!=l) - throw std::exception("Sparse matrix multiply: dimensionality mismatch"); + throw stdException("Sparse matrix multiply: dimensionality mismatch"); int nnzA = (int)S1.GetNZElements(); int nnzB = (int)S2.GetNZElements(); @@ -1170,9 +1175,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { void GPUSparseMatrix::ScaleAndAdd(ElemType alpha,const GPUSparseMatrix& a, ElemType beta, const GPUSparseMatrix& b, GPUSparseMatrix& c) { if (a.GetNumCols()!=b.GetNumCols() || a.GetNumRows()!=b.GetNumRows()) - throw new std::exception("Dimensions mismatch in ScaleAndAdd"); + throw new stdException("Dimensions mismatch in ScaleAndAdd"); if (a.GetComputeDeviceId()!=b.GetComputeDeviceId()) - throw new std::exception("ScaleAndAdd: matrices must be on the same device"); + throw new stdException("ScaleAndAdd: matrices must be on the same device"); int m = (int)a.GetNumRows(); int n = (int)a.GetNumCols(); @@ -1221,7 +1226,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (a.GetNumRows()!=b.GetNumRows()||a.GetNumRows()!=c.GetNumRows()||a.GetNumCols()!=b.GetNumCols()||a.GetNumCols()!=c.GetNumCols()) throw std::logic_error("ScaleAndAdd: dimension mismatch"); if (a.GetComputeDeviceId()!=b.GetComputeDeviceId()||a.GetComputeDeviceId()!=c.GetComputeDeviceId()) - throw std::exception("ScaleAndAdd: matrices must be on the same device"); + throw stdException("ScaleAndAdd: matrices must be on the same device"); b.PrepareDevice(); //copy b to c CUDACALL(cudaMemcpy(c.BufferPointer(),b.BufferPointer(),sizeof(ElemType)*b.GetNumElements(),cudaMemcpyDeviceToDevice)); @@ -1290,7 +1295,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ElemType GPUSparseMatrix::InnerProductOfMatrices(const GPUSparseMatrix& a, const GPUMatrix& b) { if (a.GetComputeDeviceId()!=b.GetComputeDeviceId()) - throw std::exception("a and b must be on the same device"); + throw stdException("a and b must be on the same device"); //This implementation requires additional memory //need to put a in ColumnMajor format @@ -1434,14 +1439,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { int GPUSparseMatrix::GetComputeDeviceId() const { // for externally managed memory the CUDA context will have the current device - if (m_computeDevice == MANAGEDEXTERN) + if (this->m_computeDevice == MANAGEDEXTERN) { int devId; - assert(m_externalBuffer); + assert(this->m_externalBuffer); CUDACALL(cudaGetDevice(&devId)); return devId; } - return m_computeDevice; + return this->m_computeDevice; } template @@ -1489,7 +1494,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUSparseMatrix& GPUSparseMatrix::operator^=(ElemType alpha) { - auto& us = *this; + GPUSparseMatrix& us = *this; ElementWisePower(alpha, us, us); return us; } @@ -1506,7 +1511,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUSparseMatrix& GPUSparseMatrix::operator*=(ElemType alpha) { - auto& us = *this; + GPUSparseMatrix& us = *this; if (alpha!=1) Scale(alpha,us); return us; @@ -1537,9 +1542,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { cusparseAction_t cpVals = CUSPARSE_ACTION_NUMERIC; cusparseIndexBase_t idxBase = CUSPARSE_INDEX_BASE_ZERO; - assert(GetFormat()&matrixFormatCompressed); // for now this only supports compressed formats + assert(this->GetFormat()&matrixFormatCompressed); // for now this only supports compressed formats PrepareDevice(); - GPUSparseMatrix c(n, m, nnz, NULL, GetFormat(), GetComputeDeviceId(), m_elemSizeAllocated); + GPUSparseMatrix c(n, m, nnz, NULL, this->GetFormat(), GetComputeDeviceId(), this->m_elemSizeAllocated); CUDACALL(cudaMalloc((void **)&c.m_pArray,c.BufferSize())); cusparseHandle_t cusparseHandle = 0; @@ -1596,13 +1601,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (sizeof(ElemType)==sizeof(float)) { float res=0; - cublasSasum(cuHandle,(int)GetNZElements(),reinterpret_cast(m_pArray),1,&res); + cublasSasum(cuHandle,(int)GetNZElements(),reinterpret_cast(this->m_pArray),1,&res); return res; } else { double res=0; - cublasDasum(cuHandle,(int)GetNZElements(),reinterpret_cast(m_pArray),1,&res); + cublasDasum(cuHandle,(int)GetNZElements(),reinterpret_cast(this->m_pArray),1,&res); return ElemType(res); } } @@ -1618,7 +1623,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ElemType h_sum; CUDACALL(cudaMalloc((void**)&d_sum,sizeof(ElemType))); //WARNING: THIS kernel is not the most efficient way! - _reductionSum<<<1,1024>>>(m_pArray,d_sum,(LONG64)this->GetNZElements()); + _reductionSum<<<1,1024>>>(this->m_pArray,d_sum,(LONG64)this->GetNZElements()); CUDACALL(cudaMemcpy(&h_sum,d_sum,sizeof(ElemType),cudaMemcpyDeviceToHost)); CUDACALL(cudaFree(d_sum)); return h_sum; @@ -1628,14 +1633,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { template ElemType GPUSparseMatrix::FrobeniusNorm() const { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("FrobeniusNorm: Matrix is empty."); ElemType* d_sum = NULL; ElemType h_sum=0; CUDACALL(cudaMalloc((void**)&d_sum,sizeof(ElemType))); //WARNING: THIS kernel is not the most efficient way! - _reductionSum2<<<1,1024>>>(m_pArray,d_sum,(int)this->GetNZElements()); + _reductionSum2<<<1,1024>>>(this->m_pArray,d_sum,(int)this->GetNZElements()); CUDACALL(cudaMemcpy(&h_sum,d_sum,sizeof(ElemType),cudaMemcpyDeviceToHost)); CUDACALL(cudaFree(d_sum)); if (sizeof(ElemType)==sizeof(float)) @@ -1647,14 +1652,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { template ElemType GPUSparseMatrix::MatrixNormInf() const { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("MatrixNorm1: Matrix is empty."); ElemType* d_maxAbs = NULL; ElemType h_maxAbs=0; CUDACALL(cudaMalloc((void**)&d_maxAbs,sizeof(ElemType))); //WARNING: THIS kernel is not the most efficient way! - _reductionMatrixNormInf<<<1,1024>>>(m_pArray,d_maxAbs,(int)this->GetNZElements()); + _reductionMatrixNormInf<<<1,1024>>>(this->m_pArray,d_maxAbs,(int)this->GetNZElements()); CUDACALL(cudaMemcpy(&h_maxAbs,d_maxAbs,sizeof(ElemType),cudaMemcpyDeviceToHost)); CUDACALL(cudaFree(d_maxAbs)); if (sizeof(ElemType)==sizeof(float)) @@ -1666,7 +1671,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template ElemType GPUSparseMatrix::MatrixNorm1() const { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("MatrixNorm1: Matrix is empty."); return this->SumOfAbsElements(); } @@ -1678,14 +1683,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUSparseMatrix& GPUSparseMatrix::ElementInverse () { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("ElementInverse: Matrix is empty."); long N=(long)GetNZElements(); int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); cudaEvent_t done = nullptr; CUDACALL(cudaEventCreate(&done)); - _elemInverse<<>>(m_pArray,N); + _elemInverse<<>>(this->m_pArray,N); CUDACALL(cudaEventRecord(done)); CUDACALL(cudaEventSynchronize(done)); return *this; @@ -1806,13 +1811,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUSparseMatrix& GPUSparseMatrix::InplaceTruncateBottom (const ElemType threshold) { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("InplaceTruncateBottom: Matrix is empty."); long N=(long)GetNZElements(); int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock); cudaEvent_t done = nullptr; CUDACALL(cudaEventCreate(&done)); - _inplaceTruncateBottom<<>>(m_pArray,threshold,N); + _inplaceTruncateBottom<<>>(this->m_pArray,threshold,N); CUDACALL(cudaEventRecord(done)); CUDACALL(cudaEventSynchronize(done)); return *this; @@ -1833,7 +1838,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock); cudaEvent_t done = nullptr; CUDACALL(cudaEventCreate(&done)); - _assignTruncateBottom<<>>(m_pArray,a.NzLocation(),threshold,N); + _assignTruncateBottom<<>>(this->m_pArray,a.NzLocation(),threshold,N); CUDACALL(cudaEventRecord(done)); CUDACALL(cudaEventSynchronize(done)); return *this; @@ -1842,13 +1847,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUSparseMatrix& GPUSparseMatrix::InplaceTruncateTop (const ElemType threshold) { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("InplaceTruncateTop: Matrix is empty."); long N=(long)GetNZElements(); int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock); cudaEvent_t done = nullptr; CUDACALL(cudaEventCreate(&done)); - _inplaceTruncateTop<<>>(m_pArray,threshold,N); + _inplaceTruncateTop<<>>(this->m_pArray,threshold,N); CUDACALL(cudaEventRecord(done)); CUDACALL(cudaEventSynchronize(done)); return *this; @@ -1869,7 +1874,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock); cudaEvent_t done = nullptr; CUDACALL(cudaEventCreate(&done)); - _assignTruncateTop<<>>(m_pArray,a.NzLocation(),threshold,N); + _assignTruncateTop<<>>(this->m_pArray,a.NzLocation(),threshold,N); CUDACALL(cudaEventRecord(done)); CUDACALL(cudaEventSynchronize(done)); return *this; @@ -1878,13 +1883,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUSparseMatrix& GPUSparseMatrix::SetToZeroIfAbsLessThan (const ElemType threshold) { - if (IsEmpty()) + if (this->IsEmpty()) throw std::logic_error("SetToZeroIfAbsLessThan: Matrix is empty."); long N=(long)GetNZElements(); int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock); cudaEvent_t done = nullptr; CUDACALL(cudaEventCreate(&done)); - _setToZeroIfAbsLessThan<<>>(m_pArray,threshold,N); + _setToZeroIfAbsLessThan<<>>(this->m_pArray,threshold,N); CUDACALL(cudaEventRecord(done)); CUDACALL(cudaEventSynchronize(done)); return *this; @@ -1978,25 +1983,25 @@ namespace Microsoft { namespace MSR { namespace CNTK { switch (kind) { case 0: - _inplaceSigmoidOnCuda<<>>(m_pArray,N); + _inplaceSigmoidOnCuda<<>>(this->m_pArray,N); break; case 1: - _inplaceTanhOnCuda<<>>(m_pArray,N); + _inplaceTanhOnCuda<<>>(this->m_pArray,N); break; case 2: - _inplaceSqrtOnCuda<<>>(m_pArray,N); + _inplaceSqrtOnCuda<<>>(this->m_pArray,N); break; case 3: - _inplaceExpOnCuda<<>>(m_pArray,N); + _inplaceExpOnCuda<<>>(this->m_pArray,N); break; case 4: - _inplaceLogOnCuda<<>>(m_pArray,N); + _inplaceLogOnCuda<<>>(this->m_pArray,N); break; case 5: - _inplaceAbsOnCuda<<>>(m_pArray,N); + _inplaceAbsOnCuda<<>>(this->m_pArray,N); break; case 6: - _inplaceLinRectDerivative<<>>(m_pArray,N); + _inplaceLinRectDerivative<<>>(this->m_pArray,N); } CUDACALL(cudaEventRecord(done)); CUDACALL(cudaEventSynchronize(done)); @@ -2005,20 +2010,20 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUSparseMatrix::SetMatrixFromCSRFormat(int *h_CSRRow, int *h_Col, ElemType *h_Val, size_t nz, size_t numRows, size_t numCols, bool IsOnDevice, int devId) { - m_computeDevice = devId; - m_elemSizeAllocated = m_nz = nz; - m_numCols=numCols; - m_numRows=numRows; - m_format=matrixFormatSparseCSR; - m_externalBuffer = false; + this->m_computeDevice = devId; + this->m_elemSizeAllocated = this->m_nz = nz; + this->m_numCols=numCols; + this->m_numRows=numRows; + this->m_format=matrixFormatSparseCSR; + this->m_externalBuffer = false; - if (OwnBuffer() && m_pArray != nullptr) + if (this->OwnBuffer() && this->m_pArray != NULL) { - CUDACALL(cudaFree(m_pArray)); + CUDACALL(cudaFree(this->m_pArray)); } PrepareDevice(); - CUDACALL(cudaMalloc((void **)&m_pArray,BufferSize())); + CUDACALL(cudaMalloc((void **)&this->m_pArray,BufferSize())); cudaMemcpyKind kind = IsOnDevice?cudaMemcpyDeviceToDevice:cudaMemcpyHostToDevice; CUDACALL(cudaMemcpy(RowLocation(),h_CSRRow,RowSize(),kind)); @@ -2031,7 +2036,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { void GPUSparseMatrix::GetMatrixFromCSRFormat(int*& h_CSRRow, int*& h_Col, ElemType*& h_Val, size_t &nz, size_t &numRows, size_t &numCols) const { if (h_CSRRow!=NULL || h_Col!=NULL || h_Val!=NULL) - throw std::exception("Passed pointers must be NULL"); + throw stdException("Passed pointers must be NULL"); nz = this->GetNZElements(); numRows = this->GetNumRows(); numCols = this->GetNumCols(); @@ -2042,7 +2047,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { PrepareDevice(); h_Val = new ElemType[nz]; - h_CSRRow = new int[m_numRows + 1]; + h_CSRRow = new int[this->m_numRows + 1]; h_Col = new int[nz]; CUDACALL(cudaMemcpy(h_CSRRow,RowLocation(),RowSize(),cudaMemcpyDeviceToHost)); @@ -2063,7 +2068,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t elsize; stream>>elsize; if (sizeof(ElemType)!=elsize) - throw std::exception("Template argument size doesn't match those in file"); + throw stdException("Template argument size doesn't match those in file"); std::wstring matrixName; // save off the buffer size being passed in diff --git a/Math/Math/GPUSparseMatrix.cuh b/Math/Math/GPUSparseMatrix.cuh index 5656fbe32..6691071ca 100644 --- a/Math/Math/GPUSparseMatrix.cuh +++ b/Math/Math/GPUSparseMatrix.cuh @@ -27,7 +27,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { void performInplaceFunction(int kind); void DeepCopy(const GPUSparseMatrix& deepCopyFrom); void Clear(); +#ifndef LINUX void PrepareBuffer(size_t m, size_t n, bool canReuseBuffer, std::function func); +#endif size_t ElemCountFromBufferSize(size_t totalBufferSize); void PrepareDevice(short deviceId=-1) const; @@ -39,7 +41,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { GPUSparseMatrix(const GPUSparseMatrix&); GPUSparseMatrix(const GPUMatrix&); +#ifndef LINUX GPUSparseMatrix(GPUSparseMatrix&&); +#endif /* LINUX */ ~GPUSparseMatrix(); public: void Resize(const size_t numRows, const size_t numCols, size_t size = 0); @@ -50,22 +54,22 @@ namespace Microsoft { namespace MSR { namespace CNTK { // in memory format is always in the following order: // Non-zero data elements, Full index locations, compressed index locations // In CSR row data is compressed, in CSC col data is compressed - const ElemType* NzLocation() const {return m_pArray;} - ElemType* NzLocation() {return m_pArray;} - size_t NzCount() const {return m_nz;} - size_t NzSize() const {return sizeof(ElemType)*m_nz;} // actual number of element bytes in use - int* IndexLocation() const {return (int*)(m_pArray+m_elemSizeAllocated);} - size_t IndexSize() const {return sizeof(int)*m_nz;} // actual number of index bytes in use - int* CompressedIndexLocation() const {return IndexLocation() + m_elemSizeAllocated;} + const ElemType* NzLocation() const {return this->m_pArray;} + ElemType* NzLocation() {return this->m_pArray;} + size_t NzCount() const {return this->m_nz;} + size_t NzSize() const {return sizeof(ElemType)*this->m_nz;} // actual number of element bytes in use + int* IndexLocation() const {return (int*)(this->m_pArray+this->m_elemSizeAllocated);} + size_t IndexSize() const {return sizeof(int)*this->m_nz;} // actual number of index bytes in use + int* CompressedIndexLocation() const {return IndexLocation() + this->m_elemSizeAllocated;} size_t CompressedIndexCount() const { - if (m_format&matrixFormatCompressed) + if (this->m_format&matrixFormatCompressed) { - size_t cnt = (m_format&matrixFormatRowMajor)?m_numRows:m_numCols; + size_t cnt = (this->m_format&matrixFormatRowMajor)?this->m_numRows:this->m_numCols; if (cnt) cnt++; // add an extra element on the end for the "max" value return cnt; } - return m_nz; // COO format + return this->m_nz; // COO format } // get size for compressed index size_t CompressedIndexSize() const {return (CompressedIndexCount())*sizeof(int);} @@ -73,10 +77,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { ElemType* BufferPointer() const; // the column and row locations will swap based on what format we are in. Full index always follows the data array - int* RowLocation() const {return (m_format&matrixFormatRowMajor)?CompressedIndexLocation():IndexLocation();} - size_t RowSize() const {return (m_format&matrixFormatRowMajor)?CompressedIndexSize():IndexSize();} - int* ColLocation() const {return (m_format&matrixFormatRowMajor)?IndexLocation():CompressedIndexLocation();} - size_t ColSize() const {return (m_format&matrixFormatRowMajor)?IndexSize():CompressedIndexSize();} // actual number of row bytes in use + int* RowLocation() const {return (this->m_format&matrixFormatRowMajor)?CompressedIndexLocation():IndexLocation();} + size_t RowSize() const {return (this->m_format&matrixFormatRowMajor)?CompressedIndexSize():IndexSize();} + int* ColLocation() const {return (this->m_format&matrixFormatRowMajor)?IndexLocation():CompressedIndexLocation();} + size_t ColSize() const {return (this->m_format&matrixFormatRowMajor)?IndexSize():CompressedIndexSize();} // actual number of row bytes in use void SetValue(const GPUSparseMatrix& deepCopyFrom); void SetValue(const GPUMatrix& denseMatrix); @@ -89,7 +93,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { GPUMatrix CopyToDenseMatrix(); GPUSparseMatrix& operator=(const GPUSparseMatrix& deepCopy); +#ifndef LINUX GPUSparseMatrix& operator=(GPUSparseMatrix&& moveFrom); +#endif /* LINUX */ GPUSparseMatrix operator+ (const GPUSparseMatrix& a) const; GPUSparseMatrix operator- (const GPUSparseMatrix& a) const; GPUSparseMatrix& operator^= (ElemType alpha); //element-wise power @@ -102,7 +108,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { bool IsEqualTo(const GPUMatrix& a, const ElemType threshold = 1e-8) const; public: int GetComputeDeviceId(void) const; - size_t GetNZElements() const {return m_nz;} + size_t GetNZElements() const {return this->m_nz;} //Sets sparse matrix in CSR format. this acts as deep copy void SetMatrixFromCSRFormat(int *h_CSRRow, int *h_Col, ElemType *h_Val, size_t nz, size_t numRows, size_t numCols, bool IsOnDevice=false, int devId=0); void SetMatrixFromCSCFormat(size_t *h_row, size_t *h_rowIdx, size_t size, size_t blockSize); @@ -199,10 +205,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { public: - template - friend MATH_API File& operator>>(File& stream, GPUSparseMatrix& us); - template - friend MATH_API File& operator<<(File& stream, const GPUSparseMatrix& us); + // See: http://stackoverflow.com/questions/4660123/overloading-friend-operator-for-template-class/4661372#4661372 + template + friend MATH_API File& operator>>(File& stream, GPUSparseMatrix& us); + template + friend MATH_API File& operator<<(File& stream, const GPUSparseMatrix& us); bool m_legacy; int m_colIdx; //used to SetValue() diff --git a/Math/Math/InstantiateTemplates.cpp b/Math/Math/InstantiateTemplates.cpp index b734fedbe..f457725df 100644 --- a/Math/Math/InstantiateTemplates.cpp +++ b/Math/Math/InstantiateTemplates.cpp @@ -8,8 +8,8 @@ #include "CPUMatrix.cpp" #include "Matrix.cpp" -#include "..\..\common\include\fileutil.cpp" -#include "..\..\common\include\File.cpp" +#include "..\..\Common\Include\fileutil.cpp" +#include "..\..\Common\Include\File.cpp" //don't treat it as sample code. some code does not make sense //only used to force compiler to build the code @@ -343,4 +343,4 @@ namespace Microsoft { namespace MSR { namespace CNTK { CallEverythingInMatrix(); } #pragma endregion instantiate all classes -}}} \ No newline at end of file +}}} diff --git a/Math/Math/Matrix.cpp b/Math/Math/Matrix.cpp index 96a0f7b10..6b2fc5e74 100644 --- a/Math/Math/Matrix.cpp +++ b/Math/Math/Matrix.cpp @@ -8,11 +8,17 @@ #include "fileutil.h" #include "Matrix.h" #include +#include #pragma warning (disable: 4127) // conditional expression is constant; "if (sizeof(ElemType)==sizeof(float))" triggers this #pragma warning (disable: 4239) // nonstandard extension; triggered by this pattern: "auto& second = transposeB ? b.m_GPUMatrix->Transpose() : *b.m_GPUMatrix;" #pragma warning (disable: 4702) // unreachable code; triggered for unknown reasons +#ifndef min +#define min(a,b) (((a) < (b)) ? (a) : (b)) +#endif + + //before calling the following macro the current matrix location and matrix type on MatrixPointerToCheck must have been set correctly #define DISPATCH_MATRIX_ON_FLAG(MatrixPointerToCheck, MatrixPointerToSetFlag, CPUDense, GPUDense, CPUSparse, GPUSparse) \ { \ @@ -218,7 +224,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { //matrixName is used to verify that correct matrix is read. template - Matrix::Matrix(FILE* f, const char * matrixName, short deviceId=AUTOPLACEMATRIX, const MatrixType matrixType = DENSE) + Matrix::Matrix(FILE* f, const char * matrixName, short deviceId, const MatrixType matrixType) { if (deviceId == MANAGEDEXTERN) throw runtime_error("Externally Managed Matrix must use the basic constructor, then SetValue()\n"); @@ -256,7 +262,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } template - Matrix::Matrix(const size_t numRows, const size_t numCols, short deviceId=AUTOPLACEMATRIX, const MatrixType matrixType = DENSE) + Matrix::Matrix(const size_t numRows, const size_t numCols, short deviceId, const MatrixType matrixType) { if (deviceId == MANAGEDEXTERN) throw runtime_error("Externally Managed Matrix must use the basic constructor, then SetValue(), or the full constructor\n"); @@ -294,7 +300,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } template - Matrix::Matrix(const size_t numRows, const size_t numCols, ElemType *pArray, const size_t matrixFlags, short deviceId=AUTOPLACEMATRIX, const size_t nnz=0) + Matrix::Matrix(const size_t numRows, const size_t numCols, ElemType *pArray, const size_t matrixFlags, short deviceId, const size_t nnz) { Init(deviceId); @@ -332,7 +338,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { //copy constructor, deep copy template - Matrix::Matrix(const Matrix& deepCopyFrom, short deviceId=AUTOPLACEMATRIX) + Matrix::Matrix(const Matrix& deepCopyFrom, short deviceId) { if (deviceId == MANAGEDEXTERN) throw runtime_error("Externally Managed Matrix must use the basic constructor, then SetValue(), or the full constructor\n"); @@ -519,7 +525,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { NOT_IMPLEMENTED; } else + { +#ifndef LINUX throw std::exception("Unknown matrix type"); +#else + throw std::exception(); +#endif /* LINUX */ + } } template @@ -608,7 +620,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { NOT_IMPLEMENTED; } else + { +#ifndef LINUX throw std::exception("Unknown matrix type"); +#else + throw std::exception(); +#endif /* LINUX */ + } return slice; } @@ -820,7 +838,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { DISPATCH_MATRIX_ON_FLAG(this, this, m_CPUMatrix->SetValue(*db_number.ExposePointer2Value()), - if (GetDeviceId()!=db_number.GetDeviceId()) throw std::exception("Matrix and device bound number must be on the same device"); + if (GetDeviceId()!=db_number.GetDeviceId()) + { +#ifndef LINUX + throw std::exception("Matrix and device bound number must be on the same device"); +#else + throw std::exception(); +#endif /* LINUX */ + } m_GPUMatrix->SetValue(db_number.ExposePointer2Value()), NOT_IMPLEMENTED, NOT_IMPLEMENTED @@ -889,7 +914,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { //WARNING: what's the exact meaning of MANAGEDEXTERN here? This is not handled currently template - void Matrix::SetValue(const size_t numRows, const size_t numCols, ElemType *pArray, const size_t matrixFlags, int deviceId=MANAGEDEXTERN) + void Matrix::SetValue(const size_t numRows, const size_t numCols, ElemType *pArray, const size_t matrixFlags, int deviceId) { if (pArray == nullptr) throw std::invalid_argument("Invalid pArray."); @@ -1054,7 +1079,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { //maskRate: percentage of values masked out (similar to dropout rate) //scaleValue: which scale value to set to the left ones (unmasked items). template - void Matrix::SetUniformRandomMask(const ElemType maskRate, const ElemType scaleValue, unsigned long seed=USE_TIME_BASED_SEED) + void Matrix::SetUniformRandomMask(const ElemType maskRate, const ElemType scaleValue, unsigned long seed) { if (IsEmpty()) throw std::logic_error("SetUniformRandomMask: Matrix is empty."); @@ -2308,24 +2333,24 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (sizeof(ElemType)==sizeof(float)) { - if (!_finitef((float)threshold)) + if (!isfinite((float)threshold)) return *this; } else { - if (!_finite(threshold)) + if (!isfinite(threshold)) return *this; } DISPATCH_MATRIX_ON_FLAG(this, this, this->m_CPUMatrix->InplaceTruncate(threshold), - this->m_GPUMatrix->InplaceTruncateTop(abs(threshold)); this->m_GPUMatrix->InplaceTruncateBottom(-abs(threshold)), + this->m_GPUMatrix->InplaceTruncateTop(fabs(threshold)); this->m_GPUMatrix->InplaceTruncateBottom(-fabs(threshold)), this->m_CPUSparseMatrix->InplaceTruncate(threshold), if(this->m_GPUSparseMatrix->m_legacy) { - this->m_GPUSparseMatrix->InplaceTruncateTop(abs(threshold)); - this->m_GPUSparseMatrix->InplaceTruncateBottom(-abs(threshold)); + this->m_GPUSparseMatrix->InplaceTruncateTop(fabs(threshold)); + this->m_GPUSparseMatrix->InplaceTruncateBottom(-fabs(threshold)); } else //new GPU Sparse matrix { @@ -2345,12 +2370,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (sizeof(ElemType)==sizeof(float)) { - if (!_finitef((float)threshold)) + if (!isfinite((float)threshold)) return *this; } else { - if (!_finite(threshold)) + if (!isfinite(threshold)) return *this; } @@ -2374,7 +2399,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (sizeof(ElemType)==sizeof(float)) { - if (!_finitef((float)threshold)) + if (!isfinite((float)threshold)) { (*this) = a; return *this; @@ -2382,7 +2407,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } else { - if (!_finite(threshold)) + if (!isfinite(threshold)) { (*this) = a; return *this; @@ -2412,12 +2437,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (sizeof(ElemType)==sizeof(float)) { - if (!_finitef((float)threshold)) + if (!isfinite((float)threshold)) return *this; } else { - if (!_finite(threshold)) + if (!isfinite(threshold)) return *this; } @@ -2440,7 +2465,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (sizeof(ElemType)==sizeof(float)) { - if (!_finitef((float)threshold)) + if (!isfinite((float)threshold)) { (*this) = a; return *this; @@ -2448,7 +2473,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } else { - if (!_finite(threshold)) + if (!isfinite(threshold)) { (*this) = a; return *this; @@ -3396,7 +3421,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } else if (a.m_matrixType==MatrixType::SPARSE && b.m_matrixType==c.m_matrixType && b.m_matrixType==MatrixType::DENSE) //Sparse*Dense+Dense { - auto& second = transposeB ? b.m_GPUMatrix->Transpose() : *b.m_GPUMatrix; + GPUMatrix second = transposeB ? b.m_GPUMatrix->Transpose() : *b.m_GPUMatrix; GPUSparseMatrix::MultiplyAndWeightedAdd(alpha,*a.m_GPUSparseMatrix,transposeA,second,beta,*c.m_GPUMatrix); c.SetDataLocation(GPU, DENSE); } @@ -3409,8 +3434,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { } else { - auto& first = transposeA ? a.m_GPUMatrix->Transpose()*alpha : (*a.m_GPUMatrix)*alpha; - auto& second = transposeB ? b.m_GPUSparseMatrix->Transpose() : *b.m_GPUSparseMatrix; + GPUMatrix firstDummy = transposeA ? a.m_GPUMatrix->Transpose()*alpha : (*a.m_GPUMatrix)*alpha; + GPUMatrix & first= firstDummy; // By Malcolm.. gcc doesn't support auto + GPUSparseMatrix secondDummy = transposeB ? b.m_GPUSparseMatrix->Transpose() : *b.m_GPUSparseMatrix; + GPUSparseMatrix & second = secondDummy; // By Malcolm.. gcc doesn't support auto if (beta==0) { GPUSparseMatrix::Multiply(first,second,*c.m_GPUMatrix); @@ -3432,7 +3459,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { } else if (a.m_matrixType==b.m_matrixType && b.m_matrixType==c.m_matrixType && a.m_matrixType==MatrixType::SPARSE) { - auto& first = alpha==1 ? *a.m_GPUSparseMatrix : (*a.m_GPUSparseMatrix)*alpha; + GPUSparseMatrix firstDummy = alpha==1 ? *a.m_GPUSparseMatrix : (*a.m_GPUSparseMatrix)*alpha; + GPUSparseMatrix & first = firstDummy; // By Malcolm.. gcc doesn't support auto if (beta==0) { GPUSparseMatrix::Multiply(first,transposeA,*b.m_GPUSparseMatrix,transposeB,*c.m_GPUSparseMatrix); @@ -3897,18 +3925,16 @@ namespace Microsoft { namespace MSR { namespace CNTK { template bool Matrix::HasNan (const char * name) const { -#if 0 - name; - return false; -#else - const auto & us = *this; + // const auto & us = *this; + const Matrix & us = *this; + foreach_coord (i, j, us) - if (_isnan (us(i,j))) + // if (isnan (us(i,j))) + if (isnan (us(i,j))) { - fprintf (stderr, "hasnan: NaN detected at %s (%d,%d)\n", name, i, j); + fprintf (stderr, "hasnan: NaN detected at %s (%ld,%ld)\n", name, i, j); return true; } -#endif return false; } #define CheckNan(m) m.HasNan (#m) @@ -3924,7 +3950,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { foreach_coord (i, j, us) { auto val = us(i,j); - if (_isnan (val) || !_finite (val)) + if (isnan (val) || !isfinite (val)) n++; } return n; diff --git a/Math/Math/stdafx.h b/Math/Math/stdafx.h index 2c6564cd9..b52430973 100644 --- a/Math/Math/stdafx.h +++ b/Math/Math/stdafx.h @@ -14,10 +14,10 @@ #define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms --add this at the top of all CPP files that give "function or variable may be unsafe" warnings -#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers // Windows Header Files: +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers #include - - +#endif // TODO: reference additional headers your program requires here diff --git a/Math/Math/targetver.h b/Math/Math/targetver.h index e0f1e69ca..934954c12 100644 --- a/Math/Math/targetver.h +++ b/Math/Math/targetver.h @@ -5,9 +5,13 @@ // #pragma once +#ifndef LINUX + // Including SDKDDKVer.h defines the highest available Windows platform. // If you wish to build your application for a previous Windows platform, include WinSDKVer.h and // set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h. #include + +#endif /* LINUX */ From a863c7746fc0364eab82212902a8ef47bd5b6895 Mon Sep 17 00:00:00 2001 From: Malcolm Slaney Date: Thu, 9 Oct 2014 10:44:27 -0700 Subject: [PATCH 13/31] More changes to get it compiling.. but not finished. --- Common/Include/basetypes.h | 11 ++++++++-- Math/Math/CommonMatrix.h | 1 + Math/Math/GPUMatrix.cu | 6 +++--- Math/Math/GPUMatrixCUDAKernels.cu | 7 +++++++ Math/Math/GPUSparseMatrix.cu | 35 ++++++++++++++++++++++++++++--- Math/Math/GPUSparseMatrix.cuh | 4 +++- Math/Math/Makefile | 9 ++++---- Math/Math/Matrix.cpp | 4 ++-- Math/Math/Matrix.h | 4 ++++ 9 files changed, 66 insertions(+), 15 deletions(-) diff --git a/Common/Include/basetypes.h b/Common/Include/basetypes.h index 7b424502c..d3145570a 100644 --- a/Common/Include/basetypes.h +++ b/Common/Include/basetypes.h @@ -11,6 +11,8 @@ typedef char16_t TCHAR; #include #define vsprintf_s vsprintf /* Not sure this is right... Malcolm */ +#include +#include #endif /* LINUX */ #ifndef UNDER_CE // fixed-buffer overloads not available for wince @@ -108,6 +110,7 @@ using namespace std; #define __inout_cap(x) #define __inout_cap_c(x) #endif +#endif // LINUX #ifndef __out_z_cap // non-VS2005 annotations #define __out_cap(x) #define __out_z_cap(x) @@ -321,7 +324,6 @@ public: #endif }; -#ifndef LINUX // locks a critical section, and unlocks it automatically // when the lock goes out of scope @@ -447,7 +449,11 @@ public: #include // uses strlen() #endif #define strlen strlen_ +#ifndef LINUX template inline __declspec(deprecated("Dummy general template, cannot be used directly")) +#else +template inline +#endif // LINUX size_t strlen_(_T &s) { return strnlen_s(static_cast(s), SIZE_MAX); } // never be called but needed to keep compiler happy template inline size_t strlen_(const _T &s) { return strnlen_s(static_cast(s), SIZE_MAX); } template<> inline size_t strlen_(char * &s) { return strnlen_s(s, SIZE_MAX); } @@ -980,7 +986,8 @@ template static void attempt (int retries, const FUNCTION & b #ifndef LINUX ::Sleep (1000); // wait a little, then try again #else - sleep(1); + std::chrono::milliseconds dura(1000); + std::this_thread::sleep_for(dura); #endif /* LINUX */ } } diff --git a/Math/Math/CommonMatrix.h b/Math/Math/CommonMatrix.h index f0dd57d46..da4a501a4 100644 --- a/Math/Math/CommonMatrix.h +++ b/Math/Math/CommonMatrix.h @@ -10,6 +10,7 @@ #ifdef LINUX #define wcsnlen_s wcsnlen /* Not sure if this is best replacement... Malcolm */ +// typedef char wchar_t; #endif #define AUTOPLACEMATRIX 1000 // used in parameters only diff --git a/Math/Math/GPUMatrix.cu b/Math/Math/GPUMatrix.cu index 7374aee54..9d690f0e4 100644 --- a/Math/Math/GPUMatrix.cu +++ b/Math/Math/GPUMatrix.cu @@ -424,7 +424,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { ZeroInit(deepCopyFrom.m_computeDevice); SetValue(deepCopyFrom); - SetMatrixName(deepCopyFrom.m_matrixName); + this->SetMatrixName(deepCopyFrom.m_matrixName); } #ifndef LINUX @@ -452,7 +452,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (this != &deepCopyFrom) { SetValue(deepCopyFrom); - SetMatrixName(deepCopyFrom.m_matrixName); + this->SetMatrixName(deepCopyFrom.m_matrixName); } return *this; } @@ -464,7 +464,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { if (this != &moveFrom) { - if (OwnBuffer() && this->m_pArray!=NULL) + if (this->OwnBuffer() && this->m_pArray!=NULL) { CUDA_CALL(cudaFree(this->m_pArray)); } diff --git a/Math/Math/GPUMatrixCUDAKernels.cu b/Math/Math/GPUMatrixCUDAKernels.cu index 6c035c2db..55af99884 100644 --- a/Math/Math/GPUMatrixCUDAKernels.cu +++ b/Math/Math/GPUMatrixCUDAKernels.cu @@ -18,6 +18,13 @@ #define MINLOGEXP -9.2103 #define LSMALL -0.5E10 +// Predefine this for later. +#ifndef LINUX +static __inline__ __device__ double atomicAdd(double* address, double val); +#else +static __device__ double atomicAdd(double* address, double val); +#endif + //CUDA Kernels code template __global__ void _elementWisePowerOnCuda( diff --git a/Math/Math/GPUSparseMatrix.cu b/Math/Math/GPUSparseMatrix.cu index 31bf6bb8f..e0e908292 100644 --- a/Math/Math/GPUSparseMatrix.cu +++ b/Math/Math/GPUSparseMatrix.cu @@ -1044,7 +1044,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { // canReuseBuffer - target matrix can be reused for temporary space // func - function to call to count elements in the result (returns count, and fills csrRowPtr array) template +#ifndef LINUX void GPUSparseMatrix::PrepareBuffer(size_t m, size_t n, bool canReuseBuffer, std::function func) +#else + void GPUSparseMatrix::PrepareBuffer(size_t m, size_t n, bool canReuseBuffer, size_t (*func)(int *csRowPtrC)) +#endif /* LINUX */ { int* csrRowPtrC=NULL; GPUSparseMatrix& c = *this; @@ -1099,6 +1103,16 @@ namespace Microsoft { namespace MSR { namespace CNTK { CUDACALL(cudaFree(csrRowPtrC)); } +#ifdef LINUXxx + size_t PrepareBufferMultiply(int* csrRowPtrC) + { + int nnzTotal = -1; + CUSPARSECALL(cusparseXcsrgemmNnz(cusparseHandle,operA,operB,m,n,k,descrA,nnzA,S1.RowLocation(),S1.ColLocation(),descrB,nnzB, + S2.RowLocation(),S2.ColLocation(),descrC,csrRowPtrC,&nnzTotal)); + return nnzTotal; + } +#endif + // Multiply - multiply one spares matrix by another sparse matrix // S1 - first sparse matrix // transposeS1 - transpose first matrix? @@ -1136,13 +1150,18 @@ namespace Microsoft { namespace MSR { namespace CNTK { CUDACALL(cudaEventCreate(&done)); //Step 1 c.PrepareBuffer(m, n, true, // true means we can reuse the "c" buffer if it exists for temporaries +#ifndef LINUX [&](int* csrRowPtrC) -> size_t { int nnzTotal = -1; CUSPARSECALL(cusparseXcsrgemmNnz(cusparseHandle,operA,operB,m,n,k,descrA,nnzA,S1.RowLocation(),S1.ColLocation(),descrB,nnzB, S2.RowLocation(),S2.ColLocation(),descrC,csrRowPtrC,&nnzTotal)); return nnzTotal; - }); + } +#else + NULL // PrepareBufferMultiply +#endif + ); //Step 2 @@ -1196,12 +1215,18 @@ namespace Microsoft { namespace MSR { namespace CNTK { CUDACALL(cudaEventCreate(&done)); //Step 1 bool inOutParameter = (&b == &c); - c.PrepareBuffer(m, n, !inOutParameter, [&] (int* csrRowPtrC) -> size_t + c.PrepareBuffer(m, n, !inOutParameter, +#ifndef LINUX + [&] (int* csrRowPtrC) -> size_t { int nnzTotal = -1; CUSPARSECALL(cusparseXcsrgeamNnz(cusparseHandle,m,n,descrA,nnzA,a.RowLocation(),a.ColLocation(),descrB,nnzB,b.RowLocation(),b.ColLocation(),descrC,csrRowPtrC,&nnzTotal)); return nnzTotal; - }); + } +#else + NULL +#endif // Linux + ); //Step 2 if (sizeof(ElemType)==sizeof(float)) @@ -1588,7 +1613,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (this->IsEmpty()) return; // transfer converted block over to this pointer +#ifndef LINUX *this = std::move(this->Transpose()); +#else + std::cerr << "Not sure how to do the InplaceTranspose()"; +#endif } template diff --git a/Math/Math/GPUSparseMatrix.cuh b/Math/Math/GPUSparseMatrix.cuh index 6691071ca..8b1f959a2 100644 --- a/Math/Math/GPUSparseMatrix.cuh +++ b/Math/Math/GPUSparseMatrix.cuh @@ -29,7 +29,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { void Clear(); #ifndef LINUX void PrepareBuffer(size_t m, size_t n, bool canReuseBuffer, std::function func); -#endif +#else + void PrepareBuffer(size_t m, size_t n, bool canReuseBuffer, size_t (*func)(int *csRowPtrC)); +#endif size_t ElemCountFromBufferSize(size_t totalBufferSize); void PrepareDevice(short deviceId=-1) const; diff --git a/Math/Math/Makefile b/Math/Math/Makefile index 154f2ad83..87c9e1f35 100644 --- a/Math/Math/Makefile +++ b/Math/Math/Makefile @@ -1,8 +1,8 @@ CSOURCES = CPUMatrix.cpp CPUSparseMatrix.cpp Matrix.cpp OBJECTS = CPUMatrix.o CPUSparseMatrix.o Matrix.o \ - GPUSparseMatrix.o GPUWatcher.o \ - GPUMatrixCUDAKernels.o GPUMatrix.o + GPUMatrixCUDAKernels.o GPUMatrix.o \ + GPUWatcher.o GPUSparseMatrix.o INCLUDES = -I../../Common/Include -I/opt/acml5.3.1/gfortran64_mp_int64/include @@ -10,9 +10,10 @@ DEPS = CFLAGS = $(INCLUDES) \ -D BASETYPES_NO_UNSAFECRTOVERLOAD -DBASETYPES_NO_STRPRINTF \ - -DLINUX -D_FILEUTIL_ -Wnon-template-friend -std=c++11 + -DLINUX -Wnon-template-friend -std=c++11 # -D_FILEUTIL_ -NVCFLAGS = -DLINUX -I../../Common/Include -D_FILEUTIL_ -arch sm_11 +NVCFLAGS = -DLINUX -D BASETYPES_NO_UNSAFECRTOVERLOAD -DBASETYPES_NO_STRPRINTF \ + -I../../Common/Include -arch=compute_20 -std=c++11 # -D_FILEUTIL_ CXX = gcc NVCC = nvcc diff --git a/Math/Math/Matrix.cpp b/Math/Math/Matrix.cpp index 6b2fc5e74..cdbd2526d 100644 --- a/Math/Math/Matrix.cpp +++ b/Math/Math/Matrix.cpp @@ -3435,9 +3435,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { else { GPUMatrix firstDummy = transposeA ? a.m_GPUMatrix->Transpose()*alpha : (*a.m_GPUMatrix)*alpha; - GPUMatrix & first= firstDummy; // By Malcolm.. gcc doesn't support auto + GPUMatrix & first= firstDummy; // By Malcolm.. gcc doesn't support auto like original GPUSparseMatrix secondDummy = transposeB ? b.m_GPUSparseMatrix->Transpose() : *b.m_GPUSparseMatrix; - GPUSparseMatrix & second = secondDummy; // By Malcolm.. gcc doesn't support auto + GPUSparseMatrix & second = secondDummy; // By Malcolm.. gcc doesn't support auto like original if (beta==0) { GPUSparseMatrix::Multiply(first,second,*c.m_GPUMatrix); diff --git a/Math/Math/Matrix.h b/Math/Math/Matrix.h index 2fb72901c..3ae9e9b54 100644 --- a/Math/Math/Matrix.h +++ b/Math/Math/Matrix.h @@ -9,6 +9,10 @@ #include "GPUMatrix.cuh" #include "GPUSparseMatrix.cuh" +#ifdef LINUX +// typedef char wchar_t; +#endif + // This class is exported from the Math.dll namespace Microsoft { namespace MSR { namespace CNTK { enum CurrentDataLocation From ff72d5696f07c4365c0272df63d212841a441caf Mon Sep 17 00:00:00 2001 From: adame Date: Mon, 20 Oct 2014 14:50:30 -0700 Subject: [PATCH 14/31] Modify port code to support CPUONLY build To use this define CPUONLY in the CN project And remove *.cu from the math project and add GPUDummy.cpp instead This allows use of Cygwin to compile both Windows and linux on same machine --- Common/BestGpu.cpp | 21 +- Common/Include/BestGpu.h | 5 +- Common/Include/basetypes.h | 43 +- Common/Include/fileutil.h | 252 +---- Common/fileutil.cpp | 22 +- MachineLearning/cn/cn.vcxproj | 4 +- Math/Math/CPUMatrix.cpp | 4 +- Math/Math/GPUDummy.cpp | 1667 +++++++++++++++++++++++++++++ Math/Math/GPUMatrix.cu | 70 +- Math/Math/GPUMatrixCUDAKernels.cu | 3 + Math/Math/GPUSparseMatrix.cu | 32 +- Math/Math/Math.vcxproj.filters | 77 ++ Math/Math/Matrix.cpp | 22 +- 13 files changed, 1911 insertions(+), 311 deletions(-) create mode 100644 Math/Math/GPUDummy.cpp create mode 100644 Math/Math/Math.vcxproj.filters diff --git a/Common/BestGpu.cpp b/Common/BestGpu.cpp index 3204e4e8c..426342c8b 100644 --- a/Common/BestGpu.cpp +++ b/Common/BestGpu.cpp @@ -6,14 +6,28 @@ #define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms --add this at the top of all CPP files that give "function or variable may be unsafe" warnings +#include "BestGpu.h" +#include "CommonMatrix.h" // for CPUDEVICE and AUTOPLACEMATRIX + +#ifdef CPUONLY +namespace Microsoft { + namespace MSR { + namespace CNTK { + short DeviceFromConfig(const ConfigParameters& config) + { + return CPUDEVICE; + } + } + } +} +#else + // CUDA-C includes #include #include #include #include #include -#include "BestGpu.h" -#include "CommonMatrix.h" // for CPUDEVICE and AUTOPLACEMATRIX // The "notify hook" gets called for every call to the // delay load helper. This allows a user to hook every call and @@ -507,4 +521,5 @@ void BestGpu::QueryNvmlData() m_nvmlData = true; return; } -}}} \ No newline at end of file +}}} +#endif diff --git a/Common/Include/BestGpu.h b/Common/Include/BestGpu.h index 3c04c4ab2..849b43ac6 100644 --- a/Common/Include/BestGpu.h +++ b/Common/Include/BestGpu.h @@ -4,15 +4,18 @@ // // #pragma once +#ifndef CPUONLY #include #include #include +#endif #include "commandArgUtil.h" namespace Microsoft { namespace MSR { namespace CNTK { short DeviceFromConfig(const ConfigParameters& config); +#ifndef CPUONLY struct ProcessorData { int cores; @@ -68,5 +71,5 @@ public: std::vector GetDevices(int number=AllDevices, BestGpuFlags flags=bestGpuNormal); // get multiple devices }; extern BestGpu* g_bestGpu; - +#endif }}} \ No newline at end of file diff --git a/Common/Include/basetypes.h b/Common/Include/basetypes.h index d3145570a..95819f825 100644 --- a/Common/Include/basetypes.h +++ b/Common/Include/basetypes.h @@ -13,7 +13,13 @@ typedef char16_t TCHAR; #define vsprintf_s vsprintf /* Not sure this is right... Malcolm */ #include #include -#endif /* LINUX */ +#include +#include +#define Linux(a) a +#else +#include +#endif /* LINUX */ +#include // for HUGE_VAL // Remove for a test by Malcolm because of double isnan definition... #ifndef UNDER_CE // fixed-buffer overloads not available for wince #ifdef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES // fixed-buffer overloads for strcpy() etc. @@ -80,11 +86,13 @@ OACR_WARNING_DISABLE(POTENTIAL_ARGUMENT_TYPE_MISMATCH, "Not level1 or level2_sec #include #include #include -#include // for HUGE_VAL // potential double isnan definition +#include // for HUGE_VAL // potential double isnan definition #include #include #include #include +#include // std::wstring_convert +#include // std::codecvt_utf8 #ifdef _MSC_VER #include // for CRITICAL_SECTION and Unicode conversion functions --TODO: is there a portable alternative? #endif @@ -578,6 +586,9 @@ typedef strfun::_strprintf wstrprintf; // wchar_t version #ifdef _WIN32 struct utf8 : std::string { utf8 (const std::wstring & p) // utf-16 to -8 { + //TODO: confirm it builds on VS2013 + std::wstring_convert> cv; + (*(std::string*)this) = cv.to_bytes(p); #ifdef MALCOLM size_t len = p.length(); if (len == 0) { return;} // empty string @@ -592,16 +603,19 @@ struct utf8 : std::string { utf8 (const std::wstring & p) // utf-16 to -8 }}; struct utf16 : std::wstring { utf16 (const std::string & p) // utf-8 to -16 { -#ifdef MALCOLM + std::wstring_convert> cv; + (*(std::wstring*)this) = cv.from_bytes(p); + +#ifdef OLD size_t len = p.length(); if (len == 0) { return;} // empty string msra::basetypes::fixed_vector buf (len + 1); // ... TODO: this fill() should be unnecessary (a 0 is appended)--but verify - std::fill (buf.begin (), buf.end (), (wchar_t) 0); - int rc = MultiByteToWideChar (CP_UTF8, 0, p.c_str(), (int) len, - &buf[0], (int) buf.size()); - if (rc == 0) throw std::runtime_error ("MultiByteToWideChar"); - ASSERT (rc < buf.size ()); + std::fill(buf.begin(), buf.end(), (wchar_t)0); + int rc = MultiByteToWideChar(CP_UTF8, 0, p.c_str(), (int)len, + &buf[0], (int)buf.size()); + if (rc == 0) throw std::runtime_error("MultiByteToWideChar"); + ASSERT(rc < buf.size()); (*(std::wstring*)this) = &buf[0]; #endif /* Malcolm */ }}; @@ -641,12 +655,8 @@ static inline std::string wcstombs (const std::wstring & p) // output: MBCS } static inline std::wstring mbstowcs (const std::string & p) // input: MBCS { - size_t len = p.length(); - msra::basetypes::fixed_vector buf (len + 1); // max: >1 mb chars => 1 wchar - std::fill (buf.begin (), buf.end (), (wchar_t) 0); - OACR_WARNING_SUPPRESS(UNSAFE_STRING_FUNCTION, "Reviewed OK. size checked. [rogeryu 2006/03/21]"); - ::mbstowcs (&buf[0], p.c_str(), len + 1); - return std::wstring (&buf[0]); + std::wstring ret = utf16(p); + return ret; } #pragma warning(pop) @@ -769,8 +779,6 @@ static inline FILE* _wfopen(const wchar_t * path, const wchar_t * mode) { return namespace msra { namespace basetypes { -#ifdef MALCOLM - // FILE* with auto-close; use auto_file_ptr instead of FILE*. // Warning: do not pass an auto_file_ptr to a function that calls fclose(), // except for fclose() itself. @@ -789,7 +797,7 @@ public: auto_file_ptr() : f (NULL) { } ~auto_file_ptr() { close(); } auto_file_ptr (const char * path, const char * mode) { f = fopen (path, mode); if (f == NULL) openfailed (path); } - auto_file_ptr (const wchar_t * path, const char * mode) { f = _wfopen (path, msra::strfun::utf16 (mode).c_str()); if (f == NULL) openfailed (msra::strfun::utf8 (path)); } + auto_file_ptr (const wchar_t * wpath, const char * mode) {string path = msra::strfun::utf8(wpath); f = fopen (path.c_str(), mode); if (f == NULL) openfailed (path); } FILE * operator= (FILE * other) { close(); f = other; return f; } auto_file_ptr (FILE * other) : f (other) { } operator FILE * () const { return f; } @@ -797,7 +805,6 @@ public: void swap (auto_file_ptr & other) throw() { std::swap (f, other.f); } }; inline int fclose (auto_file_ptr & af) { return af.fclose(); } -#endif /* MALCOLM */ #ifdef _MSC_VER // auto-closing container for Win32 handles. diff --git a/Common/Include/fileutil.h b/Common/Include/fileutil.h index 51ef8f82a..8371de1b9 100644 --- a/Common/Include/fileutil.h +++ b/Common/Include/fileutil.h @@ -3,232 +3,19 @@ // // Copyright (c) Microsoft Corporation. All rights reserved. // -// $Log: /Speech_To_Speech_Translation/dbn/dbn/fileutil.h $ -// -// 71 1/03/13 8:53p Kaisheny -// Asynchronous SGD using data pipe. -// -// 70 9/30/12 10:46a Fseide -// new optional parameter to fuptodate()--caller can now choose whether a -// missing input file, with target file present, will cause a failure or -// considers the target up-to-date -// -// 69 11/09/11 10:01 Fseide -// added a new overload for fgetfilelines() that returns an array of char* -// instead of strings, to avoid mem alloc -// -// 68 6/10/11 9:50 Fseide -// (fixed a missing 'inline') -// -// 67 6/10/11 9:49 Fseide -// new function fgetfilelines() for reading text files -// -// 66 6/09/11 15:18 Fseide -// added overloads to fexists() that accept STL strings -// -// 65 3/07/11 12:13 Fseide -// actually implemented unlinkOrDie() (was a dummy) -// -// 64 11/17/10 15:00 Fseide -// new function fuptodate(); -// make_intermediate_dirs() moved to namespace msra::files (all new -// functions should be put in there) -// -// 63 11/15/10 7:04p Fseide -// added an overload for freadOrDie (vector) that takes size as a size_t -// instead of an int, to pleasr the x64 compiler -// -// 62 11/08/10 17:07 Fseide -// new function make_intermediate_dirs() -// -// 61 11/08/10 11:43 Fseide -// (minor cleanup) -// -// 60 2/05/09 19:06 Fseide -// fgetline() now returns a non-const pointer, because user may want to -// post-process the line, and the returned value is a user-specified -// buffer anyway -// -// 59 1/16/09 17:34 Fseide -// relpath() and splitpath() moved to fileutil.h -// -// 58 1/16/09 8:59 Fseide -// exported fskipspace() -// -// 57 1/15/09 7:38 Fseide -// some magic to unify fgetstring() for char and wchar_t to a single -// template function -// -// 56 1/15/09 7:26 Fseide -// corrected the #include order of basetypes.h -// -// 55 1/14/09 19:26 Fseide -// new functions fsetpos() and fgetpos(); -// new fixed-buffer size overload for fgetstring() and fgettoken() -// -// 54 1/08/09 16:14 Fseide -// fopenOrDie() now supports "-" as the pathname, referring to stdin or -// stdout -// -// 53 1/08/09 15:32 Fseide -// new funtion expand_wildcards() -// -// 52 1/05/09 8:44 Fseide -// (added comments) -// -// 51 11/11/08 6:04p Qiluo -// recover the old fputstring functions -// -// 50 10/31/08 5:09p Qiluo -// remove banned APIs -// -// 49 7/17/08 7:22p V-spwang -// undid changes - back to version 47 -// -// 47 6/24/08 19:03 Fseide -// added fgetwstring() and fputstring() for wstrings; -// added templates for freadOrDie() and fwriteOrDie() for STL vectors -// -// 46 6/18/08 11:41 Fseide -// added #pragma once -// -// 45 08-05-29 18:18 Llu -// fix the interface of fputwav -// -// 44 08-05-29 13:54 Llu -// add fputwav revise fgetwav using stl instead of short * -// -// 43 11/27/06 11:40 Fseide -// new methods fgetwfx() and fputwfx() for direct access to simple PCM WAV -// files -// -// 42 10/14/06 18:31 Fseide -// added char* version of fexists() -// -// 41 5/22/06 9:34 Fseide -// (experimental auto_file class checked in) -// -// 40 5/14/06 19:59 Fseide -// new function fsetmode() -// -// 39 3/29/06 15:36 Fseide -// changed to reading entire file instead of line-by-line, not changing -// newlines anymore -// -// 38 2/21/06 12:39p Kit -// Added filesize64 function -// -// 37 1/09/06 7:12p Rogeryu -// wide version of fgetline -// -// 36 12/19/05 21:52 Fseide -// fputfile() added in 8-bit string version -// -// 35 12/15/05 20:25 Fseide -// added getfiletime(), setfiletime(), and fputfile() for strings -// -// 34 9/27/05 12:22 Fseide -// added wstring version of renameOrDie() -// -// 33 9/22/05 12:26 Fseide -// new method fexists() -// -// 32 9/15/05 11:33 Fseide -// new version of fgetline() that avoids buffer allocations, since this -// seems very expensive esp. when reading a file line by line with -// fgetline() -// -// 31 9/05/05 4:57p F-xyzhao -// added #include for #include -- ugh -// -// 30 9/05/05 11:00 Fseide -// new method renameOrDie() -// -// 29 8/24/05 5:45p Kjchen -// merge changes in OneNote -// -// 28 8/19/05 17:56 Fseide -// extended WAVEHEADER with write() and update() -// -// 27 8/13/05 15:37 Fseide -// added new version of fgetline that takes a buffer -// -// 26 7/26/05 18:54 Fseide -// new functions fgetint24() and fputint24() -// -// 25 2/12/05 15:21 Fseide -// fgetdouble() and fputdouble() added -// -// 24 2/05/05 12:38 Fseide -// new methods fputfile(), fgetfile(); -// new overload for filesize() -// -// 23 2/03/05 22:34 Fseide -// added new version of fgetline() that returns an STL string -// -// 22 5/31/04 10:06 Fseide -// new methods fseekOrDie(), ftellOrDie(), unlinkOrDie(), renameOrDie() -// -// 21 3/19/04 4:01p Fseide -// fwriteOrDie(): first argument changed to const -// -// 20 2/27/04 10:04a V-xlshi -// -// 19 2/19/04 3:45p V-xlshi -// fgetraw function is added. -// -// 18 2/19/04 1:49p V-xlshi -// -// 17 2/03/04 8:17p V-xlshi -// -// 16 2/03/04 6:20p V-xlshi -// WAVEHEADER.prepare() added -// -// 15 2/03/04 5:58p V-xlshi -// WAVEHEADER structure added -// -// 14 8/15/03 15:40 Fseide -// new method filesize() -// -// 13 8/13/03 21:06 Fseide -// new function fputbyte() -// -// 12 8/13/03 15:37 Fseide -// prototype of fOpenOrDie() Unicode version changed -// -// 11 8/07/03 22:04 Fseide -// fprintfOrDie() now really dies in case of error -// -// 10 03-07-30 12:06 I-rogery -// enable both unicode and non-unicode version -// -// 9 7/25/03 6:07p Fseide -// new functions fgetbyte() and fgetwav() -// -// 8 7/03/02 9:25p Fseide -// fcompareTag() now uses string type for both of its arguments (before, -// it used const char * for one of them) -// -// 7 6/10/02 3:14p Fseide -// new functions fgettoken(), fgetfloat_ascii(), fskipNewline() -// -// 6 6/07/02 7:26p Fseide -// new functions fcheckTag_ascii() and fgetint_ascii() -// -// 5 4/15/02 1:12p Fseide -// void fputstring (FILE * f, const TSTRING & str) and fpad() added -// -// 4 4/03/02 3:58p Fseide -// VSS keyword and copyright added -// -// F. Seide 5 Mar 2002 -// - #pragma once #ifndef _FILEUTIL_ #define _FILEUTIL_ #include +#ifdef _WIN32 +#define isfinite(x) _finite(x) +#define isnan(x) _isnan(x) +#endif +#ifdef __unix__ +#include +#include +#endif #include // for std::find #include #include @@ -695,6 +482,29 @@ namespace msra { namespace files { // simple support for WAV file I/O // ---------------------------------------------------------------------------- +// define the header if we haven't seen it yet +#ifndef _WAVEFORMATEX_ +#define _WAVEFORMATEX_ + +/* + * extended waveform format structure used for all non-PCM formats. this + * structure is common to all non-PCM formats. + */ +typedef unsigned short WORD; // in case not defined yet (i.e. linux) +typedef struct tWAVEFORMATEX +{ + WORD wFormatTag; /* format type */ + WORD nChannels; /* number of channels (i.e. mono, stereo...) */ + DWORD nSamplesPerSec; /* sample rate */ + DWORD nAvgBytesPerSec; /* for buffer estimation */ + WORD nBlockAlign; /* block size of data */ + WORD wBitsPerSample; /* number of bits per sample of mono data */ + WORD cbSize; /* the count in bytes of the size of */ + /* extra information (after cbSize) */ +} WAVEFORMATEX, *PWAVEFORMATEX; + +#endif /* _WAVEFORMATEX_ */ + typedef struct wavehder{ char riffchar[4]; unsigned int RiffLength; diff --git a/Common/fileutil.cpp b/Common/fileutil.cpp index 675c3d3c7..eca7ce6af 100644 --- a/Common/fileutil.cpp +++ b/Common/fileutil.cpp @@ -298,7 +298,7 @@ size_t filesize (const wchar_t * pathname) // filesize64(): determine size of the file in bytes (with pathname) int64_t filesize64 (const wchar_t * pathname) { - __stat64 fileinfo; + struct _stat64 fileinfo; if (_wstat64 (pathname,&fileinfo) == -1) return 0; else @@ -1375,6 +1375,21 @@ vector msra::files::fgetfilelines (const wstring & path, vector & b bool getfiletime (const wstring & path, FILETIME & time) { // return file modification time, false if cannot be determined + struct _stat buf; + int result; + + // Get data associated with "crt_stat.c": + result = _wstat(path.c_str(), &buf); + // Check if statistics are valid: + if( result != 0 ) + { + return false; + } + + (*(time_t*)(&time))= buf.st_mtime; + return true; + +#ifdef OLD WIN32_FIND_DATAW findFileData; auto_handle hFind (FindFirstFileW (path.c_str(), &findFileData), ::FindClose); if (hFind != INVALID_HANDLE_VALUE) @@ -1386,10 +1401,14 @@ bool getfiletime (const wstring & path, FILETIME & time) { return false; } +#endif } void setfiletime (const wstring & path, const FILETIME & time) { // update the file modification time of an existing file +#ifdef LINUX + throw new logic_error("setfiletime has not been converted to linux yet..."); +#else auto_handle h (CreateFileW (path.c_str(), FILE_WRITE_ATTRIBUTES, FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL)); @@ -1402,6 +1421,7 @@ void setfiletime (const wstring & path, const FILETIME & time) { RuntimeError ("setfiletime: error setting file time information: %d", GetLastError()); } +#endif } #if 0 diff --git a/MachineLearning/cn/cn.vcxproj b/MachineLearning/cn/cn.vcxproj index ebd270b41..d84aa6994 100644 --- a/MachineLearning/cn/cn.vcxproj +++ b/MachineLearning/cn/cn.vcxproj @@ -124,7 +124,7 @@ true Delayimp.lib;nvml.lib;cudart.lib;cntkMath.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) $(SolutionDir)$(Platform)\$(Configuration)\ - CNTKMath.dll;nvml.dll + CNTKMath.dll;nvml.dll;nvcuda.dll @@ -186,7 +186,7 @@ true Delayimp.lib;nvml.lib;cudart.lib;cntkMath.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) true - CNTKMath.dll;nvml.dll + CNTKMath.dll;nvml.dll;nvcuda.dll copy $(SolutionDir)..\Common\PTask\bin\*.dll $(TargetDir) diff --git a/Math/Math/CPUMatrix.cpp b/Math/Math/CPUMatrix.cpp index d76320f66..9a3df0ee8 100644 --- a/Math/Math/CPUMatrix.cpp +++ b/Math/Math/CPUMatrix.cpp @@ -21,13 +21,15 @@ #ifndef LINUX #include +#define Linux(x) #else +#define Linux(x) x #ifndef max #define max(a,b) (((a) > (b)) ? (a) : (b)) #endif -#include +#include #endif /* LINUX */ #ifdef LEAKDETECT diff --git a/Math/Math/GPUDummy.cpp b/Math/Math/GPUDummy.cpp new file mode 100644 index 000000000..5528816e5 --- /dev/null +++ b/Math/Math/GPUDummy.cpp @@ -0,0 +1,1667 @@ +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// +// +#include "GPUMatrix.cuh" +#include "GPUSparseMatrix.cuh" + +namespace Microsoft { namespace MSR { namespace CNTK { + void PrepareDevice(short deviceId); + +#pragma region Constructors and Destructor + + template + GPUSparseMatrix::GPUSparseMatrix() + {} + + template + void GPUSparseMatrix::ZeroInit() + {} + + template + GPUSparseMatrix::GPUSparseMatrix(const GPUMatrix& deepCopy) + {} + + + template + GPUSparseMatrix::GPUSparseMatrix(const GPUSparseMatrix& deepCopy) + {} + + template + GPUSparseMatrix::GPUSparseMatrix(const size_t numRows, const size_t numCols, const size_t nz, ElemType* pArray, + const size_t matrixFlags /*=matrixFormatSparseCSR*/, int deviceId /*=MANAGEDEXTERN*/, const size_t elemSizeAllocated /*=0*/) + {} + + // PrepareDevice - Setup the correct cuda context for an operation + // deviceId - the device on which the operation will take place + // defaults to -1, which means use matrices current device + template + void GPUSparseMatrix::PrepareDevice(short deviceId /*=-1*/) const + {} + + template + void GPUSparseMatrix::DeepCopy(const GPUSparseMatrix& deepCopy) + {} + + template + void GPUSparseMatrix::SetValue(const GPUSparseMatrix& deepCopy) + {} + + template + GPUMatrix GPUSparseMatrix::CopyToDenseMatrix() + { + GPUMatrix res; + return res; + } + + template + void GPUSparseMatrix::SetValue(const GPUMatrix& denseMatrix) + {} + + template + GPUSparseMatrix& GPUSparseMatrix::operator=(const GPUSparseMatrix& deepCopy) + { + return *this; + } + +#ifndef LINUX + template + GPUSparseMatrix::GPUSparseMatrix(GPUSparseMatrix&& moveFrom) + {} + + template + GPUSparseMatrix& GPUSparseMatrix::operator=(GPUSparseMatrix&& moveFrom) + { + return *this; + } +#endif /* LINUX */ + + template + GPUSparseMatrix::~GPUSparseMatrix() + {} + + template + void GPUSparseMatrix::ClearNew() + {} + + + template + void GPUSparseMatrix::Clear() + {} + + //ResizeAs - Resize this sparse matrix to have the same element structure as the passed matrix + // a - sparse matrix whose structure we want to clone + // remark: this was done for element wise operations where the structure will be identical after an operation + template + void GPUSparseMatrix::ResizeAs(const GPUSparseMatrix& a) + {} + + //------------------------------------------------------------------------- + // Start of new GPU Sparse Matrix code + //------------------------------------------------------------------------- + + template + void GPUSparseMatrix::Init() + {} + + template + GPUSparseMatrix::GPUSparseMatrix(const MatrixFormat format, const int deviceId) + {} + + template + ElemType* GPUSparseMatrix::BufferPointer() const + { + return this->m_blockVal; + } + + template + void GPUSparseMatrix::Resize(const size_t numRows, const size_t numCols, int size) + {} + + //Reset matrix so it can be reused + template + void GPUSparseMatrix::Reset() + {} + +#pragma endregion Constructors and Destructor + +#pragma region Static BLAS Functions + + // copy features to GPU matrix + template + void GPUSparseMatrix::SetMatrixFromCSCFormat(size_t *h_row, size_t *h_rowIdx, size_t size, size_t blockSize) + {} + + template + void GPUSparseMatrix::SetMatrixFromLabelAndClass(size_t *h_row, size_t *h_block2Id, size_t *h_block2UniqId, size_t labelSize, size_t expandedSize, size_t blockSize) + {} + + // forward pass from feature to hidden layer + template + void GPUSparseMatrix::MultiplyAndWeightedAdd(ElemType alpha, const GPUMatrix& lhs, const bool transposeA, + const GPUSparseMatrix& rhs, const bool transposeB, ElemType beta, GPUMatrix& c) + + {} + + // backward pass from hidden layer to feature weight + template + void GPUSparseMatrix::MultiplyAndAdd(ElemType alpha, const GPUMatrix& lhs, const bool transposeA, + const GPUSparseMatrix& rhs, const bool transposeB, GPUSparseMatrix& c) + {} + + // used for gradients udpate + template + void GPUSparseMatrix::ScaleAndAdd(const ElemType alpha, const GPUSparseMatrix& lhs, GPUMatrix& rhs) + {} + + // a: H x No: H is hidden layer size and No is mini-batch size + // weight: V x H, V is vocab size + // label: V x No + // cls: 2 x Nc, Nc is number of classes, each col is start and end word ids of a class + // idx2cls: V x 1, mapping from word to class id + // etp: V x No, stores predicted values + template + void GPUSparseMatrix::ClassEntropy(const GPUMatrix& a, const GPUMatrix& weight, + const GPUSparseMatrix & label, const GPUMatrix& cls, + const GPUMatrix& idx2cls, GPUSparseMatrix& etp, GPUMatrix& entropyScore) + {} + + template + void GPUSparseMatrix::ClassEntropyError(GPUSparseMatrix& a) + {} + + template + void GPUSparseMatrix::ClassEntropyGradientOfInput(const GPUSparseMatrix& error, const GPUMatrix& weight, GPUMatrix& grd) + {} + + template + void GPUSparseMatrix::ClassEntropyGradientOfWeight(const GPUSparseMatrix& error, const GPUMatrix& input, const GPUSparseMatrix & label, const GPUMatrix& cls, + const GPUMatrix& idx2cls, GPUSparseMatrix& grd) + {} + + template + GPUSparseMatrix& GPUSparseMatrix::InplaceTruncate (const ElemType threshold) + { + return *this; + } + + // normal update for smoothed gradients c and current gradients (this) + template + void GPUSparseMatrix::NormalGrad(GPUMatrix& c, const ElemType momentum) + {} + + //------------------------------------------------------------------------- + // End of new GPU Sparse Matrix code + //------------------------------------------------------------------------- + + template + void GPUSparseMatrix::MultiplyAndWeightedAdd(ElemType alpha, const GPUSparseMatrix& a, const bool transposeA, + const GPUMatrix& b, ElemType beta, GPUMatrix& c) + {} + + + template + void GPUSparseMatrix::Multiply(const GPUSparseMatrix& S, const GPUMatrix& D, GPUMatrix& C) + {} + + template + void GPUSparseMatrix::Multiply(const GPUMatrix& D, const GPUSparseMatrix& S, GPUMatrix& C) + {} + + // ElemCountFromBufferSize - Return the elemCountAllocated for a particular buffersize + // totalBufferSize - total buffer we have to use + // return: size of allocated elements/index slots available + template + size_t GPUSparseMatrix::ElemCountFromBufferSize(size_t totalBufferSize) + { + return 0; + } + + // PrepareBuffer - Get the dimensions start buffer, computes the starting row/column of each value + // m - rows in the source + // n - cols in the source + // canReuseBuffer - target matrix can be reused for temporary space + // func - function to call to count elements in the result (returns count, and fills csrRowPtr array) + template +#ifndef LINUX + void GPUSparseMatrix::PrepareBuffer(size_t m, size_t n, bool canReuseBuffer, std::function func) +#else + void GPUSparseMatrix::PrepareBuffer(size_t m, size_t n, bool canReuseBuffer, size_t (*func)(int *csRowPtrC)) +#endif /* LINUX */ + {} + +#ifdef LINUX + size_t PrepareBufferMultiply(int* csrRowPtrC) + { + int nnzTotal = -1; + return nnzTotal; + } +#endif + + // Multiply - multiply one spares matrix by another sparse matrix + // S1 - first sparse matrix + // transposeS1 - transpose first matrix? + // S2 - second sparse matrix + // transposeS2 - tanspose second matrix? + // c - result matrix + // NOTE: if c has enough space allocated, it will be reused, otherwise it will be freed and a new memory block used + template + void GPUSparseMatrix::Multiply(const GPUSparseMatrix& S1, bool transposeS1, const GPUSparseMatrix& S2, bool transposeS2, GPUSparseMatrix &c) + {} + + template + GPUSparseMatrix& GPUSparseMatrix::AssignProductOf(const GPUSparseMatrix& a, const bool transposeA, const GPUSparseMatrix& b, const bool transposeB) + { + return *this; + } + + template + void GPUSparseMatrix::ScaleAndAdd(ElemType alpha,const GPUSparseMatrix& a, ElemType beta, const GPUSparseMatrix& b, GPUSparseMatrix& c) + {} + + template + void GPUSparseMatrix::ScaleAndAdd(ElemType alpha,const GPUSparseMatrix& a, ElemType beta, const GPUMatrix& b, GPUMatrix& c) + {} + + template + void GPUSparseMatrix::ScaleAndAdd(ElemType alpha,const GPUMatrix& a, ElemType beta, const GPUSparseMatrix& b, GPUMatrix& c) + {} + + template + void GPUSparseMatrix::Scale(ElemType alpha, GPUSparseMatrix& a) + {} + + template + void GPUSparseMatrix::ElementWisePower (ElemType alpha, const GPUSparseMatrix& a, GPUSparseMatrix& c) + {} + + template + ElemType GPUSparseMatrix::InnerProductOfMatrices(const GPUSparseMatrix& a, const GPUMatrix& b) + { + return ElemType(0); + } + + template + ElemType GPUSparseMatrix::InnerProductOfMatrices(const GPUMatrix& a, const GPUSparseMatrix& b) + { + return ElemType(0); + } + + template + bool GPUSparseMatrix::AreEqual(const GPUSparseMatrix& a, const GPUSparseMatrix& b, + const ElemType threshold) + { + return false; + } + + template + bool GPUSparseMatrix::AreEqual(const GPUMatrix& a, const GPUSparseMatrix& b, + const ElemType threshold) + { + return false; + } + + template + bool GPUSparseMatrix::AreEqual(const GPUSparseMatrix& a, const GPUMatrix& b, + const ElemType threshold) + { + return false; + } + + template + bool GPUSparseMatrix::IsEqualTo(const GPUSparseMatrix& a, const ElemType threshold) const + { + return false; + } + + template + bool GPUSparseMatrix::IsEqualTo(const GPUMatrix& a, const ElemType threshold) const + { + return false; + } +#pragma endregion Static BLAS Functions + +#pragma region Member BLAS Functions + + template + int GPUSparseMatrix::GetComputeDeviceId() const + { + return -1; + } + + template + GPUMatrix GPUSparseMatrix::ElementProductOf (const GPUSparseMatrix& a, const GPUMatrix& b) + { + GPUMatrix c; + return c; + } + + template + GPUMatrix GPUSparseMatrix::ElementProductOf (const GPUMatrix& a, const GPUSparseMatrix& b) + { + return GPUSparseMatrix::ElementProductOf(b,a); + } + + template + GPUSparseMatrix GPUSparseMatrix::operator+ (const GPUSparseMatrix& a) const + { + return *this; + } + + template + GPUSparseMatrix GPUSparseMatrix::operator- (const GPUSparseMatrix& a) const + { + return *this; + } + + template + GPUSparseMatrix& GPUSparseMatrix::operator^=(ElemType alpha) + { + return *this; + } + + template + GPUSparseMatrix GPUSparseMatrix::operator^ (ElemType alpha) const + { + return *this; + } + + template + GPUSparseMatrix& GPUSparseMatrix::operator*=(ElemType alpha) + { + return *this; + } + + template + GPUSparseMatrix GPUSparseMatrix::operator* (ElemType alpha) const + { + return *this; + } + + template + GPUSparseMatrix& GPUSparseMatrix::AssignElementPowerOf(const GPUSparseMatrix& a, const ElemType power) + { + return *this; + } + + template + GPUSparseMatrix GPUSparseMatrix::Transpose() const + { + return *this; + } + + template + GPUSparseMatrix& GPUSparseMatrix::AssignTransposeOf(const GPUSparseMatrix& a) + { + return *this; + } + + template + void GPUSparseMatrix::InplaceTranspose() + {} + + template + ElemType GPUSparseMatrix::SumOfAbsElements() const + { + return ElemType(0); + } + + template + ElemType GPUSparseMatrix::SumOfElements() const + { + return ElemType(0); + } + + + template + ElemType GPUSparseMatrix::FrobeniusNorm() const + { + return ElemType(0); + } + + template + ElemType GPUSparseMatrix::MatrixNormInf() const + { + return ElemType(0); + } + + template + ElemType GPUSparseMatrix::MatrixNorm1() const + { + return ElemType(0); + } + +#pragma endregion Member BLAS Functions + +#pragma region Other Functions + + template + GPUSparseMatrix& GPUSparseMatrix::ElementInverse () + { + return *this; + } + + template + GPUSparseMatrix& GPUSparseMatrix::AssignElementInverseOf (const GPUSparseMatrix& a) + { + return *this; + } + + template + GPUSparseMatrix& GPUSparseMatrix::InplaceSigmoid() + { + return *this; + } + + template + GPUSparseMatrix& GPUSparseMatrix::AssignSigmoidOf (const GPUSparseMatrix& a) + { + return *this; + } + + template + GPUSparseMatrix& GPUSparseMatrix::InplaceLinearRectifierDerivative() + { + return *this; + } + + template + GPUSparseMatrix& GPUSparseMatrix::AssignLinearRectifierDerivativeOf (const GPUSparseMatrix& a) + { + return *this; + } + + template + GPUSparseMatrix& GPUSparseMatrix::InplaceTanh() + { + return *this; + } + + template + GPUSparseMatrix& GPUSparseMatrix::AssignTanhOf (const GPUSparseMatrix& a) + { + return *this; + } + + template + GPUSparseMatrix& GPUSparseMatrix::InplaceSqrt() + { + return *this; + } + + template + GPUSparseMatrix& GPUSparseMatrix::AssignSqrtOf (const GPUSparseMatrix& a) + { + return *this; + } + + template + GPUSparseMatrix& GPUSparseMatrix::InplaceExp() + { + return *this; + } + + template + GPUSparseMatrix& GPUSparseMatrix::AssignExpOf (const GPUSparseMatrix& a) + { + return *this; + } + + template + GPUSparseMatrix& GPUSparseMatrix::InplaceLog() + { + return *this; + } + + template + GPUSparseMatrix& GPUSparseMatrix::AssignLogOf (const GPUSparseMatrix& a) + { + return *this; + } + + template + GPUSparseMatrix& GPUSparseMatrix::InplaceAbs() + { + return *this; + } + + template + GPUSparseMatrix& GPUSparseMatrix::AssignAbsOf (const GPUSparseMatrix& a) + { + return *this; + } + + template + GPUSparseMatrix& GPUSparseMatrix::InplaceTruncateBottom (const ElemType threshold) + { + return *this; + } + + template + GPUSparseMatrix& GPUSparseMatrix::AssignTruncateBottomOf (const GPUSparseMatrix& a, const ElemType threshold) + { + return *this; + } + + template + GPUSparseMatrix& GPUSparseMatrix::InplaceTruncateTop (const ElemType threshold) + { + return *this; + } + + template + GPUSparseMatrix& GPUSparseMatrix::AssignTruncateTopOf (const GPUSparseMatrix& a, const ElemType threshold) + { + return *this; + } + + template + GPUSparseMatrix& GPUSparseMatrix::SetToZeroIfAbsLessThan (const ElemType threshold) + { + return *this; + } + template + void GPUSparseMatrix::Unrolling (//GPUSparseMatrix& debugMatrix, + GPUMatrix& UnrolledMatrix, const GPUMatrix& InMatrix, GPUSparseMatrix& UnrollMapping, + const int inputWidth, const int inputHeight, const int inputChannelNum, + const int FltWidth,const int FltHeight, const int FltChannel, + const int FltStepW, const int FltStepH) + { + } + +#pragma endregion + +#pragma region Helper Functions + + template + void GPUSparseMatrix::performInplaceFunction(int kind) + {} + + template + void GPUSparseMatrix::SetMatrixFromCSRFormat(int *h_CSRRow, int *h_Col, ElemType *h_Val, size_t nz, size_t numRows, size_t numCols, bool IsOnDevice, int devId) + {} + + // NOTE: we should change this to just use a single buffer, and return pointers into it + template + void GPUSparseMatrix::GetMatrixFromCSRFormat(int*& h_CSRRow, int*& h_Col, ElemType*& h_Val, size_t &nz, size_t &numRows, size_t &numCols) const + {} + +#pragma endregion Helper Functions + + template class GPUSparseMatrix; + template class GPUSparseMatrix; + + template + MATH_API File& operator>>(File& stream, GPUSparseMatrix& us) + { + return stream; + } + + template MATH_API File& operator>>(File& stream, GPUSparseMatrix& us); + template MATH_API File& operator>>(File& stream, GPUSparseMatrix& us); + + template + MATH_API File& operator<<(File& stream, const GPUSparseMatrix& us) + { + return stream; + } + template MATH_API File& operator<<(File& stream, const GPUSparseMatrix& us); + template MATH_API File& operator<<(File& stream, const GPUSparseMatrix& us); + + +#pragma region DeviceBoundNumber class + + template + DeviceBoundNumber::DeviceBoundNumber(const DeviceBoundNumber &deepCopy) + { + NOT_IMPLEMENTED; + } + +#ifndef LINUX + template + DeviceBoundNumber::DeviceBoundNumber(DeviceBoundNumber &&shallowCopy) + { + this->ShallowCopyFrom(shallowCopy.m_data,shallowCopy.m_computeDevice); + shallowCopy.m_data=NULL; + } +#endif + + template + void DeviceBoundNumber::ShallowCopyFrom(ElemType* newVal,int newValsDevceId) + {} + + template + DeviceBoundNumber::~DeviceBoundNumber() + {} + +#pragma endregion DeviceBoundNumber class + +#pragma region Helper functions + + // GetBestGPUDeviceId - Get the best GPU DeviceId, based on cuda information + // TODO: should be replaced by BestGpu class instead, it's much better + template + int GPUMatrix::GetBestGPUDeviceId() //returns -1 if no GPUs can be used + { + return -1; // CPU + } + + // PrepareDevice - Setup the correct cuda context for an operation + // deviceId - the device on which the operation will take place + // defaults to -1, which means use matrices current device + template + void GPUMatrix::PrepareDevice(short deviceId /*=-1*/) const + {} + + template + ElemType* GPUMatrix::CopyToArray() const + { + return NULL; + } + + //memory will be allocated by the callee if not enough but need to be deleted by the caller after it's done + //return number of elements copied + template + size_t GPUMatrix::CopyToArray(ElemType*& arrayCopyTo, size_t& currentArraySize) const + { + return 0; + } + + template + void GPUMatrix::ChangeDeviceTo(int to_id) + {} + + template + void GPUMatrix::performInplaceFunction(int kind) + {} + + +#pragma endregion Helper functions + +#pragma region Constructors and Destructor + + //should only be used by constructors. + template + void GPUMatrix::ZeroInit(int deviceId) + {} + + template + GPUMatrix::GPUMatrix(int deviceId) + {}; + + //matrixName is used to verify that correct matrix is read. + template + GPUMatrix::GPUMatrix(FILE* f, const char * matrixName, int deviceId) + {} + + template + GPUMatrix::GPUMatrix(const size_t numRows, const size_t numCols,int deviceId) + {}; + + template + GPUMatrix::GPUMatrix(const size_t numRows, const size_t numCols, ElemType *pArray, const size_t matrixFlags, int deviceId) + {}; + + template + GPUMatrix::GPUMatrix(const GPUMatrix& deepCopyFrom) + {} + +#ifndef LINUX + template + GPUMatrix::GPUMatrix(GPUMatrix&& moveFrom) + {} +#endif + + //assignment operator, deep copy + template + GPUMatrix& GPUMatrix::operator=(const GPUMatrix& deepCopyFrom) + { + return *this; + } + +#ifndef LINUX + //move assignment operator, shallow copy + template + GPUMatrix& GPUMatrix::operator=(GPUMatrix&& moveFrom) + { + return *this; + } +#endif /* LINUX */ + + template + GPUMatrix::~GPUMatrix(void) + { + } + + template + void GPUMatrix::Clear() + {} +#pragma endregion Constructors and Destructor + + template + int GPUMatrix::GetComputeDeviceId() const + { + return -1; + } + +#pragma region Basic Operators + template + GPUMatrix GPUMatrix::ColumnSlice(size_t startColumn, size_t numCols) const + { + GPUMatrix slice; + + return slice; + } + + template + GPUMatrix& GPUMatrix::AssignColumnSlice(const GPUMatrix& fromMatrix, size_t startColumn, size_t numCols) + { + return *this; + } + + + //for each column of a, we assign numRows starting from startIndex to this + template + GPUMatrix& GPUMatrix::AssignRowSliceValuesOf(const GPUMatrix& a, const size_t startIndex, const size_t numRows) + { + return *this; + } + + //for each column of a, we add all rows of a to this starting from startIndex + template + GPUMatrix& GPUMatrix::AddToRowSliceValuesOf(const GPUMatrix& a, const size_t startIndex, const size_t numRows) + { + return *this; + } + + template + GPUMatrix GPUMatrix::Transpose() const + { + return *this; + } + + // GetCublasHandle - get a cublas handle for the given GPU, should only need one per GPU + // computeDevice - The compute device for which the cublas handle is desired + // returns: cublas handle + // NOTE: we currently don't bother to ever free the CUBLAS handle, it will be freed automatically by CUDA when the process ends + template + cublasHandle_t GPUMatrix::GetCublasHandle(int computeDevice/*=-1*/) + { + cublasHandle_t cuHandle = 0; + return cuHandle; + } + + template + GPUMatrix& GPUMatrix::AssignTransposeOf (const GPUMatrix& a) + { + return *this; + } + + template + void GPUMatrix::SetValue(const ElemType v) + {} + + template + void GPUMatrix::SetValue(const ElemType* d_v) //d_v is pointer to the the value in GPU memory + {} + + template + void GPUMatrix::SetColumn(const ElemType* colPointer, size_t colInd) + {} + + template + void GPUMatrix::SetValue(const GPUMatrix& deepCopyFrom) + {} + + template + void GPUMatrix::SetValue(const size_t numRows, const size_t numCols, ElemType *pArray, size_t matrixFlags, int deviceId) + {} + + + template + void GPUMatrix::SetDiagonalValue(const ElemType v) + {} + + template + void GPUMatrix::SetDiagonalValue(GPUMatrix& vector) + {} + + template + void GPUMatrix::SetUniformRandomValue(const ElemType low, const ElemType high, unsigned long seed) + {} + + template + void GPUMatrix::SetGaussianRandomValue(const ElemType mean, const ElemType sigma, unsigned long seed) + {} + + //maskRate: percentage of values masked out (similar to dropout rate) + //scaleValue: which scale value to set to the left ones (unmasked items). + template + void GPUMatrix::SetUniformRandomMask(const ElemType maskRate, const ElemType scaleValue, unsigned long seed) + {} + + template + void GPUMatrix::Adagrad(GPUMatrix& gradients) + {} + + template + void GPUMatrix::Reshape(const size_t numRows, const size_t numCols) + {} + + template + void GPUMatrix::Resize(const size_t numRows, const size_t numCols, bool growOnly) + {} + + template + size_t GPUMatrix::LocateElement (const size_t row, const size_t col) const + { + return 0; + } + + template + size_t GPUMatrix::LocateColumn (const size_t col) const + { + return 0; + } + + template + ElemType GPUMatrix::Get00Element() const + { + ElemType res=0; + return res; + } +#pragma endregion Basic Operators + +#pragma region Member BLAS Functions + template + GPUMatrix& GPUMatrix::operator+= (ElemType alpha) + { + return *this; + } + + template + GPUMatrix GPUMatrix::operator+ (ElemType alpha) const + { + return *this; + } + + template + GPUMatrix& GPUMatrix::AssignSumOf(const ElemType alpha, const GPUMatrix& a) + { + return (*this); + } + + + template + GPUMatrix& GPUMatrix::operator+= (const GPUMatrix& a) + { + return *this; + } + + template + GPUMatrix GPUMatrix::operator+ (const GPUMatrix& a) const + { + return *this; + } + + template + GPUMatrix& GPUMatrix::AssignSumOf(const GPUMatrix& a, const GPUMatrix& b) + { + return (*this); + } + + template + GPUMatrix& GPUMatrix::operator-= (ElemType alpha) + { + return *this; + } + + template + GPUMatrix GPUMatrix::operator- (ElemType alpha) const + { + return *this; + } + + template + GPUMatrix& GPUMatrix::AssignDifferenceOf(const ElemType alpha, const GPUMatrix& a) + { + return *this; + } + + template + GPUMatrix& GPUMatrix::AssignDifferenceOf(const GPUMatrix& a, const ElemType alpha) + { + return *this; + } + + template + GPUMatrix& GPUMatrix::operator-= (const GPUMatrix& a) + { + return *this; + } + + template + GPUMatrix GPUMatrix::operator- (const GPUMatrix& a) const + { + return *this; + } + + template + GPUMatrix& GPUMatrix::AssignDifferenceOf(const GPUMatrix& a, const GPUMatrix& b) + { + return *this; + } + + template + GPUMatrix& GPUMatrix::operator*= (ElemType alpha) + { + return *this; + } + + template + GPUMatrix GPUMatrix::operator* (ElemType alpha) const + { + return *this; + } + + template + GPUMatrix& GPUMatrix::AssignProductOf(const ElemType alpha, const GPUMatrix& a) + { + return *this; + } + + template + GPUMatrix& GPUMatrix::AssignProductOf (const GPUMatrix& a, const bool transposeA, const GPUMatrix& b, const bool transposeB) + { + return *this; + } + + template + GPUMatrix GPUMatrix::operator* (const GPUMatrix& a) const + { + return *this; + } + + template + GPUMatrix& GPUMatrix::operator/= (ElemType alpha) + { + return (*this); + } + + template + GPUMatrix GPUMatrix::operator/ (ElemType alpha) const + { + return *this; + } + + //element-wise power + template + GPUMatrix& GPUMatrix::operator^= (ElemType alpha) + { + return *this; + } + + template + GPUMatrix GPUMatrix::operator^ (ElemType alpha) const + { + return *this; + } + + template + GPUMatrix& GPUMatrix::AssignElementPowerOf(const GPUMatrix& a, const ElemType power) + { + return *this; + } + + + template + GPUMatrix& GPUMatrix::AddElementProductOf (const GPUMatrix& a, const GPUMatrix& b) + { + return *this; + } + + template + GPUMatrix& GPUMatrix::ColumnElementMultiplyWith(const GPUMatrix& a) + { + return *this; + } + + template + GPUMatrix& GPUMatrix::RowElementMultiplyWith(const GPUMatrix& a) + { + return *this; + } + + template + GPUMatrix& GPUMatrix::ElementInverse () + { + return *this; + } + + template + GPUMatrix& GPUMatrix::AssignElementInverseOf (const GPUMatrix& a) + { + return *this; + } + + template + GPUMatrix& GPUMatrix::InplaceSigmoid() + { + return *this; + } + + template + GPUMatrix& GPUMatrix::AssignSigmoidOf (const GPUMatrix& a) + { + return *this; + } + + template + GPUMatrix& GPUMatrix::InplaceSigmoidDerivative() + { + return *this; + } + + template + GPUMatrix& GPUMatrix::AssignSigmoidDerivativeOf (const GPUMatrix& a) + { + return *this; + } + + + template + GPUMatrix& GPUMatrix::InplaceTanh() + { + return *this; + } + + template + GPUMatrix& GPUMatrix::AssignTanhOf (const GPUMatrix& a) + { + return *this; + } + + template + GPUMatrix& GPUMatrix::InplaceSoftmax (const bool isColWise) + { + return *this; + } + + template + GPUMatrix& GPUMatrix::AssignSoftmaxOf (const GPUMatrix& a, const bool isColWise) + { + return *this; + } + + template + GPUMatrix& GPUMatrix::InplaceSqrt() + { + return *this; + } + + template + GPUMatrix& GPUMatrix::AssignSqrtOf (const GPUMatrix& a) + { + return *this; + } + + template + GPUMatrix& GPUMatrix::InplaceExp() + { + return *this; + } + + template + GPUMatrix& GPUMatrix::AssignExpOf (const GPUMatrix& a) + { + return *this; + } + + template + GPUMatrix& GPUMatrix::InplaceLog() + { + return *this; + } + + template + GPUMatrix& GPUMatrix::AssignLogOf (const GPUMatrix& a) + { + return *this; + } + + template + GPUMatrix& GPUMatrix::InplaceAbs() + { + return *this; + } + + template + GPUMatrix& GPUMatrix::AssignAbsOf (const GPUMatrix& a) + { + return *this; + } + + template + GPUMatrix& GPUMatrix::InplaceLinearRectifierDerivative() + { + return *this; + } + + template + GPUMatrix& GPUMatrix::AssignLinearRectifierDerivativeOf (const GPUMatrix& a) + { + return *this; + } + + template + GPUMatrix& GPUMatrix::InplaceCosine() + { + return *this; + } + + template + GPUMatrix& GPUMatrix::AssignCosineOf (const GPUMatrix& a) + { + return *this; + } + + template + GPUMatrix& GPUMatrix::InplaceNegativeSine() + { + return *this; + } + + template + GPUMatrix& GPUMatrix::AssignNegativeSineOf (const GPUMatrix& a) + { + return *this; + } + + template + GPUMatrix& GPUMatrix::InplaceTruncateBottom (const ElemType threshold) + { + return *this; + } + + template + GPUMatrix& GPUMatrix::AssignTruncateBottomOf (const GPUMatrix& a, const ElemType threshold) + { + return *this; + } + + template + GPUMatrix& GPUMatrix::InplaceTruncateTop (const ElemType threshold) + { + return *this; + } + + template + GPUMatrix& GPUMatrix::AssignTruncateTopOf (const GPUMatrix& a, const ElemType threshold) + { + return *this; + } + template + GPUMatrix& GPUMatrix::SetToZeroIfAbsLessThan (const ElemType threshold) + { + return *this; + } + + template + ElemType GPUMatrix::SumOfAbsElements() const + { + return ElemType(0); + } + + template + ElemType GPUMatrix::SumOfElements() const + { + return ElemType(0); + } + + + template + GPUMatrix& GPUMatrix::AssignSumOfElements(const GPUMatrix& a) + { + return (*this); + } + + template + DeviceBoundNumber GPUMatrix::Sum_AsDeviceBoundNum() const + { + DeviceBoundNumber result; + return result; + } + + template + ElemType GPUMatrix::Max() const + { + return ElemType(0); + } + + + template + GPUMatrix& GPUMatrix::ElementMultiplyWith (const GPUMatrix& a) + { + return *this; + } + + template + GPUMatrix& GPUMatrix::AssignElementProductOf (const GPUMatrix& a, const GPUMatrix& b) + { + return *this; + } + + template + GPUMatrix& GPUMatrix::AssignElementDivisionOf (const GPUMatrix& a, const GPUMatrix& b) + { + return *this; + } + + template + bool GPUMatrix::IsEqualTo(const GPUMatrix& a, const ElemType threshold /*= 1e-8*/) const + { + return AreEqual(*this, a, threshold); + } + + template + void GPUMatrix::VectorNorm1(GPUMatrix& c, const bool isColWise) const + { + } + + template + GPUMatrix& GPUMatrix::AssignVectorNorm1Of(GPUMatrix& a, const bool isColWise) + { + return *this; + } + + template + void GPUMatrix::VectorNorm2(GPUMatrix& c, const bool isColWise) const + {} + + template + GPUMatrix& GPUMatrix::AssignVectorNorm2Of(GPUMatrix& a, const bool isColWise) + { + return *this; + } + + template + void GPUMatrix::VectorNormInf(GPUMatrix& c, const bool isColWise) const + {} + + template + GPUMatrix& GPUMatrix::AssignVectorNormInfOf(GPUMatrix& a, const bool isColWise) + { + return *this; + } + + template + GPUMatrix& GPUMatrix::AssignInnerProductOf(const GPUMatrix& a, const GPUMatrix& b, const bool isColWise) + { + return *this; + } + + template + GPUMatrix& GPUMatrix::AssignKhatriRaoProductOf(const GPUMatrix& a, const GPUMatrix& b) + { + return *this; + } + + //column-wise reshaped product. Used to compute KhatriRaoProduct Gradient + // this = reshape each column of a from (K1xK2,1) to (K1, K2) + // if each column of a is not transposed, each (K1, K2) times each column of b (K2, frames). + // the output is a (K1, frames) matrix + // if each column of a is tranposed, each (K1, K2)^T times each column of b(K1, frames) and output is (K2, frames) + template + GPUMatrix& GPUMatrix::AddColumnReshapeProductOf(const GPUMatrix& a, const GPUMatrix& b, const bool transposeAColumn) + { + return *this; + } + + template + GPUMatrix& GPUMatrix::AddWithScaleOf(ElemType alpha, const GPUMatrix& a) + { + return *this; + } + + template + ElemType GPUMatrix::FrobeniusNorm() const + { + ElemType h_sum=0; + return (h_sum); + } + + template + GPUMatrix& GPUMatrix::AssignFrobeniusNormOf (const GPUMatrix& a) + { + return *this; + } + + template + ElemType GPUMatrix::MatrixNormInf() const + { + ElemType h_maxAbs=0; + return h_maxAbs; + } + + template + ElemType GPUMatrix::MatrixNorm1() const + { + return ElemType(0); + } + + template + ElemType GPUMatrix::MatrixNorm0() const + { + return ElemType(0); + } + + template + GPUMatrix& GPUMatrix::AssignSignOf(const GPUMatrix& a) + { + return *this; + } + + template + GPUMatrix& GPUMatrix::AddSignOf(const GPUMatrix& a) + { + return *this; + } + + template + void GPUMatrix::VectorMax(GPUMatrix& maxIndexes, GPUMatrix& maxValues, const bool isColWise) const + {} + + template + void GPUMatrix::VectorMin(GPUMatrix& minIndexes, GPUMatrix& minValues, const bool isColWise) const + {} + + template + GPUMatrix& GPUMatrix::AssignNumOfDiff(const GPUMatrix& a, const GPUMatrix& b) + { + return *this; + } + +#pragma endregion Member BLAS Functions + +#pragma region Other helper functions + template + void GPUMatrix::Print(const char* matrixName, size_t rowStart, size_t rowEnd, size_t colStart, size_t colEnd) const + {} + + template + void GPUMatrix::Print(const char* matrixName /*=nullptr*/) const + {} + + // file I/O + //matrixName is used to verify that correct matrix is read. + template + void GPUMatrix::ReadFromFile(FILE* f, const char * matrixName) + {} + + //matrixName is used to verify that correct matrix is read. + template + void GPUMatrix::WriteToFile(FILE* f, const char * matrixName) + {} + + //helpfer function used for convolution neural network + template + GPUMatrix& GPUMatrix::AssignPackedConvolutionInput(const GPUMatrix& inputSubBatch, + const size_t inputWidth, const size_t inputHeight, const size_t inputChannels, + const size_t outputWidth, const size_t outputHeight, const size_t outputChannels, + const size_t kernelWidth, const size_t kernelHeight, const size_t horizontalSubsample, const size_t verticalSubsample, + const bool zeroPadding) + { + return *this; + } + + //helpfer function used for convolution neural network + template + GPUMatrix& GPUMatrix::UnpackConvolutionInput(GPUMatrix& inputSubBatch, + const size_t inputWidth, const size_t inputHeight, const size_t inputChannels, + const size_t outputWidth, const size_t outputHeight, const size_t outputChannels, + const size_t kernelWidth, const size_t kernelHeight, const size_t horizontalSubsample, const size_t verticalSubsample, + const bool zeroPadding) const + { + GPUMatrix mat; + return mat; + } + + template + GPUMatrix& GPUMatrix::AssignMaxPoolingResult(const GPUMatrix& inputBatch, const size_t channels, + const size_t inputWidth, const size_t inputHeight, const size_t inputSizePerSample, + const size_t outputWidth, const size_t outputHeight, const size_t outputSizePerSample, + const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample) + { + return *this; + } + + template + GPUMatrix& GPUMatrix::AddMaxPoolingGradient(const GPUMatrix& outputGradientBatch, const GPUMatrix& inputBatch, const GPUMatrix& outputBatch, + const size_t channels, + const size_t inputWidth, const size_t inputHeight, const size_t inputSizePerSample, + const size_t outputWidth, const size_t outputHeight, const size_t outputSizePerSample, + const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample) + { + return *this; + } + + template + GPUMatrix& GPUMatrix::AssignAveragePoolingResult(const GPUMatrix& inputBatch, const size_t channels, + const size_t inputWidth, const size_t inputHeight, const size_t inputSizePerSample, + const size_t outputWidth, const size_t outputHeight, const size_t outputSizePerSample, + const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample) + { + return *this; + } + + template + GPUMatrix& GPUMatrix::AddAveragePoolingGradient(const GPUMatrix& outputGradientBatch, + const size_t channels, + const size_t inputWidth, const size_t inputHeight, const size_t inputSizePerSample, + const size_t outputWidth, const size_t outputHeight, const size_t outputSizePerSample, + const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample) + { + return *this; + } + +#pragma endregion Other helper functions + +#pragma region Static BLAS Functions + template + void GPUMatrix::MultiplyAndWeightedAdd(ElemType alpha, const GPUMatrix& a, const bool transposeA, const GPUMatrix& b, const bool transposeB, + ElemType beta, GPUMatrix& c) + { + } + + template + void GPUMatrix::MultiplyAndAdd(const GPUMatrix& a, const bool transposeA, const GPUMatrix& b, const bool transposeB, GPUMatrix& c) + {} + + template + void GPUMatrix::Multiply(const GPUMatrix& a, const bool transposeA, const GPUMatrix& b, const bool transposeB, GPUMatrix& c) + {} + + template + void GPUMatrix::Multiply(const GPUMatrix& a, const GPUMatrix& b, GPUMatrix& c) + {} + + /// Matrix-scalar multiply with col-major matrices: c = alpha * a + c + /// if a is a column vector, add to all columns of c + /// if a is a row vector, add to all rows of c + /// if a is a scalar, add to all elements of c + /// Scalar + /// Input matrix + /// Resulting matrix, user is responsible for allocating this + template + void GPUMatrix::ScaleAndAdd(ElemType alpha,const GPUMatrix& a, GPUMatrix& c) + {} + + /// c += alpha * (a-b) + /// if a, b, c must have same dim + /// Scalar + /// Input matrix + /// Input matrix + /// Resulting matrix, user is responsible for allocating this + template + void GPUMatrix::AddScaledDifference(const ElemType alpha, const GPUMatrix& a, const GPUMatrix& b, GPUMatrix& c) + {} + + /// c = alpha * (a-b) + /// if a, b, c must have same dim + /// Scalar + /// Input matrix + /// Input matrix + /// Resulting matrix, user is responsible for allocating this + template + void GPUMatrix::AssignScaledDifference(const ElemType alpha, const GPUMatrix& a, const GPUMatrix& b, GPUMatrix& c) + {} + + /// c += alpha * (a-b) + /// if a, b, c must have same dim + /// 1X1 matrix + /// Input matrix + /// Input matrix + /// Resulting matrix, user is responsible for allocating this + template + void GPUMatrix::AddScaledDifference(const GPUMatrix& alpha, const GPUMatrix& a, const GPUMatrix& b, GPUMatrix& c) + {} + + /// c = alpha * (a-b) + /// if a, b, c must have same dim + /// Scalar + /// Input matrix + /// Input matrix + /// Resulting matrix, user is responsible for allocating this + template + void GPUMatrix::AssignScaledDifference(const GPUMatrix& alpha, const GPUMatrix& a, const GPUMatrix& b, GPUMatrix& c) + {} + + //c[ci,cj] += a[ai,aj] + template + void GPUMatrix::AddElementToElement(const GPUMatrix& a, const size_t ai, const size_t aj, GPUMatrix& c, const size_t ci, const size_t cj) + {} + + template + void GPUMatrix::Scale(ElemType alpha, GPUMatrix& a) + {} + + + template + void GPUMatrix::Scale(GPUMatrix& alpha, GPUMatrix& a) + {} + + template //c = alpha * a + void GPUMatrix::Scale(ElemType alpha, const GPUMatrix& a, GPUMatrix& c) + {} + + + template + void GPUMatrix::InnerProduct (const GPUMatrix& a, const GPUMatrix& b, GPUMatrix& c, const bool isColWise) + {} + + template + ElemType GPUMatrix::InnerProductOfMatrices(const GPUMatrix& a, const GPUMatrix& b) + { + return ElemType(0); + } + + + template + GPUMatrix& GPUMatrix::AssignInnerProductOfMatrices(const GPUMatrix& a, const GPUMatrix& b) + { + return *this; + } + + + template + void GPUMatrix::ElementWisePower(ElemType alpha, const GPUMatrix& a, GPUMatrix& c) + {} + + template + bool GPUMatrix::AreEqual(const GPUMatrix& a, const GPUMatrix& b, const ElemType threshold /*= 1e-8*/) + { + return false; + } + + template + GPUMatrix GPUMatrix::Ones(const size_t rows, const size_t cols) + { + GPUMatrix mat; + return mat; + } + + template + GPUMatrix GPUMatrix::Zeros(const size_t rows, const size_t cols) + { + GPUMatrix mat; + return mat; + } + + template + GPUMatrix GPUMatrix::Eye(const size_t rows) + { + GPUMatrix mat; + return mat; + } + + template + GPUMatrix GPUMatrix::RandomUniform(const size_t rows, const size_t cols, const ElemType low, const ElemType high, unsigned long seed) + { + GPUMatrix mat; + return mat; + } + + template + GPUMatrix GPUMatrix::RandomGaussian(const size_t rows, const size_t cols, const ElemType mean, const ElemType sigma, unsigned long seed) + { + GPUMatrix mat; + return mat; + } + + template + ElemType GPUMatrix::GetLearnRateForBlock_Helper(const GPUMatrix &Gradients, const GPUMatrix &SmoothedGradients) + { + return ElemType(0); + } + +#pragma endregion Static BLAS Functions + + template class GPUMatrix; + template class GPUMatrix; + template class DeviceBoundNumber; + template class DeviceBoundNumber; + + template + cublasHandle_t GPUMatrix::s_cuHandle[GPUMatrix::MaxGpus]={0}; + + template + void* GPUMatrix::s_curandGenerator=NULL; +}}} + +// define a dummy GPUWatcher class too +#include "GPUWatcher.cuh" + +int GPUWatcher::GetGPUIdWithTheMostFreeMemory() +{ + return 0; +} + + +size_t GPUWatcher::GetFreeMemoryOnCUDADevice(int devId) +{ + return 0; +} + +GPUWatcher::GPUWatcher(void) +{ +} + +GPUWatcher::~GPUWatcher(void) +{ +} + + + + + diff --git a/Math/Math/GPUMatrix.cu b/Math/Math/GPUMatrix.cu index 9d690f0e4..692b059a3 100644 --- a/Math/Math/GPUMatrix.cu +++ b/Math/Math/GPUMatrix.cu @@ -324,22 +324,22 @@ namespace Microsoft { namespace MSR { namespace CNTK { switch (kind) { case 0: - _inplaceSigmoidOnCuda<<>>(this->m_pArray,N); + _inplaceSigmoidOnCuda<<>>(this->m_pArray, N); break; case 1: - _inplaceTanhOnCuda<<>>(this->m_pArray,N); + _inplaceTanhOnCuda<<>>(this->m_pArray, N); break; case 2: - _inplaceSqrtOnCuda<<>>(this->m_pArray,N); + _inplaceSqrtOnCuda<<>>(this->m_pArray, N); break; case 3: - _inplaceExpOnCuda<<>>(this->m_pArray,N); + _inplaceExpOnCuda<<>>(this->m_pArray,N); break; case 4: - _inplaceLogOnCuda<<>>(this->m_pArray,N); + _inplaceLogOnCuda<<>>(this->m_pArray,N); break; case 5: - _inplaceAbsOnCuda<<>>(this->m_pArray,N); + _inplaceAbsOnCuda<<>>(this->m_pArray,N); break; case 6: _inplaceLinRectDerivative<<>>(this->m_pArray,N); @@ -1205,7 +1205,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { // int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); // cudaEvent_t done = nullptr; // if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - // _addValue<<>>(m_pArray,a.m_pArray,N); + // _addValue<<>>(m_pArray,a.m_pArray,N); // if (do_sync) CUDA_CALL(cudaEventRecord(done)); // if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); // if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1458,7 +1458,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _addElementProductOf<<>>(this->m_pArray,a.m_pArray,b.m_pArray,N); + _addElementProductOf<<>>(this->m_pArray,a.m_pArray,b.m_pArray,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1480,7 +1480,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _columnElementMultiplyWith<<>>(this->m_pArray,a.m_pArray,N,M); + _columnElementMultiplyWith<<>>(this->m_pArray,a.m_pArray,N,M); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1503,7 +1503,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _rowElementMultiplyWith<<>>(this->m_pArray,a.m_pArray,N,M); + _rowElementMultiplyWith<<>>(this->m_pArray,a.m_pArray,N,M); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1568,7 +1568,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _elemInverse<<>>(this->m_pArray,N); + _elemInverse<<>>(this->m_pArray,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1825,7 +1825,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _inplaceTruncateBottom<<>>(this->m_pArray,threshold,N); + _inplaceTruncateBottom<<>>(this->m_pArray,threshold,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1848,7 +1848,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _assignTruncateBottom<<>>(this->m_pArray,a.m_pArray,threshold,N); + _assignTruncateBottom<<>>(this->m_pArray,a.m_pArray,threshold,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1866,7 +1866,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _inplaceTruncateTop<<>>(this->m_pArray,threshold,N); + _inplaceTruncateTop<<>>(this->m_pArray,threshold,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1889,7 +1889,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _assignTruncateTop<<>>(this->m_pArray,a.m_pArray,threshold,N); + _assignTruncateTop<<>>(this->m_pArray,a.m_pArray,threshold,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1906,7 +1906,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _setToZeroIfAbsLessThan<<>>(this->m_pArray,threshold,N); + _setToZeroIfAbsLessThan<<>>(this->m_pArray,threshold,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1964,7 +1964,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); //WARNING: THIS kernel is not the most efficient way! - _reductionSumAndAssign<<<1,1024>>>(this->m_pArray,a.m_pArray,(LONG64)a.GetNumElements(),(LONG64)this->GetNumElements()); + _reductionSumAndAssign<<<1,1024>>>(this->m_pArray,a.m_pArray,(LONG64)a.GetNumElements(),(LONG64)this->GetNumElements()); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -2026,7 +2026,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _elemMul<<>>(this->m_pArray,a.m_pArray,N); + _elemMul<<>>(this->m_pArray,a.m_pArray,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -2049,7 +2049,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _assignElementProductOf<<>>(this->m_pArray,a.m_pArray,b.m_pArray,N); + _assignElementProductOf<<>>(this->m_pArray,a.m_pArray,b.m_pArray,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -2078,7 +2078,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _assignElementDivisionOf<<>>(this->m_pArray,a.m_pArray,b.m_pArray,N); + _assignElementDivisionOf<<>>(this->m_pArray,a.m_pArray,b.m_pArray,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -2119,7 +2119,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _vectorNorm1<<>>(c.m_pArray, this->m_pArray,n,m,isColWise); + _vectorNorm1<<>>(c.m_pArray, this->m_pArray,n,m,isColWise); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -2160,7 +2160,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _vectorNorm2<<>>(c.m_pArray, this->m_pArray,n,m,isColWise); + _vectorNorm2<<>>(c.m_pArray, this->m_pArray,n,m,isColWise); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -2219,7 +2219,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _assignKhatriRaoProductOf<<>>(this->m_pArray,a.m_pArray,b.m_pArray,rowsA, rowsB, cols); + _assignKhatriRaoProductOf<<>>(this->m_pArray,a.m_pArray,b.m_pArray,rowsA, rowsB, cols); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -2257,7 +2257,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _addColumnReshapeProductOf<<>>(this->m_pArray,a.m_pArray,b.m_pArray, rowsB, rowsC, cols, transposeAColumn); + _addColumnReshapeProductOf<<>>(this->m_pArray,a.m_pArray,b.m_pArray, rowsB, rowsC, cols, transposeAColumn); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -2360,7 +2360,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { cudaEvent_t done = nullptr; int blocksPerGrid=(int)ceil(1.0*this->GetNumElements()/threadsPerBlock); if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _assignSignOf<<>>(this->m_pArray, a.m_pArray, (long)this->GetNumElements()); + _assignSignOf<<>>(this->m_pArray, a.m_pArray, (long)this->GetNumElements()); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -2380,7 +2380,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { cudaEvent_t done = nullptr; int blocksPerGrid=(int)ceil(1.0*this->GetNumElements()/threadsPerBlock); if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _addSignOf<<>>(this->m_pArray, a.m_pArray, (LONG64)this->GetNumElements()); + _addSignOf<<>>(this->m_pArray, a.m_pArray, (LONG64)this->GetNumElements()); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -2473,8 +2473,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { cudaEvent_t done = nullptr; //int blocksPerGrid=(int)ceil(1.0*a.GetNumElements()/threadsPerBlock); if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - //_assignNumOfDiff<<>>(a.m_pArray, b.m_pArray, this->m_pArray, a.GetNumElements()); - _assignNumOfDiff<<<1,1024,0,t_stream>>>(a.m_pArray, b.m_pArray, this->m_pArray, (LONG64)a.GetNumElements()); + //_assignNumOfDiff<<>>(a.m_pArray, b.m_pArray, this->m_pArray, a.GetNumElements()); + _assignNumOfDiff<<<1,1024,0,t_stream>>>(a.m_pArray, b.m_pArray, this->m_pArray, (LONG64)a.GetNumElements()); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -2926,7 +2926,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { LONG64 n=(LONG64)a.GetNumElements(); int blocksPerGrid=(int)ceil(1.0*n/threadsPerBlock); if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _addScaledDifference<<>>(alpha, a.m_pArray, b.m_pArray, c.m_pArray, n); + _addScaledDifference<<>>(alpha, a.m_pArray, b.m_pArray, c.m_pArray, n); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -2967,7 +2967,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { LONG64 n=(LONG64)a.GetNumElements(); int blocksPerGrid=(int)ceil(1.0*n/threadsPerBlock); if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _assignScaledDifference<<>>(alpha, a.m_pArray, b.m_pArray, c.m_pArray, n); + _assignScaledDifference<<>>(alpha, a.m_pArray, b.m_pArray, c.m_pArray, n); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -3011,7 +3011,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { LONG64 n=(LONG64)a.GetNumElements(); int blocksPerGrid=(int)ceil(1.0*n/threadsPerBlock); if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _addScaledDifference<<>>(alpha.m_pArray, a.m_pArray, b.m_pArray, c.m_pArray, n); + _addScaledDifference<<>>(alpha.m_pArray, a.m_pArray, b.m_pArray, c.m_pArray, n); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -3055,7 +3055,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { LONG64 n=(LONG64)a.GetNumElements(); int blocksPerGrid=(int)ceil(1.0*n/threadsPerBlock); if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _assignScaledDifference<<>>(alpha.m_pArray, a.m_pArray, b.m_pArray, c.m_pArray, n); + _assignScaledDifference<<>>(alpha.m_pArray, a.m_pArray, b.m_pArray, c.m_pArray, n); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -3074,7 +3074,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { cudaEvent_t done = nullptr; int blocksPerGrid=1; //only one element if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _addElementToElement<<>>(a.m_pArray, (LONG64)a.LocateElement(ai, aj), c.m_pArray, (LONG64)c.LocateElement(ci, cj)); + _addElementToElement<<>>(a.m_pArray, (LONG64)a.LocateElement(ai, aj), c.m_pArray, (LONG64)c.LocateElement(ci, cj)); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -3195,7 +3195,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _innerProduct<<>>(c.m_pArray, a.m_pArray,b.m_pArray,m,n,isColWise); + _innerProduct<<>>(c.m_pArray, a.m_pArray,b.m_pArray,m,n,isColWise); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -3288,7 +3288,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (do_sync) CUDA_CALL(cudaEventCreate(&done)); LONG64 N=(LONG64)a.GetNumElements(); int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); - _elementWisePowerOnCuda<<>>(alpha,a.m_pArray,c.m_pArray,N); + _elementWisePowerOnCuda<<>>(alpha,a.m_pArray,c.m_pArray,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); diff --git a/Math/Math/GPUMatrixCUDAKernels.cu b/Math/Math/GPUMatrixCUDAKernels.cu index 55af99884..7a116df3a 100644 --- a/Math/Math/GPUMatrixCUDAKernels.cu +++ b/Math/Math/GPUMatrixCUDAKernels.cu @@ -3,6 +3,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // // +#ifndef CPU_ONLY #include #include #include "CommonMatrix.h" @@ -3235,3 +3236,5 @@ d_tmp[0] = max((ElemType)0, d_tmp[0]/max((ElemType)1.0e-10,sqrt(d_tmp[1]))/max(( } } */ + +#endif /*!CPU_ONLY*/ diff --git a/Math/Math/GPUSparseMatrix.cu b/Math/Math/GPUSparseMatrix.cu index e0e908292..1882798dc 100644 --- a/Math/Math/GPUSparseMatrix.cu +++ b/Math/Math/GPUSparseMatrix.cu @@ -909,7 +909,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock); cudaEvent_t done = nullptr; CUDACALL(cudaEventCreate(&done)); - _inplaceTruncate<<>>(this->m_blockVal,threshold,N); + _inplaceTruncate<<>>(this->m_blockVal,threshold,N); CUDACALL(cudaEventRecord(done)); CUDACALL(cudaEventSynchronize(done)); CUDACALL(cudaEventDestroy(done)); @@ -1310,7 +1310,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { a.PrepareDevice(); long N=(long)a.GetNZElements(); int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); - _elementWisePowerOnCuda<<>>(alpha,a.NzLocation(),c.NzLocation(),N); + _elementWisePowerOnCuda<<>>(alpha,a.NzLocation(),c.NzLocation(),N); CUDACALL(cudaEventRecord(done)); CUDACALL(cudaEventSynchronize(done)); } @@ -1360,7 +1360,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { //int* h_vectArray= new int[a.m_nz]; int blocksPerGrid =(int)ceil(1.0*M/threadsPerBlock); CUDACALL(cudaEventCreate(&done)); - _getSparseVectorRepresntationForMatrix<<>>(cscColPtrA,cscRowIndA,vectArray,M,N); + _getSparseVectorRepresntationForMatrix<<>>(cscColPtrA,cscRowIndA,vectArray,M,N); CUDACALL(cudaEventRecord(done)); CUDACALL(cudaEventSynchronize(done)); CUDACALL(cudaEventDestroy(done)); @@ -1411,7 +1411,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { CUDACALL(cudaMemcpy(d_res,res,sizeof(long)*3,cudaMemcpyHostToDevice)); int blocksPerGrid =(int)ceil(1.0*a.GetNZElements()/threadsPerBlock); - _areEqual<<>>(a.NzLocation(),b.NzLocation(),(long)a.GetNZElements(),threshold,d_res); + _areEqual<<>>(a.NzLocation(),b.NzLocation(),(long)a.GetNZElements(),threshold,d_res); _areEqual<<>>(a.ColLocation(),b.ColLocation(),(long)a.GetNZElements(),(int)threshold,d_res+1); blocksPerGrid =(int)ceil((1.0*a.GetNumRows()+1.0)/threadsPerBlock); _areEqual<<>>(a.RowLocation(),b.RowLocation(),(long)a.GetNumRows()+1,(int)threshold,d_res+2); @@ -1719,7 +1719,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); cudaEvent_t done = nullptr; CUDACALL(cudaEventCreate(&done)); - _elemInverse<<>>(this->m_pArray,N); + _elemInverse<<>>(this->m_pArray,N); CUDACALL(cudaEventRecord(done)); CUDACALL(cudaEventSynchronize(done)); return *this; @@ -1846,7 +1846,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock); cudaEvent_t done = nullptr; CUDACALL(cudaEventCreate(&done)); - _inplaceTruncateBottom<<>>(this->m_pArray,threshold,N); + _inplaceTruncateBottom<<>>(this->m_pArray,threshold,N); CUDACALL(cudaEventRecord(done)); CUDACALL(cudaEventSynchronize(done)); return *this; @@ -1867,7 +1867,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock); cudaEvent_t done = nullptr; CUDACALL(cudaEventCreate(&done)); - _assignTruncateBottom<<>>(this->m_pArray,a.NzLocation(),threshold,N); + _assignTruncateBottom<<>>(this->m_pArray,a.NzLocation(),threshold,N); CUDACALL(cudaEventRecord(done)); CUDACALL(cudaEventSynchronize(done)); return *this; @@ -1882,7 +1882,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock); cudaEvent_t done = nullptr; CUDACALL(cudaEventCreate(&done)); - _inplaceTruncateTop<<>>(this->m_pArray,threshold,N); + _inplaceTruncateTop<<>>(this->m_pArray,threshold,N); CUDACALL(cudaEventRecord(done)); CUDACALL(cudaEventSynchronize(done)); return *this; @@ -1903,7 +1903,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock); cudaEvent_t done = nullptr; CUDACALL(cudaEventCreate(&done)); - _assignTruncateTop<<>>(this->m_pArray,a.NzLocation(),threshold,N); + _assignTruncateTop<<>>(this->m_pArray,a.NzLocation(),threshold,N); CUDACALL(cudaEventRecord(done)); CUDACALL(cudaEventSynchronize(done)); return *this; @@ -1918,7 +1918,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock); cudaEvent_t done = nullptr; CUDACALL(cudaEventCreate(&done)); - _setToZeroIfAbsLessThan<<>>(this->m_pArray,threshold,N); + _setToZeroIfAbsLessThan<<>>(this->m_pArray,threshold,N); CUDACALL(cudaEventRecord(done)); CUDACALL(cudaEventSynchronize(done)); return *this; @@ -2012,22 +2012,22 @@ namespace Microsoft { namespace MSR { namespace CNTK { switch (kind) { case 0: - _inplaceSigmoidOnCuda<<>>(this->m_pArray,N); + _inplaceSigmoidOnCuda<<>>(this->m_pArray,N); break; case 1: - _inplaceTanhOnCuda<<>>(this->m_pArray,N); + _inplaceTanhOnCuda<<>>(this->m_pArray,N); break; case 2: - _inplaceSqrtOnCuda<<>>(this->m_pArray,N); + _inplaceSqrtOnCuda<<>>(this->m_pArray,N); break; case 3: - _inplaceExpOnCuda<<>>(this->m_pArray,N); + _inplaceExpOnCuda<<>>(this->m_pArray,N); break; case 4: - _inplaceLogOnCuda<<>>(this->m_pArray,N); + _inplaceLogOnCuda<<>>(this->m_pArray,N); break; case 5: - _inplaceAbsOnCuda<<>>(this->m_pArray,N); + _inplaceAbsOnCuda<<>>(this->m_pArray,N); break; case 6: _inplaceLinRectDerivative<<>>(this->m_pArray,N); diff --git a/Math/Math/Math.vcxproj.filters b/Math/Math/Math.vcxproj.filters new file mode 100644 index 000000000..4846433c6 --- /dev/null +++ b/Math/Math/Math.vcxproj.filters @@ -0,0 +1,77 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + \ No newline at end of file diff --git a/Math/Math/Matrix.cpp b/Math/Math/Matrix.cpp index cdbd2526d..eecc95d96 100644 --- a/Math/Math/Matrix.cpp +++ b/Math/Math/Matrix.cpp @@ -288,15 +288,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { { if (m_preferredDeviceId == CPUDEVICE) { - m_CPUMatrix = new CPUMatrix(numRows,numCols); + m_CPUMatrix = new CPUMatrix(numRows,numCols); SetDataLocation(CPU, DENSE); - } - else - { - m_GPUMatrix = new GPUMatrix(numRows,numCols,m_preferredDeviceId); - SetDataLocation(GPU, DENSE); - } - } + } + else + { + m_GPUMatrix = new GPUMatrix(numRows,numCols,m_preferredDeviceId); + SetDataLocation(GPU, DENSE); + } + } } template @@ -840,11 +840,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { m_CPUMatrix->SetValue(*db_number.ExposePointer2Value()), if (GetDeviceId()!=db_number.GetDeviceId()) { -#ifndef LINUX - throw std::exception("Matrix and device bound number must be on the same device"); -#else - throw std::exception(); -#endif /* LINUX */ + throw std::runtime_error("Matrix and device bound number must be on the same device"); } m_GPUMatrix->SetValue(db_number.ExposePointer2Value()), NOT_IMPLEMENTED, From 5396f06d652493d4a39a4c5c7d61d44dde628387 Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Thu, 30 Oct 2014 17:57:01 -0700 Subject: [PATCH 15/31] Adding untracked *.filters files. Oops. --- .../BinaryReader/BinaryReader.vcxproj.filters | 54 +++++++++++ .../HTKMLFReader/HTKMLFReader.vcxproj.filters | 66 +++++++++++++ .../LUSequenceReader.vcxproj.filters | 65 +++++++++++++ .../SequenceReader.vcxproj.filters | 64 ++++++++++++ .../UCIFastReader.vcxproj.filters | 60 ++++++++++++ .../CNTKEval/CNTKEval.vcxproj.filters | 50 ++++++++++ MachineLearning/cn/cn.vcxproj.filters | 97 +++++++++++++++++++ Math/Math/Math.vcxproj.filters | 53 ++++++++++ 8 files changed, 509 insertions(+) create mode 100644 DataReader/BinaryReader/BinaryReader.vcxproj.filters create mode 100644 DataReader/HTKMLFReader/HTKMLFReader.vcxproj.filters create mode 100644 DataReader/LUSequenceReader/LUSequenceReader.vcxproj.filters create mode 100644 DataReader/SequenceReader/SequenceReader.vcxproj.filters create mode 100644 DataReader/UCIFastReader/UCIFastReader.vcxproj.filters create mode 100644 MachineLearning/CNTKEval/CNTKEval.vcxproj.filters create mode 100644 MachineLearning/cn/cn.vcxproj.filters create mode 100644 Math/Math/Math.vcxproj.filters diff --git a/DataReader/BinaryReader/BinaryReader.vcxproj.filters b/DataReader/BinaryReader/BinaryReader.vcxproj.filters new file mode 100644 index 000000000..f061750a4 --- /dev/null +++ b/DataReader/BinaryReader/BinaryReader.vcxproj.filters @@ -0,0 +1,54 @@ + + + + + + + + + + + Common + + + Common + + + Common + + + Common + + + Common + + + + + + + + Common\Include + + + Common\Include + + + Common\Include + + + Common\Include + + + Common\Include + + + + + {7a294e4e-7a00-4bd3-8e0a-86f08db10a98} + + + {ecc1d590-3964-477c-9487-9a78e9a497ba} + + + \ No newline at end of file diff --git a/DataReader/HTKMLFReader/HTKMLFReader.vcxproj.filters b/DataReader/HTKMLFReader/HTKMLFReader.vcxproj.filters new file mode 100644 index 000000000..3a12468c6 --- /dev/null +++ b/DataReader/HTKMLFReader/HTKMLFReader.vcxproj.filters @@ -0,0 +1,66 @@ + + + + + + + + + + + + Duplicates to remove + + + + + + + + + + + + + + + + + + + + + + + + + + + + Duplicates to remove + + + Duplicates to remove + + + Common\Include + + + Common\Include + + + + + + + + {9a77edee-47ee-45ec-9b35-d22cc73ea64e} + + + {5e80057c-a54a-4d57-a856-dc1d23a63aea} + + + {812d1247-04eb-4693-a0d2-5399a94e6235} + + + \ No newline at end of file diff --git a/DataReader/LUSequenceReader/LUSequenceReader.vcxproj.filters b/DataReader/LUSequenceReader/LUSequenceReader.vcxproj.filters new file mode 100644 index 000000000..53ddd62ae --- /dev/null +++ b/DataReader/LUSequenceReader/LUSequenceReader.vcxproj.filters @@ -0,0 +1,65 @@ + + + + + + + + + + + Common + + + Common + + + Common + + + Common + + + Common + + + + + + + + + + Common\Include + + + Common\Include + + + Common\Include + + + Common\Include + + + Common\Include + + + Duplicates to remove + + + + + + + + {75463cd7-3094-4900-8dae-5b6835c23fc4} + + + {85d2fa50-2b95-4ec7-8f2c-c5c0b1cb493e} + + + {10f819fb-8861-4607-9389-60ca80f968c2} + + + \ No newline at end of file diff --git a/DataReader/SequenceReader/SequenceReader.vcxproj.filters b/DataReader/SequenceReader/SequenceReader.vcxproj.filters new file mode 100644 index 000000000..af33693bf --- /dev/null +++ b/DataReader/SequenceReader/SequenceReader.vcxproj.filters @@ -0,0 +1,64 @@ + + + + + + + + + + Common + + + Common + + + Common + + + Common + + + Common + + + + + + + + + Duplicates to remove + + + Common\Include + + + Common\Include + + + Common\Include + + + Common\Include + + + Common\Include + + + + + + + + + {82bb574a-405b-43a0-a59b-52a60aef9d61} + + + {761d8c5d-2d9b-47a0-8409-9fc9a502801b} + + + {616e954b-4068-45dc-a1d4-aaa9873983ec} + + + \ No newline at end of file diff --git a/DataReader/UCIFastReader/UCIFastReader.vcxproj.filters b/DataReader/UCIFastReader/UCIFastReader.vcxproj.filters new file mode 100644 index 000000000..985aba8c1 --- /dev/null +++ b/DataReader/UCIFastReader/UCIFastReader.vcxproj.filters @@ -0,0 +1,60 @@ + + + + + + + + + + Common + + + Common + + + Common + + + Common + + + Common + + + + + + + + + Duplicates to remove + + + Common\Include + + + Common\Include + + + Common\Include + + + Common\Include + + + Common\Include + + + + + {8e18fb2e-ab57-4862-ad16-5e322d6f2fbc} + + + {0dbad9d8-ce99-4f36-b871-3b98a27f58c2} + + + {1032308c-b577-4b1e-9f49-9570b93800ec} + + + \ No newline at end of file diff --git a/MachineLearning/CNTKEval/CNTKEval.vcxproj.filters b/MachineLearning/CNTKEval/CNTKEval.vcxproj.filters new file mode 100644 index 000000000..f19953971 --- /dev/null +++ b/MachineLearning/CNTKEval/CNTKEval.vcxproj.filters @@ -0,0 +1,50 @@ + + + + + + + + + + + Common + + + Common + + + Common + + + Common + + + + + + + + + + Common\Include + + + Common\Include + + + Common\Include + + + Common\Include + + + + + {bed53b47-70b1-494c-824d-0748362003b2} + + + {f3bf0104-8a08-40c9-a4d9-af8411c49669} + + + \ No newline at end of file diff --git a/MachineLearning/cn/cn.vcxproj.filters b/MachineLearning/cn/cn.vcxproj.filters new file mode 100644 index 000000000..09cd03857 --- /dev/null +++ b/MachineLearning/cn/cn.vcxproj.filters @@ -0,0 +1,97 @@ + + + + + Common + + + Common + + + Common + + + Common + + + Common + + + Common + + + Common + + + + + + + + + + + + Common\Include + + + Common\Include + + + Common + + + Common\Include + + + Common\Include + + + Common\Include + + + Common\Include + + + Common\Include + + + Common\Include + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {b3d05c7b-7bcf-4b12-bcb5-dced86717202} + + + {85226dda-87ba-4da6-af04-563d0ce23b94} + + + \ No newline at end of file diff --git a/Math/Math/Math.vcxproj.filters b/Math/Math/Math.vcxproj.filters new file mode 100644 index 000000000..4f8867cf0 --- /dev/null +++ b/Math/Math/Math.vcxproj.filters @@ -0,0 +1,53 @@ + + + + + + + + + + + + + + + + Common + + + Common + + + + + + + + + + + + Common\Include + + + Common\Include + + + Common\Include + + + + + + + + + + {4d07e945-74fb-48fa-aa63-23f3a7763789} + + + {51b468dd-7e8a-4be8-ae6f-5e3f3d752b88} + + + \ No newline at end of file From c23dd1dab340c68988186b3da63965b2fc26866b Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Thu, 30 Oct 2014 19:29:11 -0700 Subject: [PATCH 16/31] reviewed changes of linuxport and made it compile again on Windows. I had to undo the commenting out of C++11 features (lambdas, rvalue refs) which seem to have a problem with the Linux version of NVCC. We shall fix this later, it is likely a compiler-option issue. --- Common/Include/BestGpu.h | 4 +- Common/Include/File.h | 8 +- Common/Include/basetypes.h | 107 +---- Common/fileutil.cpp | 32 +- Math/Math/CPUMatrix.cpp | 432 +++++++++-------- Math/Math/CPUMatrix.h | 22 +- Math/Math/CPUSparseMatrix.cpp | 207 ++++----- Math/Math/CPUSparseMatrix.h | 4 +- Math/Math/CommonMatrix.h | 13 +- Math/Math/GPUDummy.cpp | 8 +- Math/Math/GPUMatrix.cu | 746 ++++++++++++++---------------- Math/Math/GPUMatrix.cuh | 22 +- Math/Math/GPUMatrixCUDAKernels.cu | 12 +- Math/Math/GPUSparseMatrix.cu | 623 ++++++++++++------------- Math/Math/GPUSparseMatrix.cuh | 34 +- Math/Math/Math.vcxproj | 1 + Math/Math/Math.vcxproj.filters | 129 +++--- Math/Math/Matrix.cpp | 42 +- Math/Math/Matrix.h | 6 +- Math/Math/targetver.h | 6 +- 20 files changed, 1114 insertions(+), 1344 deletions(-) diff --git a/Common/Include/BestGpu.h b/Common/Include/BestGpu.h index 849b43ac6..3aa3d3a05 100644 --- a/Common/Include/BestGpu.h +++ b/Common/Include/BestGpu.h @@ -71,5 +71,7 @@ public: std::vector GetDevices(int number=AllDevices, BestGpuFlags flags=bestGpuNormal); // get multiple devices }; extern BestGpu* g_bestGpu; + #endif -}}} \ No newline at end of file + +}}} diff --git a/Common/Include/File.h b/Common/Include/File.h index 306508c77..c1f0726f1 100644 --- a/Common/Include/File.h +++ b/Common/Include/File.h @@ -123,7 +123,7 @@ public: template File& operator<<(T val) { -#ifndef LINUX +#ifndef __CUDACC__ // TODO: CUDA compiler blows up, fix this attempt([=]() #endif { @@ -132,7 +132,7 @@ public: else fput(m_file, val); } -#ifndef LINUX +#ifndef __CUDACC__ ); #endif return *this; @@ -161,7 +161,7 @@ public: template File& operator>>(T& val) { -#ifndef LINUX +#ifndef __CUDACC__ // TODO: CUDA compiler blows up, fix this attempt([&]() #endif { @@ -170,7 +170,7 @@ public: else fget(m_file, val); } -#ifndef LINUX +#ifndef __CUDACC__ ); #endif return *this; diff --git a/Common/Include/basetypes.h b/Common/Include/basetypes.h index 95819f825..da888420f 100644 --- a/Common/Include/basetypes.h +++ b/Common/Include/basetypes.h @@ -7,20 +7,6 @@ #ifndef _BASETYPES_ #define _BASETYPES_ -#ifdef LINUX -typedef char16_t TCHAR; -#include -#define vsprintf_s vsprintf /* Not sure this is right... Malcolm */ -#include -#include -#include -#include -#define Linux(a) a -#else -#include -#endif /* LINUX */ -#include // for HUGE_VAL // Remove for a test by Malcolm because of double isnan definition... - #ifndef UNDER_CE // fixed-buffer overloads not available for wince #ifdef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES // fixed-buffer overloads for strcpy() etc. #undef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES @@ -91,11 +77,17 @@ OACR_WARNING_DISABLE(POTENTIAL_ARGUMENT_TYPE_MISMATCH, "Not level1 or level2_sec #include #include #include -#include // std::wstring_convert -#include // std::codecvt_utf8 +#include // std::wstring_convert #ifdef _MSC_VER +#include // std::codecvt_utf8 +#endif +#ifdef _WIN32 #include // for CRITICAL_SECTION and Unicode conversion functions --TODO: is there a portable alternative? #endif +#if __unix__ +#include +#include +#endif using namespace std; @@ -118,7 +110,6 @@ using namespace std; #define __inout_cap(x) #define __inout_cap_c(x) #endif -#endif // LINUX #ifndef __out_z_cap // non-VS2005 annotations #define __out_cap(x) #define __out_z_cap(x) @@ -300,36 +291,23 @@ public: noncopyable(){} }; - -#ifdef LINUX +// class CCritSec and CAutoLock -- simple critical section handling +#ifndef _WIN32 // TODO: Currently only working under Windows; BROKEN otherwise, to be fixed #define CRITICAL_SECTION int void InitializeCriticalSection(int *) {} void DeleteCriticalSection(int *) {} void EnterCriticalSection(int *) {} void LeaveCriticalSection(int *) {} - #endif - -// class CCritSec and CAutoLock -- simple critical section handling -// TODO: Currently only working under Windows; BROKEN otherwise, to be fixed class CCritSec { CCritSec (const CCritSec &); CCritSec & operator= (const CCritSec &); -#ifdef _MSC_VER CRITICAL_SECTION m_CritSec; -#endif public: -#ifdef _MSC_VER CCritSec() { InitializeCriticalSection(&m_CritSec); }; ~CCritSec() { DeleteCriticalSection(&m_CritSec); }; void Lock() { EnterCriticalSection(&m_CritSec); }; void Unlock() { LeaveCriticalSection(&m_CritSec); }; -#else // POSIX --TODO: need to figure this out - CCritSec() { }; - ~CCritSec() { };; - void Lock() { }; - void Unlock() { }; -#endif }; @@ -427,8 +405,6 @@ public: }; #endif -#endif /* LINUX */ - };}; // namespace #if 0 //ndef BASETYPES_NO_UNSAFECRTOVERLOAD // if on, no unsafe CRT overload functions @@ -583,13 +559,12 @@ typedef strfun::_strprintf wstrprintf; // wchar_t version #endif // string-encoding conversion functions -#ifdef _WIN32 struct utf8 : std::string { utf8 (const std::wstring & p) // utf-16 to -8 { - //TODO: confirm it builds on VS2013 - std::wstring_convert> cv; - (*(std::string*)this) = cv.to_bytes(p); -#ifdef MALCOLM +#if 1 + std::wstring_convert> cv; + (*(std::string*)this) = cv.to_bytes(p); +#else // old version, delete once we know it works size_t len = p.length(); if (len == 0) { return;} // empty string msra::basetypes::fixed_vector buf (3 * len + 1); // max: 1 wchar => up to 3 mb chars @@ -599,14 +574,14 @@ struct utf8 : std::string { utf8 (const std::wstring & p) // utf-16 to -8 &buf[0], (int) buf.size(), NULL, NULL); if (rc == 0) throw std::runtime_error ("WideCharToMultiByte"); (*(std::string*)this) = &buf[0]; -#endif /* Malcolm */ +#endif }}; struct utf16 : std::wstring { utf16 (const std::string & p) // utf-8 to -16 { +#if 1 std::wstring_convert> cv; (*(std::wstring*)this) = cv.from_bytes(p); - -#ifdef OLD +#else // old version, delete once we know it works size_t len = p.length(); if (len == 0) { return;} // empty string msra::basetypes::fixed_vector buf (len + 1); @@ -617,29 +592,8 @@ struct utf16 : std::wstring { utf16 (const std::string & p) // utf-8 to -16 if (rc == 0) throw std::runtime_error("MultiByteToWideChar"); ASSERT(rc < buf.size()); (*(std::wstring*)this) = &buf[0]; -#endif /* Malcolm */ -}}; -#else // TODO: complete this once we are building on actual Linux, currently using default locale instead of UTF-8 locale -static inline std::string utf8(const std::wstring & p) // output: UTF-8 -{ - size_t len = p.length(); - msra::basetypes::fixed_vector buf(2 * len + 1); // max: 1 wchar => 2 mb chars - std::fill(buf.begin(), buf.end(), 0); - // BUGBUG: We need to set the locale, so for now this only works for plain ASCII - ::wcstombs(&buf[0], p.c_str(), 2 * len + 1); - return std::string(&buf[0]); -} -static inline std::wstring utf16(const std::string & p) // input: UTF-8 -{ - size_t len = p.length(); - msra::basetypes::fixed_vector buf(len + 1); // max: >1 mb chars => 1 wchar - std::fill(buf.begin(), buf.end(), (wchar_t)0); - OACR_WARNING_SUPPRESS(UNSAFE_STRING_FUNCTION, "Reviewed OK. size checked. [rogeryu 2006/03/21]"); - // BUGBUG: We need to set the locale, so for now this only works for plain ASCII - ::mbstowcs(&buf[0], p.c_str(), len + 1); - return std::wstring(&buf[0]); -} #endif +}}; #pragma warning(push) #pragma warning(disable : 4996) // Reviewed by Yusheng Li, March 14, 2006. depr. fn (wcstombs, mbstowcs) @@ -647,16 +601,18 @@ static inline std::string wcstombs (const std::wstring & p) // output: MBCS { size_t len = p.length(); msra::basetypes::fixed_vector buf (2 * len + 1); // max: 1 wchar => 2 mb chars -#ifdef MALCOLM std::fill (buf.begin (), buf.end (), 0); ::wcstombs (&buf[0], p.c_str(), 2 * len + 1); -#endif /* Malcolm */ return std::string (&buf[0]); } static inline std::wstring mbstowcs (const std::string & p) // input: MBCS { - std::wstring ret = utf16(p); - return ret; + size_t len = p.length(); + msra::basetypes::fixed_vector buf(len + 1); // max: >1 mb chars => 1 wchar + std::fill(buf.begin(), buf.end(), (wchar_t)0); + OACR_WARNING_SUPPRESS(UNSAFE_STRING_FUNCTION, "Reviewed OK. size checked. [rogeryu 2006/03/21]"); + ::mbstowcs(&buf[0], p.c_str(), len + 1); + return std::wstring(&buf[0]); } #pragma warning(pop) @@ -797,7 +753,7 @@ public: auto_file_ptr() : f (NULL) { } ~auto_file_ptr() { close(); } auto_file_ptr (const char * path, const char * mode) { f = fopen (path, mode); if (f == NULL) openfailed (path); } - auto_file_ptr (const wchar_t * wpath, const char * mode) {string path = msra::strfun::utf8(wpath); f = fopen (path.c_str(), mode); if (f == NULL) openfailed (path); } + auto_file_ptr (const wchar_t * wpath, const char * mode) { f = _wfopen (wpath, msra::strfun::utf16 (mode).c_str()); if (f == NULL) openfailed (msra::strfun::utf8 (wpath)); } FILE * operator= (FILE * other) { close(); f = other; return f; } auto_file_ptr (FILE * other) : f (other) { } operator FILE * () const { return f; } @@ -825,7 +781,6 @@ public: typedef auto_handle_t auto_handle; #endif -#ifdef MALCOLM // like auto_ptr but calls freeFunc_p (type free_func_t) instead of delete to clean up // minor difference - wrapped object is T, not T *, so to wrap a // T *, use auto_clean @@ -845,7 +800,6 @@ public: operator const T () const { return it; } T detach () { T tmp = it; it = 0; return tmp; } // release ownership of object }; -#endif /* MALCOLM */ #if 0 // simple timer @@ -888,23 +842,12 @@ namespace msra { namespace files { class textreader { -#ifndef LINUX msra::basetypes::auto_file_ptr f; -#else - FILE *f; -#endif /* LINUX */ std::vector buf; // read buffer (will only grow, never shrink) int ch; // next character (we need to read ahead by one...) char getch() { char prevch = (char) ch; ch = fgetc (f); return prevch; } public: -#ifndef LINUX textreader (const std::wstring & path) : f (path.c_str(), "rb") { buf.reserve (10000); ch = fgetc (f); } -#else - textreader (const std::wstring & path) { - f = fopen((char *)path.c_str(), "rb"); - ch = fgetc(f); /* I Think this is right ... Malcolm */ - } -#endif /* LINUX */ operator bool() const { return ch != EOF; } // true if still a line to read std::string getline() // get and consume the next line { diff --git a/Common/fileutil.cpp b/Common/fileutil.cpp index eca7ce6af..c07eac1c5 100644 --- a/Common/fileutil.cpp +++ b/Common/fileutil.cpp @@ -1370,26 +1370,24 @@ vector msra::files::fgetfilelines (const wstring & path, vector & b } // ---------------------------------------------------------------------------- -// getfiletime(), setfiletime(): access modification time +// getfiletime(): access modification time // ---------------------------------------------------------------------------- bool getfiletime (const wstring & path, FILETIME & time) { // return file modification time, false if cannot be determined - struct _stat buf; - int result; +#if 1 + struct _stat buf; + int result; - // Get data associated with "crt_stat.c": - result = _wstat(path.c_str(), &buf); - // Check if statistics are valid: - if( result != 0 ) - { - return false; - } + // Get data associated with "crt_stat.c": + result = _wstat(path.c_str(), &buf); + // Check if statistics are valid: + if (result != 0) + return false; - (*(time_t*)(&time))= buf.st_mtime; - return true; - -#ifdef OLD + (*(time_t*)(&time)) = buf.st_mtime; + return true; +#else // old version, delete once above is tested WIN32_FIND_DATAW findFileData; auto_handle hFind (FindFirstFileW (path.c_str(), &findFileData), ::FindClose); if (hFind != INVALID_HANDLE_VALUE) @@ -1404,11 +1402,9 @@ bool getfiletime (const wstring & path, FILETIME & time) #endif } +#if 0 void setfiletime (const wstring & path, const FILETIME & time) { // update the file modification time of an existing file -#ifdef LINUX - throw new logic_error("setfiletime has not been converted to linux yet..."); -#else auto_handle h (CreateFileW (path.c_str(), FILE_WRITE_ATTRIBUTES, FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL)); @@ -1421,8 +1417,8 @@ void setfiletime (const wstring & path, const FILETIME & time) { RuntimeError ("setfiletime: error setting file time information: %d", GetLastError()); } -#endif } +#endif #if 0 // ---------------------------------------------------------------------------- diff --git a/Math/Math/CPUMatrix.cpp b/Math/Math/CPUMatrix.cpp index 9a3df0ee8..7b4175070 100644 --- a/Math/Math/CPUMatrix.cpp +++ b/Math/Math/CPUMatrix.cpp @@ -19,18 +19,14 @@ #include #include -#ifndef LINUX +#ifdef _WIN32 #include -#define Linux(x) #else -#define Linux(x) x - #ifndef max #define max(a,b) (((a) > (b)) ? (a) : (b)) #endif - #include -#endif /* LINUX */ +#endif #ifdef LEAKDETECT #include @@ -97,14 +93,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::ZeroInit() { - this->m_computeDevice = CPUDEVICE; - this->m_pArray = nullptr; - this->m_numRows = 0; - this->m_numCols = 0; - this->m_elemSizeAllocated = 0; - this->m_matrixName=NULL; - this->m_format = matrixFormatDense; - this->m_externalBuffer = false; + m_computeDevice = CPUDEVICE; + m_pArray = nullptr; + m_numRows = 0; + m_numCols = 0; + m_elemSizeAllocated = 0; + m_matrixName=NULL; + m_format = matrixFormatDense; + m_externalBuffer = false; } template @@ -126,13 +122,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { { ZeroInit(); - this->m_numRows = numRows; - this->m_numCols = numCols; - this->m_elemSizeAllocated = this->GetNumElements(); + m_numRows = numRows; + m_numCols = numCols; + m_elemSizeAllocated = GetNumElements(); - if (this->m_elemSizeAllocated != 0) + if (m_elemSizeAllocated != 0) { - this->m_pArray = new ElemType[this->m_elemSizeAllocated]; + m_pArray = new ElemType[m_elemSizeAllocated]; SetValue(0); } } @@ -151,7 +147,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ZeroInit(); if (!deepCopyFrom.IsEmpty()) SetValue(deepCopyFrom); - this->SetMatrixName(deepCopyFrom.m_matrixName); + SetMatrixName(deepCopyFrom.m_matrixName); } //assignment operator, deep copy @@ -161,7 +157,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { Clear(); if (!deepCopyFrom.IsEmpty()) SetValue(deepCopyFrom); - this->SetMatrixName(deepCopyFrom.m_matrixName); + SetMatrixName(deepCopyFrom.m_matrixName); return *this; } @@ -170,14 +166,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { template CPUMatrix::CPUMatrix(CPUMatrix&& moveFrom) { - this->m_computeDevice = moveFrom.m_computeDevice; - this->m_numRows = moveFrom.m_numRows; - this->m_numCols = moveFrom.m_numCols; - this->m_elemSizeAllocated = moveFrom.m_elemSizeAllocated; - this->m_pArray = moveFrom.m_pArray; //shallow copy the pointer - this->m_matrixName = moveFrom.m_matrixName; - this->m_format = moveFrom.m_format; - this->m_externalBuffer = moveFrom.m_externalBuffer; + m_computeDevice = moveFrom.m_computeDevice; + m_numRows = moveFrom.m_numRows; + m_numCols = moveFrom.m_numCols; + m_elemSizeAllocated = moveFrom.m_elemSizeAllocated; + m_pArray = moveFrom.m_pArray; //shallow copy the pointer + m_matrixName = moveFrom.m_matrixName; + m_format = moveFrom.m_format; + m_externalBuffer = moveFrom.m_externalBuffer; //release the pointer from the source object so that the destructor won't release it twice moveFrom.ZeroInit(); } @@ -188,16 +184,16 @@ namespace Microsoft { namespace MSR { namespace CNTK { { if (this != &moveFrom) { - if (this->OwnBuffer() && this->m_pArray != nullptr) - delete[] this->m_pArray; //always delete the data pointer since we will use the pointer from moveFrom + if (OwnBuffer() && m_pArray != nullptr) + delete[] m_pArray; //always delete the data pointer since we will use the pointer from moveFrom - this->m_computeDevice = moveFrom.m_computeDevice; - this->m_numRows = moveFrom.m_numRows; - this->m_numCols = moveFrom.m_numCols; - this->m_elemSizeAllocated = moveFrom.m_elemSizeAllocated; - this->m_pArray = moveFrom.m_pArray; - this->m_format = moveFrom.m_format; - this->m_externalBuffer = moveFrom.m_externalBuffer; + m_computeDevice = moveFrom.m_computeDevice; + m_numRows = moveFrom.m_numRows; + m_numCols = moveFrom.m_numCols; + m_elemSizeAllocated = moveFrom.m_elemSizeAllocated; + m_pArray = moveFrom.m_pArray; + m_format = moveFrom.m_format; + m_externalBuffer = moveFrom.m_externalBuffer; //release the pointer from the source object so that the destructor won't release it twice moveFrom.ZeroInit(); @@ -214,11 +210,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::Clear() { - if (this->m_pArray!=nullptr && this->OwnBuffer()) + if (m_pArray!=nullptr && OwnBuffer()) { - delete [] this->m_pArray; - this->m_pArray = nullptr; - this->m_elemSizeAllocated = 0; + delete [] m_pArray; + m_pArray = nullptr; + m_elemSizeAllocated = 0; } BaseMatrix::Clear(); @@ -235,17 +231,17 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (numCols == 0) throw std::logic_error("The slice cannot have 0 columns."); - if (startColumn + numCols > this->m_numCols) + if (startColumn + numCols > m_numCols) throw std::logic_error("The slice is out of range of the source matrix."); CPUMatrix slice; slice.m_externalBuffer = true; //memory of a slice is managed externally. - slice.m_numRows = this->m_numRows; + slice.m_numRows = m_numRows; slice.m_numCols = numCols; slice.m_elemSizeAllocated = slice.GetNumElements(); - slice.m_pArray = this->m_pArray + startColumn * this->m_numRows; - slice.m_format = this->m_format; + slice.m_pArray = m_pArray + startColumn * m_numRows; + slice.m_format = m_format; return slice; } @@ -256,16 +252,16 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (numCols == 0) throw std::logic_error("The slice cannot have 0 columns."); - if (startColumn + numCols > this->m_numCols) + if (startColumn + numCols > m_numCols) throw std::logic_error("The slice is out of range of the source matrix."); Clear(); - this->SetOwnBuffer(false); //memory of a slice is managed externally. - this->m_numRows = fromMatrix.m_numRows; - this->m_numCols = numCols; - this->m_elemSizeAllocated = this->GetNumElements(); - this->m_pArray = this->m_pArray + startColumn *this->m_numRows; + SetOwnBuffer(false); //memory of a slice is managed externally. + m_numRows = fromMatrix.m_numRows; + m_numCols = numCols; + m_elemSizeAllocated = GetNumElements(); + m_pArray = m_pArray + startColumn *m_numRows; return *this; } @@ -290,7 +286,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { for (long j=0; jm_pArray + j*numRows, a.m_pArray + j*k + startIndex, sizeof(ElemType) * numRows); + memcpy(m_pArray + j*numRows, a.m_pArray + j*k + startIndex, sizeof(ElemType) * numRows); ////four-way unrolling //for (long i=0, startRow = startIndex; i<(m & ~3); i+=4, startRow+=4) @@ -320,10 +316,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (a.GetNumRows() != numRows) throw std::logic_error("AddToRowSliceValuesOf: a.GetNumRows() != numRows."); - if (startIndex + numRows > this->GetNumRows()) + if (startIndex + numRows > GetNumRows()) throw std::logic_error("AddToRowSliceValuesOf: startIndex + numRows exceeds GetNumRows()."); - if (a.GetNumCols() != this->GetNumCols()) + if (a.GetNumCols() != GetNumCols()) throw std::logic_error("AddToRowSliceValuesOf: columns does not match."); long n=(long)a.GetNumCols(), m=(long)numRows; @@ -359,7 +355,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { throw std::logic_error("AddWithRowSliceValuesOf: input matrix a is empty."); if (GetNumRows() != numRows) - throw std::logic_error("AddWithRowSliceValuesOf: this->GetNumRows() != numRows."); + throw std::logic_error("AddWithRowSliceValuesOf: GetNumRows() != numRows."); if (startIndex + numRows > a.GetNumRows()) throw std::logic_error("AddWithRowSliceValuesOf: startIndex + numRows exceeds a.GetNumRows()."); @@ -439,7 +435,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template CPUMatrix CPUMatrix::Transpose() { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("Transpose: Matrix is empty."); CPUMatrix c; @@ -485,29 +481,29 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::SetValue(const ElemType v) { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("SetValue: Matrix is empty."); if (v == 0) { - memset(this->m_pArray, 0, sizeof(ElemType) * this->GetNumElements()); + memset(m_pArray, 0, sizeof(ElemType) * GetNumElements()); } else { - long m=(long)this->GetNumElements(); + long m=(long)GetNumElements(); #pragma omp parallel for //four-way unrolling for (long i=0; i<(m & ~3); i+=4) { - this->m_pArray[i] = v; - this->m_pArray[i+1] = v; - this->m_pArray[i+2] = v; - this->m_pArray[i+3] = v; + m_pArray[i] = v; + m_pArray[i+1] = v; + m_pArray[i+2] = v; + m_pArray[i+3] = v; } //handle remaining stuffs for (long i=m & ~3; im_pArray[i] = v; + m_pArray[i] = v; } } } @@ -515,13 +511,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::SetColumn(const ElemType* colPointer, size_t j) { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("SetColumn: Matrix is empty."); if (colPointer==NULL) return; auto& us = *this; - long m=(long)this->GetNumRows(); + long m=(long)GetNumRows(); #pragma omp parallel for //four-way unrolling for (long i=0; i<(m & ~3); i+=4) @@ -542,11 +538,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::SetColumn(const ElemType val, size_t j) { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("SetColumn: Matrix is empty."); auto& us = *this; - long m=(long)this->GetNumRows(); + long m=(long)GetNumRows(); #pragma omp parallel for //four-way unrolling for (long i=0; i<(m & ~3); i+=4) @@ -566,12 +562,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::SetColumn(const CPUMatrix& valMat, size_t j) { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("SetColumn: Matrix is empty."); - assert(valMat.GetNumRows() == this->GetNumRows() && valMat.GetNumCols() == 1) ; + assert(valMat.GetNumRows() == GetNumRows() && valMat.GetNumCols() == 1) ; auto& us = *this; - long m=(long)this->GetNumRows(); + long m=(long)GetNumRows(); #pragma omp parallel for //four-way unrolling for (long i=0; i<(m & ~3); i+=4) @@ -597,7 +593,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { Resize(deepCopyFrom.GetNumRows(), deepCopyFrom.GetNumCols()); size_t cpSize = deepCopyFrom.GetNumElements(); if (cpSize != 0) - memcpy(this->m_pArray, deepCopyFrom.m_pArray, cpSize*sizeof(ElemType)); + memcpy(m_pArray, deepCopyFrom.m_pArray, cpSize*sizeof(ElemType)); } template @@ -606,30 +602,30 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (pArray == nullptr) throw std::invalid_argument("Invalid pArray."); - this->m_format = matrixFormatDense; - this->m_computeDevice = CPUDEVICE; + m_format = matrixFormatDense; + m_computeDevice = CPUDEVICE; // if it's externally managed, then populate the structure if (matrixFlags&matrixFlagDontOwnBuffer) { - if (this->m_pArray != nullptr) - delete [] this->m_pArray; + if (m_pArray != nullptr) + delete [] m_pArray; - this->m_pArray = pArray; - this->m_numRows = numRows; - this->m_numCols = numCols; + m_pArray = pArray; + m_numRows = numRows; + m_numCols = numCols; // free previous array allocation if any before overwriting - if (this->m_pArray != nullptr) - delete[] this->m_pArray; - this->m_pArray = pArray; - this->m_elemSizeAllocated = this->GetNumElements(); - this->m_externalBuffer = true; + if (m_pArray != nullptr) + delete[] m_pArray; + m_pArray = pArray; + m_elemSizeAllocated = GetNumElements(); + m_externalBuffer = true; } else { Resize(numRows, numCols); - if (this->IsEmpty()) + if (IsEmpty()) { throw std::invalid_argument("NumRows or NumCols is 0. Nothing to copy"); } @@ -637,7 +633,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { if (!(matrixFlags&matrixFormatRowMajor)) //compatible to internal structure { - memcpy(this->m_pArray, pArray, this->GetNumElements()*sizeof(ElemType)); + memcpy(m_pArray, pArray, GetNumElements()*sizeof(ElemType)); } else //need to transpose { @@ -648,9 +644,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { foreach_column(j, us) { #ifndef USE_MKL - dcopy((int)numRows, reinterpret_cast (pArray+j), (int)numCols, reinterpret_cast (this->m_pArray + LocateColumn(j)), 1); + dcopy((int)numRows, reinterpret_cast (pArray+j), (int)numCols, reinterpret_cast (m_pArray + LocateColumn(j)), 1); #else - cblas_dcopy ((int)numRows, reinterpret_cast (pArray+j), (int)numCols, reinterpret_cast (this->m_pArray + LocateColumn(j)), 1); + cblas_dcopy ((int)numRows, reinterpret_cast (pArray+j), (int)numCols, reinterpret_cast (m_pArray + LocateColumn(j)), 1); #endif } } @@ -662,9 +658,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { { #pragma warning (suppress: 4244) #ifndef USE_MKL - scopy((int)numRows, reinterpret_cast (pArray+j), (int)numCols, reinterpret_cast (this->m_pArray + LocateColumn(j)), 1); + scopy((int)numRows, reinterpret_cast (pArray+j), (int)numCols, reinterpret_cast (m_pArray + LocateColumn(j)), 1); #else - cblas_scopy ((int)numRows, reinterpret_cast (pArray+j), (int)numCols, reinterpret_cast (this->m_pArray + LocateColumn(j)), 1); + cblas_scopy ((int)numRows, reinterpret_cast (pArray+j), (int)numCols, reinterpret_cast (m_pArray + LocateColumn(j)), 1); #endif } } @@ -677,14 +673,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::SetDiagonalValue(const ElemType v) { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("SetDiagonalValue: Matrix is empty."); - if (this->GetNumRows() != this->GetNumCols()) + if (GetNumRows() != GetNumCols()) throw std::logic_error("SetDiagonalValue: NumRows and NumCols do not agree."); auto& us = *this; - long m=(long)this->GetNumRows(); + long m=(long)GetNumRows(); #pragma omp parallel for //four-way unrolling for (long i=0; i<(m & ~3); i+=4) @@ -704,10 +700,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::SetDiagonalValue(CPUMatrix& vector) { - if (this->IsEmpty() || vector.IsEmpty()) + if (IsEmpty() || vector.IsEmpty()) throw std::logic_error("SetDiagonalValue: Matrix is empty."); - if (this->GetNumRows() != this->GetNumCols()) + if (GetNumRows() != GetNumCols()) throw std::logic_error("SetDiagonalValue: NumRows and NumCols do not agree."); if (vector.GetNumRows() != 1 && vector.GetNumCols() != 1) @@ -715,13 +711,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (vector.GetNumElements() == 1) //reduce to simple form SetDiagonalValue(vector(0,0)); - else if (vector.GetNumRows() != this->GetNumRows()) + else if (vector.GetNumRows() != GetNumRows()) throw std::logic_error("SetDiagonalValue: input vector's dimension does not agree with [this]."); else { auto& us = *this; - long m=(long)this->GetNumRows(); + long m=(long)GetNumRows(); if (vector.GetNumRows() == 1) //row vector { #pragma omp parallel for @@ -762,7 +758,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::SetUniformRandomValue(const ElemType low, const ElemType high, unsigned long seed) { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("SetUniformRandomValue: Matrix is empty."); #ifdef _MSC_VER // TODO: check if available under GCC/Linux @@ -773,19 +769,19 @@ namespace Microsoft { namespace MSR { namespace CNTK { #endif std::uniform_real_distribution r(low, high); - long m=(long)this->GetNumElements(); + long m=(long)GetNumElements(); //four-way unrolling for (long i=0; i<(m & ~3); i+=4) { - this->m_pArray[i] = r(generator); - this->m_pArray[i+1] = r(generator); - this->m_pArray[i+2] = r(generator); - this->m_pArray[i+3] = r(generator); + m_pArray[i] = r(generator); + m_pArray[i+1] = r(generator); + m_pArray[i+2] = r(generator); + m_pArray[i+3] = r(generator); } //handle remaining stuffs for (long i=m & ~3; im_pArray[i] = r(generator); + m_pArray[i] = r(generator); } } @@ -796,16 +792,16 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (sigma <= 0) throw std::invalid_argument("SetUniformRandomValue: sigma must be a positive value."); - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("SetUniformRandomValue: Matrix is empty."); auto& us = *this; -#ifndef LINUX - std::ranlux64_base_01 generator; +#ifdef _MSC_VER // TODO: check if available under GCC/Linux + std::ranlux64_base_01 generator; generator.seed(seed==USE_TIME_BASED_SEED ? (unsigned long) time(NULL) : seed); #else std::default_random_engine generator (seed); -#endif /* LINUX */ +#endif std::normal_distribution r(mean, sigma); //#pragma omp parallel for //is it thread safe? foreach_coord(i,j,us) @@ -820,19 +816,19 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (sigma <= 0) throw std::invalid_argument("SetUniformRandomValue: sigma must be a positive value."); - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("SetUniformRandomValue: Matrix is empty."); auto& us = *this; -#ifndef LINUX - std::ranlux64_base_01 generator; +#ifdef _MSC_VER // TODO: check if available under GCC/Linux + std::ranlux64_base_01 generator; generator.seed(seed==USE_TIME_BASED_SEED ? (unsigned long) time(NULL) : seed); #else std::default_random_engine generator (seed); -#endif /* LINUX */ +#endif std::normal_distribution r(mean, sigma); - long m=(long)this->GetNumRows(), n=(long)this->GetNumCols(); + long m=(long)GetNumRows(), n=(long)GetNumCols(); for (long j=0; j void CPUMatrix::SetUniformRandomMask(const ElemType maskRate, const ElemType scaleValue, unsigned long seed) { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("SetUniformRandomValue: Matrix is empty."); auto& us = *this; -#ifndef LINUX - std::ranlux64_base_01 generator; +#ifdef _MSC_VER // TODO: check if available under GCC/Linux + std::ranlux64_base_01 generator; generator.seed(seed==USE_TIME_BASED_SEED ? (unsigned long) time(NULL) : seed); #else std::default_random_engine generator (seed==USE_TIME_BASED_SEED ? (unsigned long) time(NULL) : seed); -#endif /* LINUX */ +#endif std::uniform_real_distribution r(0, 1); - long m=(long)this->GetNumRows(), n=(long)this->GetNumCols(); + long m=(long)GetNumRows(), n=(long)GetNumCols(); ElemType v; for (long j=0; j void CPUMatrix::Adagrad(CPUMatrix& gradients) { - if (this->IsEmpty()) + if (IsEmpty()) { - this->Resize(gradients.GetNumRows(), gradients.GetNumCols()); - this->SetValue(0.0); + Resize(gradients.GetNumRows(), gradients.GetNumCols()); + SetValue(0.0); } - assert(this->GetNumRows() == gradients.GetNumRows() && this->GetNumCols() == gradients.GetNumCols()); + assert(GetNumRows() == gradients.GetNumRows() && GetNumCols() == gradients.GetNumCols()); - ElemType *a=this->m_pArray, *d_v=gradients.m_pArray; - size_t n = this->GetNumElements(); + ElemType *a=m_pArray, *d_v=gradients.m_pArray; + size_t n = GetNumElements(); long nLoop = (long)n - n%4; const ElemType floor = 1e-16f; @@ -950,10 +946,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t n = gradients.GetNumElements(); ElemType *curr_grad=gradients.m_pArray; - if (this->IsEmpty() || this->GetNumCols() < gradients.GetNumCols() * 3) + if (IsEmpty() || GetNumCols() < gradients.GetNumCols() * 3) { - this->Resize(gradients.GetNumRows(), gradients.GetNumCols() * 3); - this->SetValue(0.0); + Resize(gradients.GetNumRows(), gradients.GetNumCols() * 3); + SetValue(0.0); ElemType *avars=m_pArray; // accumulated variances for RMS scaling ElemType *steps=m_pArray+2*n; // current step size @@ -971,7 +967,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ElemType *signs=m_pArray+n; // sign of previous gradient ElemType *steps=m_pArray+2*n; // current step size - assert(this->GetNumRows() == gradients.GetNumRows() && this->GetNumCols() == gradients.GetNumCols() * 3); + assert(GetNumRows() == gradients.GetNumRows() && GetNumCols() == gradients.GetNumCols() * 3); ElemType ONE_MINUS_GAMMA = ElemType(1.0) - RMS_GAMMA; //int upd[] = { @@ -1025,40 +1021,40 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::Reshape(const size_t numRows, const size_t numCols) { - assert (numRows*numCols == this->GetNumElements()); - if (numRows*numCols != this->GetNumElements()) + assert (numRows*numCols == GetNumElements()); + if (numRows*numCols != GetNumElements()) throw std::invalid_argument("Reshape: total number of elements does not match."); - this->m_numRows = numRows; - this->m_numCols = numCols; + m_numRows = numRows; + m_numCols = numCols; } //if growONly is true, resize will not reallocate memory if the current memory is large enough (i.e., will not shrink) template void CPUMatrix::Resize(const size_t numRows, const size_t numCols, bool growOnly /*=true*/) { - this->m_numRows = numRows; - this->m_numCols = numCols; + m_numRows = numRows; + m_numCols = numCols; - size_t numElements = this->GetNumElements(); - if (numElements > this->m_elemSizeAllocated || (!growOnly && (numElements != this->m_elemSizeAllocated))) + size_t numElements = GetNumElements(); + if (numElements > m_elemSizeAllocated || (!growOnly && (numElements != m_elemSizeAllocated))) { - if (this->OwnBuffer() && this->m_pArray) + if (OwnBuffer() && m_pArray) { - delete[] this->m_pArray; //delete and reallocate - this->m_pArray = nullptr; + delete[] m_pArray; //delete and reallocate + m_pArray = nullptr; } - if (this->IsEmpty()) + if (IsEmpty()) { - this->m_elemSizeAllocated = 0; - this->m_pArray = nullptr; + m_elemSizeAllocated = 0; + m_pArray = nullptr; } else { - if (!this->OwnBuffer()) + if (!OwnBuffer()) throw runtime_error("Resizing an matrix you don't own is not supported."); - this->m_elemSizeAllocated = numElements; - this->m_pArray = new ElemType[this->m_elemSizeAllocated]; + m_elemSizeAllocated = numElements; + m_pArray = new ElemType[m_elemSizeAllocated]; SetValue(0); } } @@ -1068,11 +1064,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { template ElemType* CPUMatrix::CopyToArray() const { - size_t numElements = this->GetNumElements(); + size_t numElements = GetNumElements(); if (numElements != 0) { ElemType* arrayCopyTo = new ElemType[numElements]; - memcpy(arrayCopyTo, this->m_pArray, sizeof(ElemType)*numElements); + memcpy(arrayCopyTo, m_pArray, sizeof(ElemType)*numElements); return arrayCopyTo; } else @@ -1086,7 +1082,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template size_t CPUMatrix::CopyToArray(ElemType*& arrayCopyTo, size_t& currentArraySize) const { - size_t numElements = this->GetNumElements(); + size_t numElements = GetNumElements(); if (numElements > currentArraySize) { @@ -1097,7 +1093,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (numElements != 0) { - memcpy(arrayCopyTo, this->m_pArray, sizeof(ElemType)*numElements); + memcpy(arrayCopyTo, m_pArray, sizeof(ElemType)*numElements); } return numElements; @@ -1106,15 +1102,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { template inline size_t CPUMatrix::LocateElement (const size_t row, const size_t col) const { - assert (row < this->m_numRows && col < this->m_numCols); - return col * this->m_numRows + row; // matrix in column-wise storage + assert (row < m_numRows && col < m_numCols); + return col * m_numRows + row; // matrix in column-wise storage } template size_t CPUMatrix::LocateColumn (const size_t col) const { - assert (col < this->m_numCols); - return col * this->m_numRows; // matrix in column-wise storage + assert (col < m_numCols); + return col * m_numRows; // matrix in column-wise storage } #pragma endregion Basic Operators @@ -1130,7 +1126,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template CPUMatrix CPUMatrix::operator+ (ElemType alpha) const { - CPUMatrix c(this->GetNumRows(), this->GetNumCols()); + CPUMatrix c(GetNumRows(), GetNumCols()); c.AssignSumOf(alpha, *this); return c; } @@ -1145,7 +1141,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (this != &a) Resize(a.GetNumRows(), a.GetNumCols()); - long m=(long)this->GetNumRows(), n=(long)this->GetNumCols(); + long m=(long)GetNumRows(), n=(long)GetNumCols(); #pragma omp parallel for for (long j=0; j CPUMatrix CPUMatrix::operator+ (const CPUMatrix& a) const { - if (this->GetNumElements() == 1) + if (GetNumElements() == 1) { CPUMatrix c(a); c += (*this)(0,0); @@ -1234,7 +1230,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template CPUMatrix CPUMatrix::operator- (ElemType alpha) const { - CPUMatrix c(this->GetNumRows(), this->GetNumCols()); + CPUMatrix c(GetNumRows(), GetNumCols()); c.AssignDifferenceOf(*this, alpha); return c; } @@ -1249,7 +1245,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (this != &a) Resize(a.GetNumRows(), a.GetNumCols()); - long m=(long)this->GetNumRows(), n=(long)this->GetNumCols(); + long m=(long)GetNumRows(), n=(long)GetNumCols(); #pragma omp parallel for for (long j=0; jGetNumRows(), n=(long)this->GetNumCols(); + long m=(long)GetNumRows(), n=(long)GetNumCols(); #pragma omp parallel for for (long j=0; j CPUMatrix CPUMatrix::operator* (ElemType alpha) const { - CPUMatrix c(this->GetNumRows(), this->GetNumCols()); + CPUMatrix c(GetNumRows(), GetNumCols()); Scale(alpha, *this, c); return c; } @@ -1384,7 +1380,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { CPUMatrix CPUMatrix::operator* (const CPUMatrix& a) const { auto& us = *this; - if (this->GetNumElements() == 1) + if (GetNumElements() == 1) { CPUMatrix c; c.AssignProductOf(us(0,0), a); @@ -1430,7 +1426,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template CPUMatrix CPUMatrix::operator^ (ElemType alpha) const { - CPUMatrix c(this->GetNumRows(), this->GetNumCols()); + CPUMatrix c(GetNumRows(), GetNumCols()); ElementWisePower(alpha, *this, c); return c; } @@ -1472,7 +1468,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (this != &a) Resize(a.GetNumRows(), a.GetNumCols()); - long m=(long)this->GetNumRows(), n=(long)this->GetNumCols(); + long m=(long)GetNumRows(), n=(long)GetNumCols(); #pragma omp parallel for for (long j=0; jGetNumRows() && a.GetNumCols() == this->GetNumCols())) + if (!(a.GetNumRows() == GetNumRows() && a.GetNumCols() == GetNumCols())) throw std::invalid_argument("AddElementProductOf : The input matrix dimensions do not match [this]."); auto& us=*this; - long m=(long)this->GetNumRows(), n=(long)this->GetNumCols(); + long m=(long)GetNumRows(), n=(long)GetNumCols(); #pragma omp parallel for for (long j=0; j CPUMatrix& CPUMatrix::ColumnElementMultiplyWith(const CPUMatrix& a) { - if (a.IsEmpty() || this->IsEmpty()) + if (a.IsEmpty() || IsEmpty()) throw std::logic_error("ColumnElementMultiplyWith: Matrix is empty."); - assert (a.GetNumRows() == this->GetNumRows() && a.GetNumCols() == 1); - if (!(a.GetNumRows() == this->GetNumRows() && a.GetNumCols() == 1)) + assert (a.GetNumRows() == GetNumRows() && a.GetNumCols() == 1); + if (!(a.GetNumRows() == GetNumRows() && a.GetNumCols() == 1)) throw std::invalid_argument("ColumnElementMultiplyWith: The input matrix should be a col vector and match [this]'s rows."); auto& us=*this; - long m=(long)this->GetNumRows(), n=(long)this->GetNumCols(); + long m=(long)GetNumRows(), n=(long)GetNumCols(); #pragma omp parallel for for (long j=0; j CPUMatrix& CPUMatrix::RowElementMultiplyWith(const CPUMatrix& a) { - if (a.IsEmpty() || this->IsEmpty()) + if (a.IsEmpty() || IsEmpty()) throw std::logic_error("RowElementMultiplyWith: Matrix is empty."); - assert (a.GetNumRows() == 1 && a.GetNumCols() == this->GetNumCols()); - if (!(a.GetNumRows() == 1 && a.GetNumCols() == this->GetNumCols())) + assert (a.GetNumRows() == 1 && a.GetNumCols() == GetNumCols()); + if (!(a.GetNumRows() == 1 && a.GetNumCols() == GetNumCols())) throw std::invalid_argument("RowElementMultiplyWith: The input matrix should be a row vector and match [this]'s columns."); auto& us=*this; - long m=(long)this->GetNumRows(), n=(long)this->GetNumCols(); + long m=(long)GetNumRows(), n=(long)GetNumCols(); #pragma omp parallel for for (long j=0; jGetNumRows(), n=(long)this->GetNumCols(); + long m=(long)GetNumRows(), n=(long)GetNumCols(); #pragma omp parallel for for (long j=0; jGetNumRows(), n=(long)this->GetNumCols(); + long m=(long)GetNumRows(), n=(long)GetNumCols(); #pragma omp parallel for for (long j=0; jGetNumRows(), n=(long)this->GetNumCols(); + long m=(long)GetNumRows(), n=(long)GetNumCols(); #pragma omp parallel for for (long j=0; jGetNumRows(), n=(long)this->GetNumCols(); + long m=(long)GetNumRows(), n=(long)GetNumCols(); #pragma omp parallel for for (long j=0; jGetNumRows(), n=(long)this->GetNumCols(); + long m=(long)GetNumRows(), n=(long)GetNumCols(); #pragma omp parallel for for (long j=0; jGetNumRows(), n=(long)this->GetNumCols(); + long m=(long)GetNumRows(), n=(long)GetNumCols(); #pragma omp parallel for for (long j=0; j CPUMatrix& CPUMatrix::InplaceTruncateBottom (const ElemType threshold) { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("InplaceTruncateBottom: Matrix is empty."); auto& us=*this; - long m=(long)this->GetNumRows(), n=(long)this->GetNumCols(); + long m=(long)GetNumRows(), n=(long)GetNumCols(); #pragma omp parallel for for (long j=0; j CPUMatrix& CPUMatrix::InplaceTruncate (const ElemType threshold) { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("InplaceTruncateBottom: Matrix is empty."); auto& us=*this; ElemType locThresholdPos = abs(threshold); ElemType locTHresholdNeg = -locThresholdPos; - long m=(long)this->GetNumRows(), n=(long)this->GetNumCols(); + long m=(long)GetNumRows(), n=(long)GetNumCols(); #pragma omp parallel for for (long j=0; j CPUMatrix& CPUMatrix::InplaceTruncateTop (const ElemType threshold) { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("InplaceTruncateTop: Matrix is empty."); auto& us=*this; @@ -2360,7 +2356,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template CPUMatrix& CPUMatrix::SetToZeroIfAbsLessThan (const ElemType threshold) { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("SetToZeroIfAbsLessThan: Matrix is empty."); auto& us=*this; @@ -2379,24 +2375,24 @@ namespace Microsoft { namespace MSR { namespace CNTK { template ElemType CPUMatrix::SumOfAbsElements () const { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("SumOfAbsElements: Matrix is empty."); if (sizeof(ElemType) == sizeof(double)) { #ifndef USE_MKL - return (ElemType)dasum((int)this->GetNumElements(), reinterpret_cast (this->m_pArray), 1); + return (ElemType)dasum((int)GetNumElements(), reinterpret_cast (m_pArray), 1); #else - return (ElemType)cblas_dasum((int)this->GetNumElements(), reinterpret_cast (this->m_pArray), 1); + return (ElemType)cblas_dasum((int)GetNumElements(), reinterpret_cast (m_pArray), 1); #endif } else { #pragma warning (suppress: 4244) #ifndef USE_MKL - return sasum((int)this->GetNumElements(), reinterpret_cast (this->m_pArray), 1); + return sasum((int)GetNumElements(), reinterpret_cast (m_pArray), 1); #else - return cblas_sasum ((int)this->GetNumElements(), reinterpret_cast (m_pArray), 1); + return cblas_sasum ((int)GetNumElements(), reinterpret_cast (m_pArray), 1); #endif } } @@ -2405,7 +2401,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template ElemType CPUMatrix::SumOfElements () const { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("SumOfElements: Matrix is empty."); ElemType sum=0; @@ -2415,12 +2411,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { #pragma omp parallel for reduction(+:sum) for (long i=0; i<(m & ~3); i+=4) { - sum += this->m_pArray[i] + this->m_pArray[i+1] + this->m_pArray[i+2] + this->m_pArray[i+3] ; + sum += m_pArray[i] + m_pArray[i+1] + m_pArray[i+2] + m_pArray[i+3] ; } //handle remaining stuffs for (long i=m & ~3; im_pArray[i]; + sum += m_pArray[i]; } return sum; @@ -2449,7 +2445,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::VectorNorm1(CPUMatrix& c, const bool isColWise) const { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("VectorNormInf: Matrix is empty."); auto& us=*this; @@ -2507,7 +2503,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::VectorNorm2(CPUMatrix& c, const bool isColWise) const { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("VectorNorm2: Matrix is empty."); auto& us=*this; @@ -2590,7 +2586,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::VectorNormInf(CPUMatrix& c, const bool isColWise) const { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("VectorNormInf: Matrix is empty."); auto& us=*this; @@ -2707,7 +2703,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { throw invalid_argument("AddColumnReshapeProductOf: number of rows in a should be multiples of that in b."); long rowsC = rowsA / rowsB; - if (rowsC != this->GetNumRows() || cols != this->GetNumCols()) + if (rowsC != GetNumRows() || cols != GetNumCols()) throw invalid_argument("AddColumnReshapeProductOf: This matrix does not have the right size."); auto & us = *this; @@ -2773,23 +2769,23 @@ namespace Microsoft { namespace MSR { namespace CNTK { template ElemType CPUMatrix::FrobeniusNorm() const { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("FrobeniusNorm: Matrix is empty."); ElemType v = 0; - long m=(long)this->GetNumElements(); + long m=(long)GetNumElements(); //four-way unrolling #pragma omp parallel for reduction(+:v) for (long i=0; i<(m & ~3); i+=4) { - v += this->m_pArray[i] * this->m_pArray[i] + this->m_pArray[i+1] * this->m_pArray[i+1] + this->m_pArray[i+2] * this->m_pArray[i+2] + this->m_pArray[i+3] * this->m_pArray[i+3]; + v += m_pArray[i] * m_pArray[i] + m_pArray[i+1] * m_pArray[i+1] + m_pArray[i+2] * m_pArray[i+2] + m_pArray[i+3] * m_pArray[i+3]; } //handle remaining stuffs for (long i=m & ~3; im_pArray[i] * this->m_pArray[i]; + v += m_pArray[i] * m_pArray[i]; } return sqrt(v); @@ -2811,7 +2807,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template ElemType CPUMatrix::MatrixNormInf() const { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("MatrixNormInf: Matrix is empty."); auto& us=*this; @@ -2831,7 +2827,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template ElemType CPUMatrix::MatrixNorm0() const { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("MatrixNorm0: Matrix is empty."); auto& us=*this; @@ -2854,7 +2850,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template ElemType CPUMatrix::MatrixNorm1() const { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("MatrixNorm1: Matrix is empty."); auto& us=*this; @@ -2917,12 +2913,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::VectorMax(CPUMatrix& maxIndexes, CPUMatrix& maxValues, const bool isColWise) const { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("VectorMax: Matrix is empty."); auto& us=*this; - const int m = (int)this->GetNumRows(); - const int n = (int)this->GetNumCols(); + const int m = (int)GetNumRows(); + const int n = (int)GetNumCols(); assert (m>0 && n>0); //converting from size_t to int may cause overflow @@ -2976,12 +2972,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::VectorMin(CPUMatrix& minIndexes, CPUMatrix& minValues, const bool isColWise) const { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("VectorMin: Matrix is empty."); auto& us=*this; - const int m = (int)this->GetNumRows(); - const int n = (int)this->GetNumCols(); + const int m = (int)GetNumRows(); + const int n = (int)GetNumCols(); assert (m>0 && n>0); //converting from size_t to int may cause overflow @@ -3059,16 +3055,16 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::Print(const char* matrixName, size_t rowStart, size_t rowEnd, size_t colStart, size_t colEnd) const { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("Print: Matrix is empty."); - if (rowEnd >= this->GetNumRows() || colEnd >= this->GetNumCols()) + if (rowEnd >= GetNumRows() || colEnd >= GetNumCols()) throw std::invalid_argument("Index out of range."); if (matrixName != nullptr) - fprintf (stderr, "\n###### %s (%lu, %lu) ######\n", matrixName, this->GetNumRows(), this->GetNumCols()); + fprintf (stderr, "\n###### %s (%lu, %lu) ######\n", matrixName, GetNumRows(), GetNumCols()); else - fprintf (stderr, "\n###### Unnamed Matrix (%lu, %lu) ######\n", this->GetNumRows(), this->GetNumCols()); + fprintf (stderr, "\n###### Unnamed Matrix (%lu, %lu) ######\n", GetNumRows(), GetNumCols()); fprintf (stderr, "\n------ Print Range (%lu:%lu, %lu:%lu) ------\n", rowStart, rowEnd, colStart, colEnd); @@ -3084,7 +3080,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::Print(const char* matrixName /*=nullptr*/) const { - Print(matrixName, 0, this->GetNumRows()-1, 0, this->GetNumCols()-1); + Print(matrixName, 0, GetNumRows()-1, 0, GetNumCols()-1); } // file I/O @@ -3964,11 +3960,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (a.IsEmpty()) throw std::logic_error("Scale: Input matrix a is empty."); if (alpha.GetNumElements()!=1) -#ifndef LINUX +#ifdef _MSC_VER // TODO: check if available under GCC/Linux throw std::exception("Matrix alpha must be 1x1"); #else throw std::exception(); -#endif /* LINUX */ +#endif CPUMatrix::Scale(alpha(0,0),a); } diff --git a/Math/Math/CPUMatrix.h b/Math/Math/CPUMatrix.h index cb026e4a9..480d0648a 100644 --- a/Math/Math/CPUMatrix.h +++ b/Math/Math/CPUMatrix.h @@ -7,21 +7,20 @@ #include #include #include -#include /* LINUX */ +#include #include "File.h" #include "Helpers.h" #include "CommonMatrix.h" -#ifndef LINUX +#ifdef _WIN32 #ifdef MATH_EXPORTS #define MATH_API __declspec(dllexport) #else #define MATH_API __declspec(dllimport) #endif - -#else /* LINUX */ +#else // no DLLs on Linux #define MATH_API -#endif /* LINUX */ +#endif #ifndef USE_TIME_BASED_SEED #define USE_TIME_BASED_SEED ULONG_MAX @@ -57,8 +56,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { ~CPUMatrix(); public: - size_t BufferSize() const {return this->m_numRows*this->m_numCols*sizeof(ElemType);} - ElemType* BufferPointer() const {return this->m_pArray;} + size_t BufferSize() const {return m_numRows*m_numCols*sizeof(ElemType);} + ElemType* BufferPointer() const {return m_pArray;} CPUMatrix ColumnSlice(size_t startColumn, size_t numCols) const; CPUMatrix& AssignColumnSlice(const CPUMatrix& fromMatrix, size_t startColumn, size_t numCols); @@ -79,15 +78,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { inline ElemType& operator() (const size_t row, const size_t col) { - return this->m_pArray[LocateElement(row, col)]; + return m_pArray[LocateElement(row, col)]; } inline const ElemType& operator() (const size_t row, const size_t col) const { - return this->m_pArray[LocateElement(row, col)]; + return m_pArray[LocateElement(row, col)]; } inline ElemType Get00Element() const { - return this->m_pArray[0]; + return m_pArray[0]; } void SetValue(const ElemType v); @@ -280,7 +279,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { static void SVD(const CPUMatrix& A, CPUMatrix& SIGMA, CPUMatrix& U, CPUMatrix& VT); static void MultiplyAndWeightedAdd(ElemType alpha, const CPUMatrix& a, const bool transposeA, const CPUMatrix& b, const bool transposeB, - ElemType beta, CPUMatrix& c); + ElemType beta, CPUMatrix& c); static void MultiplyAndAdd(const CPUMatrix& a, const bool transposeA, const CPUMatrix& b, const bool transposeB, CPUMatrix& c); static void Multiply(const CPUMatrix& a, const bool transposeA, const CPUMatrix& b, const bool transposeB, CPUMatrix& c); static void Multiply(const CPUMatrix& a, const CPUMatrix& b, CPUMatrix& c); @@ -353,7 +352,6 @@ namespace Microsoft { namespace MSR { namespace CNTK { protected: - // Was inline.. but without definition, it doesn't make sense. size_t LocateElement (const size_t i, const size_t j) const; size_t LocateColumn (const size_t j) const; diff --git a/Math/Math/CPUSparseMatrix.cpp b/Math/Math/CPUSparseMatrix.cpp index a7dd44c95..790f23a2b 100644 --- a/Math/Math/CPUSparseMatrix.cpp +++ b/Math/Math/CPUSparseMatrix.cpp @@ -15,9 +15,9 @@ #include "CPUSparseMatrix.h" #include #include -#ifndef LINUX +#ifdef _WIN32 #include -#endif /* LINUX */ +#endif #ifdef LEAKDETECT #include #endif @@ -92,77 +92,78 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUSparseMatrix::ZeroInit() { - this->m_numRows = 0; - this->m_numCols = 0; - this->m_elemSizeAllocated = 0; - this->m_externalBuffer = false; - this->m_pArray = NULL; - this->m_computeDevice = CPUDEVICE; - this->m_nz = 0; - this->m_matrixName = NULL; + m_numRows = 0; + m_numCols = 0; + m_elemSizeAllocated = 0; + m_externalBuffer = false; + m_pArray = NULL; + m_computeDevice = CPUDEVICE; + m_nz = 0; + m_matrixName = NULL; - if(this->m_format == MatrixFormat::matrixFormatSparseCSC || this->m_format == MatrixFormat::matrixFormatSparseCSR) + if(m_format == MatrixFormat::matrixFormatSparseCSC || m_format == MatrixFormat::matrixFormatSparseCSR) { - this->m_colIdx = -1; - this->m_val = NULL; - this->m_row = NULL; - this->m_pb = NULL; + m_colIdx = -1; + m_val = NULL; + m_row = NULL; + m_pb = NULL; } - else if (this->m_format == MatrixFormat::matrixFormatSparseBlockCol || this->m_format == MatrixFormat::matrixFormatSparseBlockRow) + else if (m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow) { - this->m_blockSize = 0; - this->m_blockVal = NULL; - this->m_blockIds = NULL; + m_blockSize = 0; + m_blockVal = NULL; + m_blockIds = NULL; } } - template - CPUSparseMatrix::CPUSparseMatrix(const MatrixFormat format) - { - this->CheckInit(format); - } - //should only be used by constructors. template void CPUSparseMatrix::CheckInit(const MatrixFormat format) - { - if(format != MatrixFormat::matrixFormatSparseCSC && format != MatrixFormat::matrixFormatSparseCSR && format != MatrixFormat::matrixFormatSparseBlockCol && format != MatrixFormat::matrixFormatSparseBlockRow) + { + if (format != MatrixFormat::matrixFormatSparseCSC && format != MatrixFormat::matrixFormatSparseCSR && format != MatrixFormat::matrixFormatSparseBlockCol && format != MatrixFormat::matrixFormatSparseBlockRow) { throw std::logic_error("CPUSparseMatrix: unsupported sparse matrix format"); } - this->m_format = format; + m_format = format; ZeroInit(); } + template + CPUSparseMatrix::CPUSparseMatrix(const MatrixFormat format) + { + CheckInit(format); + } + template CPUSparseMatrix::CPUSparseMatrix(const MatrixFormat format, const size_t numRows, const size_t numCols, const size_t size) - { this->CheckInit(format); + { + CheckInit(format); Resize(numRows, numCols, size); } template CPUSparseMatrix::~CPUSparseMatrix() { - if (this->m_matrixName!=NULL) + if (m_matrixName!=NULL) { - delete[] this->m_matrixName; - this->m_matrixName = nullptr; + delete[] m_matrixName; + m_matrixName = nullptr; } - if(this->m_format == MatrixFormat::matrixFormatSparseCSC || this->m_format == MatrixFormat::matrixFormatSparseCSR) + if(m_format == MatrixFormat::matrixFormatSparseCSC || m_format == MatrixFormat::matrixFormatSparseCSR) { - if(this->m_val != NULL) - delete[] this->m_val; - if(this->m_row != NULL) - delete[] this->m_row; - if(this->m_pb != NULL) - delete[] this->m_pb; + if(m_val != NULL) + delete[] m_val; + if(m_row != NULL) + delete[] m_row; + if(m_pb != NULL) + delete[] m_pb; } - else if (this->m_format == MatrixFormat::matrixFormatSparseBlockCol || this->m_format == MatrixFormat::matrixFormatSparseBlockRow) + else if (m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow) { - if(this->m_blockVal != NULL) - delete[] this->m_blockVal; - if(this->m_blockIds != NULL) - delete[] this->m_blockIds; + if(m_blockVal != NULL) + delete[] m_blockVal; + if(m_blockIds != NULL) + delete[] m_blockIds; } } @@ -176,76 +177,76 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUSparseMatrix::SetValue(const size_t rIdx, const size_t cIdx, const ElemType v) { - if(this->m_format != MatrixFormat::matrixFormatSparseCSC && this->m_format != MatrixFormat::matrixFormatSparseCSR) + if(m_format != MatrixFormat::matrixFormatSparseCSC && m_format != MatrixFormat::matrixFormatSparseCSR) { throw std::logic_error("CPUSparseMatrix: unsupported SetValue() call."); } - if(this->m_elemSizeAllocated < this->m_nz +1) { + if(m_elemSizeAllocated < m_nz +1) { throw std::logic_error("CPUSparseMatrix: allocated size is too small."); } - if(rIdx < 0 || rIdx >= this->m_numRows) { + if(rIdx < 0 || rIdx >= m_numRows) { throw std::logic_error("CPUSparseMatrix: SetValue() invalid row id"); } - if(cIdx < 0 || cIdx >= this->m_numCols) { + if(cIdx < 0 || cIdx >= m_numCols) { throw std::logic_error("CPUSparseMatrix: SetValue() invalid column id"); } - size_t r = (this->m_format == matrixFormatSparseCSC) ? rIdx: cIdx; - size_t c = (this->m_format == matrixFormatSparseCSC) ? cIdx: rIdx; + size_t r = (m_format == matrixFormatSparseCSC) ? rIdx: cIdx; + size_t c = (m_format == matrixFormatSparseCSC) ? cIdx: rIdx; - this->m_val[this->m_nz] = v; - this->m_row[this->m_nz] = r; + m_val[m_nz] = v; + m_row[m_nz] = r; //consistency check - if(c == this->m_colIdx && r <= this->m_row[this->m_nz-1]) + if(c == m_colIdx && r <= m_row[m_nz-1]) { throw std::logic_error("CPUSparseMatrix: SetValue is not called properly"); } - if (c != this->m_colIdx) + if (c != m_colIdx) { m_pb[c] = m_nz; m_colIdx = (int) c; } - this->m_pb[c+1] = this->m_nz+1; - this->m_nz++; + m_pb[c+1] = m_nz+1; + m_nz++; } template ElemType* CPUSparseMatrix::BufferPointer() const { - if(this->m_format == MatrixFormat::matrixFormatSparseCSC || this->m_format == MatrixFormat::matrixFormatSparseCSR) + if(m_format == MatrixFormat::matrixFormatSparseCSC || m_format == MatrixFormat::matrixFormatSparseCSR) { - return this->m_val; + return m_val; } else { - return this->m_blockVal; + return m_blockVal; } } template void CPUSparseMatrix::Resize(const size_t numRows, const size_t numCols, size_t size) { - this->m_nz = 0; - this->m_colIdx = -1; - this->m_numRows = numRows; - this->m_numCols = numCols; + m_nz = 0; + m_colIdx = -1; + m_numRows = numRows; + m_numCols = numCols; - if(this->m_elemSizeAllocated < size) + if(m_elemSizeAllocated < size) { - this->m_elemSizeAllocated = size; - if(this->m_format == MatrixFormat::matrixFormatSparseCSC || this->m_format == MatrixFormat::matrixFormatSparseCSR) + m_elemSizeAllocated = size; + if(m_format == MatrixFormat::matrixFormatSparseCSC || m_format == MatrixFormat::matrixFormatSparseCSR) { - if(this->m_val != NULL) - delete[] this->m_val; - if(this->m_row != NULL) - delete[] this->m_row; - if(this->m_pb != NULL) - delete[] this->m_pb; + if(m_val != NULL) + delete[] m_val; + if(m_row != NULL) + delete[] m_row; + if(m_pb != NULL) + delete[] m_pb; //int len = m_format == MatrixFormat::matrixFormatSparseCSC ? numCols : numRows; size_t len = numCols > numRows ? numCols : numRows; @@ -254,12 +255,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { m_pb = new size_t[len+1]; } - else if(this->m_format == MatrixFormat::matrixFormatSparseBlockCol || this->m_format == MatrixFormat::matrixFormatSparseBlockRow) + else if(m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow) { - if(this->m_blockVal != NULL) - delete[] this->m_blockVal; - if(this->m_blockIds != NULL) - delete[] this->m_blockIds; + if(m_blockVal != NULL) + delete[] m_blockVal; + if(m_blockIds != NULL) + delete[] m_blockIds; size_t max = numCols > numRows ? numCols : numRows; m_blockVal = new ElemType[size]; @@ -272,9 +273,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUSparseMatrix::Reset() { - this->m_nz = 0; - this->m_colIdx = -1; - this->m_blockSize = 0; + m_nz = 0; + m_colIdx = -1; + m_blockSize = 0; } //c = op(a) * op(this) or c += op(a) * op(this) @@ -498,11 +499,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } else { -#ifndef LINUX - throw std::exception("CPUSparseMatrix:: ScaleAndAdd() Not implemented"); -#else - throw std::exception(); -#endif /* LINUX */ + throw std::runtime_error("CPUSparseMatrix:: ScaleAndAdd() Not implemented"); } } @@ -522,11 +519,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { throw std::logic_error("AssignSoftmaxOf: Matrix a, class, idx2cls or label is empty."); if(etp.GetFormat() != MatrixFormat::matrixFormatSparseCSC) -#ifndef LINUX - throw std::exception("CPUSparseMatrix:: ClassEntropy() only support CSC"); -#else - throw std::exception(); -#endif /* LINUX */ + throw std::runtime_error("CPUSparseMatrix:: ClassEntropy() only support CSC"); size_t nC = cls.GetNumCols(); size_t nV = label.GetNumRows() - nC; @@ -695,11 +688,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { { if (c.IsEmpty()) { - c.Resize(this->GetNumRows(), this->GetNumCols()); + c.Resize(GetNumRows(), GetNumCols()); c.SetValue(0.0); } - if(this->m_format == MatrixFormat::matrixFormatSparseBlockCol || this->m_format == MatrixFormat::matrixFormatSparseBlockRow) + if(m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow) { for(size_t j = 0; j < m_blockSize; j++) { @@ -718,11 +711,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } else { -#ifndef LINUX - throw std::exception("CPUSparseMatrix:: NormalGrad() only support block sparse format"); -#else - throw std::exception(); -#endif /* LINUX */ + throw std::runtime_error("CPUSparseMatrix:: NormalGrad() only support block sparse format"); } } @@ -732,12 +721,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { { if (c.IsEmpty()) { - c.Resize(this->GetNumRows(), this->GetNumCols()); + c.Resize(GetNumRows(), GetNumCols()); c.SetValue(0.0); } const ElemType floor = 1e-16f; - if(this->m_format == MatrixFormat::matrixFormatSparseCSC || this->m_format == MatrixFormat::matrixFormatSparseCSR) + if(m_format == MatrixFormat::matrixFormatSparseCSC || m_format == MatrixFormat::matrixFormatSparseCSR) { size_t col_num = (m_format == MatrixFormat::matrixFormatSparseCSC) ? GetNumCols() : GetNumRows(); for(size_t j = 0; j < col_num; j++) @@ -758,7 +747,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { c(row, col) = adenorm; } } - } else if(this->m_format == MatrixFormat::matrixFormatSparseBlockCol || this->m_format == MatrixFormat::matrixFormatSparseBlockRow) + } else if(m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow) { for(size_t j = 0; j < m_blockSize; j++) { @@ -767,7 +756,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t start = j* len; for(size_t p = start; p < start+len; p++) { - ElemType val = this->m_blockVal[p]; + ElemType val = m_blockVal[p]; size_t row = (m_format == MatrixFormat::matrixFormatSparseBlockCol) ? (p - start) : i; size_t col = (m_format == MatrixFormat::matrixFormatSparseBlockCol) ? i : (p - start); @@ -784,7 +773,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template CPUSparseMatrix& CPUSparseMatrix::InplaceTruncate (const ElemType threshold) { - if(this->m_format == MatrixFormat::matrixFormatSparseBlockCol || this->m_format == MatrixFormat::matrixFormatSparseBlockRow) + if(m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow) { ElemType locThresholdPos = abs(threshold); ElemType locTHresholdNeg = -locThresholdPos; @@ -795,24 +784,20 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t start = j* len; for (size_t p = start; p < start+len; p++) { - if (this->m_blockVal[p] > locThresholdPos) + if (m_blockVal[p] > locThresholdPos) { - this->m_blockVal[p] = locThresholdPos; + m_blockVal[p] = locThresholdPos; } - else if (this->m_blockVal[p] < locTHresholdNeg) + else if (m_blockVal[p] < locTHresholdNeg) { - this->m_blockVal[p] = locTHresholdNeg; + m_blockVal[p] = locTHresholdNeg; } } } } else { -#ifndef LINUX - throw std::exception("CPUSparseMatrix:: InplaceTruncate() only support block based sparse matrix"); -#else - throw std::exception(); -#endif /* LINUX */ + throw std::runtime_error("CPUSparseMatrix:: InplaceTruncate() only support block based sparse matrix"); } return *this; } diff --git a/Math/Math/CPUSparseMatrix.h b/Math/Math/CPUSparseMatrix.h index 0de38c087..b9f31eb68 100644 --- a/Math/Math/CPUSparseMatrix.h +++ b/Math/Math/CPUSparseMatrix.h @@ -10,7 +10,7 @@ #include #include -#ifndef LINUX +#ifdef _WIN32 #ifdef MATH_EXPORTS #define MATH_API __declspec(dllexport) #else @@ -40,7 +40,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { void ShiftBy(int /*numShift*/) { NOT_IMPLEMENTED; } - size_t BufferSize() const {return this->m_elemSizeAllocated*sizeof(ElemType);} + size_t BufferSize() const {return m_elemSizeAllocated*sizeof(ElemType);} ElemType* BufferPointer() const; void SetGaussianRandomValue(const ElemType /*mean*/, const ElemType /*sigma*/, unsigned long /*seed*/) { NOT_IMPLEMENTED; } diff --git a/Math/Math/CommonMatrix.h b/Math/Math/CommonMatrix.h index da4a501a4..985b4019f 100644 --- a/Math/Math/CommonMatrix.h +++ b/Math/Math/CommonMatrix.h @@ -8,11 +8,6 @@ #include #include -#ifdef LINUX -#define wcsnlen_s wcsnlen /* Not sure if this is best replacement... Malcolm */ -// typedef char wchar_t; -#endif - #define AUTOPLACEMATRIX 1000 // used in parameters only #define MANAGEDEXTERN -2 // managed externally (i.e. PTask) #define CPUDEVICE -1 // device is the CPU @@ -83,9 +78,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { void SetMatrixName(const wchar_t* s) { Clear(); - if (s!=NULL) + if (s!=nullptr) { - size_t n = wcsnlen_s(s, SIZE_MAX); + size_t n = wcslen(s); m_matrixName = new wchar_t[n+1]; wmemcpy(m_matrixName,s,n+1); } @@ -108,10 +103,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { protected: void Clear() { - if (m_matrixName!=NULL) + if (m_matrixName!=nullptr) { delete[] m_matrixName; - m_matrixName = NULL; + m_matrixName = nullptr; } } diff --git a/Math/Math/GPUDummy.cpp b/Math/Math/GPUDummy.cpp index 5528816e5..f0648237b 100644 --- a/Math/Math/GPUDummy.cpp +++ b/Math/Math/GPUDummy.cpp @@ -3,6 +3,9 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // // + +#ifdef CPUONLY + #include "GPUMatrix.cuh" #include "GPUSparseMatrix.cuh" @@ -1661,7 +1664,4 @@ GPUWatcher::~GPUWatcher(void) { } - - - - +#endif // CPUONLY diff --git a/Math/Math/GPUMatrix.cu b/Math/Math/GPUMatrix.cu index 692b059a3..116b1555f 100644 --- a/Math/Math/GPUMatrix.cu +++ b/Math/Math/GPUMatrix.cu @@ -27,11 +27,11 @@ bool do_sync = false; bool do_sync = true; #endif -#ifndef LINUX +#ifdef _WIN32 // thread local storage to access the current stream, initalize to default stream -__declspec( thread ) -#endif /* LINUX */ - cudaStream_t t_stream = cudaStreamDefault; +__declspec (thread) +#endif +cudaStream_t t_stream = cudaStreamDefault; extern int _ConvertSMVer2Cores(int major, int minor); @@ -52,11 +52,7 @@ void CURAND_CALL(curandStatus x) { if(x!=CURAND_STATUS_SUCCESS) { -#ifndef LINUX - throw std::exception("CURAND fail"); -#else /* LINUX */ - throw std::exception(); -#endif /* LINUX */ + throw std::runtime_error("CURAND fail"); } } @@ -64,11 +60,7 @@ void CUBLAS_CALL(cublasStatus_t x) { if(x!=CUBLAS_STATUS_SUCCESS) { -#ifndef LINUX - throw std::exception("CUBLAS fail"); -#else /* LINUX */ - throw std::exception(); -#endif /* LINUX */ + throw std::runtime_error("CUBLAS fail"); } } @@ -79,11 +71,7 @@ void CUDA_CALL(cudaError_t x) const char* errmsg = cudaGetErrorString(x); std::cout<<"!!!!!!!!CUDA EXCEPTION: "< DeviceBoundNumber::DeviceBoundNumber(DeviceBoundNumber &&shallowCopy) { - this->ShallowCopyFrom(shallowCopy.m_data,shallowCopy.m_computeDevice); + ShallowCopyFrom(shallowCopy.m_data,shallowCopy.m_computeDevice); shallowCopy.m_data=NULL; } -#endif template void DeviceBoundNumber::ShallowCopyFrom(ElemType* newVal,int newValsDevceId) { - this->m_computeDevice = newValsDevceId; - this->m_data = newVal; + m_computeDevice = newValsDevceId; + m_data = newVal; } template DeviceBoundNumber::~DeviceBoundNumber() { - if (this->m_data!=NULL) + if (m_data!=NULL) { - if (this->m_computeDevice<0) + if (m_computeDevice<0) { - delete this->m_data; - this->m_data = NULL; + delete m_data; + m_data = NULL; } - else if (this->m_computeDevice != MANAGEDEXTERN) - CUDA_CALL(cudaFree(this->m_data)); + else if (m_computeDevice != MANAGEDEXTERN) + CUDA_CALL(cudaFree(m_data)); } } @@ -166,7 +152,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (chosenDeviceId != AUTOPLACEMATRIX) return chosenDeviceId; - try + __try { // stash previous device state // if there was one on entry: @@ -207,7 +193,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { chosenDeviceId = curDev; return curDev; } - catch (int e) + __except (1) { return -1; // CPU } @@ -228,12 +214,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { template ElemType* GPUMatrix::CopyToArray() const { - size_t numElements = this->GetNumElements(); + size_t numElements = GetNumElements(); if (numElements != 0) { PrepareDevice(); ElemType* pArray = new ElemType[numElements]; - CUDA_CALL(cudaMemcpy(pArray,this->m_pArray,sizeof(ElemType)*this->m_numRows*this->m_numCols,cudaMemcpyDeviceToHost)); + CUDA_CALL(cudaMemcpy(pArray,m_pArray,sizeof(ElemType)*m_numRows*m_numCols,cudaMemcpyDeviceToHost)); return pArray; } else @@ -247,7 +233,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template size_t GPUMatrix::CopyToArray(ElemType*& arrayCopyTo, size_t& currentArraySize) const { - size_t numElements = this->GetNumElements(); + size_t numElements = GetNumElements(); if (numElements > currentArraySize) { @@ -259,7 +245,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (numElements != 0) { PrepareDevice(); - CUDA_CALL(cudaMemcpy(arrayCopyTo, this->m_pArray, sizeof(ElemType)*numElements, cudaMemcpyDeviceToHost)); + CUDA_CALL(cudaMemcpy(arrayCopyTo, m_pArray, sizeof(ElemType)*numElements, cudaMemcpyDeviceToHost)); } return numElements; @@ -268,29 +254,29 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUMatrix::ChangeDeviceTo(int to_id) { - if (!this->OwnBuffer()) + if (!OwnBuffer()) throw std::logic_error("Cannot change device on Managed external matrix"); if (to_id == CPUDEVICE) throw std::logic_error("to_id must be valid GPU"); - if (this->m_computeDevice==to_id) + if (m_computeDevice==to_id) return; PrepareDevice((short)to_id); ElemType* d_dst=NULL; - CUDA_CALL(cudaMalloc((void**)&d_dst,sizeof(ElemType)*this->m_numRows*this->m_numCols)); + CUDA_CALL(cudaMalloc((void**)&d_dst,sizeof(ElemType)*m_numRows*m_numCols)); - this->m_elemSizeAllocated = this->m_numRows*this->m_numCols; + m_elemSizeAllocated = m_numRows*m_numCols; // check to make sure we have something to copy (on init we often have zero sized allocations) - if (this->m_elemSizeAllocated > 0) + if (m_elemSizeAllocated > 0) { // first try peer access int canAccessPeer = false; - CUDA_CALL(cudaDeviceCanAccessPeer(&canAccessPeer, to_id, this->m_computeDevice)); + CUDA_CALL(cudaDeviceCanAccessPeer(&canAccessPeer, to_id, m_computeDevice)); if (canAccessPeer) { - CUDA_CALL(cudaDeviceEnablePeerAccess(this->m_computeDevice, 0)); - CUDA_CALL(cudaMemcpyPeer(d_dst,to_id,this->m_pArray,this->m_computeDevice,sizeof(ElemType)*this->m_numRows*this->m_numCols)); + CUDA_CALL(cudaDeviceEnablePeerAccess(m_computeDevice, 0)); + CUDA_CALL(cudaMemcpyPeer(d_dst,to_id,m_pArray,m_computeDevice,sizeof(ElemType)*m_numRows*m_numCols)); } else { @@ -306,8 +292,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { } } PrepareDevice(); - CUDA_CALL(cudaFree(this->m_pArray)); - this->m_pArray=d_dst; + CUDA_CALL(cudaFree(m_pArray)); + m_pArray=d_dst; PrepareDevice((short)to_id); m_computeDevice=to_id; @@ -317,38 +303,38 @@ namespace Microsoft { namespace MSR { namespace CNTK { void GPUMatrix::performInplaceFunction(int kind) { PrepareDevice(); - LONG64 N= (LONG64) this->GetNumElements(); + LONG64 N= (LONG64) GetNumElements(); int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); switch (kind) { case 0: - _inplaceSigmoidOnCuda<<>>(this->m_pArray, N); + _inplaceSigmoidOnCuda<<>>(m_pArray, N); break; case 1: - _inplaceTanhOnCuda<<>>(this->m_pArray, N); + _inplaceTanhOnCuda<<>>(m_pArray, N); break; case 2: - _inplaceSqrtOnCuda<<>>(this->m_pArray, N); + _inplaceSqrtOnCuda<<>>(m_pArray, N); break; case 3: - _inplaceExpOnCuda<<>>(this->m_pArray,N); + _inplaceExpOnCuda<<>>(m_pArray,N); break; case 4: - _inplaceLogOnCuda<<>>(this->m_pArray,N); + _inplaceLogOnCuda<<>>(m_pArray,N); break; case 5: - _inplaceAbsOnCuda<<>>(this->m_pArray,N); + _inplaceAbsOnCuda<<>>(m_pArray,N); break; case 6: - _inplaceLinRectDerivative<<>>(this->m_pArray,N); + _inplaceLinRectDerivative<<>>(m_pArray,N); break; case 7: - _inplaceCosineOnCuda<<>>(this->m_pArray,N); + _inplaceCosineOnCuda<<>>(m_pArray,N); break; case 8: - _inplaceNegativeSineOnCuda<<>>(this->m_pArray,N); + _inplaceNegativeSineOnCuda<<>>(m_pArray,N); break; } if (do_sync) CUDA_CALL(cudaEventRecord(done)); @@ -365,14 +351,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUMatrix::ZeroInit(int deviceId) { - this->m_computeDevice = deviceId; - this->m_pArray = NULL; - this->m_numRows = 0; - this->m_numCols = 0; - this->m_elemSizeAllocated = 0; - this->m_matrixName=NULL; - this->m_format = matrixFormatDense; - this->m_externalBuffer = false; + m_computeDevice = deviceId; + m_pArray = nullptr; + m_numRows = 0; + m_numCols = 0; + m_elemSizeAllocated = 0; + m_matrixName=NULL; + m_format = matrixFormatDense; + m_externalBuffer = false; } template @@ -400,15 +386,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (deviceId == MANAGEDEXTERN) throw std::logic_error("constructor cannot be used with Managed Extern types"); ZeroInit(deviceId); - this->m_numRows = numRows; - this->m_numCols = numCols; - this->m_elemSizeAllocated = this->GetNumElements(); + m_numRows = numRows; + m_numCols = numCols; + m_elemSizeAllocated = GetNumElements(); - if (this->m_elemSizeAllocated != 0) + if (m_elemSizeAllocated != 0) { PrepareDevice(); - CUDA_CALL(cudaMalloc((void**)&this->m_pArray,sizeof(ElemType)*this->m_elemSizeAllocated)); - CUDA_CALL(cudaMemset(this->m_pArray,0,sizeof(ElemType)*this->m_elemSizeAllocated)); + CUDA_CALL(cudaMalloc((void**)&m_pArray,sizeof(ElemType)*m_elemSizeAllocated)); + CUDA_CALL(cudaMemset(m_pArray,0,sizeof(ElemType)*m_elemSizeAllocated)); } }; @@ -424,26 +410,24 @@ namespace Microsoft { namespace MSR { namespace CNTK { { ZeroInit(deepCopyFrom.m_computeDevice); SetValue(deepCopyFrom); - this->SetMatrixName(deepCopyFrom.m_matrixName); + SetMatrixName(deepCopyFrom.m_matrixName); } -#ifndef LINUX template GPUMatrix::GPUMatrix(GPUMatrix&& moveFrom) { - this->m_numRows = moveFrom.m_numRows; - this->m_numCols = moveFrom.m_numCols; - this->m_computeDevice = moveFrom.m_computeDevice; - this->m_pArray = moveFrom.m_pArray; //shallow copy the pointer - this->m_matrixName=moveFrom.m_matrixName; - this->m_elemSizeAllocated = moveFrom.m_elemSizeAllocated; - this->m_format = moveFrom.m_format; - this->m_externalBuffer = moveFrom.m_externalBuffer; + m_numRows = moveFrom.m_numRows; + m_numCols = moveFrom.m_numCols; + m_computeDevice = moveFrom.m_computeDevice; + m_pArray = moveFrom.m_pArray; //shallow copy the pointer + m_matrixName=moveFrom.m_matrixName; + m_elemSizeAllocated = moveFrom.m_elemSizeAllocated; + m_format = moveFrom.m_format; + m_externalBuffer = moveFrom.m_externalBuffer; //release the pointer from the source object so that the destructor won't release it twice moveFrom.ZeroInit(0); } -#endif //assignment operator, deep copy template @@ -452,37 +436,35 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (this != &deepCopyFrom) { SetValue(deepCopyFrom); - this->SetMatrixName(deepCopyFrom.m_matrixName); + SetMatrixName(deepCopyFrom.m_matrixName); } return *this; } -#ifndef LINUX //move assignment operator, shallow copy template GPUMatrix& GPUMatrix::operator=(GPUMatrix&& moveFrom) { if (this != &moveFrom) { - if (this->OwnBuffer() && this->m_pArray!=NULL) + if (OwnBuffer() && m_pArray!=NULL) { - CUDA_CALL(cudaFree(this->m_pArray)); + CUDA_CALL(cudaFree(m_pArray)); } - this->m_numRows = moveFrom.m_numRows; - this->m_numCols = moveFrom.m_numCols; - this->m_elemSizeAllocated = moveFrom.m_elemSizeAllocated; - this->m_pArray = moveFrom.m_pArray; - this->m_computeDevice = moveFrom.m_computeDevice; - this->m_format = moveFrom.m_format; - this->m_externalBuffer = moveFrom.m_externalBuffer; + m_numRows = moveFrom.m_numRows; + m_numCols = moveFrom.m_numCols; + m_elemSizeAllocated = moveFrom.m_elemSizeAllocated; + m_pArray = moveFrom.m_pArray; + m_computeDevice = moveFrom.m_computeDevice; + m_format = moveFrom.m_format; + m_externalBuffer = moveFrom.m_externalBuffer; //release the pointer from the source object so that the destructor won't release it twice moveFrom.ZeroInit(0); } return *this; } -#endif /* LINUX */ template GPUMatrix::~GPUMatrix(void) @@ -493,19 +475,19 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUMatrix::Clear() { - if (this->OwnBuffer() && this->m_pArray!=NULL) + if (OwnBuffer() && m_pArray!=NULL) { - if (this->m_computeDevice>=0) + if (m_computeDevice>=0) { PrepareDevice(); - cudaFree(this->m_pArray); - this->m_pArray = NULL; - this->m_elemSizeAllocated = 0; + cudaFree(m_pArray); + m_pArray = NULL; + m_elemSizeAllocated = 0; } } BaseMatrix::Clear(); - ZeroInit(this->m_computeDevice); + ZeroInit(m_computeDevice); } #pragma endregion Constructors and Destructor @@ -513,14 +495,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { int GPUMatrix::GetComputeDeviceId() const { // for externally managed memory the CUDA context will have the current device - if (this->m_computeDevice == MANAGEDEXTERN) + if (m_computeDevice == MANAGEDEXTERN) { int devId; - assert(this->m_externalBuffer); + assert(m_externalBuffer); CUDA_CALL(cudaGetDevice(&devId)); return devId; } - return this->m_computeDevice; + return m_computeDevice; } #pragma region Basic Operators @@ -530,10 +512,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (numCols == 0) throw std::logic_error("The slice cannot have 0 columns."); - if (startColumn + numCols > this->m_numCols) + if (startColumn + numCols > m_numCols) throw std::logic_error("The slice is out of range of the source matrix."); - GPUMatrix slice(this->m_numRows, numCols, this->m_pArray + startColumn * this->m_numRows, matrixFlagDontOwnBuffer, this->m_computeDevice); + GPUMatrix slice(m_numRows, numCols, m_pArray + startColumn * m_numRows, matrixFlagDontOwnBuffer, m_computeDevice); return slice; } @@ -544,19 +526,19 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (numCols == 0) throw std::logic_error("The slice cannot have 0 columns."); - if (startColumn + numCols > this->m_numCols) + if (startColumn + numCols > m_numCols) throw std::logic_error("The slice is out of range of the source matrix."); Clear(); - this->m_computeDevice=fromMatrix.m_computeDevice; - this->m_externalBuffer=true; - this->m_numRows = fromMatrix.m_numRows; - this->m_pArray=fromMatrix.m_pArray + startColumn * this->m_numRows; + m_computeDevice=fromMatrix.m_computeDevice; + m_externalBuffer=true; + m_numRows = fromMatrix.m_numRows; + m_pArray=fromMatrix.m_pArray + startColumn * m_numRows; - this->m_elemSizeAllocated = this->GetNumElements(); - this->m_matrixName=NULL; - this->m_format = fromMatrix.m_format; + m_elemSizeAllocated = GetNumElements(); + m_matrixName=NULL; + m_format = fromMatrix.m_format; return *this; } @@ -574,12 +556,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { Resize(numRows, a.GetNumCols()); - LONG64 N=(LONG64)this->GetNumElements(); + LONG64 N=(LONG64)GetNumElements(); int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _assignRowSliceValuesOf<<>>(this->m_pArray, a.m_pArray, N, (long)startIndex, (long)numRows, (long)a.GetNumRows()); + _assignRowSliceValuesOf<<>>(m_pArray, a.m_pArray, N, (long)startIndex, (long)numRows, (long)a.GetNumRows()); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -597,10 +579,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (a.GetNumRows() != numRows) throw std::logic_error("AddToRowSliceValuesOf: a.GetNumRows() != numRows."); - if (startIndex + numRows > this->GetNumRows()) + if (startIndex + numRows > GetNumRows()) throw std::logic_error("AddToRowSliceValuesOf: startIndex + numRows exceeds GetNumRows()."); - if (a.GetNumCols() != this->GetNumCols()) + if (a.GetNumCols() != GetNumCols()) throw std::logic_error("AddToRowSliceValuesOf: columns does not match."); LONG64 N=(LONG64)a.GetNumElements(); @@ -608,7 +590,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _addToRowSliceValuesOf<<>>(this->m_pArray, a.m_pArray, N, (long)startIndex, (long)this->GetNumRows(), (long)a.GetNumRows()); + _addToRowSliceValuesOf<<>>(m_pArray, a.m_pArray, N, (long)startIndex, (long)GetNumRows(), (long)a.GetNumRows()); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -624,7 +606,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { throw std::logic_error("AddWithRowSliceValuesOf: input matrix a is empty."); if (GetNumRows() != numRows) - throw std::logic_error("AddWithRowSliceValuesOf: this->GetNumRows() != numRows."); + throw std::logic_error("AddWithRowSliceValuesOf: GetNumRows() != numRows."); if (startIndex + numRows > a.GetNumRows()) throw std::logic_error("AddWithRowSliceValuesOf: startIndex + numRows exceeds a.GetNumRows()."); @@ -673,10 +655,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix GPUMatrix::Transpose() const { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("Transpose: Matrix is empty."); - GPUMatrix c(this->GetComputeDeviceId()); + GPUMatrix c(GetComputeDeviceId()); c.AssignTransposeOf(*this); return c; } @@ -713,7 +695,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (a.IsEmpty()) throw std::logic_error("AssignTransposeOf: Matrix a is empty."); - if (this->GetNumRows()!=a.GetNumCols() || this->GetNumCols()!=a.GetNumRows()) + if (GetNumRows()!=a.GetNumCols() || GetNumCols()!=a.GetNumRows()) Resize(a.GetNumCols(), a.GetNumRows()); cublasHandle_t cuHandle = GetCublasHandle(a.GetComputeDeviceId()); @@ -726,46 +708,38 @@ namespace Microsoft { namespace MSR { namespace CNTK { cublasStatus_t st; if (sizeof(ElemType)==sizeof(float)) { - st = cublasSgeam(cuHandle,transA,transB,m,n,reinterpret_cast(&alpha),reinterpret_cast(a.m_pArray),(int)a.m_numRows,reinterpret_cast(&beta),reinterpret_cast(a.m_pArray),(int)a.m_numRows,reinterpret_cast(this->m_pArray),(int)this->m_numRows); + st = cublasSgeam(cuHandle,transA,transB,m,n,reinterpret_cast(&alpha),reinterpret_cast(a.m_pArray),(int)a.m_numRows,reinterpret_cast(&beta),reinterpret_cast(a.m_pArray),(int)a.m_numRows,reinterpret_cast(m_pArray),(int)m_numRows); } else if (sizeof(ElemType)==sizeof(double)) { - st = cublasDgeam(cuHandle,transA,transB,m,n,reinterpret_cast(&alpha),reinterpret_cast(a.m_pArray),(int)a.m_numRows,reinterpret_cast(&beta),reinterpret_cast(a.m_pArray),(int)a.m_numRows,reinterpret_cast(this->m_pArray),(int)this->m_numRows); + st = cublasDgeam(cuHandle,transA,transB,m,n,reinterpret_cast(&alpha),reinterpret_cast(a.m_pArray),(int)a.m_numRows,reinterpret_cast(&beta),reinterpret_cast(a.m_pArray),(int)a.m_numRows,reinterpret_cast(m_pArray),(int)m_numRows); } else { -#ifndef LINUX - throw std::exception("Unsupported template argument in GPUMatrix"); -#else - throw std::exception(); -#endif /* LINUX */ + throw std::runtime_error("Unsupported template argument in GPUMatrix"); } if (st!=CUBLAS_STATUS_SUCCESS) { -#ifndef LINUX - throw std::exception("AssignTransposeOf failed"); -#else - throw std::exception(); -#endif /* LINUX */ + throw std::runtime_error("AssignTransposeOf failed"); } - this->m_numRows=a.m_numCols; - this->m_numCols=a.m_numRows; - this->SetMatrixName(a.GetMatrixName()); + m_numRows=a.m_numCols; + m_numCols=a.m_numRows; + SetMatrixName(a.GetMatrixName()); return *this; } template void GPUMatrix::SetValue(const ElemType v) { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("SetValue: Matrix is empty."); - LONG64 N=(LONG64)this->GetNumElements(); + LONG64 N=(LONG64)GetNumElements(); int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _setValue<<>>(this->m_pArray,v,N); + _setValue<<>>(m_pArray,v,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -774,15 +748,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUMatrix::SetValue(const ElemType* d_v) //d_v is pointer to the the value in GPU memory { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("SetValue: Matrix is empty."); - LONG64 N=(LONG64)this->GetNumElements(); + LONG64 N=(LONG64)GetNumElements(); int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _setValue<<>>(this->m_pArray,d_v,N); + _setValue<<>>(m_pArray,d_v,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -791,11 +765,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUMatrix::SetColumn(const ElemType* colPointer, size_t colInd) { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("SetValue: Matrix is empty."); if (colPointer==NULL) return; - CUDA_CALL(cudaMemcpy(this->m_pArray+LocateColumn(colInd),colPointer,sizeof(ElemType)*this->m_numRows,cudaMemcpyHostToDevice)); + CUDA_CALL(cudaMemcpy(m_pArray+LocateColumn(colInd),colPointer,sizeof(ElemType)*m_numRows,cudaMemcpyHostToDevice)); } template @@ -805,10 +779,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { return; Resize(deepCopyFrom.GetNumRows(), deepCopyFrom.GetNumCols()); - this->m_format = deepCopyFrom.m_format; // copy the format over just to be sure + m_format = deepCopyFrom.m_format; // copy the format over just to be sure size_t cpSize = deepCopyFrom.GetNumRows() * deepCopyFrom.GetNumCols(); if (cpSize != 0) - CUDA_CALL(cudaMemcpy(this->m_pArray,deepCopyFrom.m_pArray,cpSize*sizeof(ElemType),cudaMemcpyDeviceToDevice)); + CUDA_CALL(cudaMemcpy(m_pArray,deepCopyFrom.m_pArray,cpSize*sizeof(ElemType),cudaMemcpyDeviceToDevice)); } template @@ -818,30 +792,30 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (matrixFlags&matrixFlagDontOwnBuffer) { // free the existing array if it used to be an owned array - if (this->OwnBuffer() && this->m_pArray!=NULL) + if (OwnBuffer() && m_pArray!=NULL) { PrepareDevice(); - CUDA_CALL(cudaFree(this->m_pArray)); + CUDA_CALL(cudaFree(m_pArray)); } - this->m_numRows = numRows; - this->m_numCols = numCols; - this->m_pArray = pArray; - this->m_elemSizeAllocated = this->GetNumElements(); - this->m_matrixName = NULL; - this->m_format = matrixFormatDense; - this->m_externalBuffer = true; - this->m_computeDevice = deviceId; + m_numRows = numRows; + m_numCols = numCols; + m_pArray = pArray; + m_elemSizeAllocated = GetNumElements(); + m_matrixName = NULL; + m_format = matrixFormatDense; + m_externalBuffer = true; + m_computeDevice = deviceId; } else { // if didn't previously own the buffer, wipe it clean - if (!this->OwnBuffer()) + if (!OwnBuffer()) { ZeroInit(deviceId); } // if the devices are different move it now - if (this->m_computeDevice != deviceId && deviceId >= 0) + if (m_computeDevice != deviceId && deviceId >= 0) { Clear(); ZeroInit(deviceId); @@ -849,7 +823,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { // now resize/allocate as necessary Resize(numRows, numCols); - this->m_externalBuffer = false; + m_externalBuffer = false; // copy over the content to the buffer PrepareDevice(); @@ -857,32 +831,28 @@ namespace Microsoft { namespace MSR { namespace CNTK { { if (!(matrixFlags&matrixFormatRowMajor)) { - CUDA_CALL(cudaMemcpy(this->m_pArray, pArray, sizeof(ElemType)*this->GetNumElements(), + CUDA_CALL(cudaMemcpy(m_pArray, pArray, sizeof(ElemType)*GetNumElements(), (matrixFlags&matrixFlagSetValueOnDevice)?cudaMemcpyDeviceToDevice:cudaMemcpyHostToDevice)); } else { -#ifndef LINUX - throw std::exception("Row major isn't implemented"); -#else - throw std::exception(); -#endif /* LINUX */ + throw std::runtime_error("Row major isn't implemented"); } } } - this->m_format = matrixFormatDense; + m_format = matrixFormatDense; } template void GPUMatrix::SetDiagonalValue(const ElemType v) { - unsigned long N=(unsigned long)this->GetNumRows(); + unsigned long N=(unsigned long)GetNumRows(); int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _setDiagonalValue<<>>(this->m_pArray,v,N,(unsigned long)this->GetNumRows()); + _setDiagonalValue<<>>(m_pArray,v,N,(unsigned long)GetNumRows()); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -891,10 +861,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUMatrix::SetDiagonalValue(GPUMatrix& vector) { - if (this->IsEmpty() || vector.IsEmpty()) + if (IsEmpty() || vector.IsEmpty()) throw std::logic_error("SetDiagonalValue: Matrix is empty."); - if (this->GetNumRows() != this->GetNumCols()) + if (GetNumRows() != GetNumCols()) throw std::logic_error("SetDiagonalValue: NumRows and NumCols do not agree."); if (vector.GetNumRows() != 1 && vector.GetNumCols() != 1) @@ -903,16 +873,16 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (vector.GetNumElements() == 1) //reduce to simple form SetDiagonalValue(vector.m_pArray[0]); - else if (vector.GetNumRows() != this->GetNumRows()) + else if (vector.GetNumRows() != GetNumRows()) throw std::logic_error("SetDiagonalValue: input vector's dimension does not agree with [this]."); else { - long N=(long)this->GetNumRows(); + long N=(long)GetNumRows(); int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _setDiagonalValueFromVector<<>>(this->m_pArray,vector.m_pArray,N); + _setDiagonalValueFromVector<<>>(m_pArray,vector.m_pArray,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -936,11 +906,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { CUDA_CALL(cudaEventCreate(&done)); if (sizeof(ElemType)==sizeof(float)) { - CURAND_CALL(curandGenerateUniform(((curandGenerator_t*)s_curandGenerator)[0], reinterpret_cast(this->m_pArray), this->GetNumElements())); + CURAND_CALL(curandGenerateUniform(((curandGenerator_t*)s_curandGenerator)[0], reinterpret_cast(m_pArray), GetNumElements())); } else { - CURAND_CALL(curandGenerateUniformDouble(((curandGenerator_t*)s_curandGenerator)[0], reinterpret_cast(this->m_pArray), this->GetNumElements())); + CURAND_CALL(curandGenerateUniformDouble(((curandGenerator_t*)s_curandGenerator)[0], reinterpret_cast(m_pArray), GetNumElements())); } CUDA_CALL(cudaEventRecord(done)); CUDA_CALL(cudaEventSynchronize(done)); @@ -951,7 +921,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t blocksPerGrid = (size_t)ceil(N/(double)threadsPerBlock); if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _rescaleToRange<<>>(this->m_pArray,N,low,high); + _rescaleToRange<<>>(m_pArray,N,low,high); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1000,11 +970,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { CUDA_CALL(cudaEventCreate(&done)); if (sizeof(ElemType)==sizeof(float)) { - CURAND_CALL(curandGenerateUniform((((curandGenerator_t*)s_curandGenerator)[0]), reinterpret_cast(this->m_pArray), this->GetNumElements())); + CURAND_CALL(curandGenerateUniform((((curandGenerator_t*)s_curandGenerator)[0]), reinterpret_cast(m_pArray), GetNumElements())); } else { - CURAND_CALL(curandGenerateUniformDouble((((curandGenerator_t*)s_curandGenerator)[0]), reinterpret_cast(this->m_pArray), this->GetNumElements())); + CURAND_CALL(curandGenerateUniformDouble((((curandGenerator_t*)s_curandGenerator)[0]), reinterpret_cast(m_pArray), GetNumElements())); } CUDA_CALL(cudaEventRecord(done)); CUDA_CALL(cudaEventSynchronize(done)); @@ -1014,7 +984,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t N=GetNumElements(); size_t blocksPerGrid = (size_t)ceil(N/(double)threadsPerBlock); if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _setMaskAndScale<<>>(this->m_pArray,N,maskRate,scaleValue); + _setMaskAndScale<<>>(m_pArray,N,maskRate,scaleValue); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1023,16 +993,16 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUMatrix::Adagrad(GPUMatrix& gradients) { - if (this->IsEmpty()) + if (IsEmpty()) { - this->Resize(gradients.GetNumRows(), gradients.GetNumCols()); - this->SetValue(0.0); + Resize(gradients.GetNumRows(), gradients.GetNumCols()); + SetValue(0.0); } - assert(this->GetNumRows() == gradients.GetNumRows() && this->GetNumCols() == gradients.GetNumCols()); + assert(GetNumRows() == gradients.GetNumRows() && GetNumCols() == gradients.GetNumCols()); - int blocksPerGrid = (this->GetNumElements() + threadsPerBlock -1 )/threadsPerBlock; - _adagrad<<>>(this->m_pArray, gradients.m_pArray, this->GetNumElements()); + int blocksPerGrid = (GetNumElements() + threadsPerBlock -1 )/threadsPerBlock; + _adagrad<<>>(m_pArray, gradients.m_pArray, GetNumElements()); } template @@ -1050,10 +1020,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t n = gradients.GetNumElements(); int blocksPerGrid = (GetNumElements() + threadsPerBlock -1 )/threadsPerBlock; - if (this->IsEmpty() || this->GetNumCols() < gradients.GetNumCols() * 3) + if (IsEmpty() || GetNumCols() < gradients.GetNumCols() * 3) { - this->Resize(gradients.GetNumRows(), gradients.GetNumCols() * 3); - this->SetValue(0.0); + Resize(gradients.GetNumRows(), gradients.GetNumCols() * 3); + SetValue(0.0); ElemType *avars=m_pArray; // accumulated variances for RMS scaling ElemType *signs=m_pArray+n; // sign of previous gradient @@ -1067,7 +1037,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ElemType *signs=m_pArray+n; // sign of previous gradient ElemType *steps=m_pArray+2*n; // current step size - assert(this->GetNumRows() == gradients.GetNumRows() && this->GetNumCols() == gradients.GetNumCols() * 3); + assert(GetNumRows() == gradients.GetNumRows() && GetNumCols() == gradients.GetNumCols() * 3); if( !upd_gpu ) { @@ -1095,41 +1065,41 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUMatrix::Reshape(const size_t numRows, const size_t numCols) { - assert (numRows*numCols == this->GetNumElements()); - if (numRows*numCols != this->GetNumElements()) + assert (numRows*numCols == GetNumElements()); + if (numRows*numCols != GetNumElements()) throw std::invalid_argument("Reshape: total number of elements does not match."); - this->m_numRows = numRows; - this->m_numCols = numCols; + m_numRows = numRows; + m_numCols = numCols; } template void GPUMatrix::Resize(const size_t numRows, const size_t numCols, bool growOnly) { - if (this->m_numRows==numRows && this->m_numCols==numCols) + if (m_numRows==numRows && m_numCols==numCols) return; - this->m_numRows = numRows; - this->m_numCols = numCols; + m_numRows = numRows; + m_numCols = numCols; - size_t numElements = this->GetNumElements(); - if (numElements > this->m_elemSizeAllocated || (!growOnly && numElements != this->m_elemSizeAllocated)) + size_t numElements = GetNumElements(); + if (numElements > m_elemSizeAllocated || (!growOnly && numElements != m_elemSizeAllocated)) { - if (this->IsEmpty()) + if (IsEmpty()) { - this->m_elemSizeAllocated = 0; - this->m_pArray = NULL; + m_elemSizeAllocated = 0; + m_pArray = NULL; } else { - if (!this->OwnBuffer()) + if (!OwnBuffer()) throw std::invalid_argument("Can't resize a externally managed matrix"); PrepareDevice(); - if (this->m_pArray!=NULL) - CUDA_CALL(cudaFree(this->m_pArray)); //delete and reallocate - this->m_elemSizeAllocated = numElements; - CUDA_CALL(cudaMalloc((void**)&this->m_pArray,sizeof(ElemType)*this->m_elemSizeAllocated)); - CUDA_CALL(cudaMemset(this->m_pArray,0,sizeof(ElemType)*this->m_elemSizeAllocated)); + if (m_pArray!=NULL) + CUDA_CALL(cudaFree(m_pArray)); //delete and reallocate + m_elemSizeAllocated = numElements; + CUDA_CALL(cudaMalloc((void**)&m_pArray,sizeof(ElemType)*m_elemSizeAllocated)); + CUDA_CALL(cudaMemset(m_pArray,0,sizeof(ElemType)*m_elemSizeAllocated)); } } } @@ -1137,22 +1107,22 @@ namespace Microsoft { namespace MSR { namespace CNTK { template size_t GPUMatrix::LocateElement (const size_t row, const size_t col) const { - assert (row < this->m_numRows && col < this->m_numCols); - return col * this->m_numRows + row; // matrix in column-wise storage + assert (row < m_numRows && col < m_numCols); + return col * m_numRows + row; // matrix in column-wise storage } template size_t GPUMatrix::LocateColumn (const size_t col) const { - assert (col < this->m_numCols); - return col * this->m_numRows; // matrix in column-wise storage + assert (col < m_numCols); + return col * m_numRows; // matrix in column-wise storage } template ElemType GPUMatrix::Get00Element() const { ElemType res=0; - CUDA_CALL(cudaMemcpy(&res,this->m_pArray,sizeof(ElemType),cudaMemcpyDeviceToHost)); + CUDA_CALL(cudaMemcpy(&res,m_pArray,sizeof(ElemType),cudaMemcpyDeviceToHost)); return res; } #pragma endregion Basic Operators @@ -1161,13 +1131,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix& GPUMatrix::operator+= (ElemType alpha) { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("operator+=: Matrix is empty."); - LONG64 N=(LONG64)this->GetNumElements(); + LONG64 N=(LONG64)GetNumElements(); int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _addValue<<>>(this->m_pArray,alpha,N); + _addValue<<>>(m_pArray,alpha,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1177,7 +1147,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix GPUMatrix::operator+ (ElemType alpha) const { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("operator+: Matrix is empty."); const GPUMatrix& us=*this; @@ -1189,7 +1159,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix& GPUMatrix::AssignSumOf(const ElemType alpha, const GPUMatrix& a) { - this->SetValue(a); + SetValue(a); (*this)+=alpha; return (*this); } @@ -1220,10 +1190,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix GPUMatrix::operator+ (const GPUMatrix& a) const { - if (this->GetNumElements()==1) + if (GetNumElements()==1) { GPUMatrix c(a); - c+=this->Get00Element(); + c+=Get00Element(); return c; } else if (a.GetNumElements()==1) @@ -1243,7 +1213,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix& GPUMatrix::AssignSumOf(const GPUMatrix& a, const GPUMatrix& b) { - this->SetValue(a); + SetValue(a); (*this)+=b; return (*this); } @@ -1251,35 +1221,35 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix& GPUMatrix::operator-= (ElemType alpha) { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("operato-=: Matrix is empty."); - return this->operator+=(-1*alpha); + return operator+=(-1*alpha); } template GPUMatrix GPUMatrix::operator- (ElemType alpha) const { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("operator-: Matrix is empty."); - return this->operator+(-1*alpha); + return operator+(-1*alpha); } template GPUMatrix& GPUMatrix::AssignDifferenceOf(const ElemType alpha, const GPUMatrix& a) { - this->Resize(a.m_numRows,a.m_numCols); - LONG64 N=(LONG64)this->GetNumElements(); + Resize(a.m_numRows,a.m_numCols); + LONG64 N=(LONG64)GetNumElements(); int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _assignDifferenceOf1<<>>(this->m_pArray,alpha,a.m_pArray,N); + _assignDifferenceOf1<<>>(m_pArray,alpha,a.m_pArray,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); return *this; - /*this->Resize(a.m_numRows,a.m_numCols); - this->SetValue(alpha); + /*Resize(a.m_numRows,a.m_numCols); + SetValue(alpha); (*this)-=a; return *this;*/ } @@ -1287,18 +1257,18 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix& GPUMatrix::AssignDifferenceOf(const GPUMatrix& a, const ElemType alpha) { - this->Resize(a.m_numRows,a.m_numCols); - LONG64 N=(LONG64)this->GetNumElements(); + Resize(a.m_numRows,a.m_numCols); + LONG64 N=(LONG64)GetNumElements(); int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _assignDifferenceOf2<<>>(this->m_pArray,alpha,a.m_pArray,N); + _assignDifferenceOf2<<>>(m_pArray,alpha,a.m_pArray,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); return *this; - /*this->SetValue(a); + /*SetValue(a); (*this)-=alpha; return *this;*/ } @@ -1309,7 +1279,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { //if (a.GetNumElements() == 1) // AssignDifferenceOf(*this, a.Get00Element()); //else if (GetNumElements() == 1) - // AssignDifferenceOf(this->Get00Element(), a); + // AssignDifferenceOf(Get00Element(), a); //else ScaleAndAdd(-1, a, *this); @@ -1346,7 +1316,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix GPUMatrix::operator* (ElemType alpha) const { - GPUMatrix c(this->GetNumRows(), this->GetNumCols()); + GPUMatrix c(GetNumRows(), GetNumCols()); Scale(alpha, *this, c); return c; } @@ -1382,21 +1352,21 @@ namespace Microsoft { namespace MSR { namespace CNTK { GPUMatrix GPUMatrix::operator* (const GPUMatrix& a) const { const GPUMatrix& us = *this; - if (this->GetNumElements() == 1) + if (GetNumElements() == 1) { - GPUMatrix c(this->GetComputeDeviceId()); - c.AssignProductOf(this->Get00Element(), a); + GPUMatrix c(GetComputeDeviceId()); + c.AssignProductOf(Get00Element(), a); return c; } else if (a.GetNumElements() == 1) { - GPUMatrix c(this->GetComputeDeviceId()); + GPUMatrix c(GetComputeDeviceId()); c.AssignProductOf(a.Get00Element(), us); return c; } else { - GPUMatrix c(this->GetNumRows(),a.GetNumCols(),this->GetComputeDeviceId()); + GPUMatrix c(GetNumRows(),a.GetNumCols(),GetComputeDeviceId()); Multiply(*this, a, c); return c; } @@ -1427,7 +1397,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix GPUMatrix::operator^ (ElemType alpha) const { - GPUMatrix c(this->GetNumRows(), this->GetNumCols()); + GPUMatrix c(GetNumRows(), GetNumCols()); ElementWisePower(alpha, *this, c); return c; } @@ -1450,15 +1420,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (!(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols())) throw std::invalid_argument("The input matrix dimensions do not match."); - if (!(a.GetNumRows() == this->GetNumRows() && a.GetNumCols() == this->GetNumCols())) + if (!(a.GetNumRows() == GetNumRows() && a.GetNumCols() == GetNumCols())) throw std::invalid_argument("The input matrix dimensions do not match [this]."); - LONG64 N=(LONG64)this->GetNumElements(); + LONG64 N=(LONG64)GetNumElements(); int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _addElementProductOf<<>>(this->m_pArray,a.m_pArray,b.m_pArray,N); + _addElementProductOf<<>>(m_pArray,a.m_pArray,b.m_pArray,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1468,19 +1438,19 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix& GPUMatrix::ColumnElementMultiplyWith(const GPUMatrix& a) { - if (a.IsEmpty() || this->IsEmpty()) + if (a.IsEmpty() || IsEmpty()) throw std::logic_error("ColumnElementMultiplyWith: Matrix is empty."); - if (!(a.GetNumRows() == this->GetNumRows() && a.GetNumCols() == 1)) + if (!(a.GetNumRows() == GetNumRows() && a.GetNumCols() == 1)) throw std::invalid_argument("ColumnElementMultiplyWith: The input matrix should be a col vector and match [this]'s rows."); long N=(long)a.GetNumRows(); - long M=(long)this->GetNumCols(); + long M=(long)GetNumCols(); int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _columnElementMultiplyWith<<>>(this->m_pArray,a.m_pArray,N,M); + _columnElementMultiplyWith<<>>(m_pArray,a.m_pArray,N,M); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1491,19 +1461,19 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix& GPUMatrix::RowElementMultiplyWith(const GPUMatrix& a) { - if (a.IsEmpty() || this->IsEmpty()) + if (a.IsEmpty() || IsEmpty()) throw std::logic_error("RowElementMultiplyWith: Matrix is empty."); - if (!(a.GetNumRows() == 1 && a.GetNumCols() == this->GetNumCols())) + if (!(a.GetNumRows() == 1 && a.GetNumCols() == GetNumCols())) throw std::invalid_argument("RowElementMultiplyWith: The input matrix should be a row vector and match [this]'s columns."); - long N = (long)this->GetNumRows(); + long N = (long)GetNumRows(); long M = (long)a.GetNumCols(); int blocksPerGrid = (int)ceil(1.0*M / threadsPerBlock); a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _rowElementMultiplyWith<<>>(this->m_pArray,a.m_pArray,N,M); + _rowElementMultiplyWith<<>>(m_pArray,a.m_pArray,N,M); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1520,7 +1490,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (!(a.GetNumRows() == 1 && a.GetNumCols() == GetNumCols())) throw std::invalid_argument("RowElementDivideBy: The input matrix should be a row vector and match [this]'s columns."); - long N = (long)this->GetNumRows(); + long N = (long)GetNumRows(); long M = (long)a.GetNumCols(); int blocksPerGrid = (int)ceil(1.0*M / threadsPerBlock); a.PrepareDevice(); @@ -1544,7 +1514,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { throw std::invalid_argument("ColumnElementDivideBy: The input matrix should be a col vector and match [this]'s rows."); long N = (long)a.GetNumRows(); - long M = (long)this->GetNumCols(); + long M = (long)GetNumCols(); int blocksPerGrid = (int)ceil(1.0*N / threadsPerBlock); a.PrepareDevice(); cudaEvent_t done = nullptr; @@ -1560,15 +1530,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix& GPUMatrix::ElementInverse () { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("ElementInverse: Matrix is empty."); - LONG64 N=(LONG64)this->GetNumElements(); + LONG64 N=(LONG64)GetNumElements(); int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _elemInverse<<>>(this->m_pArray,N); + _elemInverse<<>>(m_pArray,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1578,8 +1548,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix& GPUMatrix::AssignElementInverseOf (const GPUMatrix& a) { - this->SetValue(a); - return this->ElementInverse(); + SetValue(a); + return ElementInverse(); } template @@ -1592,18 +1562,18 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix& GPUMatrix::AssignSigmoidOf (const GPUMatrix& a) { - this->Resize(a.GetNumRows(),a.GetNumCols()); - LONG64 N=(LONG64)this->GetNumElements(); + Resize(a.GetNumRows(),a.GetNumCols()); + LONG64 N=(LONG64)GetNumElements(); int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _assignSigmoidOf<<>>(a.m_pArray,this->m_pArray,N); + _assignSigmoidOf<<>>(a.m_pArray,m_pArray,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); - /*this->SetValue(a); - this->InplaceSigmoid();*/ + /*SetValue(a); + InplaceSigmoid();*/ return *this; } @@ -1625,12 +1595,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { Resize(a.GetNumRows(), a.GetNumCols()); PrepareDevice(); - LONG64 N=(LONG64)this->GetNumElements(); + LONG64 N=(LONG64)GetNumElements(); int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _assignSigmoidDerivative<<>>(a.m_pArray, this->m_pArray, N); + _assignSigmoidDerivative<<>>(a.m_pArray, m_pArray, N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1648,8 +1618,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix& GPUMatrix::AssignTanhOf (const GPUMatrix& a) { - this->SetValue(a); - this->InplaceTanh(); + SetValue(a); + InplaceTanh(); return *this; } @@ -1662,7 +1632,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { PrepareDevice(); if (isColWise) { - long N=(long)this->GetNumCols(); //one kernel per column + long N=(long)GetNumCols(); //one kernel per column int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); @@ -1673,7 +1643,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } else { - long N=(long)this->GetNumRows(); //one kernel per column + long N=(long)GetNumRows(); //one kernel per column int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); @@ -1688,7 +1658,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix& GPUMatrix::AssignLogSoftmaxOf (const GPUMatrix& a, const bool isColWise) { - this->Resize(a.GetNumRows(),a.GetNumCols()); + Resize(a.GetNumRows(),a.GetNumCols()); if (isColWise) { PrepareDevice(); @@ -1719,8 +1689,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix& GPUMatrix::AssignSqrtOf (const GPUMatrix& a) { - this->SetValue(a); - this->InplaceSqrt(); + SetValue(a); + InplaceSqrt(); return *this; } @@ -1734,8 +1704,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix& GPUMatrix::AssignExpOf (const GPUMatrix& a) { - this->SetValue(a); - this->InplaceExp(); + SetValue(a); + InplaceExp(); return *this; } @@ -1749,8 +1719,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix& GPUMatrix::AssignLogOf (const GPUMatrix& a) { - this->SetValue(a); - this->InplaceLog(); + SetValue(a); + InplaceLog(); return *this; } @@ -1764,8 +1734,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix& GPUMatrix::AssignAbsOf (const GPUMatrix& a) { - this->SetValue(a); - this->InplaceAbs(); + SetValue(a); + InplaceAbs(); return *this; } @@ -1779,8 +1749,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix& GPUMatrix::AssignLinearRectifierDerivativeOf (const GPUMatrix& a) { - this->SetValue(a); - this->InplaceLinearRectifierDerivative(); + SetValue(a); + InplaceLinearRectifierDerivative(); return *this; } @@ -1794,8 +1764,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix& GPUMatrix::AssignCosineOf (const GPUMatrix& a) { - this->SetValue(a); - this->InplaceCosine(); + SetValue(a); + InplaceCosine(); return *this; } @@ -1809,23 +1779,23 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix& GPUMatrix::AssignNegativeSineOf (const GPUMatrix& a) { - this->SetValue(a); - this->InplaceNegativeSine(); + SetValue(a); + InplaceNegativeSine(); return *this; } template GPUMatrix& GPUMatrix::InplaceTruncateBottom (const ElemType threshold) { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("InplaceTruncateBottom: Matrix is empty."); - LONG64 N=(LONG64)this->GetNumElements(); + LONG64 N=(LONG64)GetNumElements(); int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock); PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _inplaceTruncateBottom<<>>(this->m_pArray,threshold,N); + _inplaceTruncateBottom<<>>(m_pArray,threshold,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1843,12 +1813,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { Resize(a.GetNumRows(), a.GetNumCols()); } - LONG64 N=(LONG64)this->GetNumElements(); + LONG64 N=(LONG64)GetNumElements(); int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock); PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _assignTruncateBottom<<>>(this->m_pArray,a.m_pArray,threshold,N); + _assignTruncateBottom<<>>(m_pArray,a.m_pArray,threshold,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1859,14 +1829,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix& GPUMatrix::InplaceTruncateTop (const ElemType threshold) { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("InplaceTruncateTop: Matrix is empty."); - LONG64 N=(LONG64)this->GetNumElements(); + LONG64 N=(LONG64)GetNumElements(); int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock); PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _inplaceTruncateTop<<>>(this->m_pArray,threshold,N); + _inplaceTruncateTop<<>>(m_pArray,threshold,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1884,12 +1854,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { Resize(a.GetNumRows(), a.GetNumCols()); } - LONG64 N=(LONG64)this->GetNumElements(); + LONG64 N=(LONG64)GetNumElements(); int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock); a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _assignTruncateTop<<>>(this->m_pArray,a.m_pArray,threshold,N); + _assignTruncateTop<<>>(m_pArray,a.m_pArray,threshold,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1899,14 +1869,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix& GPUMatrix::SetToZeroIfAbsLessThan (const ElemType threshold) { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("SetToZeroIfAbsLessThan: Matrix is empty."); - LONG64 N=(LONG64)this->GetNumElements(); + LONG64 N=(LONG64)GetNumElements(); int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock); PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _setToZeroIfAbsLessThan<<>>(this->m_pArray,threshold,N); + _setToZeroIfAbsLessThan<<>>(m_pArray,threshold,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1916,20 +1886,20 @@ namespace Microsoft { namespace MSR { namespace CNTK { template ElemType GPUMatrix::SumOfAbsElements() const { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("SumOfAbsElements: Matrix is empty"); cublasHandle_t cuHandle = GetCublasHandle(GetComputeDeviceId()); if (sizeof(ElemType)==sizeof(float)) { float res=0; - cublasSasum(cuHandle,(LONG64)this->GetNumElements(),reinterpret_cast(this->m_pArray),1,&res); + cublasSasum(cuHandle,(LONG64)GetNumElements(),reinterpret_cast(m_pArray),1,&res); return res; } else { double res=0; - cublasDasum(cuHandle,(LONG64)this->GetNumElements(),reinterpret_cast(this->m_pArray),1,&res); + cublasDasum(cuHandle,(LONG64)GetNumElements(),reinterpret_cast(m_pArray),1,&res); return ElemType(res); } } @@ -1937,7 +1907,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template ElemType GPUMatrix::SumOfElements() const { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("SumOfElements: Matrix is empty"); PrepareDevice(); @@ -1945,7 +1915,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ElemType h_sum; CUDA_CALL(cudaMalloc((void**)&d_sum,sizeof(ElemType))); //WARNING: THIS kernel is not the most efficient way! - _reductionSum<<<1,1024,0,t_stream>>>(this->m_pArray,d_sum,(LONG64)this->GetNumElements()); + _reductionSum<<<1,1024,0,t_stream>>>(m_pArray,d_sum,(LONG64)GetNumElements()); CUDA_CALL(cudaMemcpy(&h_sum,d_sum,sizeof(ElemType),cudaMemcpyDeviceToHost)); CUDA_CALL(cudaFree(d_sum)); return h_sum; @@ -1958,13 +1928,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (a.IsEmpty()) throw std::logic_error("AssignSumOfElements: Matrix a is empty"); - this->Resize(1,1); + Resize(1,1); PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); //WARNING: THIS kernel is not the most efficient way! - _reductionSumAndAssign<<<1,1024>>>(this->m_pArray,a.m_pArray,(LONG64)a.GetNumElements(),(LONG64)this->GetNumElements()); + _reductionSumAndAssign<<<1,1024>>>(m_pArray,a.m_pArray,(LONG64)a.GetNumElements(),(LONG64)GetNumElements()); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -1974,13 +1944,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { template DeviceBoundNumber GPUMatrix::Sum_AsDeviceBoundNum() const { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("Matrix is empty"); PrepareDevice(); ElemType* d_sum = NULL; CUDA_CALL(cudaMalloc((void**)&d_sum,sizeof(ElemType))); //WARNING: THIS kernel is not the most efficient way! - _reductionSum<<<1,1024,0,t_stream>>>(this->m_pArray,d_sum,(LONG64)this->GetNumElements()); + _reductionSum<<<1,1024,0,t_stream>>>(m_pArray,d_sum,(LONG64)GetNumElements()); DeviceBoundNumber result; result.ShallowCopyFrom(d_sum,GetComputeDeviceId()); return result; @@ -1994,17 +1964,17 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (sizeof(ElemType)==sizeof(float)) { int resInd=0; - cublasIsamax(cuHandle,(LONG64)this->GetNumElements(),reinterpret_cast(this->m_pArray),1,&resInd); + cublasIsamax(cuHandle,(LONG64)GetNumElements(),reinterpret_cast(m_pArray),1,&resInd); resInd--; - CUDA_CALL(cudaMemcpy(reinterpret_cast(&res),reinterpret_cast(this->m_pArray+resInd),sizeof(float),cudaMemcpyDeviceToHost)); + CUDA_CALL(cudaMemcpy(reinterpret_cast(&res),reinterpret_cast(m_pArray+resInd),sizeof(float),cudaMemcpyDeviceToHost)); return res; } else { int resInd=0; - cublasIdamax(cuHandle,(LONG64)this->GetNumElements(),reinterpret_cast(this->m_pArray),1,&resInd); + cublasIdamax(cuHandle,(LONG64)GetNumElements(),reinterpret_cast(m_pArray),1,&resInd); resInd--; - CUDA_CALL(cudaMemcpy(reinterpret_cast(&res),this->m_pArray+resInd,sizeof(float),cudaMemcpyDeviceToHost)); + CUDA_CALL(cudaMemcpy(reinterpret_cast(&res),m_pArray+resInd,sizeof(float),cudaMemcpyDeviceToHost)); return res; } } @@ -2013,7 +1983,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUMatrix& GPUMatrix::ElementMultiplyWith (const GPUMatrix& a) { - if (this->IsEmpty() || a.IsEmpty()) + if (IsEmpty() || a.IsEmpty()) throw std::logic_error("ElementMultiplyWith: Matrix is empty."); GPUMatrix& us=*this; @@ -2021,12 +1991,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (us.GetNumRows() != a.GetNumRows() || us.GetNumCols() != a.GetNumCols()) throw std::invalid_argument("The matrix dimensions do not match."); - LONG64 N=(LONG64)this->GetNumElements(); + LONG64 N=(LONG64)GetNumElements(); int blocksPerGrid =(int)ceil(((double)N)/threadsPerBlock); a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _elemMul<<>>(this->m_pArray,a.m_pArray,N); + _elemMul<<>>(m_pArray,a.m_pArray,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -2044,12 +2014,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { throw std::invalid_argument("The input matrix dimensions do not match."); Resize(a.GetNumRows(), a.GetNumCols()); - LONG64 N=(LONG64)this->GetNumElements(); + LONG64 N=(LONG64)GetNumElements(); int blocksPerGrid =(int)ceil(((double)N)/threadsPerBlock); a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _assignElementProductOf<<>>(this->m_pArray,a.m_pArray,b.m_pArray,N); + _assignElementProductOf<<>>(m_pArray,a.m_pArray,b.m_pArray,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -2073,12 +2043,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { throw std::invalid_argument("The input matrix dimensions do not match."); Resize(a.GetNumRows(), a.GetNumCols()); - LONG64 N=(LONG64)this->GetNumElements(); + LONG64 N=(LONG64)GetNumElements(); int blocksPerGrid =(int)ceil(((double)N)/threadsPerBlock); a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _assignElementDivisionOf<<>>(this->m_pArray,a.m_pArray,b.m_pArray,N); + _assignElementDivisionOf<<>>(m_pArray,a.m_pArray,b.m_pArray,N); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -2094,11 +2064,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUMatrix::VectorNorm1(GPUMatrix& c, const bool isColWise) const { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("VectorNorm1: Matrix is empty."); - const long n = (long)this->GetNumRows(); - const long m = (long)this->GetNumCols(); + const long n = (long)GetNumRows(); + const long m = (long)GetNumCols(); assert (m>0 && n>0); //converting from size_t to int may cause overflow cudaEvent_t done = nullptr; @@ -2119,7 +2089,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _vectorNorm1<<>>(c.m_pArray, this->m_pArray,n,m,isColWise); + _vectorNorm1<<>>(c.m_pArray, m_pArray,n,m,isColWise); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -2135,11 +2105,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUMatrix::VectorNorm2(GPUMatrix& c, const bool isColWise) const { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("VectorNorm2: Matrix is empty."); - const long n = (long)this->GetNumRows(); - const long m = (long)this->GetNumCols(); + const long n = (long)GetNumRows(); + const long m = (long)GetNumCols(); assert (m>0 && n>0); //converting from size_t to int may cause overflow cudaEvent_t done = nullptr; @@ -2160,7 +2130,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _vectorNorm2<<>>(c.m_pArray, this->m_pArray,n,m,isColWise); + _vectorNorm2<<>>(c.m_pArray, m_pArray,n,m,isColWise); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -2176,7 +2146,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUMatrix::VectorNormInf(GPUMatrix& c, const bool isColWise) const { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("VectorMax: Matrix is empty."); //this implementation is not efficient @@ -2214,12 +2184,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { long rowsA = (long)a.GetNumRows(); long rowsB = (long)b.GetNumRows(); Resize(rowsA * rowsB, cols); - float N=(float)this->GetNumElements(); + float N=(float)GetNumElements(); int blocksPerGrid =(int)ceil(N/threadsPerBlock); a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _assignKhatriRaoProductOf<<>>(this->m_pArray,a.m_pArray,b.m_pArray,rowsA, rowsB, cols); + _assignKhatriRaoProductOf<<>>(m_pArray,a.m_pArray,b.m_pArray,rowsA, rowsB, cols); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -2249,15 +2219,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { throw std::invalid_argument("AddColumnReshapeProductOf: number of rows in a should be multiples of that in b."); long rowsC = rowsA / rowsB; - if (rowsC != this->GetNumRows() || cols != this->GetNumCols()) + if (rowsC != GetNumRows() || cols != GetNumCols()) throw std::invalid_argument("AddColumnReshapeProductOf: This matrix does not have the right size."); - float N=(float)this->GetNumElements(); + float N=(float)GetNumElements(); int blocksPerGrid =(int)ceil(N/threadsPerBlock); a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _addColumnReshapeProductOf<<>>(this->m_pArray,a.m_pArray,b.m_pArray, rowsB, rowsC, cols, transposeAColumn); + _addColumnReshapeProductOf<<>>(m_pArray,a.m_pArray,b.m_pArray, rowsB, rowsC, cols, transposeAColumn); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -2275,7 +2245,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template ElemType GPUMatrix::FrobeniusNorm() const { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("FrobeniusNorm: Matrix is empty."); PrepareDevice(); @@ -2283,7 +2253,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ElemType h_sum=0; CUDA_CALL(cudaMalloc((void**)&d_sum,sizeof(ElemType))); //WARNING: THIS kernel is not the most efficient way! - _reductionSum2<<<1,1024,0,t_stream>>>(this->m_pArray,d_sum,(LONG64)this->GetNumElements(), true); + _reductionSum2<<<1,1024,0,t_stream>>>(m_pArray,d_sum,(LONG64)GetNumElements(), true); CUDA_CALL(cudaMemcpy(&h_sum,d_sum,sizeof(ElemType),cudaMemcpyDeviceToHost)); CUDA_CALL(cudaFree(d_sum)); @@ -2296,11 +2266,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (a.IsEmpty()) throw std::logic_error("AssignFrobeniusNormOf: Matrix a is empty."); - this->Resize(1,1); + Resize(1,1); PrepareDevice(); //WARNING: THIS kernel is not the most efficient way! - _reductionSum2<<<1,1024,0,t_stream>>>(a.m_pArray,this->m_pArray,(LONG64)a.GetNumElements(), true); + _reductionSum2<<<1,1024,0,t_stream>>>(a.m_pArray,m_pArray,(LONG64)a.GetNumElements(), true); return *this; } @@ -2308,7 +2278,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template ElemType GPUMatrix::MatrixNormInf() const { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("MatrixNorm1: Matrix is empty."); PrepareDevice(); @@ -2316,7 +2286,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ElemType h_maxAbs=0; CUDA_CALL(cudaMalloc((void**)&d_maxAbs,sizeof(ElemType))); //WARNING: THIS kernel is not the most efficient way! - _reductionMatrixNormInf<<<1,1024,0,t_stream>>>(this->m_pArray,d_maxAbs,(LONG64)this->GetNumElements()); + _reductionMatrixNormInf<<<1,1024,0,t_stream>>>(m_pArray,d_maxAbs,(LONG64)GetNumElements()); CUDA_CALL(cudaMemcpy(&h_maxAbs,d_maxAbs,sizeof(ElemType),cudaMemcpyDeviceToHost)); CUDA_CALL(cudaFree(d_maxAbs)); return h_maxAbs; @@ -2325,15 +2295,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { template ElemType GPUMatrix::MatrixNorm1() const { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("MatrixNorm1: Matrix is empty."); - return this->SumOfAbsElements(); + return SumOfAbsElements(); } template ElemType GPUMatrix::MatrixNorm0() const { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("MatrixNorm0: Matrix is empty."); PrepareDevice(); @@ -2341,7 +2311,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ElemType h_nz=0; CUDA_CALL(cudaMalloc((void**)&d_nz,sizeof(ElemType))); //WARNING: THIS kernel is not the most efficient way! - _reductionMatrixNorm0<<<1,1024,0,t_stream>>>(this->m_pArray,d_nz,(LONG64)this->GetNumElements()); + _reductionMatrixNorm0<<<1,1024,0,t_stream>>>(m_pArray,d_nz,(LONG64)GetNumElements()); CUDA_CALL(cudaMemcpy(&h_nz,d_nz,sizeof(ElemType),cudaMemcpyDeviceToHost)); CUDA_CALL(cudaFree(d_nz)); return h_nz; @@ -2358,9 +2328,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { PrepareDevice(); cudaEvent_t done = nullptr; - int blocksPerGrid=(int)ceil(1.0*this->GetNumElements()/threadsPerBlock); + int blocksPerGrid=(int)ceil(1.0*GetNumElements()/threadsPerBlock); if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _assignSignOf<<>>(this->m_pArray, a.m_pArray, (long)this->GetNumElements()); + _assignSignOf<<>>(m_pArray, a.m_pArray, (long)GetNumElements()); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -2378,9 +2348,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { PrepareDevice(); cudaEvent_t done = nullptr; - int blocksPerGrid=(int)ceil(1.0*this->GetNumElements()/threadsPerBlock); + int blocksPerGrid=(int)ceil(1.0*GetNumElements()/threadsPerBlock); if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _addSignOf<<>>(this->m_pArray, a.m_pArray, (LONG64)this->GetNumElements()); + _addSignOf<<>>(m_pArray, a.m_pArray, (LONG64)GetNumElements()); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -2390,12 +2360,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUMatrix::VectorMax(GPUMatrix& maxIndexes, GPUMatrix& maxValues, const bool isColWise) const { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("VectorMax: Matrix is empty."); const GPUMatrix& us=*this; - const long m = (long)this->GetNumRows(); - const long n = (long)this->GetNumCols(); + const long m = (long)GetNumRows(); + const long n = (long)GetNumCols(); assert (m>0 && n>0); //converting from size_t to int may cause overflow PrepareDevice(); cudaEvent_t done = nullptr; @@ -2426,12 +2396,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUMatrix::VectorMin(GPUMatrix& minIndexes, GPUMatrix& minValues, const bool isColWise) const { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("VectorMax: Matrix is empty."); const GPUMatrix& us=*this; - const int m = (int)this->GetNumRows(); - const int n = (int)this->GetNumCols(); + const int m = (int)GetNumRows(); + const int n = (int)GetNumCols(); assert (m>0 && n>0); //converting from size_t to int may cause overflow PrepareDevice(); @@ -2473,8 +2443,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { cudaEvent_t done = nullptr; //int blocksPerGrid=(int)ceil(1.0*a.GetNumElements()/threadsPerBlock); if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - //_assignNumOfDiff<<>>(a.m_pArray, b.m_pArray, this->m_pArray, a.GetNumElements()); - _assignNumOfDiff<<<1,1024,0,t_stream>>>(a.m_pArray, b.m_pArray, this->m_pArray, (LONG64)a.GetNumElements()); + //_assignNumOfDiff<<>>(a.m_pArray, b.m_pArray, m_pArray, a.GetNumElements()); + _assignNumOfDiff<<<1,1024,0,t_stream>>>(a.m_pArray, b.m_pArray, m_pArray, (LONG64)a.GetNumElements()); if (do_sync) CUDA_CALL(cudaEventRecord(done)); if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); if (do_sync) CUDA_CALL(cudaEventDestroy(done)); @@ -2493,7 +2463,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUMatrix::Print(const char* matrixName /*=nullptr*/) const { - Print(matrixName, 0, this->GetNumRows()-1, 0, this->GetNumCols()-1); + Print(matrixName, 0, GetNumRows()-1, 0, GetNumCols()-1); } // file I/O @@ -2526,7 +2496,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t smallBatchSize = inputSubBatch.GetNumCols(); Resize(packedInputRows, packedInputColsPerSample * smallBatchSize); if (zeroPadding) - this->SetValue((ElemType)0); + SetValue((ElemType)0); PrepareDevice(); int numThreadPerBlock = threadsPerBlock; @@ -2537,7 +2507,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { #endif cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _assignPackedConvolutionInput<<>>(this->m_pArray, + _assignPackedConvolutionInput<<>>(m_pArray, inputSubBatch.m_pArray, smallBatchSize, inputWidth, inputHeight, inputChannels, @@ -2571,7 +2541,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { #endif cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _unpackConvolutionInput<<>>(this->m_pArray, + _unpackConvolutionInput<<>>(m_pArray, inputSubBatch.m_pArray, smallBatchSize, inputWidth, inputHeight, inputChannels, @@ -2601,7 +2571,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _assignMaxPoolingResult<<>>(this->m_pArray, inputBatch.m_pArray, batchSize, channels, + _assignMaxPoolingResult<<>>(m_pArray, inputBatch.m_pArray, batchSize, channels, inputWidth, inputHeight,inputSizePerSample, outputWidth, outputHeight, outputSizePerSample, windowWidth, windowHeight, horizontalSubsample, verticalSubsample); @@ -2629,7 +2599,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (do_sync) CUDA_CALL(cudaEventCreate(&done)); int blocksPerGrid = (batchSize * inputSizePerSample + numThreadPerBlock - 1)/numThreadPerBlock; - _addMaxPoolingGradient<<>>(this->m_pArray, outputGradientBatch.m_pArray, inputBatch.m_pArray, outputBatch.m_pArray, batchSize, channels, + _addMaxPoolingGradient<<>>(m_pArray, outputGradientBatch.m_pArray, inputBatch.m_pArray, outputBatch.m_pArray, batchSize, channels, inputWidth, inputHeight,inputSizePerSample, outputWidth, outputHeight, outputSizePerSample, windowWidth, windowHeight, horizontalSubsample, verticalSubsample); @@ -2657,7 +2627,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); - _assignAveragePoolingResult<<>>(this->m_pArray, inputBatch.m_pArray, batchSize, channels, + _assignAveragePoolingResult<<>>(m_pArray, inputBatch.m_pArray, batchSize, channels, inputWidth, inputHeight,inputSizePerSample, outputWidth, outputHeight, outputSizePerSample, windowWidth, windowHeight, horizontalSubsample, verticalSubsample); @@ -2685,7 +2655,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (do_sync) CUDA_CALL(cudaEventCreate(&done)); size_t blocksPerGrid = (batchSize * inputSizePerSample + numThreadPerBlock - 1)/numThreadPerBlock; - _addAveragePoolingGradient<<>>(this->m_pArray, outputGradientBatch.m_pArray, (long)batchSize, channels, + _addAveragePoolingGradient<<>>(m_pArray, outputGradientBatch.m_pArray, (long)batchSize, channels, inputWidth, inputHeight,inputSizePerSample, outputWidth, outputHeight, outputSizePerSample, windowWidth, windowHeight, horizontalSubsample, verticalSubsample); @@ -2721,19 +2691,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (!(m>0 && k>0 && l>0 && n>0)) { -#ifndef LINUX - throw std::exception("!(m>0 && k>0 && l>0 && n>0)"); //converting from size_t to int may cause overflow -#else - throw std::exception(); //converting from size_t to int may cause overflow -#endif /* LINUX */ + throw std::runtime_error("!(m>0 && k>0 && l>0 && n>0)"); //converting from size_t to int may cause overflow } if (k!=l) { -#ifndef LINUX - throw std::exception("matrix dim mismatch in MultiplyAndWeightedAdd"); -#else - throw std::exception(); -#endif /* LINUX */ + throw std::runtime_error("matrix dim mismatch in MultiplyAndWeightedAdd"); } if (sizeof(ElemType)==sizeof(float)) { @@ -2745,11 +2707,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } else { -#ifndef LINUX - throw std::exception("Unsupported template argument in GPUMatrix"); -#else - throw std::exception(); -#endif /* LINUX */ + throw std::runtime_error("Unsupported template argument in GPUMatrix"); } c.m_numRows=m; c.m_numCols=n; @@ -2818,11 +2776,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } else { -#ifndef LINUX - throw std::exception("Unsupported template argument in GPUMatrix"); -#else - throw std::exception(); -#endif /* LINUX */ + throw std::runtime_error("Unsupported template argument in GPUMatrix"); } } else if (a.GetNumElements() == 1) @@ -3096,11 +3050,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } else { -#ifndef LINUX - throw std::exception("Unsupported template argument in GPUMatrix"); -#else - throw std::exception(); -#endif /* LINUX */ + throw std::runtime_error("Unsupported template argument in GPUMatrix"); } } @@ -3110,11 +3060,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { if (alpha.GetNumElements()!=1) { -#ifndef LINUX - throw std::exception("Matrix alpha must be 1x1"); -#else - throw std::exception(); -#endif /* LINUX */ + throw std::runtime_error("Matrix alpha must be 1x1"); } cublasHandle_t cuHandle = GetCublasHandle(a.GetComputeDeviceId()); cublasSetPointerMode(cuHandle, CUBLAS_POINTER_MODE_DEVICE); @@ -3129,11 +3075,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { else { cublasSetPointerMode(cuHandle, CUBLAS_POINTER_MODE_HOST); -#ifndef LINUX - throw std::exception("Unsupported template argument in GPUMatrix"); -#else - throw std::exception(); -#endif /* LINUX */ + throw std::runtime_error("Unsupported template argument in GPUMatrix"); } cublasSetPointerMode(cuHandle, CUBLAS_POINTER_MODE_HOST); } @@ -3242,7 +3184,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (a.IsEmpty() || b.IsEmpty()) throw std::logic_error("InnerProductOfMatrices: one of the input matrices is empty."); - this->Resize(1,1); + Resize(1,1); const int m = (int)a.GetNumRows(); const int n = (int)a.GetNumCols(); @@ -3258,11 +3200,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { cublasSetPointerMode(cuHandle, CUBLAS_POINTER_MODE_DEVICE); if (sizeof(ElemType) == sizeof(double)) { - CUBLAS_CALL(cublasDdot(cuHandle,m*n, reinterpret_cast (a.m_pArray), 1, reinterpret_cast (b.m_pArray), 1,reinterpret_cast (this->m_pArray))); + CUBLAS_CALL(cublasDdot(cuHandle,m*n, reinterpret_cast (a.m_pArray), 1, reinterpret_cast (b.m_pArray), 1,reinterpret_cast (m_pArray))); } else { - CUBLAS_CALL(cublasSdot(cuHandle,m*n, reinterpret_cast (a.m_pArray), 1, reinterpret_cast (b.m_pArray), 1,reinterpret_cast (this->m_pArray))); + CUBLAS_CALL(cublasSdot(cuHandle,m*n, reinterpret_cast (a.m_pArray), 1, reinterpret_cast (b.m_pArray), 1,reinterpret_cast (m_pArray))); } cublasSetPointerMode(cuHandle, CUBLAS_POINTER_MODE_HOST); return *this; @@ -3412,7 +3354,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { // size_t elsize; // stream>>elsize; // if (sizeof(ElemType)!=elsize) - // throw std::exception("Template argument size doesn't match those in file"); + // throw std::runtime_error("Template argument size doesn't match those in file"); // std::wstring matrixName; // size_t numRows, numCols; // stream>>matrixName>>numRows>>numCols; @@ -3453,7 +3395,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { void* GPUMatrix::s_curandGenerator=NULL; }}} -// !!!!This is from helper_cuda.h which comes with CUDA samples!!!! Consider if it is benefitial to just include all helper_cuda.h +// !!!!This is from helper_cuda.h which comes with CUDA samples!!!! Consider if it is beneficial to just include all helper_cuda.h // Beginning of GPU Architecture definitions int _ConvertSMVer2Cores(int major, int minor) { diff --git a/Math/Math/GPUMatrix.cuh b/Math/Math/GPUMatrix.cuh index ce7fdace9..c323b14fd 100644 --- a/Math/Math/GPUMatrix.cuh +++ b/Math/Math/GPUMatrix.cuh @@ -17,7 +17,7 @@ typedef struct cublasContext *cublasHandle_t; struct CUstream_st; typedef struct CUstream_st *cudaStream_t; -#ifndef LINUX +#ifdef _WIN32 #ifndef MATH_API #ifdef MATH_EXPORTS #define MATH_API __declspec(dllexport) @@ -25,7 +25,7 @@ typedef struct CUstream_st *cudaStream_t; #define MATH_API __declspec(dllimport) #endif #endif /* MATH_API */ -#else /* LINUX */ +#else // no DLLs in Linux #define MATH_API #endif @@ -51,12 +51,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { public: DeviceBoundNumber() {m_data=NULL;}; DeviceBoundNumber(const DeviceBoundNumber &deepCopy); -#ifndef LINUX DeviceBoundNumber(DeviceBoundNumber &&shallowCopy); -#endif ~DeviceBoundNumber(); int GetDeviceId() const {return m_computeDevice;} - ElemType* ExposePointer2Value() const {return this->m_data;} + ElemType* ExposePointer2Value() const {return m_data;} //performs shallow copy only void ShallowCopyFrom(ElemType* newVal,int newValsDevceId); }; @@ -84,10 +82,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { GPUMatrix(const size_t numRows, const size_t numCols, ElemType *pArray, const size_t matrixFlags=matrixFlagNormal,int deviceId=0); GPUMatrix(const GPUMatrix& deepCopyFrom); GPUMatrix& operator=(const GPUMatrix& deepCopyFrom); //assignment operator, deep copy -#ifndef LINUX GPUMatrix(GPUMatrix&& moveFrom); GPUMatrix& operator=(GPUMatrix&& moveFrom); //move assignment operator, shallow copy -#endif /* LINUX */ ~GPUMatrix(void); static int GetBestGPUDeviceId(); @@ -105,17 +101,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { GPUMatrix ColumnSlice(size_t startColumn, size_t numCols) const; GPUMatrix& AssignColumnSlice(const GPUMatrix& fromMatrix, size_t startColumn, size_t numCols); - size_t BufferSize() const {return this->m_numRows*this->m_numCols*sizeof(ElemType);} - ElemType* BufferPointer() const {return this->m_pArray;} + size_t BufferSize() const {return m_numRows*m_numCols*sizeof(ElemType);} + ElemType* BufferPointer() const {return m_pArray;} void Adagrad(GPUMatrix& gradients); - void RmsProp(GPUMatrix& gradients, - ElemType RMS_GAMMA, - ElemType RMS_WGT_INC, - ElemType RMS_WGT_MAX, - ElemType RMS_WGT_DEC, - ElemType RMS_WGT_MIN - ); + void RmsProp(GPUMatrix& gradients, ElemType RMS_GAMMA, ElemType RMS_WGT_INC, ElemType RMS_WGT_MAX, ElemType RMS_WGT_DEC, ElemType RMS_WGT_MIN); void Reshape(const size_t numRows, const size_t numCols); void Resize(const size_t numRows, const size_t numCols, bool growOnly = true); //by default we only reallocate if need to grow diff --git a/Math/Math/GPUMatrixCUDAKernels.cu b/Math/Math/GPUMatrixCUDAKernels.cu index 7a116df3a..045cb3637 100644 --- a/Math/Math/GPUMatrixCUDAKernels.cu +++ b/Math/Math/GPUMatrixCUDAKernels.cu @@ -3,7 +3,9 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // // + #ifndef CPU_ONLY + #include #include #include "CommonMatrix.h" @@ -20,11 +22,7 @@ #define LSMALL -0.5E10 // Predefine this for later. -#ifndef LINUX static __inline__ __device__ double atomicAdd(double* address, double val); -#else -static __device__ double atomicAdd(double* address, double val); -#endif //CUDA Kernels code template @@ -2634,11 +2632,7 @@ __global__ void _normalGrad( } } -#ifndef LINUX static __inline__ __device__ double atomicAdd(double* address, double val) -#else -static __device__ double atomicAdd(double* address, double val) -#endif { unsigned long long int* address_as_ull = (unsigned long long int*)address; unsigned long long int old = *address_as_ull, assumed; @@ -3237,4 +3231,4 @@ d_tmp[0] = max((ElemType)0, d_tmp[0]/max((ElemType)1.0e-10,sqrt(d_tmp[1]))/max(( } */ -#endif /*!CPU_ONLY*/ +#endif // !CPU_ONLY diff --git a/Math/Math/GPUSparseMatrix.cu b/Math/Math/GPUSparseMatrix.cu index 1882798dc..7760fc88f 100644 --- a/Math/Math/GPUSparseMatrix.cu +++ b/Math/Math/GPUSparseMatrix.cu @@ -17,11 +17,11 @@ #pragma warning (disable: 4267) // conversion from 'size_t' to 'unsigned int'; happens in CUDA <<>> syntax if a and b are size_t #pragma warning (disable: 4127) // conditional expression is constant; "if (sizeof(ElemType)==sizeof(float))" triggers this +#ifdef _WIN32 // thread local storage to access the current stream, initalize to default stream -#ifndef LINUX -extern __declspec( thread ) +extern __declspec (thread) #endif - cudaStream_t t_stream; +cudaStream_t t_stream; void CUDACALL(cudaError_t x) { @@ -30,7 +30,7 @@ void CUDACALL(cudaError_t x) const char* errmsg = cudaGetErrorString(x); std::cout<<"!!!!!!!!CUDA EXCEPTION: "< void GPUSparseMatrix::ZeroInit() { - this->m_legacy = true; - this->m_computeDevice=0; //current GPU device Id - this->m_numRows=0; - this->m_numCols=0; - this->m_elemSizeAllocated = this->m_nz = 0; //Number of non-zero elements - this->m_format = matrixFormatSparseCSR; - this->m_externalBuffer = false; - this->m_pArray=NULL; - this->m_matrixName=NULL; + m_legacy = true; + m_computeDevice=0; //current GPU device Id + m_numRows=0; + m_numCols=0; + m_elemSizeAllocated = m_nz = 0; //Number of non-zero elements + m_format = matrixFormatSparseCSR; + m_externalBuffer = false; + m_pArray=NULL; + m_matrixName=NULL; } template @@ -80,7 +80,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUSparseMatrix::GPUSparseMatrix(const GPUSparseMatrix& deepCopy) { - this->m_legacy = true; + m_legacy = true; DeepCopy(deepCopy); } @@ -88,30 +88,30 @@ namespace Microsoft { namespace MSR { namespace CNTK { GPUSparseMatrix::GPUSparseMatrix(const size_t numRows, const size_t numCols, const size_t nz, ElemType* pArray, const size_t matrixFlags /*=matrixFormatSparseCSR*/, int deviceId /*=MANAGEDEXTERN*/, const size_t elemSizeAllocated /*=0*/) { - this->m_legacy = true; - this->m_computeDevice=deviceId; - this->m_numRows=numRows; - this->m_numCols=numCols; - this->m_nz=nz; - this->m_elemSizeAllocated=elemSizeAllocated?elemSizeAllocated:nz; - this->m_pArray = pArray; - this->m_format = (MatrixFormat)(matrixFormatMask&matrixFlags); - this->m_externalBuffer = true; + m_legacy = true; + m_computeDevice=deviceId; + m_numRows=numRows; + m_numCols=numCols; + m_nz=nz; + m_elemSizeAllocated=elemSizeAllocated?elemSizeAllocated:nz; + m_pArray = pArray; + m_format = (MatrixFormat)(matrixFormatMask&matrixFlags); + m_externalBuffer = true; } // legacy code /*template void GPUSparseMatrix::Resize(const size_t nR, const size_t nC) { - if (!this->IsEmpty()) + if (!IsEmpty()) { Clear(); } - this->m_numRows=nR; - this->m_numCols=nC; - this->m_nz=0; - this->m_elemSizeAllocated=m_nz; - this->m_pArray = NULL; + m_numRows=nR; + m_numCols=nC; + m_nz=0; + m_elemSizeAllocated=m_nz; + m_pArray = NULL; }*/ // PrepareDevice - Setup the correct cuda context for an operation @@ -129,47 +129,47 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUSparseMatrix::DeepCopy(const GPUSparseMatrix& deepCopy) { - this->m_computeDevice=deepCopy.m_computeDevice; - this->m_numRows=deepCopy.m_numRows; - this->m_numCols=deepCopy.m_numCols; - this->m_nz=deepCopy.m_nz; - this->m_elemSizeAllocated=deepCopy.m_elemSizeAllocated; - this->m_format = deepCopy.m_format; + m_computeDevice=deepCopy.m_computeDevice; + m_numRows=deepCopy.m_numRows; + m_numCols=deepCopy.m_numCols; + m_nz=deepCopy.m_nz; + m_elemSizeAllocated=deepCopy.m_elemSizeAllocated; + m_format = deepCopy.m_format; deepCopy.PrepareDevice(); // about to overwrite this buffer, so free it if we own it - if (this->OwnBuffer() && this->m_pArray!=NULL) + if (OwnBuffer() && m_pArray!=NULL) { - CUDACALL(cudaFree(this->m_pArray)); + CUDACALL(cudaFree(m_pArray)); } else if (!deepCopy.OwnBuffer()) { // just copy over the pointer, this assumses duplicate non-owned buffers are valid - this->m_pArray = deepCopy.m_pArray; + m_pArray = deepCopy.m_pArray; } else if (deepCopy.m_pArray!=NULL) { - CUDACALL(cudaMalloc((void **)&this->m_pArray,BufferSize())); - CUDACALL(cudaMemcpy(this->m_pArray,deepCopy.m_pArray,BufferSize(),cudaMemcpyDeviceToDevice)); + CUDACALL(cudaMalloc((void **)&m_pArray,BufferSize())); + CUDACALL(cudaMemcpy(m_pArray,deepCopy.m_pArray,BufferSize(),cudaMemcpyDeviceToDevice)); } else - this->m_pArray = NULL; - this->m_externalBuffer = deepCopy.m_externalBuffer; + m_pArray = NULL; + m_externalBuffer = deepCopy.m_externalBuffer; if (deepCopy.m_matrixName!=NULL) { - this->m_matrixName = new wchar_t[wcslen(deepCopy.m_matrixName)+1]; - wmemcpy(this->m_matrixName,deepCopy.m_matrixName,wcslen(deepCopy.m_matrixName)+1); + m_matrixName = new wchar_t[wcslen(deepCopy.m_matrixName)+1]; + wmemcpy(m_matrixName,deepCopy.m_matrixName,wcslen(deepCopy.m_matrixName)+1); } else - this->m_matrixName=NULL; + m_matrixName=NULL; } template void GPUSparseMatrix::SetValue(const GPUSparseMatrix& deepCopy) { - if (!this->IsEmpty()) + if (!IsEmpty()) { Clear(); } @@ -181,7 +181,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { GPUMatrix GPUSparseMatrix::CopyToDenseMatrix() { GPUMatrix res; - if (this->IsEmpty()) + if (IsEmpty()) return res; PrepareDevice(); @@ -199,27 +199,27 @@ namespace Microsoft { namespace MSR { namespace CNTK { CUSPARSECALL(cusparseSetStream(cusparseHandle, t_stream)); if (sizeof(ElemType)==sizeof(float)) { - CUSPARSECALL(cusparseScsr2dense(cusparseHandle,int(this->m_numRows),int(this->m_numCols),descr,(float*)NzLocation(),RowLocation(),ColLocation(),(float*)pArrayDev,int(this->m_numRows))); + CUSPARSECALL(cusparseScsr2dense(cusparseHandle,int(m_numRows),int(m_numCols),descr,(float*)NzLocation(),RowLocation(),ColLocation(),(float*)pArrayDev,int(m_numRows))); } else { - CUSPARSECALL(cusparseDcsr2dense(cusparseHandle,int(this->m_numRows),int(this->m_numCols),descr,(double*)NzLocation(),RowLocation(),ColLocation(),(double*)pArrayDev,int(this->m_numRows))); + CUSPARSECALL(cusparseDcsr2dense(cusparseHandle,int(m_numRows),int(m_numCols),descr,(double*)NzLocation(),RowLocation(),ColLocation(),(double*)pArrayDev,int(m_numRows))); } CUDACALL(cudaEventRecord(done)); CUDACALL(cudaEventSynchronize(done)); CUDACALL(cudaEventDestroy(done)); CUSPARSECALL(cusparseDestroy(cusparseHandle)); - res.SetValue(this->m_numRows,this->m_numCols,pArrayDev,(matrixFlagNormal|matrixFlagSetValueOnDevice)); + res.SetValue(m_numRows,m_numCols,pArrayDev,(matrixFlagNormal|matrixFlagSetValueOnDevice)); if (pArrayDev!=NULL) CUDACALL(cudaFree(pArrayDev)); - res.SetMatrixName(this->m_matrixName); + res.SetMatrixName(m_matrixName); return res; } template void GPUSparseMatrix::SetValue(const GPUMatrix& denseMatrix) { - if (!this->IsEmpty()) + if (!IsEmpty()) { Clear(); } @@ -232,12 +232,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { cusparseSetMatType(descr,CUSPARSE_MATRIX_TYPE_GENERAL); cusparseSetMatIndexBase(descr,CUSPARSE_INDEX_BASE_ZERO); - this->m_numRows = denseMatrix.GetNumRows(); //m - this->m_numCols = denseMatrix.GetNumCols(); //n - this->m_format = matrixFormatSparseCSR; + m_numRows = denseMatrix.GetNumRows(); //m + m_numCols = denseMatrix.GetNumCols(); //n + m_format = matrixFormatSparseCSR; int *nnzPerRow = NULL; - CUDACALL(cudaMalloc((void**)&nnzPerRow,sizeof(int)*this->m_numCols)); + CUDACALL(cudaMalloc((void**)&nnzPerRow,sizeof(int)*m_numCols)); int nnzTotalDevHostPtr = -1; @@ -245,43 +245,43 @@ namespace Microsoft { namespace MSR { namespace CNTK { CUDACALL(cudaEventCreate(&done)); if (sizeof(ElemType)==sizeof(float)) { - CUSPARSECALL(cusparseSnnz(cusparseHandle,(this->m_format&matrixFormatRowMajor)?CUSPARSE_DIRECTION_ROW:CUSPARSE_DIRECTION_COLUMN,(int)this->m_numRows,(int)this->m_numCols,descr, - reinterpret_cast(denseMatrix.BufferPointer()), (int)this->m_numRows,nnzPerRow,&nnzTotalDevHostPtr)); + CUSPARSECALL(cusparseSnnz(cusparseHandle,(m_format&matrixFormatRowMajor)?CUSPARSE_DIRECTION_ROW:CUSPARSE_DIRECTION_COLUMN,(int)m_numRows,(int)m_numCols,descr, + reinterpret_cast(denseMatrix.BufferPointer()), (int)m_numRows,nnzPerRow,&nnzTotalDevHostPtr)); } else { - CUSPARSECALL(cusparseDnnz(cusparseHandle,(this->m_format&matrixFormatRowMajor)?CUSPARSE_DIRECTION_ROW:CUSPARSE_DIRECTION_COLUMN,(int)this->m_numRows,(int)this->m_numCols,descr, - reinterpret_cast(denseMatrix.BufferPointer()), (int)this->m_numRows,nnzPerRow,&nnzTotalDevHostPtr)); + CUSPARSECALL(cusparseDnnz(cusparseHandle,(m_format&matrixFormatRowMajor)?CUSPARSE_DIRECTION_ROW:CUSPARSE_DIRECTION_COLUMN,(int)m_numRows,(int)m_numCols,descr, + reinterpret_cast(denseMatrix.BufferPointer()), (int)m_numRows,nnzPerRow,&nnzTotalDevHostPtr)); } CUDACALL(cudaEventRecord(done)); CUDACALL(cudaEventSynchronize(done)); CUDACALL(cudaEventDestroy(done)); // about to overwrite this buffer, so free it if we own it - if (this->OwnBuffer() && this->m_pArray!=NULL) + if (OwnBuffer() && m_pArray!=NULL) { - CUDACALL(cudaFree(this->m_pArray)); + CUDACALL(cudaFree(m_pArray)); } //allocate memory for sparse matrix - this->m_elemSizeAllocated = this->m_nz = nnzTotalDevHostPtr; - CUDACALL(cudaMalloc((void**)&this->m_pArray,BufferSize())); - this->m_externalBuffer = false; + m_elemSizeAllocated = m_nz = nnzTotalDevHostPtr; + CUDACALL(cudaMalloc((void**)&m_pArray,BufferSize())); + m_externalBuffer = false; CUDACALL(cudaEventCreate(&done)); if (sizeof(ElemType)==sizeof(float)) { - CUSPARSECALL(cusparseSdense2csr(cusparseHandle,(int)this->m_numRows,(int)this->m_numCols,descr,reinterpret_cast(denseMatrix.BufferPointer()), - (int)this->m_numRows,nnzPerRow,reinterpret_cast(NzLocation()),RowLocation(),ColLocation())); + CUSPARSECALL(cusparseSdense2csr(cusparseHandle,(int)m_numRows,(int)m_numCols,descr,reinterpret_cast(denseMatrix.BufferPointer()), + (int)m_numRows,nnzPerRow,reinterpret_cast(NzLocation()),RowLocation(),ColLocation())); } else { - CUSPARSECALL(cusparseDdense2csr(cusparseHandle,(int)this->m_numRows,(int)this->m_numCols,descr,reinterpret_cast(denseMatrix.BufferPointer()), - (int)this->m_numRows,nnzPerRow,reinterpret_cast(NzLocation()),RowLocation(),ColLocation())); + CUSPARSECALL(cusparseDdense2csr(cusparseHandle,(int)m_numRows,(int)m_numCols,descr,reinterpret_cast(denseMatrix.BufferPointer()), + (int)m_numRows,nnzPerRow,reinterpret_cast(NzLocation()),RowLocation(),ColLocation())); } CUDACALL(cudaEventRecord(done)); CUDACALL(cudaEventSynchronize(done)); - this->SetMatrixName(denseMatrix.GetMatrixName()); + SetMatrixName(denseMatrix.GetMatrixName()); } template @@ -294,19 +294,18 @@ namespace Microsoft { namespace MSR { namespace CNTK { return *this; } -#ifndef LINUX template GPUSparseMatrix::GPUSparseMatrix(GPUSparseMatrix&& moveFrom) { - this->m_computeDevice=moveFrom.m_computeDevice; - this->m_numRows=moveFrom.m_numRows; - this->m_numCols=moveFrom.m_numCols; - this->m_nz=moveFrom.m_nz; - this->m_elemSizeAllocated = moveFrom.m_elemSizeAllocated; - this->m_pArray = moveFrom.m_pArray; - this->m_format = moveFrom.m_format; - this->m_externalBuffer = moveFrom.m_externalBuffer; - this->m_matrixName=moveFrom.m_matrixName; + m_computeDevice=moveFrom.m_computeDevice; + m_numRows=moveFrom.m_numRows; + m_numCols=moveFrom.m_numCols; + m_nz=moveFrom.m_nz; + m_elemSizeAllocated = moveFrom.m_elemSizeAllocated; + m_pArray = moveFrom.m_pArray; + m_format = moveFrom.m_format; + m_externalBuffer = moveFrom.m_externalBuffer; + m_matrixName=moveFrom.m_matrixName; moveFrom.ZeroInit(); } @@ -315,27 +314,26 @@ namespace Microsoft { namespace MSR { namespace CNTK { GPUSparseMatrix& GPUSparseMatrix::operator=(GPUSparseMatrix&& moveFrom) { Clear(); - this->m_computeDevice=moveFrom.m_computeDevice; - this->m_numRows=moveFrom.m_numRows; - this->m_numCols=moveFrom.m_numCols; - this->m_nz=moveFrom.m_nz; - this->m_elemSizeAllocated = moveFrom.m_elemSizeAllocated; - this->m_pArray = moveFrom.m_pArray; - this->m_format = moveFrom.m_format; - this->m_externalBuffer = moveFrom.m_externalBuffer; + m_computeDevice=moveFrom.m_computeDevice; + m_numRows=moveFrom.m_numRows; + m_numCols=moveFrom.m_numCols; + m_nz=moveFrom.m_nz; + m_elemSizeAllocated = moveFrom.m_elemSizeAllocated; + m_pArray = moveFrom.m_pArray; + m_format = moveFrom.m_format; + m_externalBuffer = moveFrom.m_externalBuffer; - this->m_matrixName=moveFrom.m_matrixName; + m_matrixName=moveFrom.m_matrixName; moveFrom.m_pArray = NULL; moveFrom.m_matrixName=NULL; return *this; } -#endif /* LINUX */ template GPUSparseMatrix::~GPUSparseMatrix() { - if(this->m_legacy) + if(m_legacy) { Clear(); } @@ -348,26 +346,26 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUSparseMatrix::ClearNew() { - if (this->m_matrixName!=NULL) + if (m_matrixName!=NULL) { - delete[] this->m_matrixName; - this->m_matrixName = NULL; + delete[] m_matrixName; + m_matrixName = NULL; } - if(this->m_format == matrixFormatSparseCSC || this->m_format == matrixFormatSparseCSR) + if(m_format == matrixFormatSparseCSC || m_format == matrixFormatSparseCSR) { - if(this->m_val != NULL) - CUDACALL(cudaFree(this->m_val)); - if(this->m_row != NULL) - CUDACALL(cudaFree(this->m_row)); - if(this->m_pb != NULL) - CUDACALL(cudaFree(this->m_pb)); + if(m_val != NULL) + CUDACALL(cudaFree(m_val)); + if(m_row != NULL) + CUDACALL(cudaFree(m_row)); + if(m_pb != NULL) + CUDACALL(cudaFree(m_pb)); } - else if (this->m_format == matrixFormatSparseBlockCol || this->m_format == matrixFormatSparseBlockRow) + else if (m_format == matrixFormatSparseBlockCol || m_format == matrixFormatSparseBlockRow) { - if(this->m_blockVal != NULL) - CUDACALL(cudaFree(this->m_blockVal)); - if(this->m_blockIds != NULL) - CUDACALL(cudaFree(this->m_blockIds)); + if(m_blockVal != NULL) + CUDACALL(cudaFree(m_blockVal)); + if(m_blockIds != NULL) + CUDACALL(cudaFree(m_blockIds)); } } @@ -375,10 +373,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUSparseMatrix::Clear() { - if (this->m_pArray!=NULL) - CUDACALL(cudaFree(this->m_pArray)); - if (this->m_matrixName!=NULL) - delete[] this->m_matrixName; + if (m_pArray!=NULL) + CUDACALL(cudaFree(m_pArray)); + if (m_matrixName!=NULL) + delete[] m_matrixName; ZeroInit(); } @@ -390,19 +388,19 @@ namespace Microsoft { namespace MSR { namespace CNTK { { bool reallocate = (BufferSize() != a.BufferSize()); - this->m_numRows=a.m_numRows; - this->m_numCols=a.m_numCols; - this->m_nz=a.m_nz; - this->m_elemSizeAllocated = a.m_elemSizeAllocated; - this->m_format = a.m_format; + m_numRows=a.m_numRows; + m_numCols=a.m_numCols; + m_nz=a.m_nz; + m_elemSizeAllocated = a.m_elemSizeAllocated; + m_format = a.m_format; if (reallocate) { - if (!this->OwnBuffer()) + if (!OwnBuffer()) throw std::runtime_error("cannot reallocate a buffer not owned by the matrix"); - if (this->m_pArray!=NULL) - CUDACALL(cudaFree(this->m_pArray)); - CUDACALL(cudaMalloc((void **)&this->m_pArray,BufferSize())); + if (m_pArray!=NULL) + CUDACALL(cudaFree(m_pArray)); + CUDACALL(cudaMalloc((void **)&m_pArray,BufferSize())); } // copy over the non-zero locations from the source matrix @@ -417,33 +415,33 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUSparseMatrix::Init() { - this->m_legacy = false; - this->m_numRows = 0; - this->m_numCols = 0; - this->m_elemSizeAllocated = 0; - this->m_externalBuffer = false; - this->m_pArray = NULL; + m_legacy = false; + m_numRows = 0; + m_numCols = 0; + m_elemSizeAllocated = 0; + m_externalBuffer = false; + m_pArray = NULL; PrepareDevice(); - this->m_nz = 0; - this->m_matrixName = NULL; + m_nz = 0; + m_matrixName = NULL; - if(this->m_format == matrixFormatSparseCSC || this->m_format == matrixFormatSparseCSR) + if(m_format == matrixFormatSparseCSC || m_format == matrixFormatSparseCSR) { - this->m_colIdx = -1; - this->m_val = NULL; - this->m_row = NULL; - this->m_pb = NULL; - this->m_rowIdx = NULL; - this->m_col = NULL; + m_colIdx = -1; + m_val = NULL; + m_row = NULL; + m_pb = NULL; + m_rowIdx = NULL; + m_col = NULL; - this->m_block2Id = NULL; - this->m_block2UniqId = NULL; + m_block2Id = NULL; + m_block2UniqId = NULL; } - else if (this->m_format == matrixFormatSparseBlockCol || this->m_format == matrixFormatSparseBlockRow) + else if (m_format == matrixFormatSparseBlockCol || m_format == matrixFormatSparseBlockRow) { - this->m_blockSize = 0; - this->m_blockVal = NULL; - this->m_blockIds = NULL; + m_blockSize = 0; + m_blockVal = NULL; + m_blockIds = NULL; } } @@ -454,71 +452,71 @@ namespace Microsoft { namespace MSR { namespace CNTK { { throw std::logic_error("GPUSparseMatrix: unsupported sparse matrix format"); } - this->m_format = format; - this->m_computeDevice = deviceId; + m_format = format; + m_computeDevice = deviceId; Init(); } template ElemType* GPUSparseMatrix::BufferPointer() const { - if(this->m_format == matrixFormatSparseCSC || this->m_format == matrixFormatSparseCSR) + if(m_format == matrixFormatSparseCSC || m_format == matrixFormatSparseCSR) { - return this->m_val; + return m_val; } else { - return this->m_blockVal; + return m_blockVal; } } template void GPUSparseMatrix::Resize(const size_t numRows, const size_t numCols, size_t size) { - this->m_nz = 0; - this->m_colIdx = -1; - this->m_numRows = numRows; - this->m_numCols = numCols; - if(this->m_elemSizeAllocated < size) + m_nz = 0; + m_colIdx = -1; + m_numRows = numRows; + m_numCols = numCols; + if(m_elemSizeAllocated < size) { - this->m_elemSizeAllocated = size; - if(this->m_format == matrixFormatSparseCSC || this->m_format == matrixFormatSparseCSR) + m_elemSizeAllocated = size; + if(m_format == matrixFormatSparseCSC || m_format == matrixFormatSparseCSR) { - if(this->m_val != NULL) - CUDACALL(cudaFree(this->m_val)); - if(this->m_row != NULL) - CUDACALL(cudaFree(this->m_row)); - if(this->m_pb != NULL) - CUDACALL(cudaFree(this->m_pb)); - if(this->m_rowIdx != NULL) - CUDACALL(cudaFree(this->m_rowIdx)); - if(this->m_col != NULL) - CUDACALL(cudaFree(this->m_col)); - if(this->m_block2Id != NULL) - CUDACALL(cudaFree(this->m_block2Id)); - if(this->m_block2UniqId != NULL) - CUDACALL(cudaFree(this->m_block2UniqId)); + if(m_val != NULL) + CUDACALL(cudaFree(m_val)); + if(m_row != NULL) + CUDACALL(cudaFree(m_row)); + if(m_pb != NULL) + CUDACALL(cudaFree(m_pb)); + if(m_rowIdx != NULL) + CUDACALL(cudaFree(m_rowIdx)); + if(m_col != NULL) + CUDACALL(cudaFree(m_col)); + if(m_block2Id != NULL) + CUDACALL(cudaFree(m_block2Id)); + if(m_block2UniqId != NULL) + CUDACALL(cudaFree(m_block2UniqId)); PrepareDevice(); - CUDACALL(cudaMalloc((void **)&this->m_val,sizeof(ElemType)*size)); - CUDACALL(cudaMalloc((void **)&this->m_row,sizeof(size_t)*size)); - int len = this->m_format == matrixFormatSparseCSC ? numCols : numRows; - CUDACALL(cudaMalloc((void **)&this->m_pb,sizeof(size_t)*(len+1))); - CUDACALL(cudaMalloc((void **)&this->m_rowIdx,sizeof(size_t)*size)); - CUDACALL(cudaMalloc((void **)&this->m_col,sizeof(size_t)*size)); - CUDACALL(cudaMalloc((void **)&this->m_block2Id,sizeof(size_t)*(numCols*2))); - CUDACALL(cudaMalloc((void **)&this->m_block2UniqId,sizeof(size_t)*(numCols*2))); + CUDACALL(cudaMalloc((void **)&m_val,sizeof(ElemType)*size)); + CUDACALL(cudaMalloc((void **)&m_row,sizeof(size_t)*size)); + int len = m_format == matrixFormatSparseCSC ? numCols : numRows; + CUDACALL(cudaMalloc((void **)&m_pb,sizeof(size_t)*(len+1))); + CUDACALL(cudaMalloc((void **)&m_rowIdx,sizeof(size_t)*size)); + CUDACALL(cudaMalloc((void **)&m_col,sizeof(size_t)*size)); + CUDACALL(cudaMalloc((void **)&m_block2Id,sizeof(size_t)*(numCols*2))); + CUDACALL(cudaMalloc((void **)&m_block2UniqId,sizeof(size_t)*(numCols*2))); } - else if(this->m_format == matrixFormatSparseBlockCol || this->m_format == matrixFormatSparseBlockRow) + else if(m_format == matrixFormatSparseBlockCol || m_format == matrixFormatSparseBlockRow) { - if(this->m_blockVal != NULL) - CUDACALL(cudaFree(this->m_blockVal)); - if(this->m_blockIds != NULL) - CUDACALL(cudaFree(this->m_blockIds)); + if(m_blockVal != NULL) + CUDACALL(cudaFree(m_blockVal)); + if(m_blockIds != NULL) + CUDACALL(cudaFree(m_blockIds)); PrepareDevice(); - CUDACALL(cudaMalloc((void **)&this->m_blockVal,sizeof(ElemType)*size)); + CUDACALL(cudaMalloc((void **)&m_blockVal,sizeof(ElemType)*size)); int max = numCols > numRows ? numCols : numRows; - CUDACALL(cudaMalloc((void **)&this->m_blockIds,sizeof(size_t)*max)); + CUDACALL(cudaMalloc((void **)&m_blockIds,sizeof(size_t)*max)); } } } @@ -527,9 +525,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUSparseMatrix::Reset() { - this->m_nz = 0; - this->m_colIdx = -1; - this->m_blockSize = 0; + m_nz = 0; + m_colIdx = -1; + m_blockSize = 0; } #pragma endregion Constructors and Destructor @@ -540,46 +538,46 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUSparseMatrix::SetMatrixFromCSCFormat(size_t *h_row, size_t *h_rowIdx, size_t size, size_t blockSize) { - if(this->m_format != matrixFormatSparseCSC) + if(m_format != matrixFormatSparseCSC) { throw std::logic_error("CPUSparseMatrix: unsupported SetValue() call."); } - if(this->m_elemSizeAllocated < size) + if(m_elemSizeAllocated < size) { throw std::logic_error("CPUSparseMatrix: allocated size is too small."); } Reset(); - this->m_nz = size; - this->m_blockSize = blockSize; + m_nz = size; + m_blockSize = blockSize; PrepareDevice(); - CUDACALL(cudaMemcpy(this->m_row, h_row, sizeof(size_t)*size,cudaMemcpyHostToDevice)); - CUDACALL(cudaMemcpy(this->m_rowIdx, h_rowIdx, sizeof(size_t)*size,cudaMemcpyHostToDevice)); + CUDACALL(cudaMemcpy(m_row, h_row, sizeof(size_t)*size,cudaMemcpyHostToDevice)); + CUDACALL(cudaMemcpy(m_rowIdx, h_rowIdx, sizeof(size_t)*size,cudaMemcpyHostToDevice)); } template void GPUSparseMatrix::SetMatrixFromLabelAndClass(size_t *h_row, size_t *h_block2Id, size_t *h_block2UniqId, size_t labelSize, size_t expandedSize, size_t blockSize) { - if(this->m_format != matrixFormatSparseCSC) + if(m_format != matrixFormatSparseCSC) { throw std::logic_error("CPUSparseMatrix: unsupported SetValue() call."); } - if(this->m_elemSizeAllocated < labelSize) + if(m_elemSizeAllocated < labelSize) { throw std::logic_error("CPUSparseMatrix: allocated size is too small."); } Reset(); - this->m_nz = labelSize; - this->m_expandedSize = expandedSize; - this->m_blockSize = blockSize; + m_nz = labelSize; + m_expandedSize = expandedSize; + m_blockSize = blockSize; PrepareDevice(); - CUDACALL(cudaMemcpy(this->m_row, h_row, sizeof(size_t)*labelSize,cudaMemcpyHostToDevice)); - CUDACALL(cudaMemcpy(this->m_block2Id, h_block2Id, sizeof(size_t)*labelSize,cudaMemcpyHostToDevice)); - CUDACALL(cudaMemcpy(this->m_block2UniqId, h_block2UniqId, sizeof(size_t)*labelSize,cudaMemcpyHostToDevice)); + CUDACALL(cudaMemcpy(m_row, h_row, sizeof(size_t)*labelSize,cudaMemcpyHostToDevice)); + CUDACALL(cudaMemcpy(m_block2Id, h_block2Id, sizeof(size_t)*labelSize,cudaMemcpyHostToDevice)); + CUDACALL(cudaMemcpy(m_block2UniqId, h_block2UniqId, sizeof(size_t)*labelSize,cudaMemcpyHostToDevice)); } // forward pass from feature to hidden layer @@ -589,7 +587,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { if (lhs.GetComputeDeviceId()!=rhs.GetComputeDeviceId()||(lhs.GetComputeDeviceId()!=c.GetComputeDeviceId())) - throw stdException("MultiplyAndWeightedAddStD: All matrices must be on the same GPU"); + throw std::runtime_error("MultiplyAndWeightedAddStD: All matrices must be on the same GPU"); if (lhs.IsEmpty() || rhs.IsEmpty()) throw std::logic_error("LeftMultiplyAndAdd: one of the input matrix is empty."); @@ -658,7 +656,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { const GPUSparseMatrix& rhs, const bool transposeB, GPUSparseMatrix& c) { if (lhs.GetComputeDeviceId()!=rhs.GetComputeDeviceId()) - throw stdException("GPUSparseMatrix::MultiplyAndAdd: All matrices must be on the same GPU"); + throw std::runtime_error("GPUSparseMatrix::MultiplyAndAdd: All matrices must be on the same GPU"); int m = transposeA? (int)lhs.GetNumCols(): (int)lhs.GetNumRows(); int k = transposeA? (int)lhs.GetNumRows(): (int)lhs.GetNumCols(); @@ -719,7 +717,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { void GPUSparseMatrix::ScaleAndAdd(const ElemType alpha, const GPUSparseMatrix& lhs, GPUMatrix& rhs) { if (lhs.GetComputeDeviceId()!=rhs.GetComputeDeviceId()) - throw stdException("GPUSparseMatrix::ScaleAndAdd: All matrices must be on the same GPU"); + throw std::runtime_error("GPUSparseMatrix::ScaleAndAdd: All matrices must be on the same GPU"); if (lhs.m_format == matrixFormatSparseBlockCol || lhs.m_format == matrixFormatSparseBlockRow) { @@ -743,7 +741,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } else { - throw stdException("GPUSparseMatrix:: ScaleAndAdd() Not implemented"); + throw std::runtime_error("GPUSparseMatrix:: ScaleAndAdd() Not implemented"); } } @@ -761,7 +759,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { int deviceId = a.GetComputeDeviceId(); if (weight.GetComputeDeviceId()!=deviceId || label.GetComputeDeviceId()!=deviceId || cls.GetComputeDeviceId()!=deviceId || idx2cls.GetComputeDeviceId()!=deviceId || etp.GetComputeDeviceId()!=deviceId ) - throw stdException("GPUSparseMatrix:: ClassEntropy() All matrices must be on the same GPU"); + throw std::runtime_error("GPUSparseMatrix:: ClassEntropy() All matrices must be on the same GPU"); size_t nC = cls.GetNumCols(); size_t nV = label.GetNumRows() - nC; @@ -836,7 +834,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { int deviceId = error.GetComputeDeviceId(); if (weight.GetComputeDeviceId()!=deviceId || grd.GetComputeDeviceId()!=deviceId ) - throw stdException("GPUSparseMatrix::ClassEntropyGradientOfInput() All matrices must be on the same GPU"); + throw std::runtime_error("GPUSparseMatrix::ClassEntropyGradientOfInput() All matrices must be on the same GPU"); grd.SetValue((ElemType)0); cudaEvent_t done = nullptr; @@ -863,7 +861,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { int deviceId = error.GetComputeDeviceId(); if (input.GetComputeDeviceId()!=deviceId || label.GetComputeDeviceId()!=deviceId || cls.GetComputeDeviceId()!=deviceId || idx2cls.GetComputeDeviceId()!=deviceId || grd.GetComputeDeviceId()!=deviceId ) - throw stdException("GPUSparseMatrix::ClassEntropyGradientOfWeight() All matrices must be on the same GPU"); + throw std::runtime_error("GPUSparseMatrix::ClassEntropyGradientOfWeight() All matrices must be on the same GPU"); grd.SetFormat(matrixFormatSparseBlockRow); size_t nz = label.m_blockSize * grd.GetNumCols(); @@ -903,20 +901,20 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUSparseMatrix& GPUSparseMatrix::InplaceTruncate (const ElemType threshold) { - if(this->m_format == matrixFormatSparseBlockCol || this->m_format == matrixFormatSparseBlockRow) + if(m_format == matrixFormatSparseBlockCol || m_format == matrixFormatSparseBlockRow) { long N=(long)GetNZElements(); int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock); cudaEvent_t done = nullptr; CUDACALL(cudaEventCreate(&done)); - _inplaceTruncate<<>>(this->m_blockVal,threshold,N); + _inplaceTruncate<<>>(m_blockVal,threshold,N); CUDACALL(cudaEventRecord(done)); CUDACALL(cudaEventSynchronize(done)); CUDACALL(cudaEventDestroy(done)); } else { - throw stdException("GPUSparseMatrix:: InplaceTruncate() only support block based sparse matrix"); + throw std::runtime_error("GPUSparseMatrix:: InplaceTruncate() only support block based sparse matrix"); } return *this; } @@ -927,11 +925,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { { if (c.IsEmpty()) { - c.Resize(this->GetNumRows(), this->GetNumCols()); + c.Resize(GetNumRows(), GetNumCols()); c.SetValue(0.0); } - if(this->m_format == matrixFormatSparseBlockCol || this->m_format == matrixFormatSparseBlockRow) + if(m_format == matrixFormatSparseBlockCol || m_format == matrixFormatSparseBlockRow) { size_t blocksPerGrid = m_blockSize; bool isBlockCol = (m_format == MatrixFormat::matrixFormatSparseBlockCol); @@ -942,8 +940,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { isBlockCol, len, momentum, - this->m_blockIds, - this->m_blockVal, + m_blockIds, + m_blockVal, c.BufferPointer(), c.GetNumRows()); CUDACALL(cudaEventRecord(done)); @@ -952,7 +950,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } else { - throw stdException("GPUSparseMatrix:: NormalGrad() only support block sparse format"); + throw std::runtime_error("GPUSparseMatrix:: NormalGrad() only support block sparse format"); } } @@ -965,7 +963,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { const GPUMatrix& b, ElemType beta, GPUMatrix& c) { if (a.GetComputeDeviceId()!=b.GetComputeDeviceId()||(b.GetComputeDeviceId()!=a.GetComputeDeviceId())) - throw stdException("MultiplyAndWeightedAddStD: All matrices must be on the same GPU"); + throw std::runtime_error("MultiplyAndWeightedAddStD: All matrices must be on the same GPU"); a.PrepareDevice(); cusparseHandle_t cusparseHandle = 0; CUSPARSECALL(cusparseCreate(&cusparseHandle)); @@ -1027,7 +1025,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t GPUSparseMatrix::ElemCountFromBufferSize(size_t totalBufferSize) { size_t elemSizeAllocated; - if (this->m_format & matrixFormatCompressed) + if (m_format & matrixFormatCompressed) { elemSizeAllocated = (totalBufferSize-CompressedIndexSize())/(sizeof(int)+sizeof(ElemType)); } @@ -1044,11 +1042,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { // canReuseBuffer - target matrix can be reused for temporary space // func - function to call to count elements in the result (returns count, and fills csrRowPtr array) template -#ifndef LINUX void GPUSparseMatrix::PrepareBuffer(size_t m, size_t n, bool canReuseBuffer, std::function func) -#else - void GPUSparseMatrix::PrepareBuffer(size_t m, size_t n, bool canReuseBuffer, size_t (*func)(int *csRowPtrC)) -#endif /* LINUX */ { int* csrRowPtrC=NULL; GPUSparseMatrix& c = *this; @@ -1103,16 +1097,6 @@ namespace Microsoft { namespace MSR { namespace CNTK { CUDACALL(cudaFree(csrRowPtrC)); } -#ifdef LINUXxx - size_t PrepareBufferMultiply(int* csrRowPtrC) - { - int nnzTotal = -1; - CUSPARSECALL(cusparseXcsrgemmNnz(cusparseHandle,operA,operB,m,n,k,descrA,nnzA,S1.RowLocation(),S1.ColLocation(),descrB,nnzB, - S2.RowLocation(),S2.ColLocation(),descrC,csrRowPtrC,&nnzTotal)); - return nnzTotal; - } -#endif - // Multiply - multiply one spares matrix by another sparse matrix // S1 - first sparse matrix // transposeS1 - transpose first matrix? @@ -1124,7 +1108,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { void GPUSparseMatrix::Multiply(const GPUSparseMatrix& S1, bool transposeS1, const GPUSparseMatrix& S2, bool transposeS2, GPUSparseMatrix &c) { if (S1.GetComputeDeviceId()!=S2.GetComputeDeviceId()) - throw stdException("Sparse matrix multiply: both matrices must be on the same device"); + throw std::runtime_error("Sparse matrix multiply: both matrices must be on the same device"); S1.PrepareDevice(); cusparseHandle_t cusparseHandle = 0; @@ -1141,7 +1125,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { int k = int(transposeS1 ? S1.GetNumRows() : S1.GetNumCols()); int l = int(transposeS2 ? S2.GetNumCols() : S2.GetNumRows()); if (k!=l) - throw stdException("Sparse matrix multiply: dimensionality mismatch"); + throw std::runtime_error("Sparse matrix multiply: dimensionality mismatch"); int nnzA = (int)S1.GetNZElements(); int nnzB = (int)S2.GetNZElements(); @@ -1150,18 +1134,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { CUDACALL(cudaEventCreate(&done)); //Step 1 c.PrepareBuffer(m, n, true, // true means we can reuse the "c" buffer if it exists for temporaries -#ifndef LINUX [&](int* csrRowPtrC) -> size_t { int nnzTotal = -1; CUSPARSECALL(cusparseXcsrgemmNnz(cusparseHandle,operA,operB,m,n,k,descrA,nnzA,S1.RowLocation(),S1.ColLocation(),descrB,nnzB, S2.RowLocation(),S2.ColLocation(),descrC,csrRowPtrC,&nnzTotal)); return nnzTotal; - } -#else - NULL // PrepareBufferMultiply -#endif - ); + }); //Step 2 @@ -1194,9 +1173,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { void GPUSparseMatrix::ScaleAndAdd(ElemType alpha,const GPUSparseMatrix& a, ElemType beta, const GPUSparseMatrix& b, GPUSparseMatrix& c) { if (a.GetNumCols()!=b.GetNumCols() || a.GetNumRows()!=b.GetNumRows()) - throw new stdException("Dimensions mismatch in ScaleAndAdd"); + throw new std::runtime_error("Dimensions mismatch in ScaleAndAdd"); if (a.GetComputeDeviceId()!=b.GetComputeDeviceId()) - throw new stdException("ScaleAndAdd: matrices must be on the same device"); + throw new std::runtime_error("ScaleAndAdd: matrices must be on the same device"); int m = (int)a.GetNumRows(); int n = (int)a.GetNumCols(); @@ -1215,18 +1194,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { CUDACALL(cudaEventCreate(&done)); //Step 1 bool inOutParameter = (&b == &c); - c.PrepareBuffer(m, n, !inOutParameter, -#ifndef LINUX - [&] (int* csrRowPtrC) -> size_t + c.PrepareBuffer(m, n, !inOutParameter, [&] (int* csrRowPtrC) -> size_t { int nnzTotal = -1; CUSPARSECALL(cusparseXcsrgeamNnz(cusparseHandle,m,n,descrA,nnzA,a.RowLocation(),a.ColLocation(),descrB,nnzB,b.RowLocation(),b.ColLocation(),descrC,csrRowPtrC,&nnzTotal)); return nnzTotal; - } -#else - NULL -#endif // Linux - ); + }); //Step 2 if (sizeof(ElemType)==sizeof(float)) @@ -1251,7 +1224,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (a.GetNumRows()!=b.GetNumRows()||a.GetNumRows()!=c.GetNumRows()||a.GetNumCols()!=b.GetNumCols()||a.GetNumCols()!=c.GetNumCols()) throw std::logic_error("ScaleAndAdd: dimension mismatch"); if (a.GetComputeDeviceId()!=b.GetComputeDeviceId()||a.GetComputeDeviceId()!=c.GetComputeDeviceId()) - throw stdException("ScaleAndAdd: matrices must be on the same device"); + throw std::runtime_error("ScaleAndAdd: matrices must be on the same device"); b.PrepareDevice(); //copy b to c CUDACALL(cudaMemcpy(c.BufferPointer(),b.BufferPointer(),sizeof(ElemType)*b.GetNumElements(),cudaMemcpyDeviceToDevice)); @@ -1320,7 +1293,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ElemType GPUSparseMatrix::InnerProductOfMatrices(const GPUSparseMatrix& a, const GPUMatrix& b) { if (a.GetComputeDeviceId()!=b.GetComputeDeviceId()) - throw stdException("a and b must be on the same device"); + throw std::runtime_error("a and b must be on the same device"); //This implementation requires additional memory //need to put a in ColumnMajor format @@ -1464,14 +1437,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { int GPUSparseMatrix::GetComputeDeviceId() const { // for externally managed memory the CUDA context will have the current device - if (this->m_computeDevice == MANAGEDEXTERN) + if (m_computeDevice == MANAGEDEXTERN) { int devId; - assert(this->m_externalBuffer); + assert(m_externalBuffer); CUDACALL(cudaGetDevice(&devId)); return devId; } - return this->m_computeDevice; + return m_computeDevice; } template @@ -1561,15 +1534,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUSparseMatrix GPUSparseMatrix::Transpose() const { - int m = (int)this->GetNumRows(); - int n = (int)this->GetNumCols(); - int nnz = (int)this->GetNZElements(); + int m = (int)GetNumRows(); + int n = (int)GetNumCols(); + int nnz = (int)GetNZElements(); cusparseAction_t cpVals = CUSPARSE_ACTION_NUMERIC; cusparseIndexBase_t idxBase = CUSPARSE_INDEX_BASE_ZERO; - assert(this->GetFormat()&matrixFormatCompressed); // for now this only supports compressed formats + assert(GetFormat()&matrixFormatCompressed); // for now this only supports compressed formats PrepareDevice(); - GPUSparseMatrix c(n, m, nnz, NULL, this->GetFormat(), GetComputeDeviceId(), this->m_elemSizeAllocated); + GPUSparseMatrix c(n, m, nnz, NULL, GetFormat(), GetComputeDeviceId(), m_elemSizeAllocated); CUDACALL(cudaMalloc((void **)&c.m_pArray,c.BufferSize())); cusparseHandle_t cusparseHandle = 0; @@ -1579,12 +1552,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { CUDACALL(cudaEventCreate(&done)); if (sizeof(ElemType)==sizeof(float)) { - CUSPARSECALL(cusparseScsr2csc(cusparseHandle,m,n,nnz,reinterpret_cast(this->NzLocation()),this->CompressedIndexLocation(),this->IndexLocation(), + CUSPARSECALL(cusparseScsr2csc(cusparseHandle,m,n,nnz,reinterpret_cast(NzLocation()),CompressedIndexLocation(),IndexLocation(), reinterpret_cast(c.NzLocation()),c.IndexLocation(),c.CompressedIndexLocation(),cpVals,idxBase)); } else { - CUSPARSECALL(cusparseDcsr2csc(cusparseHandle,m,n,nnz,reinterpret_cast(this->NzLocation()),this->CompressedIndexLocation(),this->IndexLocation(), + CUSPARSECALL(cusparseDcsr2csc(cusparseHandle,m,n,nnz,reinterpret_cast(NzLocation()),CompressedIndexLocation(),IndexLocation(), reinterpret_cast(c.NzLocation()),c.IndexLocation(),c.CompressedIndexLocation(),cpVals,idxBase)); } CUDACALL(cudaEventRecord(done)); @@ -1610,33 +1583,29 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUSparseMatrix::InplaceTranspose() { - if (this->IsEmpty()) + if (IsEmpty()) return; // transfer converted block over to this pointer -#ifndef LINUX - *this = std::move(this->Transpose()); -#else - std::cerr << "Not sure how to do the InplaceTranspose()"; -#endif + *this = std::move(Transpose()); } template ElemType GPUSparseMatrix::SumOfAbsElements() const { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("SumOfAbsElements: Matrix is empty"); - cublasHandle_t cuHandle = GPUMatrix::GetCublasHandle(this->GetComputeDeviceId()); + cublasHandle_t cuHandle = GPUMatrix::GetCublasHandle(GetComputeDeviceId()); if (sizeof(ElemType)==sizeof(float)) { float res=0; - cublasSasum(cuHandle,(int)GetNZElements(),reinterpret_cast(this->m_pArray),1,&res); + cublasSasum(cuHandle,(int)GetNZElements(),reinterpret_cast(m_pArray),1,&res); return res; } else { double res=0; - cublasDasum(cuHandle,(int)GetNZElements(),reinterpret_cast(this->m_pArray),1,&res); + cublasDasum(cuHandle,(int)GetNZElements(),reinterpret_cast(m_pArray),1,&res); return ElemType(res); } } @@ -1644,7 +1613,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template ElemType GPUSparseMatrix::SumOfElements() const { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("SumOfElements: Matrix is empty"); PrepareDevice(); @@ -1652,7 +1621,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ElemType h_sum; CUDACALL(cudaMalloc((void**)&d_sum,sizeof(ElemType))); //WARNING: THIS kernel is not the most efficient way! - _reductionSum<<<1,1024>>>(this->m_pArray,d_sum,(LONG64)this->GetNZElements()); + _reductionSum<<<1,1024>>>(m_pArray,d_sum,(LONG64)GetNZElements()); CUDACALL(cudaMemcpy(&h_sum,d_sum,sizeof(ElemType),cudaMemcpyDeviceToHost)); CUDACALL(cudaFree(d_sum)); return h_sum; @@ -1662,14 +1631,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { template ElemType GPUSparseMatrix::FrobeniusNorm() const { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("FrobeniusNorm: Matrix is empty."); ElemType* d_sum = NULL; ElemType h_sum=0; CUDACALL(cudaMalloc((void**)&d_sum,sizeof(ElemType))); //WARNING: THIS kernel is not the most efficient way! - _reductionSum2<<<1,1024>>>(this->m_pArray,d_sum,(int)this->GetNZElements()); + _reductionSum2<<<1,1024>>>(m_pArray,d_sum,(int)GetNZElements()); CUDACALL(cudaMemcpy(&h_sum,d_sum,sizeof(ElemType),cudaMemcpyDeviceToHost)); CUDACALL(cudaFree(d_sum)); if (sizeof(ElemType)==sizeof(float)) @@ -1681,14 +1650,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { template ElemType GPUSparseMatrix::MatrixNormInf() const { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("MatrixNorm1: Matrix is empty."); ElemType* d_maxAbs = NULL; ElemType h_maxAbs=0; CUDACALL(cudaMalloc((void**)&d_maxAbs,sizeof(ElemType))); //WARNING: THIS kernel is not the most efficient way! - _reductionMatrixNormInf<<<1,1024>>>(this->m_pArray,d_maxAbs,(int)this->GetNZElements()); + _reductionMatrixNormInf<<<1,1024>>>(m_pArray,d_maxAbs,(int)GetNZElements()); CUDACALL(cudaMemcpy(&h_maxAbs,d_maxAbs,sizeof(ElemType),cudaMemcpyDeviceToHost)); CUDACALL(cudaFree(d_maxAbs)); if (sizeof(ElemType)==sizeof(float)) @@ -1700,9 +1669,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { template ElemType GPUSparseMatrix::MatrixNorm1() const { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("MatrixNorm1: Matrix is empty."); - return this->SumOfAbsElements(); + return SumOfAbsElements(); } #pragma endregion Member BLAS Functions @@ -1712,14 +1681,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUSparseMatrix& GPUSparseMatrix::ElementInverse () { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("ElementInverse: Matrix is empty."); long N=(long)GetNZElements(); int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); cudaEvent_t done = nullptr; CUDACALL(cudaEventCreate(&done)); - _elemInverse<<>>(this->m_pArray,N); + _elemInverse<<>>(m_pArray,N); CUDACALL(cudaEventRecord(done)); CUDACALL(cudaEventSynchronize(done)); return *this; @@ -1728,8 +1697,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUSparseMatrix& GPUSparseMatrix::AssignElementInverseOf (const GPUSparseMatrix& a) { - this->SetValue(a); - return this->ElementInverse(); + SetValue(a); + return ElementInverse(); } template @@ -1742,8 +1711,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUSparseMatrix& GPUSparseMatrix::AssignSigmoidOf (const GPUSparseMatrix& a) { - this->SetValue(a); - this->InplaceSigmoid(); + SetValue(a); + InplaceSigmoid(); return *this; } @@ -1757,8 +1726,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUSparseMatrix& GPUSparseMatrix::AssignLinearRectifierDerivativeOf (const GPUSparseMatrix& a) { - this->SetValue(a); - this->InplaceLinearRectifierDerivative(); + SetValue(a); + InplaceLinearRectifierDerivative(); return *this; } @@ -1772,8 +1741,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUSparseMatrix& GPUSparseMatrix::AssignTanhOf (const GPUSparseMatrix& a) { - this->SetValue(a); - this->InplaceTanh(); + SetValue(a); + InplaceTanh(); return *this; } @@ -1787,8 +1756,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUSparseMatrix& GPUSparseMatrix::AssignSqrtOf (const GPUSparseMatrix& a) { - this->SetValue(a); - this->InplaceSqrt(); + SetValue(a); + InplaceSqrt(); return *this; } @@ -1802,8 +1771,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUSparseMatrix& GPUSparseMatrix::AssignExpOf (const GPUSparseMatrix& a) { - this->SetValue(a); - this->InplaceExp(); + SetValue(a); + InplaceExp(); return *this; } @@ -1817,8 +1786,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUSparseMatrix& GPUSparseMatrix::AssignLogOf (const GPUSparseMatrix& a) { - this->SetValue(a); - this->InplaceLog(); + SetValue(a); + InplaceLog(); return *this; } @@ -1832,21 +1801,21 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUSparseMatrix& GPUSparseMatrix::AssignAbsOf (const GPUSparseMatrix& a) { - this->SetValue(a); - this->InplaceAbs(); + SetValue(a); + InplaceAbs(); return *this; } template GPUSparseMatrix& GPUSparseMatrix::InplaceTruncateBottom (const ElemType threshold) { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("InplaceTruncateBottom: Matrix is empty."); long N=(long)GetNZElements(); int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock); cudaEvent_t done = nullptr; CUDACALL(cudaEventCreate(&done)); - _inplaceTruncateBottom<<>>(this->m_pArray,threshold,N); + _inplaceTruncateBottom<<>>(m_pArray,threshold,N); CUDACALL(cudaEventRecord(done)); CUDACALL(cudaEventSynchronize(done)); return *this; @@ -1867,7 +1836,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock); cudaEvent_t done = nullptr; CUDACALL(cudaEventCreate(&done)); - _assignTruncateBottom<<>>(this->m_pArray,a.NzLocation(),threshold,N); + _assignTruncateBottom<<>>(m_pArray,a.NzLocation(),threshold,N); CUDACALL(cudaEventRecord(done)); CUDACALL(cudaEventSynchronize(done)); return *this; @@ -1876,13 +1845,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUSparseMatrix& GPUSparseMatrix::InplaceTruncateTop (const ElemType threshold) { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("InplaceTruncateTop: Matrix is empty."); long N=(long)GetNZElements(); int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock); cudaEvent_t done = nullptr; CUDACALL(cudaEventCreate(&done)); - _inplaceTruncateTop<<>>(this->m_pArray,threshold,N); + _inplaceTruncateTop<<>>(m_pArray,threshold,N); CUDACALL(cudaEventRecord(done)); CUDACALL(cudaEventSynchronize(done)); return *this; @@ -1903,7 +1872,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock); cudaEvent_t done = nullptr; CUDACALL(cudaEventCreate(&done)); - _assignTruncateTop<<>>(this->m_pArray,a.NzLocation(),threshold,N); + _assignTruncateTop<<>>(m_pArray,a.NzLocation(),threshold,N); CUDACALL(cudaEventRecord(done)); CUDACALL(cudaEventSynchronize(done)); return *this; @@ -1912,13 +1881,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUSparseMatrix& GPUSparseMatrix::SetToZeroIfAbsLessThan (const ElemType threshold) { - if (this->IsEmpty()) + if (IsEmpty()) throw std::logic_error("SetToZeroIfAbsLessThan: Matrix is empty."); long N=(long)GetNZElements(); int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock); cudaEvent_t done = nullptr; CUDACALL(cudaEventCreate(&done)); - _setToZeroIfAbsLessThan<<>>(this->m_pArray,threshold,N); + _setToZeroIfAbsLessThan<<>>(m_pArray,threshold,N); CUDACALL(cudaEventRecord(done)); CUDACALL(cudaEventSynchronize(done)); return *this; @@ -2012,25 +1981,25 @@ namespace Microsoft { namespace MSR { namespace CNTK { switch (kind) { case 0: - _inplaceSigmoidOnCuda<<>>(this->m_pArray,N); + _inplaceSigmoidOnCuda<<>>(m_pArray,N); break; case 1: - _inplaceTanhOnCuda<<>>(this->m_pArray,N); + _inplaceTanhOnCuda<<>>(m_pArray,N); break; case 2: - _inplaceSqrtOnCuda<<>>(this->m_pArray,N); + _inplaceSqrtOnCuda<<>>(m_pArray,N); break; case 3: - _inplaceExpOnCuda<<>>(this->m_pArray,N); + _inplaceExpOnCuda<<>>(m_pArray,N); break; case 4: - _inplaceLogOnCuda<<>>(this->m_pArray,N); + _inplaceLogOnCuda<<>>(m_pArray,N); break; case 5: - _inplaceAbsOnCuda<<>>(this->m_pArray,N); + _inplaceAbsOnCuda<<>>(m_pArray,N); break; case 6: - _inplaceLinRectDerivative<<>>(this->m_pArray,N); + _inplaceLinRectDerivative<<>>(m_pArray,N); } CUDACALL(cudaEventRecord(done)); CUDACALL(cudaEventSynchronize(done)); @@ -2039,20 +2008,20 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void GPUSparseMatrix::SetMatrixFromCSRFormat(int *h_CSRRow, int *h_Col, ElemType *h_Val, size_t nz, size_t numRows, size_t numCols, bool IsOnDevice, int devId) { - this->m_computeDevice = devId; - this->m_elemSizeAllocated = this->m_nz = nz; - this->m_numCols=numCols; - this->m_numRows=numRows; - this->m_format=matrixFormatSparseCSR; - this->m_externalBuffer = false; + m_computeDevice = devId; + m_elemSizeAllocated = m_nz = nz; + m_numCols=numCols; + m_numRows=numRows; + m_format=matrixFormatSparseCSR; + m_externalBuffer = false; - if (this->OwnBuffer() && this->m_pArray != NULL) + if (OwnBuffer() && m_pArray != NULL) { - CUDACALL(cudaFree(this->m_pArray)); + CUDACALL(cudaFree(m_pArray)); } PrepareDevice(); - CUDACALL(cudaMalloc((void **)&this->m_pArray,BufferSize())); + CUDACALL(cudaMalloc((void **)&m_pArray,BufferSize())); cudaMemcpyKind kind = IsOnDevice?cudaMemcpyDeviceToDevice:cudaMemcpyHostToDevice; CUDACALL(cudaMemcpy(RowLocation(),h_CSRRow,RowSize(),kind)); @@ -2065,18 +2034,18 @@ namespace Microsoft { namespace MSR { namespace CNTK { void GPUSparseMatrix::GetMatrixFromCSRFormat(int*& h_CSRRow, int*& h_Col, ElemType*& h_Val, size_t &nz, size_t &numRows, size_t &numCols) const { if (h_CSRRow!=NULL || h_Col!=NULL || h_Val!=NULL) - throw stdException("Passed pointers must be NULL"); - nz = this->GetNZElements(); - numRows = this->GetNumRows(); - numCols = this->GetNumCols(); + throw std::runtime_error("Passed pointers must be NULL"); + nz = GetNZElements(); + numRows = GetNumRows(); + numCols = GetNumCols(); - if (this->IsEmpty()) + if (IsEmpty()) return; else { PrepareDevice(); h_Val = new ElemType[nz]; - h_CSRRow = new int[this->m_numRows + 1]; + h_CSRRow = new int[m_numRows + 1]; h_Col = new int[nz]; CUDACALL(cudaMemcpy(h_CSRRow,RowLocation(),RowSize(),cudaMemcpyDeviceToHost)); @@ -2097,7 +2066,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t elsize; stream>>elsize; if (sizeof(ElemType)!=elsize) - throw stdException("Template argument size doesn't match those in file"); + throw std::runtime_error("Template argument size doesn't match those in file"); std::wstring matrixName; // save off the buffer size being passed in diff --git a/Math/Math/GPUSparseMatrix.cuh b/Math/Math/GPUSparseMatrix.cuh index 8b1f959a2..30a53f05d 100644 --- a/Math/Math/GPUSparseMatrix.cuh +++ b/Math/Math/GPUSparseMatrix.cuh @@ -27,11 +27,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { void performInplaceFunction(int kind); void DeepCopy(const GPUSparseMatrix& deepCopyFrom); void Clear(); -#ifndef LINUX void PrepareBuffer(size_t m, size_t n, bool canReuseBuffer, std::function func); -#else - void PrepareBuffer(size_t m, size_t n, bool canReuseBuffer, size_t (*func)(int *csRowPtrC)); -#endif size_t ElemCountFromBufferSize(size_t totalBufferSize); void PrepareDevice(short deviceId=-1) const; @@ -56,22 +52,22 @@ namespace Microsoft { namespace MSR { namespace CNTK { // in memory format is always in the following order: // Non-zero data elements, Full index locations, compressed index locations // In CSR row data is compressed, in CSC col data is compressed - const ElemType* NzLocation() const {return this->m_pArray;} - ElemType* NzLocation() {return this->m_pArray;} - size_t NzCount() const {return this->m_nz;} - size_t NzSize() const {return sizeof(ElemType)*this->m_nz;} // actual number of element bytes in use - int* IndexLocation() const {return (int*)(this->m_pArray+this->m_elemSizeAllocated);} - size_t IndexSize() const {return sizeof(int)*this->m_nz;} // actual number of index bytes in use - int* CompressedIndexLocation() const {return IndexLocation() + this->m_elemSizeAllocated;} + const ElemType* NzLocation() const {return m_pArray;} + ElemType* NzLocation() {return m_pArray;} + size_t NzCount() const {return m_nz;} + size_t NzSize() const {return sizeof(ElemType)*m_nz;} // actual number of element bytes in use + int* IndexLocation() const {return (int*)(m_pArray+m_elemSizeAllocated);} + size_t IndexSize() const {return sizeof(int)*m_nz;} // actual number of index bytes in use + int* CompressedIndexLocation() const {return IndexLocation() + m_elemSizeAllocated;} size_t CompressedIndexCount() const { - if (this->m_format&matrixFormatCompressed) + if (m_format&matrixFormatCompressed) { - size_t cnt = (this->m_format&matrixFormatRowMajor)?this->m_numRows:this->m_numCols; + size_t cnt = (m_format&matrixFormatRowMajor)?m_numRows:m_numCols; if (cnt) cnt++; // add an extra element on the end for the "max" value return cnt; } - return this->m_nz; // COO format + return m_nz; // COO format } // get size for compressed index size_t CompressedIndexSize() const {return (CompressedIndexCount())*sizeof(int);} @@ -79,10 +75,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { ElemType* BufferPointer() const; // the column and row locations will swap based on what format we are in. Full index always follows the data array - int* RowLocation() const {return (this->m_format&matrixFormatRowMajor)?CompressedIndexLocation():IndexLocation();} - size_t RowSize() const {return (this->m_format&matrixFormatRowMajor)?CompressedIndexSize():IndexSize();} - int* ColLocation() const {return (this->m_format&matrixFormatRowMajor)?IndexLocation():CompressedIndexLocation();} - size_t ColSize() const {return (this->m_format&matrixFormatRowMajor)?IndexSize():CompressedIndexSize();} // actual number of row bytes in use + int* RowLocation() const {return (m_format&matrixFormatRowMajor)?CompressedIndexLocation():IndexLocation();} + size_t RowSize() const {return (m_format&matrixFormatRowMajor)?CompressedIndexSize():IndexSize();} + int* ColLocation() const {return (m_format&matrixFormatRowMajor)?IndexLocation():CompressedIndexLocation();} + size_t ColSize() const {return (m_format&matrixFormatRowMajor)?IndexSize():CompressedIndexSize();} // actual number of row bytes in use void SetValue(const GPUSparseMatrix& deepCopyFrom); void SetValue(const GPUMatrix& denseMatrix); @@ -110,7 +106,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { bool IsEqualTo(const GPUMatrix& a, const ElemType threshold = 1e-8) const; public: int GetComputeDeviceId(void) const; - size_t GetNZElements() const {return this->m_nz;} + size_t GetNZElements() const {return m_nz;} //Sets sparse matrix in CSR format. this acts as deep copy void SetMatrixFromCSRFormat(int *h_CSRRow, int *h_Col, ElemType *h_Val, size_t nz, size_t numRows, size_t numCols, bool IsOnDevice=false, int devId=0); void SetMatrixFromCSCFormat(size_t *h_row, size_t *h_rowIdx, size_t size, size_t blockSize); diff --git a/Math/Math/Math.vcxproj b/Math/Math/Math.vcxproj index 03ad001e8..6089ef5d0 100644 --- a/Math/Math/Math.vcxproj +++ b/Math/Math/Math.vcxproj @@ -182,6 +182,7 @@ CppCode + Create diff --git a/Math/Math/Math.vcxproj.filters b/Math/Math/Math.vcxproj.filters index 4846433c6..1069be78d 100644 --- a/Math/Math/Math.vcxproj.filters +++ b/Math/Math/Math.vcxproj.filters @@ -1,77 +1,54 @@ - - - - - {4FC737F1-C7A5-4376-A066-2A32D752A2FF} - cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx - - - {93995380-89BD-4b04-88EB-625FBE52EBFB} - h;hpp;hxx;hm;inl;inc;xsd - - - {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} - rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms - - - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - - - Header Files - - - Header Files - - - Header Files - - + + + + + + + + + + + + + + + + Common + + + Common + + + + + + + + + + + + + Common\Include + + + Common\Include + + + Common\Include + + + + + + + + + + {4d07e945-74fb-48fa-aa63-23f3a7763789} + + + {51b468dd-7e8a-4be8-ae6f-5e3f3d752b88} + + \ No newline at end of file diff --git a/Math/Math/Matrix.cpp b/Math/Math/Matrix.cpp index eecc95d96..1812484c3 100644 --- a/Math/Math/Matrix.cpp +++ b/Math/Math/Matrix.cpp @@ -203,7 +203,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { m_GPUSparseMatrix = (GPUSparseMatrix*)baseMatrix; SetDataLocation(GPU, SPARSE); - } + } } else { @@ -216,8 +216,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { { m_GPUMatrix = (GPUMatrix*)baseMatrix; SetDataLocation(GPU, DENSE); + } } - } m_baseMatrix = baseMatrix; m_baseMatrix->SetArray(pArray); } @@ -288,15 +288,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { { if (m_preferredDeviceId == CPUDEVICE) { - m_CPUMatrix = new CPUMatrix(numRows,numCols); + m_CPUMatrix = new CPUMatrix(numRows, numCols); SetDataLocation(CPU, DENSE); - } - else - { - m_GPUMatrix = new GPUMatrix(numRows,numCols,m_preferredDeviceId); - SetDataLocation(GPU, DENSE); - } - } + } + else + { + m_GPUMatrix = new GPUMatrix(numRows, numCols, m_preferredDeviceId); + SetDataLocation(GPU, DENSE); + } + } } template @@ -526,11 +526,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } else { -#ifndef LINUX - throw std::exception("Unknown matrix type"); -#else - throw std::exception(); -#endif /* LINUX */ + throw std::runtime_error("Unknown matrix type"); } } @@ -617,15 +613,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { } else if (GetMatrixType() == MatrixType::SPARSE) { - NOT_IMPLEMENTED; + NOT_IMPLEMENTED; } else { -#ifndef LINUX - throw std::exception("Unknown matrix type"); -#else - throw std::exception(); -#endif /* LINUX */ + throw std::runtime_error("Unknown matrix type"); } return slice; @@ -838,10 +830,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { DISPATCH_MATRIX_ON_FLAG(this, this, m_CPUMatrix->SetValue(*db_number.ExposePointer2Value()), - if (GetDeviceId()!=db_number.GetDeviceId()) - { + if (GetDeviceId()!=db_number.GetDeviceId()) throw std::runtime_error("Matrix and device bound number must be on the same device"); - } m_GPUMatrix->SetValue(db_number.ExposePointer2Value()), NOT_IMPLEMENTED, NOT_IMPLEMENTED @@ -3431,9 +3421,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { else { GPUMatrix firstDummy = transposeA ? a.m_GPUMatrix->Transpose()*alpha : (*a.m_GPUMatrix)*alpha; - GPUMatrix & first= firstDummy; // By Malcolm.. gcc doesn't support auto like original + GPUMatrix & first= firstDummy; // GCC does not support mixing refs and non-refs GPUSparseMatrix secondDummy = transposeB ? b.m_GPUSparseMatrix->Transpose() : *b.m_GPUSparseMatrix; - GPUSparseMatrix & second = secondDummy; // By Malcolm.. gcc doesn't support auto like original + GPUSparseMatrix & second = secondDummy; if (beta==0) { GPUSparseMatrix::Multiply(first,second,*c.m_GPUMatrix); diff --git a/Math/Math/Matrix.h b/Math/Math/Matrix.h index 3ae9e9b54..73a70e6dd 100644 --- a/Math/Math/Matrix.h +++ b/Math/Math/Matrix.h @@ -3,16 +3,14 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // // + #pragma once + #include "CPUMatrix.h" #include "CPUSparseMatrix.h" #include "GPUMatrix.cuh" #include "GPUSparseMatrix.cuh" -#ifdef LINUX -// typedef char wchar_t; -#endif - // This class is exported from the Math.dll namespace Microsoft { namespace MSR { namespace CNTK { enum CurrentDataLocation diff --git a/Math/Math/targetver.h b/Math/Math/targetver.h index 934954c12..27ea285fc 100644 --- a/Math/Math/targetver.h +++ b/Math/Math/targetver.h @@ -5,13 +5,11 @@ // #pragma once -#ifndef LINUX - // Including SDKDDKVer.h defines the highest available Windows platform. // If you wish to build your application for a previous Windows platform, include WinSDKVer.h and // set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h. +#ifdef _WIN32 #include - -#endif /* LINUX */ +#endif From 089b4191812699b73d2b5f828360b93cbd686978 Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Thu, 30 Oct 2014 19:48:39 -0700 Subject: [PATCH 17/31] (fixed a CRLF inconsistency) --- Common/Include/basetypes.h | 1956 ++++++++++++++++++------------------ 1 file changed, 978 insertions(+), 978 deletions(-) diff --git a/Common/Include/basetypes.h b/Common/Include/basetypes.h index da888420f..f26434c1c 100644 --- a/Common/Include/basetypes.h +++ b/Common/Include/basetypes.h @@ -1,983 +1,983 @@ -// -// basetypes.h - basic types that C++ lacks -// -// Copyright (c) Microsoft Corporation. All rights reserved. -// -#pragma once -#ifndef _BASETYPES_ -#define _BASETYPES_ - -#ifndef UNDER_CE // fixed-buffer overloads not available for wince -#ifdef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES // fixed-buffer overloads for strcpy() etc. -#undef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES -#endif -#define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1 -#endif - -#pragma warning (push) -#pragma warning (disable: 4793) // caused by varargs - -// disable certain parts of basetypes for wince compilation -#ifdef UNDER_CE -#define BASETYPES_NO_UNSAFECRTOVERLOAD // disable unsafe CRT overloads (safe functions don't exist in wince) -#define BASETYPES_NO_STRPRINTF // dependent functions here are not defined for wince -#endif - -#ifndef OACR // dummies when we are not compiling under Office -#define OACR_WARNING_SUPPRESS(x, y) -#define OACR_WARNING_DISABLE(x, y) -#define OACR_WARNING_PUSH -#define OACR_WARNING_POP -#endif -#ifndef OACR_ASSUME // this seems to be a different one -#define OACR_ASSUME(x) -#endif - -// following oacr warnings are not level1 or level2-security -// in currect stage we want to ignore those warnings -// if necessay this can be fixed at later stage - -// not a bug -OACR_WARNING_DISABLE(EXC_NOT_CAUGHT_BY_REFERENCE, "Not indicating a bug or security threat."); -OACR_WARNING_DISABLE(LOCALDECLHIDESLOCAL, "Not indicating a bug or security threat."); - -// not reviewed -OACR_WARNING_DISABLE(MISSING_OVERRIDE, "Not level1 or level2_security."); -OACR_WARNING_DISABLE(EMPTY_DTOR, "Not level1 or level2_security."); -OACR_WARNING_DISABLE(DEREF_NULL_PTR, "Not level1 or level2_security."); -OACR_WARNING_DISABLE(INVALID_PARAM_VALUE_1, "Not level1 or level2_security."); -OACR_WARNING_DISABLE(VIRTUAL_CALL_IN_CTOR, "Not level1 or level2_security."); -OACR_WARNING_DISABLE(POTENTIAL_ARGUMENT_TYPE_MISMATCH, "Not level1 or level2_security."); - -// determine WIN32 api calling convention -// it seems this is normally stdcall?? but when compiling as /clr:pure or /clr:Safe -// this is not supported, so in this case, we need to use the 'default' calling convention -// TODO: can we reuse the #define of WINAPI?? -#ifdef _M_CEE_SAFE -#define WINAPI_CC __clrcall -#elif _M_CEE -#define WINAPI_CC __clrcall -#else -#define WINAPI_CC __stdcall -#endif - -// fix some warnings in STL -#if !defined(_DEBUG) || defined(_CHECKED) || defined(_MANAGED) -#pragma warning(disable : 4702) // unreachable code -#endif - -#include -#include -#include // include here because we redefine some names later -#include -#include -#include -#include // for HUGE_VAL // potential double isnan definition -#include -#include -#include -#include -#include // std::wstring_convert -#ifdef _MSC_VER -#include // std::codecvt_utf8 -#endif -#ifdef _WIN32 -#include // for CRITICAL_SECTION and Unicode conversion functions --TODO: is there a portable alternative? -#endif -#if __unix__ -#include -#include -#endif - -using namespace std; - -// CRT error handling seems to not be included in wince headers -// so we define our own imports -#ifdef UNDER_CE - -// TODO: is this true - is GetLastError == errno?? - also this adds a dependency on windows.h -#define errno GetLastError() - -// strerror(x) - x here is normally errno - TODO: make this return errno as a string -#define strerror(x) "strerror error but can't report error number sorry!" -#endif - -#if 0 -#ifndef __in // dummies for sal annotations if compiler does not support it -#define __in -#define __inout_z -#define __in_count(x) -#define __inout_cap(x) -#define __inout_cap_c(x) -#endif -#ifndef __out_z_cap // non-VS2005 annotations -#define __out_cap(x) -#define __out_z_cap(x) -#define __out_cap_c(x) -#endif - -#ifndef __override // and some more non-std extensions required by Office -#define __override virtual -#endif -#endif - -// disable warnings for which fixing would make code less readable -#pragma warning(disable : 4290) // throw() declaration ignored -#pragma warning(disable : 4244) // conversion from typeA to typeB, possible loss of data - - -// ---------------------------------------------------------------------------- -// basic macros -// ---------------------------------------------------------------------------- - -#define SAFE_DELETE(p) { if(p) { delete (p); (p)=NULL; } } -#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } } // nasty! use CComPtr<> -#ifndef ASSERT -#define ASSERT assert -#endif - -// ---------------------------------------------------------------------------- -// basic data types -// ---------------------------------------------------------------------------- - -namespace msra { namespace basetypes { - -// class ARRAY -- std::vector with array-bounds checking -// VS 2008 and above do this, so there is no longer a need for this. - -#pragma warning(push) -#pragma warning(disable : 4555) // expression has no affect, used so retail won't be empty - -template -class ARRAY : public std::vector<_ElemType> -{ -#if defined (_DEBUG) || defined (_CHECKED) // debug version with range checking - static void throwOutOfBounds() - { // (moved to separate function hoping to keep inlined code smaller - OACR_WARNING_PUSH; - OACR_WARNING_DISABLE(IGNOREDBYCOMMA, "Reviewd OK. Special trick below to show a message when assertion fails" - "[rogeryu 2006/03/24]"); - OACR_WARNING_DISABLE(BOGUS_EXPRESSION_LIST, "This is intentional. [rogeryu 2006/03/24]"); - //ASSERT ("ARRAY::operator[] out of bounds", false); - OACR_WARNING_POP; - } -#endif - -public: - - ARRAY() : std::vector<_ElemType> () { } - ARRAY (int size) : std::vector<_ElemType> (size) { } - -#if defined (_DEBUG) || defined (_CHECKED) // debug version with range checking - // ------------------------------------------------------------------------ - // operator[]: with array-bounds checking - // ------------------------------------------------------------------------ - - inline _ElemType & operator[] (int index) // writing - { - if (index < 0 || index >= size()) throwOutOfBounds(); - return (*(std::vector<_ElemType>*) this)[index]; - } - - // ------------------------------------------------------------------------ - - inline const _ElemType & operator[] (int index) const // reading - { - if (index < 0 || index >= size()) throwOutOfBounds(); - return (*(std::vector<_ElemType>*) this)[index]; - } -#endif - - // ------------------------------------------------------------------------ - // size(): same as base class, but returning an 'int' instead of 'size_t' - // to allow for better readable code - // ------------------------------------------------------------------------ - - inline int size() const - { - size_t siz = ((std::vector<_ElemType>*) this)->size(); - return (int) siz; - } -}; -// overload swap(), otherwise we'd fallback to 3-way assignment & possibly throw -template inline void swap (ARRAY<_T> & L, ARRAY<_T> & R) throw() -{ swap ((std::vector<_T> &) L, (std::vector<_T> &) R); } - -// class fixed_vector - non-resizable vector - -template class fixed_vector -{ - _T * p; // pointer array - size_t n; // number of elements - void check (int index) const { index/*avoid compiler warning*/;ASSERT (index >= 0 && (size_t) index < n); } - void check (size_t index) const { ASSERT (index < n); } - // ... TODO: when I make this public, LinearTransform.h acts totally up but I cannot see where it comes from. - //fixed_vector (const fixed_vector & other) : n (0), p (NULL) { *this = other; } -public: - fixed_vector() : n (0), p (NULL) { } - void resize (int size) { clear(); if (size > 0) { p = new _T[size]; n = size; } } - void resize (size_t size) { clear(); if (size > 0) { p = new _T[size]; n = size; } } - fixed_vector (int size) : n (size), p (size > 0 ? new _T[size] : NULL) { } - fixed_vector (size_t size) : n ((int) size), p (size > 0 ? new _T[size] : NULL) { } - ~fixed_vector() { delete[] p; } - inline int size() const { return (int) n; } - inline int capacity() const { return (int) n; } - inline bool empty() const { return n == 0; } - void clear() { delete[] p; p = NULL; n = 0; } - _T * begin() { return p; } - const _T * begin() const { return p; } - _T * end() { return p + n; } // note: n == 0 so result is NULL - inline _T & operator[] (int index) { check (index); return p[index]; } // writing - inline const _T & operator[] (int index) const { check (index); return p[index]; } // reading - inline _T & operator[] (size_t index) { check (index); return p[index]; } // writing - inline const _T & operator[] (size_t index) const { check (index); return p[index]; } // reading - inline int indexof (const _T & elem) const { ASSERT (&elem >= p && &elem < p + n); return &elem - p; } - inline void swap (fixed_vector & other) throw() { std::swap (other.p, p); std::swap (other.n, n); } - template fixed_vector & operator= (const VECTOR & other) - { - int other_n = (int) other.size(); - fixed_vector tmp (other_n); - for (int k = 0; k < other_n; k++) tmp[k] = other[k]; - swap (tmp); - return *this; - } - fixed_vector & operator= (const fixed_vector & other) - { - int other_n = (int) other.size(); - fixed_vector tmp (other_n); - for (int k = 0; k < other_n; k++) tmp[k] = other[k]; - swap (tmp); - return *this; - } - template fixed_vector (const VECTOR & other) : n (0), p (NULL) { *this = other; } -}; -template inline void swap (fixed_vector<_T> & L, fixed_vector<_T> & R) throw() { L.swap (R); } - -#pragma warning(pop) // pop off waring: expression has no effect - -// class matrix - simple fixed-size 2-dimensional array, access elements as m(i,j) -// stored as concatenation of rows - -//template class matrix : fixed_vector -//{ -// size_t numcols; -// size_t locate (size_t i, size_t j) const { ASSERT (i < rows() && j < cols()); return i * cols() + j; } -//public: -// typedef T elemtype; -// matrix() : numcols (0) {} -// matrix (size_t n, size_t m) { resize (n, m); } -// void resize (size_t n, size_t m) { numcols = m; fixed_vector::resize (n * m); } -// size_t cols() const { return numcols; } -// size_t rows() const { return empty() ? 0 : size() / cols(); } -// size_t size() const { return fixed_vector::size(); } // use this for reading and writing... not nice! -// bool empty() const { return fixed_vector::empty(); } -// T & operator() (size_t i, size_t j) { return (*this)[locate(i,j)]; } -// const T & operator() (size_t i, size_t j) const { return (*this)[locate(i,j)]; } -// void swap (matrix & other) throw() { std::swap (numcols, other.numcols); fixed_vector::swap (other); } -//}; -//template inline void swap (matrix<_T> & L, matrix<_T> & R) throw() { L.swap (R); } - -// TODO: get rid of these -typedef std::string STRING; -typedef std::wstring WSTRING; - -// derive from this for noncopyable classes (will get you private unimplemented copy constructors) -// ... TODO: change all of basetypes classes/structs to use this -class noncopyable -{ - noncopyable & operator= (const noncopyable &); - noncopyable (const noncopyable &); -public: - noncopyable(){} -}; - -// class CCritSec and CAutoLock -- simple critical section handling -#ifndef _WIN32 // TODO: Currently only working under Windows; BROKEN otherwise, to be fixed -#define CRITICAL_SECTION int -void InitializeCriticalSection(int *) {} -void DeleteCriticalSection(int *) {} -void EnterCriticalSection(int *) {} -void LeaveCriticalSection(int *) {} -#endif -class CCritSec -{ - CCritSec (const CCritSec &); CCritSec & operator= (const CCritSec &); - CRITICAL_SECTION m_CritSec; -public: - CCritSec() { InitializeCriticalSection(&m_CritSec); }; - ~CCritSec() { DeleteCriticalSection(&m_CritSec); }; - void Lock() { EnterCriticalSection(&m_CritSec); }; - void Unlock() { LeaveCriticalSection(&m_CritSec); }; -}; - - -// locks a critical section, and unlocks it automatically -// when the lock goes out of scope -class CAutoLock -{ - CAutoLock(const CAutoLock &refAutoLock); CAutoLock &operator=(const CAutoLock &refAutoLock); - CCritSec & m_rLock; -public: - CAutoLock(CCritSec & rLock) : m_rLock (rLock) { m_rLock.Lock(); }; - ~CAutoLock() { m_rLock.Unlock(); }; -}; - -#if 0 -// an efficient way to write COM code -// usage examples: -// COM_function() || throw_hr ("message"); -// while ((s->Read (p, n, &m) || throw_hr ("Read failure")) == S_OK) { ... } -// is that cool or what? -struct bad_hr : public std::runtime_error -{ - HRESULT hr; - bad_hr (HRESULT p_hr, const char * msg) : hr (p_hr), std::runtime_error (msg) { } - // (only for use in || expression --deprecated:) - bad_hr() : std::runtime_error(NULL) { } - bad_hr(const char * msg) : std::runtime_error(msg) { } -}; -struct throw_hr -{ - const char * msg; - inline throw_hr (const char * msg = NULL) : msg (msg) {} -}; -inline static HRESULT operator|| (HRESULT hr, const throw_hr & e) -{ - if (SUCCEEDED (hr)) return hr; - throw bad_hr (hr, e.msg); -} -// (old deprecated version kept for compat:) -inline static bool operator|| (HRESULT hr, const bad_hr & e) { if (SUCCEEDED (hr)) return true; throw bad_hr (hr, e.what()); } - -// back-mapping of exceptions to HRESULT codes -// usage pattern: HRESULT COM_function (...) { try { exception-based function body } catch_hr_return; } -#define catch_hr_return \ - catch (const bad_alloc &) { return E_OUTOFMEMORY; } \ - catch (const bad_hr & e) { return e.hr; } \ - catch (const invalid_argument &) { return E_INVALIDARG; } \ - catch (const runtime_error &) { return E_FAIL; } \ - catch (const logic_error &) { return E_UNEXPECTED; } \ - catch (const exception &) { return E_FAIL; } \ - return S_OK; - -// CoInitializeEx() wrapper to ensure CoUnintialize() -//struct auto_co_initialize : noncopyable -//{ -// auto_co_initialize() { ::CoInitializeEx (NULL, COINIT_MULTITHREADED) || bad_hr ("auto_co_initialize: CoInitializeEx failure"); } -// ~auto_co_initialize() { ::CoUninitialize(); } -//}; - -// auto pointer for ::CoTaskMemFree -template class auto_co_ptr : noncopyable -{ - T * p; -public: - auto_co_ptr() : p (NULL) { } - auto_co_ptr (T * p) : p (p) { } -// ~auto_co_ptr() { ::CoTaskMemFree (p); } - operator T * () const { return p; } - T * operator->() const { return p; } - T** operator& () { assert (p == NULL); return &p; } // must be empty when taking address -}; - -// represents a thread-local-storage variable -// Note: __declspec(thread) is broken on pre-Vista for delay loaded DLLs -// [http://www.nynaeve.net/?p=187] -// so instead, we need to wrap up the Win32 TLS functions ourselves. -// Note: tls instances must be allocated as static to work correctly, e.g.: -// static tls myVal(); -// myVal = (void *) 25; -// printf ("value is %d",(void *) myVal); - -class tls -{ -private: - int tlsSlot; -public: - -#ifdef UNDER_CE - // this is from standard windows headers - seems to be missing in WINCE - #define TLS_OUT_OF_INDEXES ((DWORD)0xFFFFFFFF) -#endif - tls() { tlsSlot = TlsAlloc(); if (tlsSlot == TLS_OUT_OF_INDEXES) throw std::runtime_error("tls: TlsAlloc failed, out of tls slots"); } - operator void * () { return TlsGetValue (tlsSlot); } - void *operator = (void *val) { if (!TlsSetValue (tlsSlot,val)) throw std::runtime_error ("tls: TlsSetValue failed"); return val; } -}; -#endif - -};}; // namespace - -#if 0 //ndef BASETYPES_NO_UNSAFECRTOVERLOAD // if on, no unsafe CRT overload functions - -// ---------------------------------------------------------------------------- -// overloads for "unsafe" CRT functions used in our code base -// ---------------------------------------------------------------------------- - -// strlen/wcslen overloads for fixed-buffer size - -// Note: Careful while fixing bug related to these templates. -// In all attempted experiments, in seems all 6 definitions are required -// below to get the correct behaviour. Be very very careful -// not to delete something without testing that case 5&6 have "size" deduced. -// 1. char * -// 2. char * const -// 3. const char * -// 4. const char * const -// 5. char (&) [size] -// 6. const char (&) [size] -// the following includes all headers that use strlen() and fail because of the mapping below -// to find those, change #define strlen strlen_ to something invalid e.g. strlen::strlen_ -#if _MSC_VER >= 1600 // VS 2010 --TODO: fix this by correct include order instead -#include // defines strlen() as an intrinsic in VS 2010 -#include // uses strlen() -#include // uses strlen() -#endif -#define strlen strlen_ -#ifndef LINUX -template inline __declspec(deprecated("Dummy general template, cannot be used directly")) -#else -template inline -#endif // LINUX -size_t strlen_(_T &s) { return strnlen_s(static_cast(s), SIZE_MAX); } // never be called but needed to keep compiler happy -template inline size_t strlen_(const _T &s) { return strnlen_s(static_cast(s), SIZE_MAX); } -template<> inline size_t strlen_(char * &s) { return strnlen_s(s, SIZE_MAX); } -template<> inline size_t strlen_(const char * &s) { return strnlen_s(s, SIZE_MAX); } -template inline size_t strlen_(const char (&s)[n]) { return (strnlen_s(s, n)); } -template inline size_t strlen_(char (&s)[n]) { return (strnlen_s(s, n)); } -#define wcslen wcslen_ -template inline __declspec(deprecated("Dummy general template, cannot be used directly")) -size_t wcslen_(_T &s) { return wcsnlen_s(static_cast(s), SIZE_MAX); } // never be called but needed to keep compiler happy -template inline size_t __cdecl wcslen_(const _T &s) { return wcsnlen_s(static_cast(s), SIZE_MAX); } -template<> inline size_t wcslen_(wchar_t * &s) { return wcsnlen_s(s, SIZE_MAX); } -template<> inline size_t wcslen_(const wchar_t * &s) { return wcsnlen_s(s, SIZE_MAX); } -template inline size_t wcslen_(const wchar_t (&s)[n]) { return (wcsnlen_s(s, n)); } -template inline size_t wcslen_(wchar_t (&s)[n]) { return (wcsnlen_s(s, n)); } - -// xscanf wrappers -- one overload for each actual use case in our code base -static inline int sscanf (const char * buf, const char * format, int * i1) { return sscanf_s (buf, format, i1); } -static inline int sscanf (const char * buf, const char * format, int * i1, int * i2) { return sscanf_s (buf, format, i1, i2); } -static inline int sscanf (const char * buf, const char * format, int * i1, int * i2, int * i3) { return sscanf_s (buf, format, i1, i2, i3); } -static inline int sscanf (const char * buf, const char * format, double * f1) { return sscanf_s (buf, format, f1); } -static inline int swscanf (const wchar_t * buf, const wchar_t * format, int * i1) { return swscanf_s (buf, format, i1); } -static inline int fscanf (FILE * file, const char * format, float * f1) { return fscanf_s (file, format, f1); } - -// ...TODO: should we pass 'count' instead of SIZE_MAX? (need to review use cases) -#define _vsnprintf _vsnprintf_ -static inline int _vsnprintf_(char *buffer, size_t count, const char *format, va_list argptr) -{ return _vsnprintf_s (buffer, SIZE_MAX, count, format, argptr); } -#define _vsnwprintf _vsnwprintf_ -static inline int _vsnwprintf_(wchar_t *buffer, size_t count, const wchar_t *format, va_list argptr) -{ return _vsnwprintf_s (buffer, SIZE_MAX, count, format, argptr); } - -// wcsfcpy -- same as standard wcsncpy, use padded fixed-size buffer really needed -static inline void wcsfcpy (wchar_t * dest, const wchar_t * source, size_t count) -{ - while (count && (*dest++ = *source++) != 0) count--; // copy - if (count) while (--count) *dest++ = 0; // pad with zeroes -} - -// cacpy -- fixed-size character array (same as original strncpy (dst, src, sizeof (dst))) -// NOTE: THIS FUNCTION HAS NEVER BEEN TESTED. REMOVE THIS COMMENT ONCE IT HAS. -template static inline void cacpy (T (&dst)[n], const T * src) -{ for (int i = 0; i < n; i++) { dst[i] = *src; if (*src) src++; } } -// { return strncpy (dst, src, n); } // using original C std lib function - -// mappings for "unsafe" functions that are not really unsafe -#define strtok strtok_ // map to "safe" function (adds no value) -static inline /*const*/ char * strtok_(char * s, const char * delim) -{ - static msra::basetypes::tls tls_context; // see note for tls class def - char *context = (char *) (void *) tls_context; - char *ret = strtok_s (s, delim, &context); - tls_context = context; - return ret; -} - -#define wcstok wcstok_ // map to "safe" function (adds no value) -static inline /*const*/ wchar_t * wcstok_(wchar_t * s, const wchar_t * delim) -{ - static msra::basetypes::tls tls_context; // see note for tls class def - wchar_t *context = (wchar_t *) (void *) tls_context; - wchar_t *ret = wcstok_s (s, delim, &context); - tls_context = context; - return ret; -} - -#define fopen fopen_ // map to _fsopen() (adds no value) -static inline FILE * fopen_(const char * p, const char * m) { return _fsopen (p, m, _SH_DENYWR); } -#define _wfopen _wfopen_ // map to _wfsopen() (adds no value) -static inline FILE * _wfopen_(const wchar_t * p, const wchar_t * m) { return _wfsopen (p, m, _SH_DENYWR); } - -#define strerror(e) strerror_((e)) // map to "safe" function (adds no value) -static inline const char *strerror_(int e) -{ // keep a cache so we can return a pointer (to mimic the old interface) - static msra::basetypes::CCritSec cs; static std::map msgs; - msra::basetypes::CAutoLock lock (cs); - if (msgs.find(e) == msgs.end()) { char msg[1024]; strerror_s (msg, e); msgs[e] = msg; } - return msgs[e].c_str(); -} - -#endif - -// ---------------------------------------------------------------------------- -// frequently missing string functions -// ---------------------------------------------------------------------------- - -namespace msra { namespace strfun { - -#ifndef BASETYPES_NO_STRPRINTF - -// [w]strprintf() -- like sprintf() but resulting in a C++ string -template struct _strprintf : public std::basic_string<_T> -{ // works for both wchar_t* and char* - _strprintf (const _T * format, ...) - { - va_list args; va_start (args, format); // varargs stuff - size_t n = _cprintf (format, args); // num chars excl. '\0' - const int FIXBUF_SIZE = 128; // incl. '\0' - if (n < FIXBUF_SIZE) - { - _T fixbuf[FIXBUF_SIZE]; - this->assign (_sprintf (&fixbuf[0], sizeof (fixbuf)/sizeof (*fixbuf), format, args), n); - } - else // too long: use dynamically allocated variable-size buffer - { - std::vector<_T> varbuf (n + 1); // incl. '\0' - this->assign (_sprintf (&varbuf[0], varbuf.size(), format, args), n); - } - } -private: - // helpers - inline size_t _cprintf (const wchar_t * format, va_list args) { return vswprintf (nullptr, 0, format, args); } - inline size_t _cprintf (const char * format, va_list args) { return vsprintf (nullptr, format, args); } - inline const wchar_t * _sprintf (wchar_t * buf, size_t bufsiz, const wchar_t * format, va_list args) { vswprintf (buf, bufsiz, format, args); return buf; } - inline const char * _sprintf ( char * buf, size_t /*bufsiz*/, const char * format, va_list args) { vsprintf (buf, format, args); return buf; } -}; -typedef strfun::_strprintf strprintf; // char version -typedef strfun::_strprintf wstrprintf; // wchar_t version - -#endif - -// string-encoding conversion functions -struct utf8 : std::string { utf8 (const std::wstring & p) // utf-16 to -8 -{ -#if 1 - std::wstring_convert> cv; - (*(std::string*)this) = cv.to_bytes(p); -#else // old version, delete once we know it works - size_t len = p.length(); - if (len == 0) { return;} // empty string - msra::basetypes::fixed_vector buf (3 * len + 1); // max: 1 wchar => up to 3 mb chars - // ... TODO: this fill() should be unnecessary (a 0 is appended)--but verify - std::fill (buf.begin (), buf.end (), 0); - int rc = WideCharToMultiByte (CP_UTF8, 0, p.c_str(), (int) len, - &buf[0], (int) buf.size(), NULL, NULL); - if (rc == 0) throw std::runtime_error ("WideCharToMultiByte"); - (*(std::string*)this) = &buf[0]; -#endif -}}; -struct utf16 : std::wstring { utf16 (const std::string & p) // utf-8 to -16 -{ -#if 1 - std::wstring_convert> cv; - (*(std::wstring*)this) = cv.from_bytes(p); -#else // old version, delete once we know it works - size_t len = p.length(); - if (len == 0) { return;} // empty string - msra::basetypes::fixed_vector buf (len + 1); - // ... TODO: this fill() should be unnecessary (a 0 is appended)--but verify - std::fill(buf.begin(), buf.end(), (wchar_t)0); - int rc = MultiByteToWideChar(CP_UTF8, 0, p.c_str(), (int)len, - &buf[0], (int)buf.size()); - if (rc == 0) throw std::runtime_error("MultiByteToWideChar"); - ASSERT(rc < buf.size()); - (*(std::wstring*)this) = &buf[0]; -#endif -}}; - -#pragma warning(push) -#pragma warning(disable : 4996) // Reviewed by Yusheng Li, March 14, 2006. depr. fn (wcstombs, mbstowcs) -static inline std::string wcstombs (const std::wstring & p) // output: MBCS -{ - size_t len = p.length(); - msra::basetypes::fixed_vector buf (2 * len + 1); // max: 1 wchar => 2 mb chars - std::fill (buf.begin (), buf.end (), 0); - ::wcstombs (&buf[0], p.c_str(), 2 * len + 1); - return std::string (&buf[0]); -} -static inline std::wstring mbstowcs (const std::string & p) // input: MBCS -{ +// +// basetypes.h - basic types that C++ lacks +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// +#pragma once +#ifndef _BASETYPES_ +#define _BASETYPES_ + +#ifndef UNDER_CE // fixed-buffer overloads not available for wince +#ifdef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES // fixed-buffer overloads for strcpy() etc. +#undef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES +#endif +#define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1 +#endif + +#pragma warning (push) +#pragma warning (disable: 4793) // caused by varargs + +// disable certain parts of basetypes for wince compilation +#ifdef UNDER_CE +#define BASETYPES_NO_UNSAFECRTOVERLOAD // disable unsafe CRT overloads (safe functions don't exist in wince) +#define BASETYPES_NO_STRPRINTF // dependent functions here are not defined for wince +#endif + +#ifndef OACR // dummies when we are not compiling under Office +#define OACR_WARNING_SUPPRESS(x, y) +#define OACR_WARNING_DISABLE(x, y) +#define OACR_WARNING_PUSH +#define OACR_WARNING_POP +#endif +#ifndef OACR_ASSUME // this seems to be a different one +#define OACR_ASSUME(x) +#endif + +// following oacr warnings are not level1 or level2-security +// in currect stage we want to ignore those warnings +// if necessay this can be fixed at later stage + +// not a bug +OACR_WARNING_DISABLE(EXC_NOT_CAUGHT_BY_REFERENCE, "Not indicating a bug or security threat."); +OACR_WARNING_DISABLE(LOCALDECLHIDESLOCAL, "Not indicating a bug or security threat."); + +// not reviewed +OACR_WARNING_DISABLE(MISSING_OVERRIDE, "Not level1 or level2_security."); +OACR_WARNING_DISABLE(EMPTY_DTOR, "Not level1 or level2_security."); +OACR_WARNING_DISABLE(DEREF_NULL_PTR, "Not level1 or level2_security."); +OACR_WARNING_DISABLE(INVALID_PARAM_VALUE_1, "Not level1 or level2_security."); +OACR_WARNING_DISABLE(VIRTUAL_CALL_IN_CTOR, "Not level1 or level2_security."); +OACR_WARNING_DISABLE(POTENTIAL_ARGUMENT_TYPE_MISMATCH, "Not level1 or level2_security."); + +// determine WIN32 api calling convention +// it seems this is normally stdcall?? but when compiling as /clr:pure or /clr:Safe +// this is not supported, so in this case, we need to use the 'default' calling convention +// TODO: can we reuse the #define of WINAPI?? +#ifdef _M_CEE_SAFE +#define WINAPI_CC __clrcall +#elif _M_CEE +#define WINAPI_CC __clrcall +#else +#define WINAPI_CC __stdcall +#endif + +// fix some warnings in STL +#if !defined(_DEBUG) || defined(_CHECKED) || defined(_MANAGED) +#pragma warning(disable : 4702) // unreachable code +#endif + +#include +#include +#include // include here because we redefine some names later +#include +#include +#include +#include // for HUGE_VAL // potential double isnan definition +#include +#include +#include +#include +#include // std::wstring_convert +#ifdef _MSC_VER +#include // std::codecvt_utf8 +#endif +#ifdef _WIN32 +#include // for CRITICAL_SECTION and Unicode conversion functions --TODO: is there a portable alternative? +#endif +#if __unix__ +#include +#include +#endif + +using namespace std; + +// CRT error handling seems to not be included in wince headers +// so we define our own imports +#ifdef UNDER_CE + +// TODO: is this true - is GetLastError == errno?? - also this adds a dependency on windows.h +#define errno GetLastError() + +// strerror(x) - x here is normally errno - TODO: make this return errno as a string +#define strerror(x) "strerror error but can't report error number sorry!" +#endif + +#if 0 +#ifndef __in // dummies for sal annotations if compiler does not support it +#define __in +#define __inout_z +#define __in_count(x) +#define __inout_cap(x) +#define __inout_cap_c(x) +#endif +#ifndef __out_z_cap // non-VS2005 annotations +#define __out_cap(x) +#define __out_z_cap(x) +#define __out_cap_c(x) +#endif + +#ifndef __override // and some more non-std extensions required by Office +#define __override virtual +#endif +#endif + +// disable warnings for which fixing would make code less readable +#pragma warning(disable : 4290) // throw() declaration ignored +#pragma warning(disable : 4244) // conversion from typeA to typeB, possible loss of data + + +// ---------------------------------------------------------------------------- +// basic macros +// ---------------------------------------------------------------------------- + +#define SAFE_DELETE(p) { if(p) { delete (p); (p)=NULL; } } +#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } } // nasty! use CComPtr<> +#ifndef ASSERT +#define ASSERT assert +#endif + +// ---------------------------------------------------------------------------- +// basic data types +// ---------------------------------------------------------------------------- + +namespace msra { namespace basetypes { + +// class ARRAY -- std::vector with array-bounds checking +// VS 2008 and above do this, so there is no longer a need for this. + +#pragma warning(push) +#pragma warning(disable : 4555) // expression has no affect, used so retail won't be empty + +template +class ARRAY : public std::vector<_ElemType> +{ +#if defined (_DEBUG) || defined (_CHECKED) // debug version with range checking + static void throwOutOfBounds() + { // (moved to separate function hoping to keep inlined code smaller + OACR_WARNING_PUSH; + OACR_WARNING_DISABLE(IGNOREDBYCOMMA, "Reviewd OK. Special trick below to show a message when assertion fails" + "[rogeryu 2006/03/24]"); + OACR_WARNING_DISABLE(BOGUS_EXPRESSION_LIST, "This is intentional. [rogeryu 2006/03/24]"); + //ASSERT ("ARRAY::operator[] out of bounds", false); + OACR_WARNING_POP; + } +#endif + +public: + + ARRAY() : std::vector<_ElemType> () { } + ARRAY (int size) : std::vector<_ElemType> (size) { } + +#if defined (_DEBUG) || defined (_CHECKED) // debug version with range checking + // ------------------------------------------------------------------------ + // operator[]: with array-bounds checking + // ------------------------------------------------------------------------ + + inline _ElemType & operator[] (int index) // writing + { + if (index < 0 || index >= size()) throwOutOfBounds(); + return (*(std::vector<_ElemType>*) this)[index]; + } + + // ------------------------------------------------------------------------ + + inline const _ElemType & operator[] (int index) const // reading + { + if (index < 0 || index >= size()) throwOutOfBounds(); + return (*(std::vector<_ElemType>*) this)[index]; + } +#endif + + // ------------------------------------------------------------------------ + // size(): same as base class, but returning an 'int' instead of 'size_t' + // to allow for better readable code + // ------------------------------------------------------------------------ + + inline int size() const + { + size_t siz = ((std::vector<_ElemType>*) this)->size(); + return (int) siz; + } +}; +// overload swap(), otherwise we'd fallback to 3-way assignment & possibly throw +template inline void swap (ARRAY<_T> & L, ARRAY<_T> & R) throw() +{ swap ((std::vector<_T> &) L, (std::vector<_T> &) R); } + +// class fixed_vector - non-resizable vector + +template class fixed_vector +{ + _T * p; // pointer array + size_t n; // number of elements + void check (int index) const { index/*avoid compiler warning*/;ASSERT (index >= 0 && (size_t) index < n); } + void check (size_t index) const { ASSERT (index < n); } + // ... TODO: when I make this public, LinearTransform.h acts totally up but I cannot see where it comes from. + //fixed_vector (const fixed_vector & other) : n (0), p (NULL) { *this = other; } +public: + fixed_vector() : n (0), p (NULL) { } + void resize (int size) { clear(); if (size > 0) { p = new _T[size]; n = size; } } + void resize (size_t size) { clear(); if (size > 0) { p = new _T[size]; n = size; } } + fixed_vector (int size) : n (size), p (size > 0 ? new _T[size] : NULL) { } + fixed_vector (size_t size) : n ((int) size), p (size > 0 ? new _T[size] : NULL) { } + ~fixed_vector() { delete[] p; } + inline int size() const { return (int) n; } + inline int capacity() const { return (int) n; } + inline bool empty() const { return n == 0; } + void clear() { delete[] p; p = NULL; n = 0; } + _T * begin() { return p; } + const _T * begin() const { return p; } + _T * end() { return p + n; } // note: n == 0 so result is NULL + inline _T & operator[] (int index) { check (index); return p[index]; } // writing + inline const _T & operator[] (int index) const { check (index); return p[index]; } // reading + inline _T & operator[] (size_t index) { check (index); return p[index]; } // writing + inline const _T & operator[] (size_t index) const { check (index); return p[index]; } // reading + inline int indexof (const _T & elem) const { ASSERT (&elem >= p && &elem < p + n); return &elem - p; } + inline void swap (fixed_vector & other) throw() { std::swap (other.p, p); std::swap (other.n, n); } + template fixed_vector & operator= (const VECTOR & other) + { + int other_n = (int) other.size(); + fixed_vector tmp (other_n); + for (int k = 0; k < other_n; k++) tmp[k] = other[k]; + swap (tmp); + return *this; + } + fixed_vector & operator= (const fixed_vector & other) + { + int other_n = (int) other.size(); + fixed_vector tmp (other_n); + for (int k = 0; k < other_n; k++) tmp[k] = other[k]; + swap (tmp); + return *this; + } + template fixed_vector (const VECTOR & other) : n (0), p (NULL) { *this = other; } +}; +template inline void swap (fixed_vector<_T> & L, fixed_vector<_T> & R) throw() { L.swap (R); } + +#pragma warning(pop) // pop off waring: expression has no effect + +// class matrix - simple fixed-size 2-dimensional array, access elements as m(i,j) +// stored as concatenation of rows + +//template class matrix : fixed_vector +//{ +// size_t numcols; +// size_t locate (size_t i, size_t j) const { ASSERT (i < rows() && j < cols()); return i * cols() + j; } +//public: +// typedef T elemtype; +// matrix() : numcols (0) {} +// matrix (size_t n, size_t m) { resize (n, m); } +// void resize (size_t n, size_t m) { numcols = m; fixed_vector::resize (n * m); } +// size_t cols() const { return numcols; } +// size_t rows() const { return empty() ? 0 : size() / cols(); } +// size_t size() const { return fixed_vector::size(); } // use this for reading and writing... not nice! +// bool empty() const { return fixed_vector::empty(); } +// T & operator() (size_t i, size_t j) { return (*this)[locate(i,j)]; } +// const T & operator() (size_t i, size_t j) const { return (*this)[locate(i,j)]; } +// void swap (matrix & other) throw() { std::swap (numcols, other.numcols); fixed_vector::swap (other); } +//}; +//template inline void swap (matrix<_T> & L, matrix<_T> & R) throw() { L.swap (R); } + +// TODO: get rid of these +typedef std::string STRING; +typedef std::wstring WSTRING; + +// derive from this for noncopyable classes (will get you private unimplemented copy constructors) +// ... TODO: change all of basetypes classes/structs to use this +class noncopyable +{ + noncopyable & operator= (const noncopyable &); + noncopyable (const noncopyable &); +public: + noncopyable(){} +}; + +// class CCritSec and CAutoLock -- simple critical section handling +#ifndef _WIN32 // TODO: Currently only working under Windows; BROKEN otherwise, to be fixed +#define CRITICAL_SECTION int +void InitializeCriticalSection(int *) {} +void DeleteCriticalSection(int *) {} +void EnterCriticalSection(int *) {} +void LeaveCriticalSection(int *) {} +#endif +class CCritSec +{ + CCritSec (const CCritSec &); CCritSec & operator= (const CCritSec &); + CRITICAL_SECTION m_CritSec; +public: + CCritSec() { InitializeCriticalSection(&m_CritSec); }; + ~CCritSec() { DeleteCriticalSection(&m_CritSec); }; + void Lock() { EnterCriticalSection(&m_CritSec); }; + void Unlock() { LeaveCriticalSection(&m_CritSec); }; +}; + + +// locks a critical section, and unlocks it automatically +// when the lock goes out of scope +class CAutoLock +{ + CAutoLock(const CAutoLock &refAutoLock); CAutoLock &operator=(const CAutoLock &refAutoLock); + CCritSec & m_rLock; +public: + CAutoLock(CCritSec & rLock) : m_rLock (rLock) { m_rLock.Lock(); }; + ~CAutoLock() { m_rLock.Unlock(); }; +}; + +#if 0 +// an efficient way to write COM code +// usage examples: +// COM_function() || throw_hr ("message"); +// while ((s->Read (p, n, &m) || throw_hr ("Read failure")) == S_OK) { ... } +// is that cool or what? +struct bad_hr : public std::runtime_error +{ + HRESULT hr; + bad_hr (HRESULT p_hr, const char * msg) : hr (p_hr), std::runtime_error (msg) { } + // (only for use in || expression --deprecated:) + bad_hr() : std::runtime_error(NULL) { } + bad_hr(const char * msg) : std::runtime_error(msg) { } +}; +struct throw_hr +{ + const char * msg; + inline throw_hr (const char * msg = NULL) : msg (msg) {} +}; +inline static HRESULT operator|| (HRESULT hr, const throw_hr & e) +{ + if (SUCCEEDED (hr)) return hr; + throw bad_hr (hr, e.msg); +} +// (old deprecated version kept for compat:) +inline static bool operator|| (HRESULT hr, const bad_hr & e) { if (SUCCEEDED (hr)) return true; throw bad_hr (hr, e.what()); } + +// back-mapping of exceptions to HRESULT codes +// usage pattern: HRESULT COM_function (...) { try { exception-based function body } catch_hr_return; } +#define catch_hr_return \ + catch (const bad_alloc &) { return E_OUTOFMEMORY; } \ + catch (const bad_hr & e) { return e.hr; } \ + catch (const invalid_argument &) { return E_INVALIDARG; } \ + catch (const runtime_error &) { return E_FAIL; } \ + catch (const logic_error &) { return E_UNEXPECTED; } \ + catch (const exception &) { return E_FAIL; } \ + return S_OK; + +// CoInitializeEx() wrapper to ensure CoUnintialize() +//struct auto_co_initialize : noncopyable +//{ +// auto_co_initialize() { ::CoInitializeEx (NULL, COINIT_MULTITHREADED) || bad_hr ("auto_co_initialize: CoInitializeEx failure"); } +// ~auto_co_initialize() { ::CoUninitialize(); } +//}; + +// auto pointer for ::CoTaskMemFree +template class auto_co_ptr : noncopyable +{ + T * p; +public: + auto_co_ptr() : p (NULL) { } + auto_co_ptr (T * p) : p (p) { } +// ~auto_co_ptr() { ::CoTaskMemFree (p); } + operator T * () const { return p; } + T * operator->() const { return p; } + T** operator& () { assert (p == NULL); return &p; } // must be empty when taking address +}; + +// represents a thread-local-storage variable +// Note: __declspec(thread) is broken on pre-Vista for delay loaded DLLs +// [http://www.nynaeve.net/?p=187] +// so instead, we need to wrap up the Win32 TLS functions ourselves. +// Note: tls instances must be allocated as static to work correctly, e.g.: +// static tls myVal(); +// myVal = (void *) 25; +// printf ("value is %d",(void *) myVal); + +class tls +{ +private: + int tlsSlot; +public: + +#ifdef UNDER_CE + // this is from standard windows headers - seems to be missing in WINCE + #define TLS_OUT_OF_INDEXES ((DWORD)0xFFFFFFFF) +#endif + tls() { tlsSlot = TlsAlloc(); if (tlsSlot == TLS_OUT_OF_INDEXES) throw std::runtime_error("tls: TlsAlloc failed, out of tls slots"); } + operator void * () { return TlsGetValue (tlsSlot); } + void *operator = (void *val) { if (!TlsSetValue (tlsSlot,val)) throw std::runtime_error ("tls: TlsSetValue failed"); return val; } +}; +#endif + +};}; // namespace + +#if 0 //ndef BASETYPES_NO_UNSAFECRTOVERLOAD // if on, no unsafe CRT overload functions + +// ---------------------------------------------------------------------------- +// overloads for "unsafe" CRT functions used in our code base +// ---------------------------------------------------------------------------- + +// strlen/wcslen overloads for fixed-buffer size + +// Note: Careful while fixing bug related to these templates. +// In all attempted experiments, in seems all 6 definitions are required +// below to get the correct behaviour. Be very very careful +// not to delete something without testing that case 5&6 have "size" deduced. +// 1. char * +// 2. char * const +// 3. const char * +// 4. const char * const +// 5. char (&) [size] +// 6. const char (&) [size] +// the following includes all headers that use strlen() and fail because of the mapping below +// to find those, change #define strlen strlen_ to something invalid e.g. strlen::strlen_ +#if _MSC_VER >= 1600 // VS 2010 --TODO: fix this by correct include order instead +#include // defines strlen() as an intrinsic in VS 2010 +#include // uses strlen() +#include // uses strlen() +#endif +#define strlen strlen_ +#ifndef LINUX +template inline __declspec(deprecated("Dummy general template, cannot be used directly")) +#else +template inline +#endif // LINUX +size_t strlen_(_T &s) { return strnlen_s(static_cast(s), SIZE_MAX); } // never be called but needed to keep compiler happy +template inline size_t strlen_(const _T &s) { return strnlen_s(static_cast(s), SIZE_MAX); } +template<> inline size_t strlen_(char * &s) { return strnlen_s(s, SIZE_MAX); } +template<> inline size_t strlen_(const char * &s) { return strnlen_s(s, SIZE_MAX); } +template inline size_t strlen_(const char (&s)[n]) { return (strnlen_s(s, n)); } +template inline size_t strlen_(char (&s)[n]) { return (strnlen_s(s, n)); } +#define wcslen wcslen_ +template inline __declspec(deprecated("Dummy general template, cannot be used directly")) +size_t wcslen_(_T &s) { return wcsnlen_s(static_cast(s), SIZE_MAX); } // never be called but needed to keep compiler happy +template inline size_t __cdecl wcslen_(const _T &s) { return wcsnlen_s(static_cast(s), SIZE_MAX); } +template<> inline size_t wcslen_(wchar_t * &s) { return wcsnlen_s(s, SIZE_MAX); } +template<> inline size_t wcslen_(const wchar_t * &s) { return wcsnlen_s(s, SIZE_MAX); } +template inline size_t wcslen_(const wchar_t (&s)[n]) { return (wcsnlen_s(s, n)); } +template inline size_t wcslen_(wchar_t (&s)[n]) { return (wcsnlen_s(s, n)); } + +// xscanf wrappers -- one overload for each actual use case in our code base +static inline int sscanf (const char * buf, const char * format, int * i1) { return sscanf_s (buf, format, i1); } +static inline int sscanf (const char * buf, const char * format, int * i1, int * i2) { return sscanf_s (buf, format, i1, i2); } +static inline int sscanf (const char * buf, const char * format, int * i1, int * i2, int * i3) { return sscanf_s (buf, format, i1, i2, i3); } +static inline int sscanf (const char * buf, const char * format, double * f1) { return sscanf_s (buf, format, f1); } +static inline int swscanf (const wchar_t * buf, const wchar_t * format, int * i1) { return swscanf_s (buf, format, i1); } +static inline int fscanf (FILE * file, const char * format, float * f1) { return fscanf_s (file, format, f1); } + +// ...TODO: should we pass 'count' instead of SIZE_MAX? (need to review use cases) +#define _vsnprintf _vsnprintf_ +static inline int _vsnprintf_(char *buffer, size_t count, const char *format, va_list argptr) +{ return _vsnprintf_s (buffer, SIZE_MAX, count, format, argptr); } +#define _vsnwprintf _vsnwprintf_ +static inline int _vsnwprintf_(wchar_t *buffer, size_t count, const wchar_t *format, va_list argptr) +{ return _vsnwprintf_s (buffer, SIZE_MAX, count, format, argptr); } + +// wcsfcpy -- same as standard wcsncpy, use padded fixed-size buffer really needed +static inline void wcsfcpy (wchar_t * dest, const wchar_t * source, size_t count) +{ + while (count && (*dest++ = *source++) != 0) count--; // copy + if (count) while (--count) *dest++ = 0; // pad with zeroes +} + +// cacpy -- fixed-size character array (same as original strncpy (dst, src, sizeof (dst))) +// NOTE: THIS FUNCTION HAS NEVER BEEN TESTED. REMOVE THIS COMMENT ONCE IT HAS. +template static inline void cacpy (T (&dst)[n], const T * src) +{ for (int i = 0; i < n; i++) { dst[i] = *src; if (*src) src++; } } +// { return strncpy (dst, src, n); } // using original C std lib function + +// mappings for "unsafe" functions that are not really unsafe +#define strtok strtok_ // map to "safe" function (adds no value) +static inline /*const*/ char * strtok_(char * s, const char * delim) +{ + static msra::basetypes::tls tls_context; // see note for tls class def + char *context = (char *) (void *) tls_context; + char *ret = strtok_s (s, delim, &context); + tls_context = context; + return ret; +} + +#define wcstok wcstok_ // map to "safe" function (adds no value) +static inline /*const*/ wchar_t * wcstok_(wchar_t * s, const wchar_t * delim) +{ + static msra::basetypes::tls tls_context; // see note for tls class def + wchar_t *context = (wchar_t *) (void *) tls_context; + wchar_t *ret = wcstok_s (s, delim, &context); + tls_context = context; + return ret; +} + +#define fopen fopen_ // map to _fsopen() (adds no value) +static inline FILE * fopen_(const char * p, const char * m) { return _fsopen (p, m, _SH_DENYWR); } +#define _wfopen _wfopen_ // map to _wfsopen() (adds no value) +static inline FILE * _wfopen_(const wchar_t * p, const wchar_t * m) { return _wfsopen (p, m, _SH_DENYWR); } + +#define strerror(e) strerror_((e)) // map to "safe" function (adds no value) +static inline const char *strerror_(int e) +{ // keep a cache so we can return a pointer (to mimic the old interface) + static msra::basetypes::CCritSec cs; static std::map msgs; + msra::basetypes::CAutoLock lock (cs); + if (msgs.find(e) == msgs.end()) { char msg[1024]; strerror_s (msg, e); msgs[e] = msg; } + return msgs[e].c_str(); +} + +#endif + +// ---------------------------------------------------------------------------- +// frequently missing string functions +// ---------------------------------------------------------------------------- + +namespace msra { namespace strfun { + +#ifndef BASETYPES_NO_STRPRINTF + +// [w]strprintf() -- like sprintf() but resulting in a C++ string +template struct _strprintf : public std::basic_string<_T> +{ // works for both wchar_t* and char* + _strprintf (const _T * format, ...) + { + va_list args; va_start (args, format); // varargs stuff + size_t n = _cprintf (format, args); // num chars excl. '\0' + const int FIXBUF_SIZE = 128; // incl. '\0' + if (n < FIXBUF_SIZE) + { + _T fixbuf[FIXBUF_SIZE]; + this->assign (_sprintf (&fixbuf[0], sizeof (fixbuf)/sizeof (*fixbuf), format, args), n); + } + else // too long: use dynamically allocated variable-size buffer + { + std::vector<_T> varbuf (n + 1); // incl. '\0' + this->assign (_sprintf (&varbuf[0], varbuf.size(), format, args), n); + } + } +private: + // helpers + inline size_t _cprintf (const wchar_t * format, va_list args) { return vswprintf (nullptr, 0, format, args); } + inline size_t _cprintf (const char * format, va_list args) { return vsprintf (nullptr, format, args); } + inline const wchar_t * _sprintf (wchar_t * buf, size_t bufsiz, const wchar_t * format, va_list args) { vswprintf (buf, bufsiz, format, args); return buf; } + inline const char * _sprintf ( char * buf, size_t /*bufsiz*/, const char * format, va_list args) { vsprintf (buf, format, args); return buf; } +}; +typedef strfun::_strprintf strprintf; // char version +typedef strfun::_strprintf wstrprintf; // wchar_t version + +#endif + +// string-encoding conversion functions +struct utf8 : std::string { utf8 (const std::wstring & p) // utf-16 to -8 +{ +#if 1 + std::wstring_convert> cv; + (*(std::string*)this) = cv.to_bytes(p); +#else // old version, delete once we know it works + size_t len = p.length(); + if (len == 0) { return;} // empty string + msra::basetypes::fixed_vector buf (3 * len + 1); // max: 1 wchar => up to 3 mb chars + // ... TODO: this fill() should be unnecessary (a 0 is appended)--but verify + std::fill (buf.begin (), buf.end (), 0); + int rc = WideCharToMultiByte (CP_UTF8, 0, p.c_str(), (int) len, + &buf[0], (int) buf.size(), NULL, NULL); + if (rc == 0) throw std::runtime_error ("WideCharToMultiByte"); + (*(std::string*)this) = &buf[0]; +#endif +}}; +struct utf16 : std::wstring { utf16 (const std::string & p) // utf-8 to -16 +{ +#if 1 + std::wstring_convert> cv; + (*(std::wstring*)this) = cv.from_bytes(p); +#else // old version, delete once we know it works + size_t len = p.length(); + if (len == 0) { return;} // empty string + msra::basetypes::fixed_vector buf (len + 1); + // ... TODO: this fill() should be unnecessary (a 0 is appended)--but verify + std::fill(buf.begin(), buf.end(), (wchar_t)0); + int rc = MultiByteToWideChar(CP_UTF8, 0, p.c_str(), (int)len, + &buf[0], (int)buf.size()); + if (rc == 0) throw std::runtime_error("MultiByteToWideChar"); + ASSERT(rc < buf.size()); + (*(std::wstring*)this) = &buf[0]; +#endif +}}; + +#pragma warning(push) +#pragma warning(disable : 4996) // Reviewed by Yusheng Li, March 14, 2006. depr. fn (wcstombs, mbstowcs) +static inline std::string wcstombs (const std::wstring & p) // output: MBCS +{ + size_t len = p.length(); + msra::basetypes::fixed_vector buf (2 * len + 1); // max: 1 wchar => 2 mb chars + std::fill (buf.begin (), buf.end (), 0); + ::wcstombs (&buf[0], p.c_str(), 2 * len + 1); + return std::string (&buf[0]); +} +static inline std::wstring mbstowcs (const std::string & p) // input: MBCS +{ size_t len = p.length(); msra::basetypes::fixed_vector buf(len + 1); // max: >1 mb chars => 1 wchar std::fill(buf.begin(), buf.end(), (wchar_t)0); OACR_WARNING_SUPPRESS(UNSAFE_STRING_FUNCTION, "Reviewed OK. size checked. [rogeryu 2006/03/21]"); ::mbstowcs(&buf[0], p.c_str(), len + 1); - return std::wstring(&buf[0]); -} -#pragma warning(pop) - -// split and join -- tokenize a string like strtok() would, join() strings together -template static inline std::vector> split (const std::basic_string<_T> & s, const _T * delim) -{ - std::vector> res; - for (size_t st = s.find_first_not_of (delim); st != std::basic_string<_T>::npos; ) - { - size_t en = s.find_first_of (delim, st +1); - if (en == std::basic_string<_T>::npos) en = s.length(); - res.push_back (s.substr (st, en-st)); - st = s.find_first_not_of (delim, en +1); // may exceed - } - return res; -} - -template static inline std::basic_string<_T> join (const std::vector> & a, const _T * delim) -{ - std::basic_string<_T> res; - for (int i = 0; i < (int) a.size(); i++) - { - if (i > 0) res.append (delim); - res.append (a[i]); - } - return res; -} - -// parsing strings to numbers -static inline int toint (const wchar_t * s) -{ - return (int)wcstol(s, 0, 10); - //return _wtoi (s); // ... TODO: test this -} -static inline int toint (const char * s) -{ - return atoi (s); // ... TODO: check it -} -static inline int toint (const std::wstring & s) { return toint (s.c_str()); } - -static inline double todouble (const char * s) -{ - char * ep; // will be set to point to first character that failed parsing - double value = strtod (s, &ep); - if (*s == 0 || *ep != 0) - throw std::runtime_error ("todouble: invalid input string"); - return value; -} - -// TODO: merge this with todouble(const char*) above -static inline double todouble (const std::string & s) -{ - s.size(); // just used to remove the unreferenced warning - - double value = 0.0; - - // stod supposedly exists in VS2010, but some folks have compilation errors - // If this causes errors again, change the #if into the respective one for VS 2010. -#if _MSC_VER > 1400 // VS 2010+ - size_t * idx = 0; - value = std::stod (s, idx); - if (idx) throw std::runtime_error ("todouble: invalid input string"); -#else - char *ep = 0; // will be updated by strtod to point to first character that failed parsing - value = strtod (s.c_str(), &ep); - - // strtod documentation says ep points to first unconverted character OR - // return value will be +/- HUGE_VAL for overflow/underflow - if (ep != s.c_str() + s.length() || value == HUGE_VAL || value == -HUGE_VAL) - throw std::runtime_error ("todouble: invalid input string"); -#endif - - return value; -} - -static inline double todouble (const std::wstring & s) -{ - wchar_t * endptr; - double value = wcstod (s.c_str(), &endptr); - if (*endptr) throw std::runtime_error ("todouble: invalid input string"); - return value; -} - -// ---------------------------------------------------------------------------- -// tokenizer -- utility for white-space tokenizing strings in a character buffer -// This simple class just breaks a string, but does not own the string buffer. -// ---------------------------------------------------------------------------- - -class tokenizer : public std::vector -{ - const char * delim; -public: - tokenizer (const char * delim, size_t cap) : delim (delim) { reserve (cap); } - // Usage: tokenizer tokens (delim, capacity); tokens = buf; tokens.size(), tokens[i] - void operator= (char * buf) - { - resize (0); - - // strtok_s not available on all platforms - so backoff to strtok on those -#ifdef strtok_s - char * context; // for strtok_s() - for (char * p = strtok_s (buf, delim, &context); p; p = strtok_s (NULL, delim, &context)) - push_back (p); -#else - for (char * p = strtok (buf, delim); p; p = strtok (NULL, delim)) - push_back (p); -#endif - } -}; - -};}; // namespace - -// ---------------------------------------------------------------------------- -// wrappers for some basic types (files, handles, timer) -// ---------------------------------------------------------------------------- - -#ifndef _MSC_VER // add some functions that are VS-only -static inline FILE* _wfopen(const wchar_t * path, const wchar_t * mode) { return fopen(msra::strfun::wcstombs(path).c_str(), msra::strfun::utf8(mode).c_str()); } -#endif - -namespace msra { namespace basetypes { - -// FILE* with auto-close; use auto_file_ptr instead of FILE*. -// Warning: do not pass an auto_file_ptr to a function that calls fclose(), -// except for fclose() itself. -class auto_file_ptr -{ - FILE * f; - FILE * operator= (auto_file_ptr &); // can't ref-count: no assignment - auto_file_ptr (auto_file_ptr &); - // implicit close (destructor, assignment): we ignore error - void close() throw() { if (f) try { if (f != stdin && f != stdout && f != stderr) ::fclose (f); } catch (...) { } f = NULL; } - void openfailed (const std::string & path) { throw std::runtime_error ("auto_file_ptr: error opening file '" + path + "': " + strerror (errno)); } -protected: - friend int fclose (auto_file_ptr&); // explicit close (note: may fail) - int fclose() { int rc = ::fclose (f); if (rc == 0) f = NULL; return rc; } -public: - auto_file_ptr() : f (NULL) { } - ~auto_file_ptr() { close(); } - auto_file_ptr (const char * path, const char * mode) { f = fopen (path, mode); if (f == NULL) openfailed (path); } - auto_file_ptr (const wchar_t * wpath, const char * mode) { f = _wfopen (wpath, msra::strfun::utf16 (mode).c_str()); if (f == NULL) openfailed (msra::strfun::utf8 (wpath)); } - FILE * operator= (FILE * other) { close(); f = other; return f; } - auto_file_ptr (FILE * other) : f (other) { } - operator FILE * () const { return f; } - FILE * operator->() const { return f; } - void swap (auto_file_ptr & other) throw() { std::swap (f, other.f); } -}; -inline int fclose (auto_file_ptr & af) { return af.fclose(); } - -#ifdef _MSC_VER -// auto-closing container for Win32 handles. -// Pass close function if not CloseHandle(), e.g. -// auto_handle h (FindFirstFile(...), FindClose); -// ... TODO: the close function should really be a template parameter -template class auto_handle_t -{ - _H h; - BOOL (WINAPI_CC * close) (HANDLE); // close function - auto_handle_t operator= (const auto_handle_t &); - auto_handle_t (const auto_handle_t &); -public: - auto_handle_t (_H p_h, BOOL (WINAPI_CC * p_close) (HANDLE) = ::CloseHandle) : h (p_h), close (p_close) {} - ~auto_handle_t() { if (h != INVALID_HANDLE_VALUE) close (h); } - operator _H () const { return h; } -}; -typedef auto_handle_t auto_handle; -#endif - -// like auto_ptr but calls freeFunc_p (type free_func_t) instead of delete to clean up -// minor difference - wrapped object is T, not T *, so to wrap a -// T *, use auto_clean -// TODO: can this be used for simplifying those other classes? -template class auto_clean -{ - T it; - typedef FR (*free_func_t)(T); - free_func_t freeFunc; // the function used to free the pointer - void free() { if (it) freeFunc(it); it = 0; } - auto_clean operator= (const auto_clean &); // hide to prevent copy - auto_clean (const auto_clean &); // hide to prevent copy -public: - auto_clean (T it_p, free_func_t freeFunc_p) : it (it_p), freeFunc (freeFunc_p) {} - ~auto_clean() { free(); } - operator T () { return it; } - operator const T () const { return it; } - T detach () { T tmp = it; it = 0; return tmp; } // release ownership of object -}; - -#if 0 -// simple timer -// auto_timer timer; run(); double seconds = timer; // now can abandon the object -class auto_timer -{ - LARGE_INTEGER freq, start; - auto_timer (const auto_timer &); void operator= (const auto_timer &); -public: - auto_timer() - { - if (!QueryPerformanceFrequency (&freq)) // count ticks per second - throw std::runtime_error ("auto_timer: QueryPerformanceFrequency failure"); - QueryPerformanceCounter (&start); - } - operator double() const // each read gives time elapsed since start, in seconds - { - LARGE_INTEGER end; - QueryPerformanceCounter (&end); - return (end.QuadPart - start.QuadPart) / (double) freq.QuadPart; - } - void show (const std::string & msg) const - { - double elapsed = *this; - fprintf (stderr, "%s: %.6f ms\n", msg.c_str(), elapsed * 1000.0/*to ms*/); - } -}; -#endif - -};}; - -namespace msra { namespace files { - -// ---------------------------------------------------------------------------- -// textreader -- simple reader for text files --we need this all the time! -// Currently reads 8-bit files, but can return as wstring, in which case -// they are interpreted as UTF-8 (without BOM). -// Note: Not suitable for pipes or typed input due to readahead (fixable if needed). -// ---------------------------------------------------------------------------- - -class textreader -{ - msra::basetypes::auto_file_ptr f; - std::vector buf; // read buffer (will only grow, never shrink) - int ch; // next character (we need to read ahead by one...) - char getch() { char prevch = (char) ch; ch = fgetc (f); return prevch; } -public: - textreader (const std::wstring & path) : f (path.c_str(), "rb") { buf.reserve (10000); ch = fgetc (f); } - operator bool() const { return ch != EOF; } // true if still a line to read - std::string getline() // get and consume the next line - { - if (ch == EOF) throw std::logic_error ("textreader: attempted to read beyond EOF"); - assert (buf.empty()); - // get all line's characters --we recognize UNIX (LF), DOS (CRLF), and Mac (CR) convention - while (ch != EOF && ch != '\n' && ch != '\r') buf.push_back (getch()); - if (ch != EOF && getch() == '\r' && ch == '\n') getch(); // consume EOLN char - std::string line (buf.begin(), buf.end()); - buf.clear(); - return line; - } - std::wstring wgetline() { return msra::strfun::utf16 (getline()); } -}; - -};}; - -// ---------------------------------------------------------------------------- -// functional-programming style helper macros (...do this with templates?) -// ---------------------------------------------------------------------------- - -#define foreach_index(_i,_dat) for (int _i = 0; _i < (int) (_dat).size(); _i++) -#define map_array(_x,_expr,_y) { _y.resize (_x.size()); foreach_index(_i,_x) _y[_i]=_expr(_x[_i]); } -#define reduce_array(_x,_expr,_y) { foreach_index(_i,_x) _y = (_i==0) ? _x[_i] : _expr(_y,_x[_i]); } -//template -//static void fill_array(_A & a, _F v) { ::fill (a.begin(), a.end(), v); } - -// ---------------------------------------------------------------------------- -// frequently missing utility functions -// ---------------------------------------------------------------------------- - -namespace msra { namespace util { - -// to (slightly) simplify processing of command-line arguments. -// command_line args (argc, argv); -// while (args.has (1) && args[0][0] == '-') { option = args.shift(); process (option); } -// for (const wchar_t * arg = args.shift(); arg; arg = args.shift()) { process (arg); } -class command_line -{ - int num; - const wchar_t ** args; -public: - command_line (int argc, wchar_t * argv[]) : num (argc), args ((const wchar_t **) argv) { shift(); } - inline int size() const { return num; } - inline bool has (int left) { return size() >= left; } - const wchar_t * shift() { if (size() == 0) return NULL; num--; return *args++; } - const wchar_t * operator[] (int i) const { return (i < 0 || i >= size()) ? NULL : args[i]; } -}; - -// byte-reverse a variable --reverse all bytes (intended for integral types and float) -//template static inline void bytereverse (T & v) throw() -//{ // note: this is more efficient than it looks because sizeof (v[0]) is a constant -// char * p = (char *) &v; -// const size_t elemsize = sizeof (v); -// for (int k = 0; k < elemsize / 2; k++) // swap individual bytes -// swap (p[k], p[elemsize-1 - k]); -//} - -// byte-swap an entire array -template static inline void byteswap (V & v) throw() -{ - foreach_index (i, v) - bytereverse (v[i]); -} - -#if 0 -// execute a block with retry -// Block must be restartable. -// Use this when writing small files to those unreliable Windows servers. -// TODO: This will fail to compile under VS 2008--we need an #ifdef around this -template static void attempt (int retries, const FUNCTION & body) -{ - for (int attempt = 1; ; attempt++) - { - try - { - body(); - if (attempt > 1) fprintf (stderr, "attempt: success after %d retries\n", attempt); - break; - } - catch (const std::exception & e) - { - if (attempt >= retries) - throw; // failed N times --give up and rethrow the error - fprintf (stderr, "attempt: %s, retrying %d-th time out of %d...\n", e.what(), attempt+1, retries); -#ifndef LINUX - ::Sleep (1000); // wait a little, then try again -#else - std::chrono::milliseconds dura(1000); - std::this_thread::sleep_for(dura); -#endif /* LINUX */ - } - } -} -#endif - -};}; // namespace - -template static inline void ZeroStruct (S & s) { memset (&s, 0, sizeof (s)); } - -// ---------------------------------------------------------------------------- -// machine dependent -// ---------------------------------------------------------------------------- - -#define MACHINE_IS_BIG_ENDIAN (false) - -using namespace msra::basetypes; // for compatibility - -#pragma warning (pop) - -// RuntimeError - throw a std::runtime_error with a formatted error string -static inline bool RuntimeError (const char * format, ...) -{ - va_list args; - char buffer[1024]; - - va_start (args, format); - vsprintf (buffer, format, args); - throw std::runtime_error(buffer); -}; - -// LogicError - throw a std::logic_error with a formatted error string -static inline bool LogicError(const char * format, ...) -{ - va_list args; - char buffer[1024]; - - va_start(args, format); - vsprintf(buffer, format, args); - throw std::logic_error(buffer); -}; - -#endif // _BASETYPES_ + return std::wstring(&buf[0]); +} +#pragma warning(pop) + +// split and join -- tokenize a string like strtok() would, join() strings together +template static inline std::vector> split (const std::basic_string<_T> & s, const _T * delim) +{ + std::vector> res; + for (size_t st = s.find_first_not_of (delim); st != std::basic_string<_T>::npos; ) + { + size_t en = s.find_first_of (delim, st +1); + if (en == std::basic_string<_T>::npos) en = s.length(); + res.push_back (s.substr (st, en-st)); + st = s.find_first_not_of (delim, en +1); // may exceed + } + return res; +} + +template static inline std::basic_string<_T> join (const std::vector> & a, const _T * delim) +{ + std::basic_string<_T> res; + for (int i = 0; i < (int) a.size(); i++) + { + if (i > 0) res.append (delim); + res.append (a[i]); + } + return res; +} + +// parsing strings to numbers +static inline int toint (const wchar_t * s) +{ + return (int)wcstol(s, 0, 10); + //return _wtoi (s); // ... TODO: test this +} +static inline int toint (const char * s) +{ + return atoi (s); // ... TODO: check it +} +static inline int toint (const std::wstring & s) { return toint (s.c_str()); } + +static inline double todouble (const char * s) +{ + char * ep; // will be set to point to first character that failed parsing + double value = strtod (s, &ep); + if (*s == 0 || *ep != 0) + throw std::runtime_error ("todouble: invalid input string"); + return value; +} + +// TODO: merge this with todouble(const char*) above +static inline double todouble (const std::string & s) +{ + s.size(); // just used to remove the unreferenced warning + + double value = 0.0; + + // stod supposedly exists in VS2010, but some folks have compilation errors + // If this causes errors again, change the #if into the respective one for VS 2010. +#if _MSC_VER > 1400 // VS 2010+ + size_t * idx = 0; + value = std::stod (s, idx); + if (idx) throw std::runtime_error ("todouble: invalid input string"); +#else + char *ep = 0; // will be updated by strtod to point to first character that failed parsing + value = strtod (s.c_str(), &ep); + + // strtod documentation says ep points to first unconverted character OR + // return value will be +/- HUGE_VAL for overflow/underflow + if (ep != s.c_str() + s.length() || value == HUGE_VAL || value == -HUGE_VAL) + throw std::runtime_error ("todouble: invalid input string"); +#endif + + return value; +} + +static inline double todouble (const std::wstring & s) +{ + wchar_t * endptr; + double value = wcstod (s.c_str(), &endptr); + if (*endptr) throw std::runtime_error ("todouble: invalid input string"); + return value; +} + +// ---------------------------------------------------------------------------- +// tokenizer -- utility for white-space tokenizing strings in a character buffer +// This simple class just breaks a string, but does not own the string buffer. +// ---------------------------------------------------------------------------- + +class tokenizer : public std::vector +{ + const char * delim; +public: + tokenizer (const char * delim, size_t cap) : delim (delim) { reserve (cap); } + // Usage: tokenizer tokens (delim, capacity); tokens = buf; tokens.size(), tokens[i] + void operator= (char * buf) + { + resize (0); + + // strtok_s not available on all platforms - so backoff to strtok on those +#ifdef strtok_s + char * context; // for strtok_s() + for (char * p = strtok_s (buf, delim, &context); p; p = strtok_s (NULL, delim, &context)) + push_back (p); +#else + for (char * p = strtok (buf, delim); p; p = strtok (NULL, delim)) + push_back (p); +#endif + } +}; + +};}; // namespace + +// ---------------------------------------------------------------------------- +// wrappers for some basic types (files, handles, timer) +// ---------------------------------------------------------------------------- + +#ifndef _MSC_VER // add some functions that are VS-only +static inline FILE* _wfopen(const wchar_t * path, const wchar_t * mode) { return fopen(msra::strfun::wcstombs(path).c_str(), msra::strfun::utf8(mode).c_str()); } +#endif + +namespace msra { namespace basetypes { + +// FILE* with auto-close; use auto_file_ptr instead of FILE*. +// Warning: do not pass an auto_file_ptr to a function that calls fclose(), +// except for fclose() itself. +class auto_file_ptr +{ + FILE * f; + FILE * operator= (auto_file_ptr &); // can't ref-count: no assignment + auto_file_ptr (auto_file_ptr &); + // implicit close (destructor, assignment): we ignore error + void close() throw() { if (f) try { if (f != stdin && f != stdout && f != stderr) ::fclose (f); } catch (...) { } f = NULL; } + void openfailed (const std::string & path) { throw std::runtime_error ("auto_file_ptr: error opening file '" + path + "': " + strerror (errno)); } +protected: + friend int fclose (auto_file_ptr&); // explicit close (note: may fail) + int fclose() { int rc = ::fclose (f); if (rc == 0) f = NULL; return rc; } +public: + auto_file_ptr() : f (NULL) { } + ~auto_file_ptr() { close(); } + auto_file_ptr (const char * path, const char * mode) { f = fopen (path, mode); if (f == NULL) openfailed (path); } + auto_file_ptr (const wchar_t * wpath, const char * mode) { f = _wfopen (wpath, msra::strfun::utf16 (mode).c_str()); if (f == NULL) openfailed (msra::strfun::utf8 (wpath)); } + FILE * operator= (FILE * other) { close(); f = other; return f; } + auto_file_ptr (FILE * other) : f (other) { } + operator FILE * () const { return f; } + FILE * operator->() const { return f; } + void swap (auto_file_ptr & other) throw() { std::swap (f, other.f); } +}; +inline int fclose (auto_file_ptr & af) { return af.fclose(); } + +#ifdef _MSC_VER +// auto-closing container for Win32 handles. +// Pass close function if not CloseHandle(), e.g. +// auto_handle h (FindFirstFile(...), FindClose); +// ... TODO: the close function should really be a template parameter +template class auto_handle_t +{ + _H h; + BOOL (WINAPI_CC * close) (HANDLE); // close function + auto_handle_t operator= (const auto_handle_t &); + auto_handle_t (const auto_handle_t &); +public: + auto_handle_t (_H p_h, BOOL (WINAPI_CC * p_close) (HANDLE) = ::CloseHandle) : h (p_h), close (p_close) {} + ~auto_handle_t() { if (h != INVALID_HANDLE_VALUE) close (h); } + operator _H () const { return h; } +}; +typedef auto_handle_t auto_handle; +#endif + +// like auto_ptr but calls freeFunc_p (type free_func_t) instead of delete to clean up +// minor difference - wrapped object is T, not T *, so to wrap a +// T *, use auto_clean +// TODO: can this be used for simplifying those other classes? +template class auto_clean +{ + T it; + typedef FR (*free_func_t)(T); + free_func_t freeFunc; // the function used to free the pointer + void free() { if (it) freeFunc(it); it = 0; } + auto_clean operator= (const auto_clean &); // hide to prevent copy + auto_clean (const auto_clean &); // hide to prevent copy +public: + auto_clean (T it_p, free_func_t freeFunc_p) : it (it_p), freeFunc (freeFunc_p) {} + ~auto_clean() { free(); } + operator T () { return it; } + operator const T () const { return it; } + T detach () { T tmp = it; it = 0; return tmp; } // release ownership of object +}; + +#if 0 +// simple timer +// auto_timer timer; run(); double seconds = timer; // now can abandon the object +class auto_timer +{ + LARGE_INTEGER freq, start; + auto_timer (const auto_timer &); void operator= (const auto_timer &); +public: + auto_timer() + { + if (!QueryPerformanceFrequency (&freq)) // count ticks per second + throw std::runtime_error ("auto_timer: QueryPerformanceFrequency failure"); + QueryPerformanceCounter (&start); + } + operator double() const // each read gives time elapsed since start, in seconds + { + LARGE_INTEGER end; + QueryPerformanceCounter (&end); + return (end.QuadPart - start.QuadPart) / (double) freq.QuadPart; + } + void show (const std::string & msg) const + { + double elapsed = *this; + fprintf (stderr, "%s: %.6f ms\n", msg.c_str(), elapsed * 1000.0/*to ms*/); + } +}; +#endif + +};}; + +namespace msra { namespace files { + +// ---------------------------------------------------------------------------- +// textreader -- simple reader for text files --we need this all the time! +// Currently reads 8-bit files, but can return as wstring, in which case +// they are interpreted as UTF-8 (without BOM). +// Note: Not suitable for pipes or typed input due to readahead (fixable if needed). +// ---------------------------------------------------------------------------- + +class textreader +{ + msra::basetypes::auto_file_ptr f; + std::vector buf; // read buffer (will only grow, never shrink) + int ch; // next character (we need to read ahead by one...) + char getch() { char prevch = (char) ch; ch = fgetc (f); return prevch; } +public: + textreader (const std::wstring & path) : f (path.c_str(), "rb") { buf.reserve (10000); ch = fgetc (f); } + operator bool() const { return ch != EOF; } // true if still a line to read + std::string getline() // get and consume the next line + { + if (ch == EOF) throw std::logic_error ("textreader: attempted to read beyond EOF"); + assert (buf.empty()); + // get all line's characters --we recognize UNIX (LF), DOS (CRLF), and Mac (CR) convention + while (ch != EOF && ch != '\n' && ch != '\r') buf.push_back (getch()); + if (ch != EOF && getch() == '\r' && ch == '\n') getch(); // consume EOLN char + std::string line (buf.begin(), buf.end()); + buf.clear(); + return line; + } + std::wstring wgetline() { return msra::strfun::utf16 (getline()); } +}; + +};}; + +// ---------------------------------------------------------------------------- +// functional-programming style helper macros (...do this with templates?) +// ---------------------------------------------------------------------------- + +#define foreach_index(_i,_dat) for (int _i = 0; _i < (int) (_dat).size(); _i++) +#define map_array(_x,_expr,_y) { _y.resize (_x.size()); foreach_index(_i,_x) _y[_i]=_expr(_x[_i]); } +#define reduce_array(_x,_expr,_y) { foreach_index(_i,_x) _y = (_i==0) ? _x[_i] : _expr(_y,_x[_i]); } +//template +//static void fill_array(_A & a, _F v) { ::fill (a.begin(), a.end(), v); } + +// ---------------------------------------------------------------------------- +// frequently missing utility functions +// ---------------------------------------------------------------------------- + +namespace msra { namespace util { + +// to (slightly) simplify processing of command-line arguments. +// command_line args (argc, argv); +// while (args.has (1) && args[0][0] == '-') { option = args.shift(); process (option); } +// for (const wchar_t * arg = args.shift(); arg; arg = args.shift()) { process (arg); } +class command_line +{ + int num; + const wchar_t ** args; +public: + command_line (int argc, wchar_t * argv[]) : num (argc), args ((const wchar_t **) argv) { shift(); } + inline int size() const { return num; } + inline bool has (int left) { return size() >= left; } + const wchar_t * shift() { if (size() == 0) return NULL; num--; return *args++; } + const wchar_t * operator[] (int i) const { return (i < 0 || i >= size()) ? NULL : args[i]; } +}; + +// byte-reverse a variable --reverse all bytes (intended for integral types and float) +//template static inline void bytereverse (T & v) throw() +//{ // note: this is more efficient than it looks because sizeof (v[0]) is a constant +// char * p = (char *) &v; +// const size_t elemsize = sizeof (v); +// for (int k = 0; k < elemsize / 2; k++) // swap individual bytes +// swap (p[k], p[elemsize-1 - k]); +//} + +// byte-swap an entire array +template static inline void byteswap (V & v) throw() +{ + foreach_index (i, v) + bytereverse (v[i]); +} + +#if 0 +// execute a block with retry +// Block must be restartable. +// Use this when writing small files to those unreliable Windows servers. +// TODO: This will fail to compile under VS 2008--we need an #ifdef around this +template static void attempt (int retries, const FUNCTION & body) +{ + for (int attempt = 1; ; attempt++) + { + try + { + body(); + if (attempt > 1) fprintf (stderr, "attempt: success after %d retries\n", attempt); + break; + } + catch (const std::exception & e) + { + if (attempt >= retries) + throw; // failed N times --give up and rethrow the error + fprintf (stderr, "attempt: %s, retrying %d-th time out of %d...\n", e.what(), attempt+1, retries); +#ifndef LINUX + ::Sleep (1000); // wait a little, then try again +#else + std::chrono::milliseconds dura(1000); + std::this_thread::sleep_for(dura); +#endif /* LINUX */ + } + } +} +#endif + +};}; // namespace + +template static inline void ZeroStruct (S & s) { memset (&s, 0, sizeof (s)); } + +// ---------------------------------------------------------------------------- +// machine dependent +// ---------------------------------------------------------------------------- + +#define MACHINE_IS_BIG_ENDIAN (false) + +using namespace msra::basetypes; // for compatibility + +#pragma warning (pop) + +// RuntimeError - throw a std::runtime_error with a formatted error string +static inline bool RuntimeError (const char * format, ...) +{ + va_list args; + char buffer[1024]; + + va_start (args, format); + vsprintf (buffer, format, args); + throw std::runtime_error(buffer); +}; + +// LogicError - throw a std::logic_error with a formatted error string +static inline bool LogicError(const char * format, ...) +{ + va_list args; + char buffer[1024]; + + va_start(args, format); + vsprintf(buffer, format, args); + throw std::logic_error(buffer); +}; + +#endif // _BASETYPES_ From 23fc4305a636d5fee39ca595b454982671a0234c Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Thu, 30 Oct 2014 20:49:52 -0700 Subject: [PATCH 18/31] resurrected dummy implementation of utf{8,16}() since the portable standard wstring_convert class does not exist under GCC (does it under Linux proper?). We emulate this by using MBS conversion instead, which of course only works for 7-bit ASCII; removed f{seek,tell}OrDie() since they are not used and don't compile under GCC; GCC implementations of filesize() and getfiletime(); GCC version of renameOrDie() implemented with rename(); auto_file_ptr f = ... not working for GCC, need to use auto_file_ptr f (...); grouped sources in Math project by CPU vs. GPU --- Common/Include/basetypes.h | 50 ++++++++------ Common/Include/fileutil.h | 4 +- Common/fileutil.cpp | 116 +++++++++++++++++---------------- Math/Math/Math.vcxproj.filters | 54 +++++++++++---- 4 files changed, 133 insertions(+), 91 deletions(-) diff --git a/Common/Include/basetypes.h b/Common/Include/basetypes.h index f26434c1c..9a033f97a 100644 --- a/Common/Include/basetypes.h +++ b/Common/Include/basetypes.h @@ -559,6 +559,31 @@ typedef strfun::_strprintf wstrprintf; // wchar_t version #endif // string-encoding conversion functions +// Note: generally, 8-bit strings in this codebase are UTF-8. +// One exception are functions that take 8-bit pathnames. Those will be interpreted by the OS as MBS. Best use wstring pathnames for all file accesses. + +#pragma warning(push) +#pragma warning(disable : 4996) // Reviewed by Yusheng Li, March 14, 2006. depr. fn (wcstombs, mbstowcs) +static inline std::string wcstombs(const std::wstring & p) // output: MBCS +{ + size_t len = p.length(); + msra::basetypes::fixed_vector buf(2 * len + 1); // max: 1 wchar => 2 mb chars + std::fill(buf.begin(), buf.end(), 0); + ::wcstombs(&buf[0], p.c_str(), 2 * len + 1); + return std::string(&buf[0]); +} +static inline std::wstring mbstowcs(const std::string & p) // input: MBCS +{ + size_t len = p.length(); + msra::basetypes::fixed_vector buf(len + 1); // max: >1 mb chars => 1 wchar + std::fill(buf.begin(), buf.end(), (wchar_t)0); + OACR_WARNING_SUPPRESS(UNSAFE_STRING_FUNCTION, "Reviewed OK. size checked. [rogeryu 2006/03/21]"); + ::mbstowcs(&buf[0], p.c_str(), len + 1); + return std::wstring(&buf[0]); +} +#pragma warning(pop) + +#ifdef _WIN32 struct utf8 : std::string { utf8 (const std::wstring & p) // utf-16 to -8 { #if 1 @@ -594,27 +619,10 @@ struct utf16 : std::wstring { utf16 (const std::string & p) // utf-8 to -16 (*(std::wstring*)this) = &buf[0]; #endif }}; - -#pragma warning(push) -#pragma warning(disable : 4996) // Reviewed by Yusheng Li, March 14, 2006. depr. fn (wcstombs, mbstowcs) -static inline std::string wcstombs (const std::wstring & p) // output: MBCS -{ - size_t len = p.length(); - msra::basetypes::fixed_vector buf (2 * len + 1); // max: 1 wchar => 2 mb chars - std::fill (buf.begin (), buf.end (), 0); - ::wcstombs (&buf[0], p.c_str(), 2 * len + 1); - return std::string (&buf[0]); -} -static inline std::wstring mbstowcs (const std::string & p) // input: MBCS -{ - size_t len = p.length(); - msra::basetypes::fixed_vector buf(len + 1); // max: >1 mb chars => 1 wchar - std::fill(buf.begin(), buf.end(), (wchar_t)0); - OACR_WARNING_SUPPRESS(UNSAFE_STRING_FUNCTION, "Reviewed OK. size checked. [rogeryu 2006/03/21]"); - ::mbstowcs(&buf[0], p.c_str(), len + 1); - return std::wstring(&buf[0]); -} -#pragma warning(pop) +#else // BUGBUG: we cannot compile the above on Cygwin GCC, so for now fake it using the mbs functions, which will only work for 7-bit ASCII strings +static inline std::string utf8(const std::wstring & p) { return msra::strfun::wcstombs (p); } // output: UTF-8... not really +static inline std::wstring utf16(const std::string & p) { return msra::strfun::mbstowcs (p); } // input: UTF-8... not really +#endif // split and join -- tokenize a string like strtok() would, join() strings together template static inline std::vector> split (const std::basic_string<_T> & s, const _T * delim) diff --git a/Common/Include/fileutil.h b/Common/Include/fileutil.h index 8371de1b9..a51fce9a4 100644 --- a/Common/Include/fileutil.h +++ b/Common/Include/fileutil.h @@ -98,11 +98,9 @@ size_t filesize (FILE * f); int64_t filesize64 (const wchar_t * pathname); // ---------------------------------------------------------------------------- -// fseekOrDie(),ftellOrDie(), fget/setpos(): seek functions with error handling +// fget/setpos(): seek functions with error handling // ---------------------------------------------------------------------------- -size_t fseekOrDie (FILE * f, size_t offset, int mode = SEEK_SET); -#define ftellOrDie _ftelli64 uint64_t fgetpos (FILE * f); void fsetpos (FILE * f, uint64_t pos); diff --git a/Common/fileutil.cpp b/Common/fileutil.cpp index c07eac1c5..6f51f8d84 100644 --- a/Common/fileutil.cpp +++ b/Common/fileutil.cpp @@ -7,6 +7,7 @@ #define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms --add this at the top of all CPP files that give "function or variable may be unsafe" warnings #define _CRT_NONSTDC_NO_DEPRECATE // make VS accept POSIX functions without _ #pragma warning (disable: 4996) // ^^ this does not seem to work--TODO: make it work +#define _FILE_OFFSET_BITS = 64 // for ftell64() in Linux #ifndef UNDER_CE // fixed-buffer overloads not available for wince #ifdef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES // fixed-buffer overloads for strcpy() etc. @@ -17,6 +18,8 @@ #include "basetypes.h" #include "fileutil.h" #ifdef __unix__ +#include +#include #include #endif #include @@ -48,7 +51,7 @@ template <> const wchar_t* GetScanFormatString(int) {return L" %i"; template <> const wchar_t* GetScanFormatString(long) {return L" %li";} template <> const wchar_t* GetScanFormatString(unsigned short) {return L" %hu";} template <> const wchar_t* GetScanFormatString(unsigned int) {return L" %u";} -template <> const wchar_t* GetScanFormatString(unsigned long) {return L" %lu";} +//template <> const wchar_t* GetScanFormatString(unsigned long) {return L" %lu";} template <> const wchar_t* GetScanFormatString(float) {return L" %g";} template <> const wchar_t* GetScanFormatString(double) {return L" %lg";} template <> const wchar_t* GetScanFormatString(size_t) {return L" %llu";} @@ -61,7 +64,7 @@ template <> const wchar_t* GetFormatString(int) {return L" %i";} template <> const wchar_t* GetFormatString(long) {return L" %li";} template <> const wchar_t* GetFormatString(unsigned short) {return L" %hu";} template <> const wchar_t* GetFormatString(unsigned int) {return L" %u";} -template <> const wchar_t* GetFormatString(unsigned long) {return L" %lu";} +//template <> const wchar_t* GetFormatString(unsigned long) {return L" %lu";} template <> const wchar_t* GetFormatString(float) {return L" %.9g";} template <> const wchar_t* GetFormatString(double) {return L" %.17g";} template <> const wchar_t* GetFormatString(size_t) { return L" %llu"; } @@ -253,6 +256,7 @@ void fflushOrDie (FILE * f) // ---------------------------------------------------------------------------- size_t filesize (FILE * f) { +#ifdef _WIN32 size_t curPos = _ftelli64(f); if (curPos == -1L) { @@ -260,20 +264,22 @@ size_t filesize (FILE * f) } int rc = _fseeki64 (f, 0, SEEK_END); if (rc != 0) - { RuntimeError ("error seeking to end of file: %s", strerror (errno)); - } size_t len = _ftelli64 (f); if (len == -1L) - { RuntimeError ("error determining file position: %s", strerror (errno)); - } rc = _fseeki64 (f, curPos, SEEK_SET); if (rc != 0) - { RuntimeError ("error resetting file position: %s", strerror (errno)); - } return len; +#else // TODO: test this + struct stat stat_buf; + int rc = fstat(fileno(f), &stat_buf); + if (rc != 0) + RuntimeError("error determining length of file: %s", strerror(errno)); + static_assert (sizeof(stat_buf.st_size)>=sizeof(uint64_t), "struct stat not compiled for 64-bit mode"); + return stat_buf.st_size; +#endif } // filesize(): determine size of the file in bytes (with pathname) @@ -298,33 +304,22 @@ size_t filesize (const wchar_t * pathname) // filesize64(): determine size of the file in bytes (with pathname) int64_t filesize64 (const wchar_t * pathname) { +#ifdef _WIN32 struct _stat64 fileinfo; if (_wstat64 (pathname,&fileinfo) == -1) return 0; else return fileinfo.st_size; +#else + return filesize (pathname); +#endif } #endif // ---------------------------------------------------------------------------- -// fseekOrDie(),ftellOrDie(), fget/setpos(): seek functions with error handling +// fget/setpos(): seek functions with error handling // ---------------------------------------------------------------------------- -size_t fseekOrDie (FILE * f, size_t offset, int mode) -{ - size_t curPos = _ftelli64 (f); - if (curPos == -1L) - { - RuntimeError ("error seeking: %s", strerror (errno)); - } - int rc = _fseeki64 (f, offset, mode); - if (rc != 0) - { - RuntimeError ("error seeking: %s", strerror (errno)); - } - return curPos; -} - uint64_t fgetpos (FILE * f) { fpos_t post; @@ -392,14 +387,23 @@ void unlinkOrDie (const std::wstring & pathname) void renameOrDie (const std::string & from, const std::string & to) { +#ifdef _WIN32 if (!MoveFileA (from.c_str(),to.c_str())) - RuntimeError ("error renaming: %s", GetLastError()); + RuntimeError("error renaming: %s", GetLastError()); +#else // TODO: test this + if (!rename (from.c_str(), to.c_str())) + RuntimeError("error renaming file '%s': %s", from.c_str(), strerror(errno)); +#endif } void renameOrDie (const std::wstring & from, const std::wstring & to) { - if (!MoveFileW (from.c_str(),to.c_str())) +#ifdef _WIN32 + if (!MoveFileW(from.c_str(), to.c_str())) RuntimeError ("error renaming: %s", GetLastError()); +#else + renameOrDie (msra::strfun::utf8(from), msra::strfun::utf8(to)); +#endif } // ---------------------------------------------------------------------------- @@ -421,7 +425,7 @@ bool fexists (const wchar_t * pathname) return false; } #else - auto_file_ptr f = _wfopen (pathname, L"r"); + auto_file_ptr f (_wfopen (pathname, L"r")); return f != nullptr; #endif } @@ -441,7 +445,7 @@ bool fexists (const char * pathname) return false; } #else - auto_file_ptr f = fopen (pathname, "r"); + auto_file_ptr f (fopen (pathname, "r")); return f != nullptr; #endif } @@ -1331,7 +1335,7 @@ void fgetfile (FILE * f, std::vector & buffer) // load it into RAM in one huge chunk static size_t fgetfilechars (const std::wstring & path, vector & buffer) { - auto_file_ptr f = fopenOrDie (path, L"rb"); + auto_file_ptr f (fopenOrDie (path, L"rb")); size_t len = filesize (f); buffer.reserve (len +1); freadOrDie (buffer, len, f); @@ -1373,21 +1377,20 @@ vector msra::files::fgetfilelines (const wstring & path, vector & b // getfiletime(): access modification time // ---------------------------------------------------------------------------- +#ifndef _FILETIME_ +//typedef struct _FILETIME { DWORD dwLowDateTime; DWORD dwHighDateTime; }; // from minwindef.h +typedef time_t FILETIME; +#else +bool operator>= (const FILETIME & targettime, const FILETIME & inputtime) // for use in fuptodate() +{ + return (targettime.dwHighDateTime > inputtime.dwHighDateTime) || + (targettime.dwHighDateTime == inputtime.dwHighDateTime && targettime.dwLowDateTime >= inputtime.dwLowDateTime); +} +#endif + bool getfiletime (const wstring & path, FILETIME & time) { // return file modification time, false if cannot be determined -#if 1 - struct _stat buf; - int result; - - // Get data associated with "crt_stat.c": - result = _wstat(path.c_str(), &buf); - // Check if statistics are valid: - if (result != 0) - return false; - - (*(time_t*)(&time)) = buf.st_mtime; - return true; -#else // old version, delete once above is tested +#ifdef _WIN32 WIN32_FIND_DATAW findFileData; auto_handle hFind (FindFirstFileW (path.c_str(), &findFileData), ::FindClose); if (hFind != INVALID_HANDLE_VALUE) @@ -1396,9 +1399,19 @@ bool getfiletime (const wstring & path, FILETIME & time) return true; } else - { return false; - } +#else // TODO: test this; e.g. does st_mtime have the desired resolution? + struct stat buf; + int result; + + // Get data associated with "crt_stat.c": + result = stat(msra::strfun::wcstombs(path).c_str(), &buf); + // Check if statistics are valid: + if (result != 0) + return false; + + time = buf.st_mtime; + return true; #endif } @@ -1515,14 +1528,15 @@ static void mkdir (const wstring & path) int rc = _wmkdir (path.c_str()); if (rc >= 0 || errno == EEXIST) return; // no error or already existing --ok +#ifdef _WIN32 // bug in _wmkdir(): returns access_denied if folder exists but read-only --check existence if (errno == EACCES) { - // bug in _wmkdir(): returns access_denied if folder exists but read-only --check existence DWORD att = ::GetFileAttributesW (path.c_str()); if (att != INVALID_FILE_ATTRIBUTES || (att & FILE_ATTRIBUTE_DIRECTORY) != 0) return; // ok } - RuntimeError ("make_intermediate_dirs: error creating intermediate directory %S", path.c_str()); +#endif + RuntimeError ("mkdir: error creating intermediate directory %S", path.c_str()); } #ifndef _MSC_VER @@ -1571,16 +1585,8 @@ bool msra::files::fuptodate (const wstring & target, const wstring & input, bool if (!getfiletime (target, targettime)) return false; // target missing: need to update FILETIME inputtime; if (!getfiletime (input, inputtime)) return !inputrequired; // input missing: if required, pretend to be out of date as to force caller to fail -#if 1 // formerly called IsResultFileUpdateToDate() // up to date if target has higher time stamp - return (targettime.dwHighDateTime > inputtime.dwHighDateTime) || - (targettime.dwHighDateTime == inputtime.dwHighDateTime && targettime.dwLowDateTime >= inputtime.dwLowDateTime); -#else - ULARGE_INTEGER targett, inputt; - memcpy (&targett, &targettime, sizeof (targett)); - memcpy (&inputt, &inputtime, sizeof (inputt)); - return !(targett.QuadPart < inputt.QuadPart); // up to date if target not older than input -#endif + return targettime >= inputtime; // note: uses an overload for WIN32 FILETIME (in Linux, FILETIME=time_t=size_t) } /// separate string by separator diff --git a/Math/Math/Math.vcxproj.filters b/Math/Math/Math.vcxproj.filters index 1069be78d..55dbd567c 100644 --- a/Math/Math/Math.vcxproj.filters +++ b/Math/Math/Math.vcxproj.filters @@ -1,15 +1,21 @@  - - - - + + GPU + + + GPU + + + GPU + + + GPU + - - @@ -18,12 +24,18 @@ Common - + + CPU + + + CPU + + + GPU + - - @@ -37,11 +49,23 @@ Common\Include + + CPU + + + CPU + - - - + + GPU + + + GPU + + + GPU + @@ -50,5 +74,11 @@ {51b468dd-7e8a-4be8-ae6f-5e3f3d752b88} + + {94878fe0-ecce-4868-b3be-2199f4ee7507} + + + {cc9a219d-d8ab-484a-b253-fd2a29ad7c7c} + \ No newline at end of file From d72ad05fb4635df934c6f0ae89535773237cb1f3 Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Thu, 30 Oct 2014 21:24:47 -0700 Subject: [PATCH 19/31] GCC fails to lookup members in base classes of class templates (http://stackoverflow.com/questions/11405/gcc-problem-using-a-member-of-a-base-class-that-depends-on-a-template-argument), solved by explicit 'using' statements (this has the same effect as the earlier addition of this-> to every member access, but is simpler) --- Math/Math/CPUMatrix.h | 1 + Math/Math/CPUSparseMatrix.h | 1 + Math/Math/GPUMatrix.cuh | 1 + Math/Math/GPUSparseMatrix.cuh | 1 + 4 files changed, 4 insertions(+) diff --git a/Math/Math/CPUMatrix.h b/Math/Math/CPUMatrix.h index 480d0648a..e3861d5be 100644 --- a/Math/Math/CPUMatrix.h +++ b/Math/Math/CPUMatrix.h @@ -43,6 +43,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template class MATH_API CPUMatrix : public BaseMatrix { + typedef BaseMatrix B; using B::m_numRows; using B::m_numCols; using B::m_pArray; // easier access to base members public: CPUMatrix(); CPUMatrix(FILE* f, const char * matrixName); //matrixName is used to verify that correct matrix is read. diff --git a/Math/Math/CPUSparseMatrix.h b/Math/Math/CPUSparseMatrix.h index b9f31eb68..522d24fa8 100644 --- a/Math/Math/CPUSparseMatrix.h +++ b/Math/Math/CPUSparseMatrix.h @@ -23,6 +23,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template class MATH_API CPUSparseMatrix : public BaseMatrix { + typedef BaseMatrix B; using B::m_elemSizeAllocated; // easier access to base members private: void ZeroInit(); diff --git a/Math/Math/GPUMatrix.cuh b/Math/Math/GPUMatrix.cuh index c323b14fd..f4c2d6505 100644 --- a/Math/Math/GPUMatrix.cuh +++ b/Math/Math/GPUMatrix.cuh @@ -62,6 +62,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template class MATH_API GPUMatrix : public BaseMatrix { + typedef BaseMatrix B; using B::m_numRows; using B::m_numCols; using B::m_pArray; // easier access to base members public: static const int MaxGpus = 8; // support up to 8 GPUs private: diff --git a/Math/Math/GPUSparseMatrix.cuh b/Math/Math/GPUSparseMatrix.cuh index 30a53f05d..92232cc3f 100644 --- a/Math/Math/GPUSparseMatrix.cuh +++ b/Math/Math/GPUSparseMatrix.cuh @@ -19,6 +19,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template class MATH_API GPUSparseMatrix : public BaseMatrix { + typedef BaseMatrix B; using B::m_numRows; using B::m_numCols; using B::m_pArray; using B::m_elemSizeAllocated; using B::m_nz; using B::m_format; // easier access to base members private: void ZeroInit(); void Init(); From 616f812d44cd37c66a3181ef686e6327dee661ea Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Thu, 30 Oct 2014 21:53:21 -0700 Subject: [PATCH 20/31] basetypes.h was missing the #include for stderror(); in GCC, members of base classes that have a template parameter require this-> unless one declares each with a 'using' directive, which was done here. --- Common/Include/fileutil.h | 1 + Math/Math/CPUMatrix.h | 7 +++++-- Math/Math/CPUSparseMatrix.h | 5 ++++- Math/Math/GPUMatrix.cuh | 2 +- Math/Math/GPUSparseMatrix.cuh | 2 +- 5 files changed, 12 insertions(+), 5 deletions(-) diff --git a/Common/Include/fileutil.h b/Common/Include/fileutil.h index a51fce9a4..205b97fdd 100644 --- a/Common/Include/fileutil.h +++ b/Common/Include/fileutil.h @@ -24,6 +24,7 @@ #include #include #include +#include // for strerror() using namespace std; diff --git a/Math/Math/CPUMatrix.h b/Math/Math/CPUMatrix.h index e3861d5be..5ec956a4c 100644 --- a/Math/Math/CPUMatrix.h +++ b/Math/Math/CPUMatrix.h @@ -43,7 +43,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { template class MATH_API CPUMatrix : public BaseMatrix { - typedef BaseMatrix B; using B::m_numRows; using B::m_numCols; using B::m_pArray; // easier access to base members + typedef BaseMatrix B; using B::m_numRows; using B::m_numCols; using B::m_pArray; using B::m_computeDevice; using B::m_elemSizeAllocated; + using B::m_externalBuffer; using B::m_format; using B::m_matrixName; // without this, base members would require to use thi-> in GCC public: CPUMatrix(); CPUMatrix(FILE* f, const char * matrixName); //matrixName is used to verify that correct matrix is read. @@ -57,7 +58,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { ~CPUMatrix(); public: - size_t BufferSize() const {return m_numRows*m_numCols*sizeof(ElemType);} + using B::OwnBuffer; using B::GetNumElements; using B::IsEmpty; using B::GetNumRows; using B::GetNumCols; using B::SetOwnBuffer; using B::SetMatrixName; + + size_t BufferSize() const { return m_numRows*m_numCols*sizeof(ElemType); } ElemType* BufferPointer() const {return m_pArray;} CPUMatrix ColumnSlice(size_t startColumn, size_t numCols) const; diff --git a/Math/Math/CPUSparseMatrix.h b/Math/Math/CPUSparseMatrix.h index 522d24fa8..0499781a9 100644 --- a/Math/Math/CPUSparseMatrix.h +++ b/Math/Math/CPUSparseMatrix.h @@ -23,7 +23,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { template class MATH_API CPUSparseMatrix : public BaseMatrix { - typedef BaseMatrix B; using B::m_elemSizeAllocated; // easier access to base members + typedef BaseMatrix B; using B::m_elemSizeAllocated; using B::m_computeDevice; using B::m_externalBuffer; using B::m_format; using B::m_matrixName; + using B::m_numCols; using B::m_numRows; using B::m_nz; using B::m_pArray; // without this, base members would require to use thi-> in GCC private: void ZeroInit(); @@ -36,6 +37,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { ~CPUSparseMatrix(); public: + using B::GetNumCols; using B::GetNumRows; + void SetValue(const size_t rIdx, const size_t cIdx, ElemType val); void SetValue(const CPUSparseMatrix& /*val*/) { NOT_IMPLEMENTED; } diff --git a/Math/Math/GPUMatrix.cuh b/Math/Math/GPUMatrix.cuh index f4c2d6505..91394d5a1 100644 --- a/Math/Math/GPUMatrix.cuh +++ b/Math/Math/GPUMatrix.cuh @@ -62,7 +62,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template class MATH_API GPUMatrix : public BaseMatrix { - typedef BaseMatrix B; using B::m_numRows; using B::m_numCols; using B::m_pArray; // easier access to base members + typedef BaseMatrix B; using B::m_numRows; using B::m_numCols; using B::m_pArray; // without this, base members would require to use thi-> in GCC public: static const int MaxGpus = 8; // support up to 8 GPUs private: diff --git a/Math/Math/GPUSparseMatrix.cuh b/Math/Math/GPUSparseMatrix.cuh index 92232cc3f..aabfb9be4 100644 --- a/Math/Math/GPUSparseMatrix.cuh +++ b/Math/Math/GPUSparseMatrix.cuh @@ -19,7 +19,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template class MATH_API GPUSparseMatrix : public BaseMatrix { - typedef BaseMatrix B; using B::m_numRows; using B::m_numCols; using B::m_pArray; using B::m_elemSizeAllocated; using B::m_nz; using B::m_format; // easier access to base members + typedef BaseMatrix B; using B::m_numRows; using B::m_numCols; using B::m_pArray; using B::m_elemSizeAllocated; using B::m_nz; using B::m_format; // without this, base members would require to use thi-> in GCC private: void ZeroInit(); void Init(); From 9da359de1a79c83aa04db5c32369d358b11ab3bc Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Thu, 30 Oct 2014 21:55:49 -0700 Subject: [PATCH 21/31] GPUDummy.cpp should not compile if CPUONLY --- Math/Math/GPUDummy.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Math/Math/GPUDummy.cpp b/Math/Math/GPUDummy.cpp index f0648237b..42662c69b 100644 --- a/Math/Math/GPUDummy.cpp +++ b/Math/Math/GPUDummy.cpp @@ -4,7 +4,7 @@ // // -#ifdef CPUONLY +#ifndef CPUONLY #include "GPUMatrix.cuh" #include "GPUSparseMatrix.cuh" From 2738da7496d3a06ca901f1e314aa39e0e243f6f0 Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Thu, 30 Oct 2014 22:03:09 -0700 Subject: [PATCH 22/31] corrected an inconsistent include-path setting; undid previous commit which was wrong --- Math/Math/GPUDummy.cpp | 2 +- Math/Math/Math.vcxproj | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Math/Math/GPUDummy.cpp b/Math/Math/GPUDummy.cpp index 42662c69b..f0648237b 100644 --- a/Math/Math/GPUDummy.cpp +++ b/Math/Math/GPUDummy.cpp @@ -4,7 +4,7 @@ // // -#ifndef CPUONLY +#ifdef CPUONLY #include "GPUMatrix.cuh" #include "GPUSparseMatrix.cuh" diff --git a/Math/Math/Math.vcxproj b/Math/Math/Math.vcxproj index 6089ef5d0..73632c370 100644 --- a/Math/Math/Math.vcxproj +++ b/Math/Math/Math.vcxproj @@ -109,7 +109,7 @@ true NO_SYNC;WIN32;NDEBUG;_WINDOWS;_USRDLL;MATH_EXPORTS;%(PreprocessorDefinitions) true - %(AdditionalIncludeDirectories) + ..\..\common\include\;%(AdditionalIncludeDirectories) true Fast true From ca0f8d2a407c4cc3e6ec47d8db2e94e6d97507fa Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Thu, 30 Oct 2014 22:13:43 -0700 Subject: [PATCH 23/31] GPUDummy.cpp now compiles with GCC --- Math/Math/GPUDummy.cpp | 9 +++++---- Math/Math/GPUMatrix.cuh | 1 + 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Math/Math/GPUDummy.cpp b/Math/Math/GPUDummy.cpp index f0648237b..6cbcfec0a 100644 --- a/Math/Math/GPUDummy.cpp +++ b/Math/Math/GPUDummy.cpp @@ -119,7 +119,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } template - void GPUSparseMatrix::Resize(const size_t numRows, const size_t numCols, int size) + void GPUSparseMatrix::Resize(const size_t numRows, const size_t numCols, size_t size) {} //Reset matrix so it can be reused @@ -1085,6 +1085,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { return *this; } +#if 0 template GPUMatrix& GPUMatrix::InplaceSoftmax (const bool isColWise) { @@ -1096,6 +1097,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { return *this; } +#endif template GPUMatrix& GPUMatrix::InplaceSqrt() @@ -1426,8 +1428,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { const size_t kernelWidth, const size_t kernelHeight, const size_t horizontalSubsample, const size_t verticalSubsample, const bool zeroPadding) const { - GPUMatrix mat; - return mat; + return inputSubBatch; } template @@ -1651,7 +1652,7 @@ int GPUWatcher::GetGPUIdWithTheMostFreeMemory() } -size_t GPUWatcher::GetFreeMemoryOnCUDADevice(int devId) +size_t GPUWatcher::GetFreeMemoryOnCUDADevice(int /*devId*/) { return 0; } diff --git a/Math/Math/GPUMatrix.cuh b/Math/Math/GPUMatrix.cuh index 91394d5a1..f5f981189 100644 --- a/Math/Math/GPUMatrix.cuh +++ b/Math/Math/GPUMatrix.cuh @@ -7,6 +7,7 @@ #include #include #include +#include // for ULONG_MAX #include "File.h" #include "Helpers.h" #include "CommonMatrix.h" From 4dd6d2f10029fb297a783dac1d143aa09d27332f Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Thu, 30 Oct 2014 22:23:40 -0700 Subject: [PATCH 24/31] missing #include "stdafx.h" caused a problem when fiddling with CPUONLY #define --- Math/Math/GPUDummy.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Math/Math/GPUDummy.cpp b/Math/Math/GPUDummy.cpp index 6cbcfec0a..e6feb69a4 100644 --- a/Math/Math/GPUDummy.cpp +++ b/Math/Math/GPUDummy.cpp @@ -4,6 +4,8 @@ // // +#include "stdafx.h" + #ifdef CPUONLY #include "GPUMatrix.cuh" From ff71e32586099f71ff4952a0a32d7dec50cc65d5 Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Fri, 31 Oct 2014 10:33:00 -0700 Subject: [PATCH 25/31] experimental makefile for eventual Linux port (not currently functional); dummy stdafx.h added to Common/Include for GCC build --- Common/Include/stdafx.h | 5 +++++ makefile | 48 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 Common/Include/stdafx.h create mode 100644 makefile diff --git a/Common/Include/stdafx.h b/Common/Include/stdafx.h new file mode 100644 index 000000000..c32f5c3b1 --- /dev/null +++ b/Common/Include/stdafx.h @@ -0,0 +1,5 @@ +// dummy stdafx.h file for Linux version + +// In the Windows build, there are several stdafx.h files which are used for project-specific precompilation of headers. +// In Linux, this is not used. By placing this dummy file ahead of all other stdafx.h in the include search path, +// we make sure it compiles while not getting confused by Windows-specific content of those files. diff --git a/makefile b/makefile new file mode 100644 index 000000000..dc12f6b71 --- /dev/null +++ b/makefile @@ -0,0 +1,48 @@ +# WORK IN PROGRESS, not currently complete nor usable + +# makefile for a Linux/GCC build of CNTK +# This needs ACML_PATH. E.g. in tcsh, say: setenv ACML_PATH C:/AMD/acml5.3.1/ifort64_mp + +# This is work in progress and not at all complete or usable. +# +# The Linux and Windows versions are not different branches, but rather build off the same +# source files, using different makefiles. This current makefile has the purpose of enabling +# work to make all sources compile with GCC, and also to check for GCC-compat regressions due to +# modifications which are currently done under Windows. +# +# The planned steps are: +# - runnable non-GPU GCC-built version under Cygwin +# - get all CPU-only sources to compile with GCC/x64 under Cygwin --currently ongoing work +# - port the dynamic-loading mechanism +# - runnable non-GPU version on actual Linux +# - enable CUDA on Linux (=makefile code and figuring out the right compiler options) +# +# Any help is welcome, of course! +# +# This makefile will be extended/completed as we go. + +.SUFFIXES: + +#SRC := Common/File.cpp Math/Math/Matrix.cpp +#OBJ := $(SRC:%.cpp=%.obj) +#DEP := $(OBJ:%.obj=%.dep) + +INCFLAGS = -I Common/Include -I Math/Math -I $(ACML_PATH)/include + +COMMON_SRC = Common/fileutil.cpp Common/DataWriter.cpp Common/ConfigFile.cpp Common/DataReader.cpp \ + Common/Eval.cpp Common/File.cpp Common/NetworkDescriptionLanguage.cpp Common/BestGpu.cpp + +MATH_SRC = Math/Math/Matrix.obj Math/Math/CPUMatrix.obj Math/Math/CPUSparseMatrix.obj Math/Math/GPUDummy.obj + +SRC = $(MATH_SRC) $(COMMON_SRC) + +all: ${SRC:.cpp=.obj} + + +CFLAGS = -std=c++0x -std=c++11 -DCPUONLY -D_POSIX_SOURCE -D_XOPEN_SOURCE=600 -fpermissive + +%.obj: %.cpp + gcc -c -o $@ $(CPPFLAGS) $(CFLAGS) $(INCFLAGS) -MD -MP -MF ${@:.obj=.dep} $< + +# .dep files created by -MD option in the gcc call +-include $(DEP) From b0aef3dd2ffdbb100adc4bc165b249a8f4f44f80 Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Fri, 31 Oct 2014 11:42:30 -0700 Subject: [PATCH 26/31] new class (w)cstring to make passing STL strings to C functions easier; consolidating some mappings for VS-only functions in basetypes.h --- Common/DataReader.cpp | 1 + Common/Include/basetypes.h | 113 +++++++++++-------------- Common/fileutil.cpp | 20 ++--- DataReader/BinaryReader/BinaryFile.cpp | 2 +- 4 files changed, 57 insertions(+), 79 deletions(-) diff --git a/Common/DataReader.cpp b/Common/DataReader.cpp index e067b989a..f3ea027d4 100644 --- a/Common/DataReader.cpp +++ b/Common/DataReader.cpp @@ -8,6 +8,7 @@ #include "stdafx.h" #define DATAREADER_LOCAL +#include "basetypes.h" #include "DataReader.h" namespace Microsoft { namespace MSR { namespace CNTK { diff --git a/Common/Include/basetypes.h b/Common/Include/basetypes.h index 9a033f97a..9506999e1 100644 --- a/Common/Include/basetypes.h +++ b/Common/Include/basetypes.h @@ -102,36 +102,57 @@ using namespace std; #define strerror(x) "strerror error but can't report error number sorry!" #endif -#if 0 -#ifndef __in // dummies for sal annotations if compiler does not support it -#define __in -#define __inout_z -#define __in_count(x) -#define __inout_cap(x) -#define __inout_cap_c(x) -#endif -#ifndef __out_z_cap // non-VS2005 annotations -#define __out_cap(x) -#define __out_z_cap(x) -#define __out_cap_c(x) -#endif - -#ifndef __override // and some more non-std extensions required by Office -#define __override virtual -#endif -#endif - // disable warnings for which fixing would make code less readable #pragma warning(disable : 4290) // throw() declaration ignored #pragma warning(disable : 4244) // conversion from typeA to typeB, possible loss of data - // ---------------------------------------------------------------------------- -// basic macros +// (w)cstring -- helper class like std::string but with auto-cast to char* // ---------------------------------------------------------------------------- -#define SAFE_DELETE(p) { if(p) { delete (p); (p)=NULL; } } -#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } } // nasty! use CComPtr<> +namespace msra { namespace strfun { + // a class that can return a std::string with auto-convert into a const char* + template struct basic_cstring : std::basic_string + { + template basic_cstring (S p) : basic_string (p) { } + operator const char * () const { return c_str(); } + }; + typedef basic_cstring cstring; + typedef basic_cstring wcstring; +}} + + +// ---------------------------------------------------------------------------- +// some mappings for non-Windows builds +// ---------------------------------------------------------------------------- + +#ifndef _MSC_VER // add some functions that are VS-only +// --- basic file functions +// convert a wchar_t path to what gets passed to CRT functions that take narrow characters +// This is needed for the Linux CRT which does not accept wide-char strings for pathnames anywhere. +// Always use this function for mapping the paths. +msra::strfun::cstring charpath (const std::wstring & p) +{ +#ifdef _WIN32 + return std::wstring_convert>().to_bytes(p); +#else // old version, delete once we know it works + size_t len = p.length(); + std::vector buf(2 * len + 1, 0); // max: 1 wchar => 2 mb chars + ::wcstombs(buf.data(), p.c_str(), 2 * len + 1); + return msra::strfun::cstring (&buf[0]); +#endif +} +static inline FILE* _wfopen(const wchar_t * path, const wchar_t * mode) { return fopen(charpath(path), charpath(mode)); } +// --- basic string functions +static inline wchar_t* wcstok_s(wchar_t* s, const wchar_t* delim, wchar_t** ptr) { return ::wcstok(s, delim, ptr); } +#endif + +// ---------------------------------------------------------------------------- +// basic macros --TODO: do we need those? delete what we dont' need +// ---------------------------------------------------------------------------- + +//#define SAFE_DELETE(p) { if(p) { delete (p); (p)=NULL; } } +//#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } } // nasty! use CComPtr<> #ifndef ASSERT #define ASSERT assert #endif @@ -584,45 +605,13 @@ static inline std::wstring mbstowcs(const std::string & p) // input: MBCS #pragma warning(pop) #ifdef _WIN32 -struct utf8 : std::string { utf8 (const std::wstring & p) // utf-16 to -8 -{ -#if 1 - std::wstring_convert> cv; - (*(std::string*)this) = cv.to_bytes(p); -#else // old version, delete once we know it works - size_t len = p.length(); - if (len == 0) { return;} // empty string - msra::basetypes::fixed_vector buf (3 * len + 1); // max: 1 wchar => up to 3 mb chars - // ... TODO: this fill() should be unnecessary (a 0 is appended)--but verify - std::fill (buf.begin (), buf.end (), 0); - int rc = WideCharToMultiByte (CP_UTF8, 0, p.c_str(), (int) len, - &buf[0], (int) buf.size(), NULL, NULL); - if (rc == 0) throw std::runtime_error ("WideCharToMultiByte"); - (*(std::string*)this) = &buf[0]; -#endif -}}; -struct utf16 : std::wstring { utf16 (const std::string & p) // utf-8 to -16 -{ -#if 1 - std::wstring_convert> cv; - (*(std::wstring*)this) = cv.from_bytes(p); -#else // old version, delete once we know it works - size_t len = p.length(); - if (len == 0) { return;} // empty string - msra::basetypes::fixed_vector buf (len + 1); - // ... TODO: this fill() should be unnecessary (a 0 is appended)--but verify - std::fill(buf.begin(), buf.end(), (wchar_t)0); - int rc = MultiByteToWideChar(CP_UTF8, 0, p.c_str(), (int)len, - &buf[0], (int)buf.size()); - if (rc == 0) throw std::runtime_error("MultiByteToWideChar"); - ASSERT(rc < buf.size()); - (*(std::wstring*)this) = &buf[0]; -#endif -}}; +static inline cstring utf8(const std::wstring & p) { return std::wstring_convert>().to_bytes(p); } // utf-16 to -8 +static inline wcstring utf16 (const std::string & p) { return std::wstring_convert>().from_bytes(p); } // utf-8 to -16 #else // BUGBUG: we cannot compile the above on Cygwin GCC, so for now fake it using the mbs functions, which will only work for 7-bit ASCII strings -static inline std::string utf8(const std::wstring & p) { return msra::strfun::wcstombs (p); } // output: UTF-8... not really -static inline std::wstring utf16(const std::string & p) { return msra::strfun::mbstowcs (p); } // input: UTF-8... not really +static inline std::string utf8(const std::wstring & p) { return msra::strfun::wcstombs (p.c_str()); } // output: UTF-8... not really +static inline std::wstring utf16(const std::string & p) { return msra::strfun::mbstowcs(p.c_str()); } // input: UTF-8... not really #endif +static inline cstring utf8(const std::string & p) { return p; } // no converstion (useful in templated functions) // split and join -- tokenize a string like strtok() would, join() strings together template static inline std::vector> split (const std::basic_string<_T> & s, const _T * delim) @@ -737,10 +726,6 @@ public: // wrappers for some basic types (files, handles, timer) // ---------------------------------------------------------------------------- -#ifndef _MSC_VER // add some functions that are VS-only -static inline FILE* _wfopen(const wchar_t * path, const wchar_t * mode) { return fopen(msra::strfun::wcstombs(path).c_str(), msra::strfun::utf8(mode).c_str()); } -#endif - namespace msra { namespace basetypes { // FILE* with auto-close; use auto_file_ptr instead of FILE*. diff --git a/Common/fileutil.cpp b/Common/fileutil.cpp index 6f51f8d84..d012c3838 100644 --- a/Common/fileutil.cpp +++ b/Common/fileutil.cpp @@ -402,7 +402,7 @@ void renameOrDie (const std::wstring & from, const std::wstring & to) if (!MoveFileW(from.c_str(), to.c_str())) RuntimeError ("error renaming: %s", GetLastError()); #else - renameOrDie (msra::strfun::utf8(from), msra::strfun::utf8(to)); + renameOrDie (charpath(from), charpath(to)); #endif } @@ -471,12 +471,6 @@ bool funicode (FILE * f) // Returns 'buf' (always). buf guaranteed to be 0-terminated. // ---------------------------------------------------------------------------- -// TODO: we should redefine this to write UTF-16 (which matters on GCC which defines wchar_t as 32 bit) -static inline wchar_t * fgets(wchar_t * buf, int n, FILE * f) { return fgetws(buf, n, f); } -static inline string _utf8 (const string & s) { return s; } -static inline string _utf8 (const wstring & s) { return msra::strfun::utf8 (s); } -static inline size_t strnlen (wchar_t * s, size_t n) { return wcsnlen (s, n); } - #ifndef _MSC_VER // strnlen is VS proprietary static inline size_t strnlen(const char * s, size_t /*n*/) { return strlen(s); } #endif @@ -485,6 +479,9 @@ static inline size_t strnlen(const char * s, size_t /*n*/) { return strlen(s); } static inline size_t strnlen (const char *s, size_t n) { return std::find (s,s+n,'\0') - s; } #endif +static inline wchar_t * fgets(wchar_t * buf, int n, FILE * f) { return fgetws(buf, n, f); } +static inline size_t strnlen(wchar_t * s, size_t n) { return wcsnlen(s, n); } + template CHAR * fgetline (FILE * f, CHAR * buf, int size) { @@ -504,9 +501,8 @@ CHAR * fgetline (FILE * f, CHAR * buf, int size) if (n >= (size_t) size -1) { basic_string example (p, n < 100 ? n : 100); - uint64_t filepos = fgetpos(f); // (for error message only) - RuntimeError ("input line too long at file offset %I64d (max. %d characters allowed) [%s ...]", - filepos, size -1, _utf8 (example).c_str()); + uint64_t filepos = fgetpos(f); // (for error message only) + RuntimeError("input line too long at file offset %I64d (max. %d characters allowed) [%s ...]", filepos, size - 1, msra::strfun::utf8(example).c_str()); } // remove newline at end @@ -1539,10 +1535,6 @@ static void mkdir (const wstring & path) RuntimeError ("mkdir: error creating intermediate directory %S", path.c_str()); } -#ifndef _MSC_VER -wchar_t* wcstok_s(wchar_t* s, const wchar_t* delim, wchar_t** ptr) { return wcstok (s, delim, ptr); } -#endif - // make subdir of a file including parents void msra::files::make_intermediate_dirs (const wstring & filepath) { diff --git a/DataReader/BinaryReader/BinaryFile.cpp b/DataReader/BinaryReader/BinaryFile.cpp index 5793e5fda..a74ce8f68 100644 --- a/DataReader/BinaryReader/BinaryFile.cpp +++ b/DataReader/BinaryReader/BinaryFile.cpp @@ -432,7 +432,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { return m_name; // if name is not set yet, get it from the description header - msra::strfun::utf16 nameDescription(m_sectionHeader->nameDescription); + std::wstring nameDescription (msra::strfun::utf16(m_sectionHeader->nameDescription)); auto firstColon = nameDescription.find_first_of(L':'); if (firstColon != npos && nameDescription.size() >= firstColon) { From f3cc47bc20a5dae34ace35e984f40b372eb73250 Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Fri, 31 Oct 2014 11:58:24 -0700 Subject: [PATCH 27/31] fixed cstring; moved some wchar_t mappers back to fileutil.cpp --- Common/Include/basetypes.h | 14 ++++++-------- Common/fileutil.cpp | 21 ++++++++++----------- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/Common/Include/basetypes.h b/Common/Include/basetypes.h index 9506999e1..ef75fcad7 100644 --- a/Common/Include/basetypes.h +++ b/Common/Include/basetypes.h @@ -78,6 +78,7 @@ OACR_WARNING_DISABLE(POTENTIAL_ARGUMENT_TYPE_MISMATCH, "Not level1 or level2_sec #include #include #include // std::wstring_convert +#include #ifdef _MSC_VER #include // std::codecvt_utf8 #endif @@ -112,10 +113,10 @@ using namespace std; namespace msra { namespace strfun { // a class that can return a std::string with auto-convert into a const char* - template struct basic_cstring : std::basic_string + template struct basic_cstring : public std::basic_string { - template basic_cstring (S p) : basic_string (p) { } - operator const char * () const { return c_str(); } + template basic_cstring (S p) : std::basic_string (p) { } + operator const C * () const { return this->c_str(); } }; typedef basic_cstring cstring; typedef basic_cstring wcstring; @@ -145,6 +146,8 @@ msra::strfun::cstring charpath (const std::wstring & p) static inline FILE* _wfopen(const wchar_t * path, const wchar_t * mode) { return fopen(charpath(path), charpath(mode)); } // --- basic string functions static inline wchar_t* wcstok_s(wchar_t* s, const wchar_t* delim, wchar_t** ptr) { return ::wcstok(s, delim, ptr); } +// -- other +static inline void Sleep (size_t ms) { std::this_thread::sleep_for (std::chrono::milliseconds (ms)); } #endif // ---------------------------------------------------------------------------- @@ -926,12 +929,7 @@ template static void attempt (int retries, const FUNCTION & b if (attempt >= retries) throw; // failed N times --give up and rethrow the error fprintf (stderr, "attempt: %s, retrying %d-th time out of %d...\n", e.what(), attempt+1, retries); -#ifndef LINUX ::Sleep (1000); // wait a little, then try again -#else - std::chrono::milliseconds dura(1000); - std::this_thread::sleep_for(dura); -#endif /* LINUX */ } } } diff --git a/Common/fileutil.cpp b/Common/fileutil.cpp index d012c3838..d8a15f631 100644 --- a/Common/fileutil.cpp +++ b/Common/fileutil.cpp @@ -44,6 +44,15 @@ using namespace std; +// ---------------------------------------------------------------------------- +// some mappings for non-Windows builds +// ---------------------------------------------------------------------------- + +#ifndef _MSC_VER // add some functions that are VS-only +static int _wunlink (const wchar_t * p) { return unlink (charpath (p)); } +static int _wmkdir (const wchar_t * p) { return mkdir (charpath (p), 0777/*correct?*/); } +#endif + template <> const wchar_t* GetScanFormatString(char) {return L" %hc";} template <> const wchar_t* GetScanFormatString(wchar_t) {return L" %lc";} template <> const wchar_t* GetScanFormatString(short) {return L" %hi";} @@ -372,9 +381,6 @@ void unlinkOrDie (const std::string & pathname) if (unlink (pathname.c_str()) != 0 && errno != ENOENT) // if file is missing that's what we want RuntimeError ("error deleting file '%s': %s", pathname.c_str(), strerror (errno)); } -#ifndef _MSC_VER -static int _wunlink (const wchar_t * p) { return unlink (msra::strfun::wcstombs (p).c_str()); } -#endif void unlinkOrDie (const std::wstring & pathname) { if (_wunlink (pathname.c_str()) != 0 && errno != ENOENT) // if file is missing that's what we want @@ -1401,7 +1407,7 @@ bool getfiletime (const wstring & path, FILETIME & time) int result; // Get data associated with "crt_stat.c": - result = stat(msra::strfun::wcstombs(path).c_str(), &buf); + result = stat(charpath(path), &buf); // Check if statistics are valid: if (result != 0) return false; @@ -1512,13 +1518,6 @@ void expand_wildcards (const wstring & path, vector & paths) // make_intermediate_dirs() -- make all intermediate dirs on a path // ---------------------------------------------------------------------------- -#ifndef _MSC_VER // _wmkdir() is VS proprietary -static int _wmkdir (const wchar_t * p) -{ - return mkdir (msra::strfun::wcstombs (p).c_str(), 0777/*correct?*/); -} -#endif - static void mkdir (const wstring & path) { int rc = _wmkdir (path.c_str()); From 92fe5377d5d8fdb75e04ce35a5f7f81bdeeda6c9 Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Fri, 31 Oct 2014 12:13:26 -0700 Subject: [PATCH 28/31] fixed ambigous type cast problems in ConfigValue; added some more VS CRT mappings to basetypes.h (string compare functions) --- Common/Include/basetypes.h | 8 +- Common/Include/commandArgUtil.h | 26 ++-- Math/Math/CPUSparseMatrix.h | 232 ++++++++++++++++---------------- 3 files changed, 137 insertions(+), 129 deletions(-) diff --git a/Common/Include/basetypes.h b/Common/Include/basetypes.h index ef75fcad7..2cf2b20b7 100644 --- a/Common/Include/basetypes.h +++ b/Common/Include/basetypes.h @@ -86,6 +86,7 @@ OACR_WARNING_DISABLE(POTENTIAL_ARGUMENT_TYPE_MISMATCH, "Not level1 or level2_sec #include // for CRITICAL_SECTION and Unicode conversion functions --TODO: is there a portable alternative? #endif #if __unix__ +#include #include #include #endif @@ -143,9 +144,12 @@ msra::strfun::cstring charpath (const std::wstring & p) return msra::strfun::cstring (&buf[0]); #endif } -static inline FILE* _wfopen(const wchar_t * path, const wchar_t * mode) { return fopen(charpath(path), charpath(mode)); } +static inline FILE* _wfopen (const wchar_t * path, const wchar_t * mode) { return fopen(charpath(path), charpath(mode)); } // --- basic string functions -static inline wchar_t* wcstok_s(wchar_t* s, const wchar_t* delim, wchar_t** ptr) { return ::wcstok(s, delim, ptr); } +static inline wchar_t* wcstok_s (wchar_t* s, const wchar_t* delim, wchar_t** ptr) { return ::wcstok(s, delim, ptr); } +static inline int _stricmp (const char * a, const char * b) { return ::strcasecmp (a, b); } +static inline int _strnicmp (const char * a, const char * b, wchar_t n) { return ::strncasecmp (a, b, n); } +static inline int _wcsicmp (const wchar_t * a, const wchar_t * b) { return ::wcscasecmp (a, b); } // -- other static inline void Sleep (size_t ms) { std::this_thread::sleep_for (std::chrono::milliseconds (ms)); } #endif diff --git a/Common/Include/commandArgUtil.h b/Common/Include/commandArgUtil.h index db09447ac..d175edf60 100644 --- a/Common/Include/commandArgUtil.h +++ b/Common/Include/commandArgUtil.h @@ -108,25 +108,29 @@ namespace Microsoft { namespace MSR { namespace CNTK { return value; } operator float () const { return (float) (double) *this; } - operator long () const + private: + long tolong() const { char * ep; // will be set to point to first character that failed parsing - long value = strtol (c_str(), &ep, 10); + long value = strtol(c_str(), &ep, 10); if (empty() || *ep != 0) - throw std::runtime_error ("ConfigValue (long): invalid input string"); + throw std::runtime_error("ConfigValue (long): invalid input string"); return value; } - operator unsigned long () const + unsigned long toulong() const { char * ep; // will be set to point to first character that failed parsing - unsigned long value = strtoul (c_str(), &ep, 10); + unsigned long value = strtoul(c_str(), &ep, 10); if (empty() || *ep != 0) - throw std::runtime_error ("ConfigValue (unsigned long): invalid input string"); + throw std::runtime_error("ConfigValue (unsigned long): invalid input string"); return value; } + public: + operator long() const { return tolong(); } + operator unsigned long() const { return toulong(); } operator short () const { - long val = (long) *this; + long val = tolong(); short ival = (short) val; if (val != ival) throw std::runtime_error ("ConfigValue (short): integer argument expected"); @@ -134,7 +138,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } operator unsigned short () const { - unsigned long val = (unsigned long) *this; + unsigned long val = toulong(); unsigned short ival = (unsigned short) val; if (val != ival) throw std::runtime_error ("ConfigValue (unsigned short): integer argument expected"); @@ -142,7 +146,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } operator int () const { - long val = (long) *this; + long val = tolong(); int ival = (int) val; if (val != ival) throw std::runtime_error ("ConfigValue (int): integer argument expected"); @@ -150,7 +154,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } operator unsigned int () const { - unsigned long val = (unsigned long) *this; + unsigned long val = toulong(); unsigned int ival = (unsigned int) val; if (val != ival) throw std::runtime_error ("ConfigValue (unsigned int): integer argument expected"); @@ -159,7 +163,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { operator int64_t () const { char * ep; // will be set to point to first character that failed parsing - int64_t value = _strtoui64 (c_str(), &ep, 10); + int64_t value = _strtoi64 (c_str(), &ep, 10); if (empty() || *ep != 0) throw std::runtime_error ("ConfigValue (int64_t): invalid input string"); return value; diff --git a/Math/Math/CPUSparseMatrix.h b/Math/Math/CPUSparseMatrix.h index 0499781a9..e33010331 100644 --- a/Math/Math/CPUSparseMatrix.h +++ b/Math/Math/CPUSparseMatrix.h @@ -1,118 +1,118 @@ -// -// -// Copyright (c) Microsoft Corporation. All rights reserved. -// -// -#pragma once - -#include -#include "CPUMatrix.h" -#include -#include - -#ifdef _WIN32 -#ifdef MATH_EXPORTS -#define MATH_API __declspec(dllexport) -#else -#define MATH_API __declspec(dllimport) -#endif -#endif /* Linux - already defined in CPUMatrix.h */ - -namespace Microsoft { namespace MSR { namespace CNTK { - - template - class MATH_API CPUSparseMatrix : public BaseMatrix - { +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// +// +#pragma once + +#include +#include "CPUMatrix.h" +#include +#include + +#ifdef _WIN32 +#ifdef MATH_EXPORTS +#define MATH_API __declspec(dllexport) +#else +#define MATH_API __declspec(dllimport) +#endif +#endif /* Linux - already defined in CPUMatrix.h */ + +namespace Microsoft { namespace MSR { namespace CNTK { + + template + class MATH_API CPUSparseMatrix : public BaseMatrix + { typedef BaseMatrix B; using B::m_elemSizeAllocated; using B::m_computeDevice; using B::m_externalBuffer; using B::m_format; using B::m_matrixName; using B::m_numCols; using B::m_numRows; using B::m_nz; using B::m_pArray; // without this, base members would require to use thi-> in GCC - - private: - void ZeroInit(); - void CheckInit(const MatrixFormat format); - - public: - CPUSparseMatrix(const MatrixFormat format); - CPUSparseMatrix(const MatrixFormat format, const size_t numRows, const size_t numCols, const size_t size); - - ~CPUSparseMatrix(); - - public: - using B::GetNumCols; using B::GetNumRows; - - void SetValue(const size_t rIdx, const size_t cIdx, ElemType val); - void SetValue(const CPUSparseMatrix& /*val*/) { NOT_IMPLEMENTED; } - - void ShiftBy(int /*numShift*/) { NOT_IMPLEMENTED; } - - size_t BufferSize() const {return m_elemSizeAllocated*sizeof(ElemType);} - ElemType* BufferPointer() const; - - void SetGaussianRandomValue(const ElemType /*mean*/, const ElemType /*sigma*/, unsigned long /*seed*/) { NOT_IMPLEMENTED; } - - static void ClassEntropy(const CPUMatrix& a, const CPUMatrix& weight, - const CPUSparseMatrix & label, const CPUMatrix& cls, - const CPUMatrix& idx2cls, CPUSparseMatrix& etp, CPUMatrix& entropyScore); - - static void ClassEntropyError(CPUSparseMatrix& a); - - static void ClassEntropyGradientOfInput( - const CPUSparseMatrix& error, - const CPUMatrix& weight, - CPUMatrix& grd); - - static void ClassEntropyGradientOfWeight( - const CPUSparseMatrix& error, - const CPUMatrix& input, - const CPUSparseMatrix & label, - const CPUMatrix& cls, - const CPUMatrix& idx2cls, - CPUSparseMatrix& grd); - - static void MultiplyAndWeightedAdd(ElemType alpha, const CPUMatrix& lhs, const bool transposeA, - const CPUSparseMatrix& rhs, const bool transposeB, ElemType beta, CPUMatrix& c); - - static void MultiplyAndAdd(ElemType alpha, const CPUMatrix& lhs, const bool transposeA, - const CPUSparseMatrix& rhs, const bool transposeB, CPUSparseMatrix& c); - - static void ScaleAndAdd(const ElemType alpha, const CPUSparseMatrix& lhs, CPUMatrix& c); - - /// sum(vec(a).*vec(b)) - static ElemType InnerProductOfMatrices(const CPUSparseMatrix& /*a*/, const CPUMatrix& /*b*/) { NOT_IMPLEMENTED; } - - static void AddScaledDifference(const ElemType /*alpha*/, const CPUSparseMatrix& /*a*/, const CPUMatrix& /*b*/, CPUMatrix& /*c*/, - bool /*bDefaultZero*/ ) { NOT_IMPLEMENTED; } - static void AddScaledDifference(const ElemType /*alpha*/, const CPUMatrix& /*a*/, const CPUSparseMatrix& /*b*/, CPUMatrix& /*c*/, - bool /*bDefaultZero*/ ) { NOT_IMPLEMENTED; } - - int GetComputeDeviceId() const {return -1;} - - void Resize(const size_t numRows, const size_t numCols, size_t size = 0); - void Reset(); - - public: - void NormalGrad(CPUMatrix& c, const ElemType momentum); - void Adagrad(CPUMatrix& c); - - public: - CPUSparseMatrix& InplaceTruncateTop (const ElemType /*threshold*/) { NOT_IMPLEMENTED; } - CPUSparseMatrix& InplaceTruncateBottom (const ElemType /*threshold*/) { NOT_IMPLEMENTED; } - CPUSparseMatrix& InplaceTruncate (const ElemType /*threshold*/); - - public: - void Print(const char* /*matrixName*/) const { NOT_IMPLEMENTED; } - - int m_colIdx; //used to SetValue() - ElemType *m_val; // values - size_t *m_row; //row/col ids in CSC/CSR format - size_t *m_pb; //begin ids of col/row in CSC/CSR format - - size_t m_blockSize; //block size - ElemType *m_blockVal; //block values - size_t *m_blockIds; //block ids - }; - - typedef CPUSparseMatrix CPUSingleSparseMatrix; - typedef CPUSparseMatrix CPUDoubleSparseMatrix; - -}}} - + + private: + void ZeroInit(); + void CheckInit(const MatrixFormat format); + + public: + CPUSparseMatrix(const MatrixFormat format); + CPUSparseMatrix(const MatrixFormat format, const size_t numRows, const size_t numCols, const size_t size); + + ~CPUSparseMatrix(); + + public: + using B::GetNumCols; using B::GetNumRows; + + void SetValue(const size_t rIdx, const size_t cIdx, ElemType val); + void SetValue(const CPUSparseMatrix& /*val*/) { NOT_IMPLEMENTED; } + + void ShiftBy(int /*numShift*/) { NOT_IMPLEMENTED; } + + size_t BufferSize() const {return m_elemSizeAllocated*sizeof(ElemType);} + ElemType* BufferPointer() const; + + void SetGaussianRandomValue(const ElemType /*mean*/, const ElemType /*sigma*/, unsigned long /*seed*/) { NOT_IMPLEMENTED; } + + static void ClassEntropy(const CPUMatrix& a, const CPUMatrix& weight, + const CPUSparseMatrix & label, const CPUMatrix& cls, + const CPUMatrix& idx2cls, CPUSparseMatrix& etp, CPUMatrix& entropyScore); + + static void ClassEntropyError(CPUSparseMatrix& a); + + static void ClassEntropyGradientOfInput( + const CPUSparseMatrix& error, + const CPUMatrix& weight, + CPUMatrix& grd); + + static void ClassEntropyGradientOfWeight( + const CPUSparseMatrix& error, + const CPUMatrix& input, + const CPUSparseMatrix & label, + const CPUMatrix& cls, + const CPUMatrix& idx2cls, + CPUSparseMatrix& grd); + + static void MultiplyAndWeightedAdd(ElemType alpha, const CPUMatrix& lhs, const bool transposeA, + const CPUSparseMatrix& rhs, const bool transposeB, ElemType beta, CPUMatrix& c); + + static void MultiplyAndAdd(ElemType alpha, const CPUMatrix& lhs, const bool transposeA, + const CPUSparseMatrix& rhs, const bool transposeB, CPUSparseMatrix& c); + + static void ScaleAndAdd(const ElemType alpha, const CPUSparseMatrix& lhs, CPUMatrix& c); + + /// sum(vec(a).*vec(b)) + static ElemType InnerProductOfMatrices(const CPUSparseMatrix& /*a*/, const CPUMatrix& /*b*/) { NOT_IMPLEMENTED; } + + static void AddScaledDifference(const ElemType /*alpha*/, const CPUSparseMatrix& /*a*/, const CPUMatrix& /*b*/, CPUMatrix& /*c*/, + bool /*bDefaultZero*/ ) { NOT_IMPLEMENTED; } + static void AddScaledDifference(const ElemType /*alpha*/, const CPUMatrix& /*a*/, const CPUSparseMatrix& /*b*/, CPUMatrix& /*c*/, + bool /*bDefaultZero*/ ) { NOT_IMPLEMENTED; } + + int GetComputeDeviceId() const {return -1;} + + void Resize(const size_t numRows, const size_t numCols, size_t size = 0); + void Reset(); + + public: + void NormalGrad(CPUMatrix& c, const ElemType momentum); + void Adagrad(CPUMatrix& c); + + public: + CPUSparseMatrix& InplaceTruncateTop (const ElemType /*threshold*/) { NOT_IMPLEMENTED; } + CPUSparseMatrix& InplaceTruncateBottom (const ElemType /*threshold*/) { NOT_IMPLEMENTED; } + CPUSparseMatrix& InplaceTruncate (const ElemType /*threshold*/); + + public: + void Print(const char* /*matrixName*/) const { NOT_IMPLEMENTED; } + + int m_colIdx; //used to SetValue() + ElemType *m_val; // values + size_t *m_row; //row/col ids in CSC/CSR format + size_t *m_pb; //begin ids of col/row in CSC/CSR format + + size_t m_blockSize; //block size + ElemType *m_blockVal; //block values + size_t *m_blockIds; //block ids + }; + + typedef CPUSparseMatrix CPUSingleSparseMatrix; + typedef CPUSparseMatrix CPUDoubleSparseMatrix; + +}}} + From 22b7e0fc2554b3330a324f24cf29ce939e04e3d6 Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Fri, 31 Oct 2014 12:33:46 -0700 Subject: [PATCH 29/31] fixed some "default assignment operator deleted" issues with GCC; two more mapped CRT functions that don't exist in GCC --- Common/Include/basetypes.h | 2 ++ Common/Include/commandArgUtil.h | 27 +++++++++++++++++++++++++-- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/Common/Include/basetypes.h b/Common/Include/basetypes.h index 2cf2b20b7..5e5179581 100644 --- a/Common/Include/basetypes.h +++ b/Common/Include/basetypes.h @@ -150,6 +150,8 @@ static inline wchar_t* wcstok_s (wchar_t* s, const wchar_t* delim, wchar_t** ptr static inline int _stricmp (const char * a, const char * b) { return ::strcasecmp (a, b); } static inline int _strnicmp (const char * a, const char * b, wchar_t n) { return ::strncasecmp (a, b, n); } static inline int _wcsicmp (const wchar_t * a, const wchar_t * b) { return ::wcscasecmp (a, b); } +static inline int64_t _strtoi64 (const char * s, char ** ep, int r) { return strtoll (s, ep, r); } // TODO: check if correct +static inline uint64_t _strtoui64 (const char * s, char ** ep, int r) { return strtoull (s, ep, r); } // TODO: correct for size_t? // -- other static inline void Sleep (size_t ms) { std::this_thread::sleep_for (std::chrono::milliseconds (ms)); } #endif diff --git a/Common/Include/commandArgUtil.h b/Common/Include/commandArgUtil.h index d175edf60..c70772049 100644 --- a/Common/Include/commandArgUtil.h +++ b/Common/Include/commandArgUtil.h @@ -244,6 +244,16 @@ namespace Microsoft { namespace MSR { namespace CNTK { m_separator = configParser.m_separator; m_configName = move(configParser.m_configName); } + ConfigParser& operator=(const ConfigParser& configParser) + { + m_separator = configParser.m_separator; + m_configName = configParser.m_configName; + } + ConfigParser& operator=(const ConfigParser&& configParser) + { + m_separator = configParser.m_separator; + m_configName = move(configParser.m_configName); + } public: // FindBraces - find matching braces in a string starting at the current position @@ -873,7 +883,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { m_repeatAsterisk = repeatAsterisk; } - // copy and move constructors + // copy and move constructors and assignment ConfigArray(const ConfigArray& configValue) : ConfigParser(configValue) { m_repeatAsterisk = true; @@ -884,6 +894,18 @@ namespace Microsoft { namespace MSR { namespace CNTK { m_repeatAsterisk = true; *this = move(configValue); } + ConfigArray& operator=(const ConfigArray& configValue) + { + ConfigParser::operator=(configValue); + m_repeatAsterisk = true; + *this = configValue; + } + ConfigArray& operator=(const ConfigArray&& configValue) + { + ConfigParser::operator=(move(configValue)); + m_repeatAsterisk = true; + *this = move(configValue); + } // cast a configArray back to a string so we can return it as a ConfigValue operator ConfigValue() @@ -951,7 +973,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { for (int i=0;i < count;++i) { char buf[10]; - _itoa_s((int)size(), buf, 10); + sprintf (buf, "%d", (int)size()); // TODO: left-over of Linux compat, can be done nicer std::string name = m_configName + '[' + buf + ']' ; push_back(ConfigValue(value, name)); } @@ -980,6 +1002,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template class argvector : public std::vector { + typedef std::vector B; using B::clear; using B::reserve; static void parse (const std::wstring & in, float & val) { val = (float) msra::strfun::todouble (in); } static void parse (const std::wstring & in, size_t & val) // convert wstring toks2[0] to T val and check type { From 6cd2a6f36af8f45873a4e6e951bd1cd7d9564c2c Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Fri, 31 Oct 2014 13:31:17 -0700 Subject: [PATCH 30/31] fixed previous check-in for the deleted auto-generated assignment operators, now using =default instead of implementing them actually; removed a few seemingly superfluous 'typename's --- Common/Include/DataWriter.h | 6 +++--- Common/Include/commandArgUtil.h | 24 ++---------------------- 2 files changed, 5 insertions(+), 25 deletions(-) diff --git a/Common/Include/DataWriter.h b/Common/Include/DataWriter.h index efc7020ba..e8f62d70d 100644 --- a/Common/Include/DataWriter.h +++ b/Common/Include/DataWriter.h @@ -52,7 +52,7 @@ public: virtual void Destroy() = 0; virtual void GetSections(std::map& sections) = 0; virtual bool SaveData(size_t recordStart, const std::map& matrices, size_t numRecords, size_t datasetSize, size_t byteVariableSized) = 0; - virtual void SaveMapping(std::wstring saveId, const std::map& labelMapping) = 0; + virtual void SaveMapping(std::wstring saveId, const std::map& labelMapping) = 0; }; @@ -71,7 +71,7 @@ template class DataWriter : public IDataWriter { private: - IDataWriter *m_dataWriter; // writer + IDataWriter *m_dataWriter; // writer HMODULE m_hModule; // module handle for the writer DLL std::wstring m_dllName; // name of the writer DLL @@ -146,7 +146,7 @@ public: // SaveMapping - save a map into the file // saveId - name of the section to save into (section:subsection format) // labelMapping - map we are saving to the file - virtual void SaveMapping(std::wstring saveId, const std::map& labelMapping); + virtual void SaveMapping(std::wstring saveId, const std::map& labelMapping); }; }}} diff --git a/Common/Include/commandArgUtil.h b/Common/Include/commandArgUtil.h index c70772049..640fd37c1 100644 --- a/Common/Include/commandArgUtil.h +++ b/Common/Include/commandArgUtil.h @@ -244,16 +244,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { m_separator = configParser.m_separator; m_configName = move(configParser.m_configName); } - ConfigParser& operator=(const ConfigParser& configParser) - { - m_separator = configParser.m_separator; - m_configName = configParser.m_configName; - } - ConfigParser& operator=(const ConfigParser&& configParser) - { - m_separator = configParser.m_separator; - m_configName = move(configParser.m_configName); - } + ConfigParser& operator=(const ConfigParser& configParser) = default; public: // FindBraces - find matching braces in a string starting at the current position @@ -894,18 +885,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { m_repeatAsterisk = true; *this = move(configValue); } - ConfigArray& operator=(const ConfigArray& configValue) - { - ConfigParser::operator=(configValue); - m_repeatAsterisk = true; - *this = configValue; - } - ConfigArray& operator=(const ConfigArray&& configValue) - { - ConfigParser::operator=(move(configValue)); - m_repeatAsterisk = true; - *this = move(configValue); - } + ConfigArray& operator=(const ConfigArray& configValue) = default; // cast a configArray back to a string so we can return it as a ConfigValue operator ConfigValue() From c576fc6f2c20f62e0b5ccf1407a120efba37a77b Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Fri, 31 Oct 2014 13:59:01 -0700 Subject: [PATCH 31/31] factored out dynamic loading of modules from Data{Read,Writ}er, in prep for Linux port --- Common/DataReader.cpp | 18 +------------- Common/DataWriter.cpp | 37 ++++++++-------------------- Common/Include/DataReader.h | 14 +++++------ Common/Include/DataWriter.h | 7 +++--- Common/Include/basetypes.h | 36 +++++++++++++++++++++++++++ DataReader/HTKMLFReader/basetypes.h | 38 +++++++++++++++++++++++++++++ 6 files changed, 94 insertions(+), 56 deletions(-) diff --git a/Common/DataReader.cpp b/Common/DataReader.cpp index f3ea027d4..936a31e46 100644 --- a/Common/DataReader.cpp +++ b/Common/DataReader.cpp @@ -44,23 +44,12 @@ void DataReader::GetDataReader(const ConfigParameters& config) typedef void (*GetReaderProc)(IDataReader** preader); // initialize just in case - m_hModule = NULL; m_dataReader = NULL; // get the name for the reader we want to use, default to UCIFastReader - m_dllName = msra::strfun::utf16(config("readerType", "UCIFastReader")); - m_dllName += L".dll"; - m_hModule = LoadLibrary(m_dllName.c_str()); - if (m_hModule == NULL) - { - std::string message = "Reader not found: "; - message += msra::strfun::utf8(m_dllName); - RuntimeError((char*) message.c_str()); - } - // create a variable of each type just to call the proper templated version ElemType elemType = ElemType(); - GetReaderProc getReaderProc = (GetReaderProc)GetProcAddress(m_hModule, GetReaderName(elemType).c_str()); + GetReaderProc getReaderProc = (GetReaderProc)Plugin::Load(config("readerType", "UCIFastReader"), GetReaderName(elemType).c_str()); getReaderProc(&m_dataReader); } @@ -87,11 +76,6 @@ DataReader::~DataReader() m_dataReader->Destroy(); m_dataReader = NULL; } - if (m_hModule != NULL) - { - FreeLibrary(m_hModule); - m_hModule = NULL; - } } //StartMinibatchLoop - Startup a minibatch loop diff --git a/Common/DataWriter.cpp b/Common/DataWriter.cpp index 846e70aab..712e1d354 100644 --- a/Common/DataWriter.cpp +++ b/Common/DataWriter.cpp @@ -43,37 +43,25 @@ void DataWriter::GetDataWriter(const ConfigParameters& config) typedef void (*GetWriterProc)(IDataWriter** pwriter); // initialize just in case - m_hModule = NULL; m_dataWriter = NULL; // get the name for the writer we want to use, default to BinaryWriter (which is in BinaryReader.dll) - string writerType = config("writerType","BinaryReader"); - if (writerType == "HTKMLFWriter" || writerType == "HTKMLFReader") - { - writerType = "HTKMLFReader"; - } - else if (writerType == "BinaryWriter" || writerType == "BinaryReader") - { - writerType = "BinaryReader"; - } + string writerType = config("writerType", "BinaryReader"); + if (writerType == "HTKMLFWriter" || writerType == "HTKMLFReader") + { + writerType = "HTKMLFReader"; + } + else if (writerType == "BinaryWriter" || writerType == "BinaryReader") + { + writerType = "BinaryReader"; + } else if (writerType == "LUSequenceWriter" || writerType == "LUSequenceReader") { writerType = "LUSequenceReader"; } - m_dllName = msra::strfun::utf16(writerType); - m_dllName += L".dll"; - m_hModule = LoadLibrary(m_dllName.c_str()); - if (m_hModule == NULL) - { - std::string message = "Writer not found: "; - message += msra::strfun::utf8(m_dllName); - RuntimeError((char*)message.c_str()); - } - - // create a variable of each type just to call the proper templated version ElemType elemType = ElemType(); - GetWriterProc getWriterProc = (GetWriterProc)GetProcAddress(m_hModule, GetWriterName(elemType).c_str()); + GetWriterProc getWriterProc = (GetWriterProc)Plugin::Load(writerType, GetWriterName(elemType).c_str()); getWriterProc(&m_dataWriter); } @@ -97,11 +85,6 @@ DataWriter::~DataWriter() m_dataWriter->Destroy(); m_dataWriter = NULL; } - if (m_hModule != NULL) - { - FreeLibrary(m_hModule); - m_hModule = NULL; - } } // GetSections - Get the sections of the file diff --git a/Common/Include/DataReader.h b/Common/Include/DataReader.h index 1984b73fb..1f0596ea8 100644 --- a/Common/Include/DataReader.h +++ b/Common/Include/DataReader.h @@ -18,9 +18,10 @@ #else #define DATAREADER_API __declspec(dllimport) #endif -#include "matrix.h" +#include "Matrix.h" #include #include +#include "basetypes.h" #include "commandArgUtil.h" namespace Microsoft { namespace MSR { namespace CNTK { @@ -56,8 +57,7 @@ public: virtual void SetLabelMapping(const std::wstring& sectionName, const std::map& labelMapping) = 0; virtual bool GetData(const std::wstring& sectionName, size_t numRecords, void* data, size_t& dataBufferSize, size_t recordStart) = 0; virtual bool DataEnd(EndDataType endDataType) = 0; - virtual void SetSentenceEndInBatch(vector &sentenceEnd)=0; - + virtual void SetSentenceEndInBatch(vector &sentenceEnd) = 0; }; // GetReader - get a reader type from the DLL @@ -72,12 +72,10 @@ extern "C" DATAREADER_API void GetReaderD(IDataReader** preader); // interface for clients of the Data Reader // mirrors the IDataReader interface, except the Init method is private (use the constructor) template -class DataReader : public IDataReader +class DataReader : public IDataReader, public Plugin { private: - IDataReader *m_dataReader; // reader - HMODULE m_hModule; // module handle for the reader DLL - std::wstring m_dllName; // name of the reader DLL + IDataReader *m_dataReader; // reader // Init - Reader Initialize for multiple data sets // config - [in] configuration parameters for the datareader @@ -155,4 +153,4 @@ public: void SetSentenceEndInBatch(std::vector &sentenceEnd); }; -}}} \ No newline at end of file +}}} diff --git a/Common/Include/DataWriter.h b/Common/Include/DataWriter.h index e8f62d70d..6478ef4a3 100644 --- a/Common/Include/DataWriter.h +++ b/Common/Include/DataWriter.h @@ -18,9 +18,10 @@ #else #define DATAWRITER_API __declspec(dllimport) #endif -#include "matrix.h" +#include "Matrix.h" #include #include +#include "basetypes.h" #include "commandArgUtil.h" @@ -68,12 +69,10 @@ extern "C" DATAWRITER_API void GetWriterD(IDataWriter** pwriter); // interface for clients of the Data Writer // mirrors the IDataWriter interface, except the Init method is private (use the constructor) template -class DataWriter : public IDataWriter +class DataWriter : public IDataWriter, public Plugin { private: IDataWriter *m_dataWriter; // writer - HMODULE m_hModule; // module handle for the writer DLL - std::wstring m_dllName; // name of the writer DLL // Init - Writer Initialize for multiple data sets // config - [in] configuration parameters for the datawriter diff --git a/Common/Include/basetypes.h b/Common/Include/basetypes.h index 5e5179581..a08a4c507 100644 --- a/Common/Include/basetypes.h +++ b/Common/Include/basetypes.h @@ -976,5 +976,41 @@ static inline bool LogicError(const char * format, ...) vsprintf(buffer, format, args); throw std::logic_error(buffer); }; + +// ---------------------------------------------------------------------------- +// dynamic loading of modules +// ---------------------------------------------------------------------------- + +#ifdef _WIN32 +class Plugin +{ + HMODULE m_hModule; // module handle for the writer DLL + std::wstring m_dllName; // name of the writer DLL +public: + Plugin() { m_hModule = NULL; } + FARPROC Load(const std::string & plugin, const std::string & proc) + { + m_dllName = msra::strfun::utf16(plugin); + m_dllName += L".dll"; + m_hModule = LoadLibrary(m_dllName.c_str()); + if (m_hModule == NULL) + RuntimeError("Plugin not found: %s", msra::strfun::utf8(m_dllName)); + + // create a variable of each type just to call the proper templated version + return GetProcAddress(m_hModule, proc.c_str()); + } + ~Plugin() { if (m_hModule) FreeLibrary(m_hModule); } +}; +#else +class Plugin +{ +public: + void * Load(const std::string & plugin, const std::string & proc) + { + RuntimeError("Plugins not implemented on Linux yet"); + return NULL; + } +}; +#endif #endif // _BASETYPES_ diff --git a/DataReader/HTKMLFReader/basetypes.h b/DataReader/HTKMLFReader/basetypes.h index 47330185e..9f247891a 100644 --- a/DataReader/HTKMLFReader/basetypes.h +++ b/DataReader/HTKMLFReader/basetypes.h @@ -1,3 +1,5 @@ +// TODO: This is a dup, we should get back to the shared one. But this one has some stuff the other doesn't. + // // // Copyright (c) Microsoft Corporation. All rights reserved. @@ -970,4 +972,40 @@ static inline void RuntimeError(const char * format, ...) throw std::runtime_error(buffer); }; +// ---------------------------------------------------------------------------- +// dynamic loading of modules +// ---------------------------------------------------------------------------- + +#ifdef _WIN32 +class Plugin +{ + HMODULE m_hModule; // module handle for the writer DLL + std::wstring m_dllName; // name of the writer DLL +public: + Plugin() { m_hModule = NULL; } + FARPROC Load(const std::string & plugin, const std::string & proc) + { + m_dllName = msra::strfun::utf16(plugin); + m_dllName += L".dll"; + m_hModule = LoadLibrary(m_dllName.c_str()); + if (m_hModule == NULL) + RuntimeError("Plugin not found: %s", msra::strfun::utf8(m_dllName)); + + // create a variable of each type just to call the proper templated version + return GetProcAddress(m_hModule, proc.c_str()); + } + ~Plugin() { if (m_hModule) FreeLibrary(m_hModule); } +}; +#else +class Plugin +{ +public: + void * Load(const std::string & plugin, const std::string & proc) + { + RuntimeError("Plugins not implemented on Linux yet"); + return NULL; + } +}; +#endif + #endif // _BASETYPES_