reviewed changes of linuxport and made it compile again on Windows. I had to undo the commenting out of C++11 features (lambdas, rvalue refs) which seem to have a problem with the Linux version of NVCC. We shall fix this later, it is likely a compiler-option issue.
This commit is contained in:
Родитель
ff72d5696f
Коммит
c23dd1dab3
|
@ -71,5 +71,7 @@ public:
|
|||
std::vector<int> GetDevices(int number=AllDevices, BestGpuFlags flags=bestGpuNormal); // get multiple devices
|
||||
};
|
||||
extern BestGpu* g_bestGpu;
|
||||
|
||||
#endif
|
||||
}}}
|
||||
|
||||
}}}
|
||||
|
|
|
@ -123,7 +123,7 @@ public:
|
|||
template <typename T>
|
||||
File& operator<<(T val)
|
||||
{
|
||||
#ifndef LINUX
|
||||
#ifndef __CUDACC__ // TODO: CUDA compiler blows up, fix this
|
||||
attempt([=]()
|
||||
#endif
|
||||
{
|
||||
|
@ -132,7 +132,7 @@ public:
|
|||
else
|
||||
fput(m_file, val);
|
||||
}
|
||||
#ifndef LINUX
|
||||
#ifndef __CUDACC__
|
||||
);
|
||||
#endif
|
||||
return *this;
|
||||
|
@ -161,7 +161,7 @@ public:
|
|||
template <typename T>
|
||||
File& operator>>(T& val)
|
||||
{
|
||||
#ifndef LINUX
|
||||
#ifndef __CUDACC__ // TODO: CUDA compiler blows up, fix this
|
||||
attempt([&]()
|
||||
#endif
|
||||
{
|
||||
|
@ -170,7 +170,7 @@ public:
|
|||
else
|
||||
fget(m_file, val);
|
||||
}
|
||||
#ifndef LINUX
|
||||
#ifndef __CUDACC__
|
||||
);
|
||||
#endif
|
||||
return *this;
|
||||
|
|
|
@ -7,20 +7,6 @@
|
|||
#ifndef _BASETYPES_
|
||||
#define _BASETYPES_
|
||||
|
||||
#ifdef LINUX
|
||||
typedef char16_t TCHAR;
|
||||
#include <stdarg.h>
|
||||
#define vsprintf_s vsprintf /* Not sure this is right... Malcolm */
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
#include <cstdlib>
|
||||
#include <cerrno>
|
||||
#define Linux(a) a
|
||||
#else
|
||||
#include <tchar.h>
|
||||
#endif /* LINUX */
|
||||
#include <cmath> // for HUGE_VAL // Remove for a test by Malcolm because of double isnan definition...
|
||||
|
||||
#ifndef UNDER_CE // fixed-buffer overloads not available for wince
|
||||
#ifdef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES // fixed-buffer overloads for strcpy() etc.
|
||||
#undef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
|
||||
|
@ -91,11 +77,17 @@ OACR_WARNING_DISABLE(POTENTIAL_ARGUMENT_TYPE_MISMATCH, "Not level1 or level2_sec
|
|||
#include <stdarg.h>
|
||||
#include <map>
|
||||
#include <stdexcept>
|
||||
#include <locale> // std::wstring_convert
|
||||
#include <codecvt> // std::codecvt_utf8
|
||||
#include <locale> // std::wstring_convert
|
||||
#ifdef _MSC_VER
|
||||
#include <codecvt> // std::codecvt_utf8
|
||||
#endif
|
||||
#ifdef _WIN32
|
||||
#include <windows.h> // for CRITICAL_SECTION and Unicode conversion functions --TODO: is there a portable alternative?
|
||||
#endif
|
||||
#if __unix__
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
@ -118,7 +110,6 @@ using namespace std;
|
|||
#define __inout_cap(x)
|
||||
#define __inout_cap_c(x)
|
||||
#endif
|
||||
#endif // LINUX
|
||||
#ifndef __out_z_cap // non-VS2005 annotations
|
||||
#define __out_cap(x)
|
||||
#define __out_z_cap(x)
|
||||
|
@ -300,36 +291,23 @@ public:
|
|||
noncopyable(){}
|
||||
};
|
||||
|
||||
|
||||
#ifdef LINUX
|
||||
// class CCritSec and CAutoLock -- simple critical section handling
|
||||
#ifndef _WIN32 // TODO: Currently only working under Windows; BROKEN otherwise, to be fixed
|
||||
#define CRITICAL_SECTION int
|
||||
void InitializeCriticalSection(int *) {}
|
||||
void DeleteCriticalSection(int *) {}
|
||||
void EnterCriticalSection(int *) {}
|
||||
void LeaveCriticalSection(int *) {}
|
||||
|
||||
#endif
|
||||
|
||||
// class CCritSec and CAutoLock -- simple critical section handling
|
||||
// TODO: Currently only working under Windows; BROKEN otherwise, to be fixed
|
||||
class CCritSec
|
||||
{
|
||||
CCritSec (const CCritSec &); CCritSec & operator= (const CCritSec &);
|
||||
#ifdef _MSC_VER
|
||||
CRITICAL_SECTION m_CritSec;
|
||||
#endif
|
||||
public:
|
||||
#ifdef _MSC_VER
|
||||
CCritSec() { InitializeCriticalSection(&m_CritSec); };
|
||||
~CCritSec() { DeleteCriticalSection(&m_CritSec); };
|
||||
void Lock() { EnterCriticalSection(&m_CritSec); };
|
||||
void Unlock() { LeaveCriticalSection(&m_CritSec); };
|
||||
#else // POSIX --TODO: need to figure this out
|
||||
CCritSec() { };
|
||||
~CCritSec() { };;
|
||||
void Lock() { };
|
||||
void Unlock() { };
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
|
@ -427,8 +405,6 @@ public:
|
|||
};
|
||||
#endif
|
||||
|
||||
#endif /* LINUX */
|
||||
|
||||
};}; // namespace
|
||||
|
||||
#if 0 //ndef BASETYPES_NO_UNSAFECRTOVERLOAD // if on, no unsafe CRT overload functions
|
||||
|
@ -583,13 +559,12 @@ typedef strfun::_strprintf<wchar_t> wstrprintf; // wchar_t version
|
|||
#endif
|
||||
|
||||
// string-encoding conversion functions
|
||||
#ifdef _WIN32
|
||||
struct utf8 : std::string { utf8 (const std::wstring & p) // utf-16 to -8
|
||||
{
|
||||
//TODO: confirm it builds on VS2013
|
||||
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> cv;
|
||||
(*(std::string*)this) = cv.to_bytes(p);
|
||||
#ifdef MALCOLM
|
||||
#if 1
|
||||
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> cv;
|
||||
(*(std::string*)this) = cv.to_bytes(p);
|
||||
#else // old version, delete once we know it works
|
||||
size_t len = p.length();
|
||||
if (len == 0) { return;} // empty string
|
||||
msra::basetypes::fixed_vector<char> buf (3 * len + 1); // max: 1 wchar => up to 3 mb chars
|
||||
|
@ -599,14 +574,14 @@ struct utf8 : std::string { utf8 (const std::wstring & p) // utf-16 to -8
|
|||
&buf[0], (int) buf.size(), NULL, NULL);
|
||||
if (rc == 0) throw std::runtime_error ("WideCharToMultiByte");
|
||||
(*(std::string*)this) = &buf[0];
|
||||
#endif /* Malcolm */
|
||||
#endif
|
||||
}};
|
||||
struct utf16 : std::wstring { utf16 (const std::string & p) // utf-8 to -16
|
||||
{
|
||||
#if 1
|
||||
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> cv;
|
||||
(*(std::wstring*)this) = cv.from_bytes(p);
|
||||
|
||||
#ifdef OLD
|
||||
#else // old version, delete once we know it works
|
||||
size_t len = p.length();
|
||||
if (len == 0) { return;} // empty string
|
||||
msra::basetypes::fixed_vector<wchar_t> buf (len + 1);
|
||||
|
@ -617,29 +592,8 @@ struct utf16 : std::wstring { utf16 (const std::string & p) // utf-8 to -16
|
|||
if (rc == 0) throw std::runtime_error("MultiByteToWideChar");
|
||||
ASSERT(rc < buf.size());
|
||||
(*(std::wstring*)this) = &buf[0];
|
||||
#endif /* Malcolm */
|
||||
}};
|
||||
#else // TODO: complete this once we are building on actual Linux, currently using default locale instead of UTF-8 locale
|
||||
static inline std::string utf8(const std::wstring & p) // output: UTF-8
|
||||
{
|
||||
size_t len = p.length();
|
||||
msra::basetypes::fixed_vector<char> buf(2 * len + 1); // max: 1 wchar => 2 mb chars
|
||||
std::fill(buf.begin(), buf.end(), 0);
|
||||
// BUGBUG: We need to set the locale, so for now this only works for plain ASCII
|
||||
::wcstombs(&buf[0], p.c_str(), 2 * len + 1);
|
||||
return std::string(&buf[0]);
|
||||
}
|
||||
static inline std::wstring utf16(const std::string & p) // input: UTF-8
|
||||
{
|
||||
size_t len = p.length();
|
||||
msra::basetypes::fixed_vector<wchar_t> buf(len + 1); // max: >1 mb chars => 1 wchar
|
||||
std::fill(buf.begin(), buf.end(), (wchar_t)0);
|
||||
OACR_WARNING_SUPPRESS(UNSAFE_STRING_FUNCTION, "Reviewed OK. size checked. [rogeryu 2006/03/21]");
|
||||
// BUGBUG: We need to set the locale, so for now this only works for plain ASCII
|
||||
::mbstowcs(&buf[0], p.c_str(), len + 1);
|
||||
return std::wstring(&buf[0]);
|
||||
}
|
||||
#endif
|
||||
}};
|
||||
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 4996) // Reviewed by Yusheng Li, March 14, 2006. depr. fn (wcstombs, mbstowcs)
|
||||
|
@ -647,16 +601,18 @@ static inline std::string wcstombs (const std::wstring & p) // output: MBCS
|
|||
{
|
||||
size_t len = p.length();
|
||||
msra::basetypes::fixed_vector<char> buf (2 * len + 1); // max: 1 wchar => 2 mb chars
|
||||
#ifdef MALCOLM
|
||||
std::fill (buf.begin (), buf.end (), 0);
|
||||
::wcstombs (&buf[0], p.c_str(), 2 * len + 1);
|
||||
#endif /* Malcolm */
|
||||
return std::string (&buf[0]);
|
||||
}
|
||||
static inline std::wstring mbstowcs (const std::string & p) // input: MBCS
|
||||
{
|
||||
std::wstring ret = utf16(p);
|
||||
return ret;
|
||||
size_t len = p.length();
|
||||
msra::basetypes::fixed_vector<wchar_t> buf(len + 1); // max: >1 mb chars => 1 wchar
|
||||
std::fill(buf.begin(), buf.end(), (wchar_t)0);
|
||||
OACR_WARNING_SUPPRESS(UNSAFE_STRING_FUNCTION, "Reviewed OK. size checked. [rogeryu 2006/03/21]");
|
||||
::mbstowcs(&buf[0], p.c_str(), len + 1);
|
||||
return std::wstring(&buf[0]);
|
||||
}
|
||||
#pragma warning(pop)
|
||||
|
||||
|
@ -797,7 +753,7 @@ public:
|
|||
auto_file_ptr() : f (NULL) { }
|
||||
~auto_file_ptr() { close(); }
|
||||
auto_file_ptr (const char * path, const char * mode) { f = fopen (path, mode); if (f == NULL) openfailed (path); }
|
||||
auto_file_ptr (const wchar_t * wpath, const char * mode) {string path = msra::strfun::utf8(wpath); f = fopen (path.c_str(), mode); if (f == NULL) openfailed (path); }
|
||||
auto_file_ptr (const wchar_t * wpath, const char * mode) { f = _wfopen (wpath, msra::strfun::utf16 (mode).c_str()); if (f == NULL) openfailed (msra::strfun::utf8 (wpath)); }
|
||||
FILE * operator= (FILE * other) { close(); f = other; return f; }
|
||||
auto_file_ptr (FILE * other) : f (other) { }
|
||||
operator FILE * () const { return f; }
|
||||
|
@ -825,7 +781,6 @@ public:
|
|||
typedef auto_handle_t<HANDLE> auto_handle;
|
||||
#endif
|
||||
|
||||
#ifdef MALCOLM
|
||||
// like auto_ptr but calls freeFunc_p (type free_func_t) instead of delete to clean up
|
||||
// minor difference - wrapped object is T, not T *, so to wrap a
|
||||
// T *, use auto_clean<T *>
|
||||
|
@ -845,7 +800,6 @@ public:
|
|||
operator const T () const { return it; }
|
||||
T detach () { T tmp = it; it = 0; return tmp; } // release ownership of object
|
||||
};
|
||||
#endif /* MALCOLM */
|
||||
|
||||
#if 0
|
||||
// simple timer
|
||||
|
@ -888,23 +842,12 @@ namespace msra { namespace files {
|
|||
|
||||
class textreader
|
||||
{
|
||||
#ifndef LINUX
|
||||
msra::basetypes::auto_file_ptr f;
|
||||
#else
|
||||
FILE *f;
|
||||
#endif /* LINUX */
|
||||
std::vector<char> buf; // read buffer (will only grow, never shrink)
|
||||
int ch; // next character (we need to read ahead by one...)
|
||||
char getch() { char prevch = (char) ch; ch = fgetc (f); return prevch; }
|
||||
public:
|
||||
#ifndef LINUX
|
||||
textreader (const std::wstring & path) : f (path.c_str(), "rb") { buf.reserve (10000); ch = fgetc (f); }
|
||||
#else
|
||||
textreader (const std::wstring & path) {
|
||||
f = fopen((char *)path.c_str(), "rb");
|
||||
ch = fgetc(f); /* I Think this is right ... Malcolm */
|
||||
}
|
||||
#endif /* LINUX */
|
||||
operator bool() const { return ch != EOF; } // true if still a line to read
|
||||
std::string getline() // get and consume the next line
|
||||
{
|
||||
|
|
|
@ -1370,26 +1370,24 @@ vector<char*> msra::files::fgetfilelines (const wstring & path, vector<char> & b
|
|||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// getfiletime(), setfiletime(): access modification time
|
||||
// getfiletime(): access modification time
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
bool getfiletime (const wstring & path, FILETIME & time)
|
||||
{ // return file modification time, false if cannot be determined
|
||||
struct _stat buf;
|
||||
int result;
|
||||
#if 1
|
||||
struct _stat buf;
|
||||
int result;
|
||||
|
||||
// Get data associated with "crt_stat.c":
|
||||
result = _wstat(path.c_str(), &buf);
|
||||
// Check if statistics are valid:
|
||||
if( result != 0 )
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// Get data associated with "crt_stat.c":
|
||||
result = _wstat(path.c_str(), &buf);
|
||||
// Check if statistics are valid:
|
||||
if (result != 0)
|
||||
return false;
|
||||
|
||||
(*(time_t*)(&time))= buf.st_mtime;
|
||||
return true;
|
||||
|
||||
#ifdef OLD
|
||||
(*(time_t*)(&time)) = buf.st_mtime;
|
||||
return true;
|
||||
#else // old version, delete once above is tested
|
||||
WIN32_FIND_DATAW findFileData;
|
||||
auto_handle hFind (FindFirstFileW (path.c_str(), &findFileData), ::FindClose);
|
||||
if (hFind != INVALID_HANDLE_VALUE)
|
||||
|
@ -1404,11 +1402,9 @@ bool getfiletime (const wstring & path, FILETIME & time)
|
|||
#endif
|
||||
}
|
||||
|
||||
#if 0
|
||||
void setfiletime (const wstring & path, const FILETIME & time)
|
||||
{ // update the file modification time of an existing file
|
||||
#ifdef LINUX
|
||||
throw new logic_error("setfiletime has not been converted to linux yet...");
|
||||
#else
|
||||
auto_handle h (CreateFileW (path.c_str(), FILE_WRITE_ATTRIBUTES,
|
||||
FILE_SHARE_READ|FILE_SHARE_WRITE, NULL,
|
||||
OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL));
|
||||
|
@ -1421,8 +1417,8 @@ void setfiletime (const wstring & path, const FILETIME & time)
|
|||
{
|
||||
RuntimeError ("setfiletime: error setting file time information: %d", GetLastError());
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
// ----------------------------------------------------------------------------
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -7,21 +7,20 @@
|
|||
#include <vector>
|
||||
#include <stdio.h>
|
||||
#include <ctime>
|
||||
#include <limits.h> /* LINUX */
|
||||
#include <limits.h>
|
||||
#include "File.h"
|
||||
#include "Helpers.h"
|
||||
#include "CommonMatrix.h"
|
||||
|
||||
#ifndef LINUX
|
||||
#ifdef _WIN32
|
||||
#ifdef MATH_EXPORTS
|
||||
#define MATH_API __declspec(dllexport)
|
||||
#else
|
||||
#define MATH_API __declspec(dllimport)
|
||||
#endif
|
||||
|
||||
#else /* LINUX */
|
||||
#else // no DLLs on Linux
|
||||
#define MATH_API
|
||||
#endif /* LINUX */
|
||||
#endif
|
||||
|
||||
#ifndef USE_TIME_BASED_SEED
|
||||
#define USE_TIME_BASED_SEED ULONG_MAX
|
||||
|
@ -57,8 +56,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
~CPUMatrix();
|
||||
|
||||
public:
|
||||
size_t BufferSize() const {return this->m_numRows*this->m_numCols*sizeof(ElemType);}
|
||||
ElemType* BufferPointer() const {return this->m_pArray;}
|
||||
size_t BufferSize() const {return m_numRows*m_numCols*sizeof(ElemType);}
|
||||
ElemType* BufferPointer() const {return m_pArray;}
|
||||
|
||||
CPUMatrix<ElemType> ColumnSlice(size_t startColumn, size_t numCols) const;
|
||||
CPUMatrix<ElemType>& AssignColumnSlice(const CPUMatrix<ElemType>& fromMatrix, size_t startColumn, size_t numCols);
|
||||
|
@ -79,15 +78,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
inline ElemType& operator() (const size_t row, const size_t col)
|
||||
{
|
||||
return this->m_pArray[LocateElement(row, col)];
|
||||
return m_pArray[LocateElement(row, col)];
|
||||
}
|
||||
inline const ElemType& operator() (const size_t row, const size_t col) const
|
||||
{
|
||||
return this->m_pArray[LocateElement(row, col)];
|
||||
return m_pArray[LocateElement(row, col)];
|
||||
}
|
||||
inline ElemType Get00Element() const
|
||||
{
|
||||
return this->m_pArray[0];
|
||||
return m_pArray[0];
|
||||
}
|
||||
|
||||
void SetValue(const ElemType v);
|
||||
|
@ -280,7 +279,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
static void SVD(const CPUMatrix<ElemType>& A, CPUMatrix<ElemType>& SIGMA, CPUMatrix<ElemType>& U, CPUMatrix<ElemType>& VT);
|
||||
|
||||
static void MultiplyAndWeightedAdd(ElemType alpha, const CPUMatrix<ElemType>& a, const bool transposeA, const CPUMatrix<ElemType>& b, const bool transposeB,
|
||||
ElemType beta, CPUMatrix<ElemType>& c);
|
||||
ElemType beta, CPUMatrix<ElemType>& c);
|
||||
static void MultiplyAndAdd(const CPUMatrix<ElemType>& a, const bool transposeA, const CPUMatrix<ElemType>& b, const bool transposeB, CPUMatrix<ElemType>& c);
|
||||
static void Multiply(const CPUMatrix<ElemType>& a, const bool transposeA, const CPUMatrix<ElemType>& b, const bool transposeB, CPUMatrix<ElemType>& c);
|
||||
static void Multiply(const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, CPUMatrix<ElemType>& c);
|
||||
|
@ -353,7 +352,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
|
||||
protected:
|
||||
// Was inline.. but without definition, it doesn't make sense.
|
||||
size_t LocateElement (const size_t i, const size_t j) const;
|
||||
size_t LocateColumn (const size_t j) const;
|
||||
|
||||
|
|
|
@ -15,9 +15,9 @@
|
|||
#include "CPUSparseMatrix.h"
|
||||
#include <random>
|
||||
#include <chrono>
|
||||
#ifndef LINUX
|
||||
#ifdef _WIN32
|
||||
#include <Windows.h>
|
||||
#endif /* LINUX */
|
||||
#endif
|
||||
#ifdef LEAKDETECT
|
||||
#include <vld.h>
|
||||
#endif
|
||||
|
@ -92,77 +92,78 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
template<class ElemType>
|
||||
void CPUSparseMatrix<ElemType>::ZeroInit()
|
||||
{
|
||||
this->m_numRows = 0;
|
||||
this->m_numCols = 0;
|
||||
this->m_elemSizeAllocated = 0;
|
||||
this->m_externalBuffer = false;
|
||||
this->m_pArray = NULL;
|
||||
this->m_computeDevice = CPUDEVICE;
|
||||
this->m_nz = 0;
|
||||
this->m_matrixName = NULL;
|
||||
m_numRows = 0;
|
||||
m_numCols = 0;
|
||||
m_elemSizeAllocated = 0;
|
||||
m_externalBuffer = false;
|
||||
m_pArray = NULL;
|
||||
m_computeDevice = CPUDEVICE;
|
||||
m_nz = 0;
|
||||
m_matrixName = NULL;
|
||||
|
||||
if(this->m_format == MatrixFormat::matrixFormatSparseCSC || this->m_format == MatrixFormat::matrixFormatSparseCSR)
|
||||
if(m_format == MatrixFormat::matrixFormatSparseCSC || m_format == MatrixFormat::matrixFormatSparseCSR)
|
||||
{
|
||||
this->m_colIdx = -1;
|
||||
this->m_val = NULL;
|
||||
this->m_row = NULL;
|
||||
this->m_pb = NULL;
|
||||
m_colIdx = -1;
|
||||
m_val = NULL;
|
||||
m_row = NULL;
|
||||
m_pb = NULL;
|
||||
}
|
||||
else if (this->m_format == MatrixFormat::matrixFormatSparseBlockCol || this->m_format == MatrixFormat::matrixFormatSparseBlockRow)
|
||||
else if (m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow)
|
||||
{
|
||||
this->m_blockSize = 0;
|
||||
this->m_blockVal = NULL;
|
||||
this->m_blockIds = NULL;
|
||||
m_blockSize = 0;
|
||||
m_blockVal = NULL;
|
||||
m_blockIds = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
CPUSparseMatrix<ElemType>::CPUSparseMatrix(const MatrixFormat format)
|
||||
{
|
||||
this->CheckInit(format);
|
||||
}
|
||||
|
||||
//should only be used by constructors.
|
||||
template<class ElemType>
|
||||
void CPUSparseMatrix<ElemType>::CheckInit(const MatrixFormat format)
|
||||
{
|
||||
if(format != MatrixFormat::matrixFormatSparseCSC && format != MatrixFormat::matrixFormatSparseCSR && format != MatrixFormat::matrixFormatSparseBlockCol && format != MatrixFormat::matrixFormatSparseBlockRow)
|
||||
{
|
||||
if (format != MatrixFormat::matrixFormatSparseCSC && format != MatrixFormat::matrixFormatSparseCSR && format != MatrixFormat::matrixFormatSparseBlockCol && format != MatrixFormat::matrixFormatSparseBlockRow)
|
||||
{
|
||||
throw std::logic_error("CPUSparseMatrix: unsupported sparse matrix format");
|
||||
}
|
||||
this->m_format = format;
|
||||
m_format = format;
|
||||
ZeroInit();
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
CPUSparseMatrix<ElemType>::CPUSparseMatrix(const MatrixFormat format)
|
||||
{
|
||||
CheckInit(format);
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
CPUSparseMatrix<ElemType>::CPUSparseMatrix(const MatrixFormat format, const size_t numRows, const size_t numCols, const size_t size)
|
||||
{ this->CheckInit(format);
|
||||
{
|
||||
CheckInit(format);
|
||||
Resize(numRows, numCols, size);
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
CPUSparseMatrix<ElemType>::~CPUSparseMatrix()
|
||||
{
|
||||
if (this->m_matrixName!=NULL)
|
||||
if (m_matrixName!=NULL)
|
||||
{
|
||||
delete[] this->m_matrixName;
|
||||
this->m_matrixName = nullptr;
|
||||
delete[] m_matrixName;
|
||||
m_matrixName = nullptr;
|
||||
}
|
||||
if(this->m_format == MatrixFormat::matrixFormatSparseCSC || this->m_format == MatrixFormat::matrixFormatSparseCSR)
|
||||
if(m_format == MatrixFormat::matrixFormatSparseCSC || m_format == MatrixFormat::matrixFormatSparseCSR)
|
||||
{
|
||||
if(this->m_val != NULL)
|
||||
delete[] this->m_val;
|
||||
if(this->m_row != NULL)
|
||||
delete[] this->m_row;
|
||||
if(this->m_pb != NULL)
|
||||
delete[] this->m_pb;
|
||||
if(m_val != NULL)
|
||||
delete[] m_val;
|
||||
if(m_row != NULL)
|
||||
delete[] m_row;
|
||||
if(m_pb != NULL)
|
||||
delete[] m_pb;
|
||||
}
|
||||
else if (this->m_format == MatrixFormat::matrixFormatSparseBlockCol || this->m_format == MatrixFormat::matrixFormatSparseBlockRow)
|
||||
else if (m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow)
|
||||
{
|
||||
if(this->m_blockVal != NULL)
|
||||
delete[] this->m_blockVal;
|
||||
if(this->m_blockIds != NULL)
|
||||
delete[] this->m_blockIds;
|
||||
if(m_blockVal != NULL)
|
||||
delete[] m_blockVal;
|
||||
if(m_blockIds != NULL)
|
||||
delete[] m_blockIds;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -176,76 +177,76 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
template<class ElemType>
|
||||
void CPUSparseMatrix<ElemType>::SetValue(const size_t rIdx, const size_t cIdx, const ElemType v)
|
||||
{
|
||||
if(this->m_format != MatrixFormat::matrixFormatSparseCSC && this->m_format != MatrixFormat::matrixFormatSparseCSR)
|
||||
if(m_format != MatrixFormat::matrixFormatSparseCSC && m_format != MatrixFormat::matrixFormatSparseCSR)
|
||||
{
|
||||
throw std::logic_error("CPUSparseMatrix: unsupported SetValue() call.");
|
||||
}
|
||||
|
||||
if(this->m_elemSizeAllocated < this->m_nz +1) {
|
||||
if(m_elemSizeAllocated < m_nz +1) {
|
||||
throw std::logic_error("CPUSparseMatrix: allocated size is too small.");
|
||||
}
|
||||
|
||||
if(rIdx < 0 || rIdx >= this->m_numRows) {
|
||||
if(rIdx < 0 || rIdx >= m_numRows) {
|
||||
throw std::logic_error("CPUSparseMatrix: SetValue() invalid row id");
|
||||
}
|
||||
|
||||
if(cIdx < 0 || cIdx >= this->m_numCols) {
|
||||
if(cIdx < 0 || cIdx >= m_numCols) {
|
||||
throw std::logic_error("CPUSparseMatrix: SetValue() invalid column id");
|
||||
}
|
||||
|
||||
size_t r = (this->m_format == matrixFormatSparseCSC) ? rIdx: cIdx;
|
||||
size_t c = (this->m_format == matrixFormatSparseCSC) ? cIdx: rIdx;
|
||||
size_t r = (m_format == matrixFormatSparseCSC) ? rIdx: cIdx;
|
||||
size_t c = (m_format == matrixFormatSparseCSC) ? cIdx: rIdx;
|
||||
|
||||
this->m_val[this->m_nz] = v;
|
||||
this->m_row[this->m_nz] = r;
|
||||
m_val[m_nz] = v;
|
||||
m_row[m_nz] = r;
|
||||
|
||||
//consistency check
|
||||
if(c == this->m_colIdx && r <= this->m_row[this->m_nz-1])
|
||||
if(c == m_colIdx && r <= m_row[m_nz-1])
|
||||
{
|
||||
throw std::logic_error("CPUSparseMatrix: SetValue is not called properly");
|
||||
}
|
||||
|
||||
if (c != this->m_colIdx)
|
||||
if (c != m_colIdx)
|
||||
{
|
||||
m_pb[c] = m_nz;
|
||||
m_colIdx = (int) c;
|
||||
}
|
||||
this->m_pb[c+1] = this->m_nz+1;
|
||||
this->m_nz++;
|
||||
m_pb[c+1] = m_nz+1;
|
||||
m_nz++;
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
ElemType* CPUSparseMatrix<ElemType>::BufferPointer() const
|
||||
{
|
||||
if(this->m_format == MatrixFormat::matrixFormatSparseCSC || this->m_format == MatrixFormat::matrixFormatSparseCSR)
|
||||
if(m_format == MatrixFormat::matrixFormatSparseCSC || m_format == MatrixFormat::matrixFormatSparseCSR)
|
||||
{
|
||||
return this->m_val;
|
||||
return m_val;
|
||||
}
|
||||
else
|
||||
{
|
||||
return this->m_blockVal;
|
||||
return m_blockVal;
|
||||
}
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
void CPUSparseMatrix<ElemType>::Resize(const size_t numRows, const size_t numCols, size_t size)
|
||||
{
|
||||
this->m_nz = 0;
|
||||
this->m_colIdx = -1;
|
||||
this->m_numRows = numRows;
|
||||
this->m_numCols = numCols;
|
||||
m_nz = 0;
|
||||
m_colIdx = -1;
|
||||
m_numRows = numRows;
|
||||
m_numCols = numCols;
|
||||
|
||||
if(this->m_elemSizeAllocated < size)
|
||||
if(m_elemSizeAllocated < size)
|
||||
{
|
||||
this->m_elemSizeAllocated = size;
|
||||
if(this->m_format == MatrixFormat::matrixFormatSparseCSC || this->m_format == MatrixFormat::matrixFormatSparseCSR)
|
||||
m_elemSizeAllocated = size;
|
||||
if(m_format == MatrixFormat::matrixFormatSparseCSC || m_format == MatrixFormat::matrixFormatSparseCSR)
|
||||
{
|
||||
if(this->m_val != NULL)
|
||||
delete[] this->m_val;
|
||||
if(this->m_row != NULL)
|
||||
delete[] this->m_row;
|
||||
if(this->m_pb != NULL)
|
||||
delete[] this->m_pb;
|
||||
if(m_val != NULL)
|
||||
delete[] m_val;
|
||||
if(m_row != NULL)
|
||||
delete[] m_row;
|
||||
if(m_pb != NULL)
|
||||
delete[] m_pb;
|
||||
|
||||
//int len = m_format == MatrixFormat::matrixFormatSparseCSC ? numCols : numRows;
|
||||
size_t len = numCols > numRows ? numCols : numRows;
|
||||
|
@ -254,12 +255,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
m_pb = new size_t[len+1];
|
||||
|
||||
}
|
||||
else if(this->m_format == MatrixFormat::matrixFormatSparseBlockCol || this->m_format == MatrixFormat::matrixFormatSparseBlockRow)
|
||||
else if(m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow)
|
||||
{
|
||||
if(this->m_blockVal != NULL)
|
||||
delete[] this->m_blockVal;
|
||||
if(this->m_blockIds != NULL)
|
||||
delete[] this->m_blockIds;
|
||||
if(m_blockVal != NULL)
|
||||
delete[] m_blockVal;
|
||||
if(m_blockIds != NULL)
|
||||
delete[] m_blockIds;
|
||||
|
||||
size_t max = numCols > numRows ? numCols : numRows;
|
||||
m_blockVal = new ElemType[size];
|
||||
|
@ -272,9 +273,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
template<class ElemType>
|
||||
void CPUSparseMatrix<ElemType>::Reset()
|
||||
{
|
||||
this->m_nz = 0;
|
||||
this->m_colIdx = -1;
|
||||
this->m_blockSize = 0;
|
||||
m_nz = 0;
|
||||
m_colIdx = -1;
|
||||
m_blockSize = 0;
|
||||
}
|
||||
|
||||
//c = op(a) * op(this) or c += op(a) * op(this)
|
||||
|
@ -498,11 +499,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
else
|
||||
{
|
||||
#ifndef LINUX
|
||||
throw std::exception("CPUSparseMatrix:: ScaleAndAdd() Not implemented");
|
||||
#else
|
||||
throw std::exception();
|
||||
#endif /* LINUX */
|
||||
throw std::runtime_error("CPUSparseMatrix:: ScaleAndAdd() Not implemented");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -522,11 +519,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
throw std::logic_error("AssignSoftmaxOf: Matrix a, class, idx2cls or label is empty.");
|
||||
|
||||
if(etp.GetFormat() != MatrixFormat::matrixFormatSparseCSC)
|
||||
#ifndef LINUX
|
||||
throw std::exception("CPUSparseMatrix:: ClassEntropy() only support CSC");
|
||||
#else
|
||||
throw std::exception();
|
||||
#endif /* LINUX */
|
||||
throw std::runtime_error("CPUSparseMatrix:: ClassEntropy() only support CSC");
|
||||
|
||||
size_t nC = cls.GetNumCols();
|
||||
size_t nV = label.GetNumRows() - nC;
|
||||
|
@ -695,11 +688,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
if (c.IsEmpty())
|
||||
{
|
||||
c.Resize(this->GetNumRows(), this->GetNumCols());
|
||||
c.Resize(GetNumRows(), GetNumCols());
|
||||
c.SetValue(0.0);
|
||||
}
|
||||
|
||||
if(this->m_format == MatrixFormat::matrixFormatSparseBlockCol || this->m_format == MatrixFormat::matrixFormatSparseBlockRow)
|
||||
if(m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow)
|
||||
{
|
||||
for(size_t j = 0; j < m_blockSize; j++)
|
||||
{
|
||||
|
@ -718,11 +711,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
else
|
||||
{
|
||||
#ifndef LINUX
|
||||
throw std::exception("CPUSparseMatrix:: NormalGrad() only support block sparse format");
|
||||
#else
|
||||
throw std::exception();
|
||||
#endif /* LINUX */
|
||||
throw std::runtime_error("CPUSparseMatrix:: NormalGrad() only support block sparse format");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -732,12 +721,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
if (c.IsEmpty())
|
||||
{
|
||||
c.Resize(this->GetNumRows(), this->GetNumCols());
|
||||
c.Resize(GetNumRows(), GetNumCols());
|
||||
c.SetValue(0.0);
|
||||
}
|
||||
|
||||
const ElemType floor = 1e-16f;
|
||||
if(this->m_format == MatrixFormat::matrixFormatSparseCSC || this->m_format == MatrixFormat::matrixFormatSparseCSR)
|
||||
if(m_format == MatrixFormat::matrixFormatSparseCSC || m_format == MatrixFormat::matrixFormatSparseCSR)
|
||||
{
|
||||
size_t col_num = (m_format == MatrixFormat::matrixFormatSparseCSC) ? GetNumCols() : GetNumRows();
|
||||
for(size_t j = 0; j < col_num; j++)
|
||||
|
@ -758,7 +747,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
c(row, col) = adenorm;
|
||||
}
|
||||
}
|
||||
} else if(this->m_format == MatrixFormat::matrixFormatSparseBlockCol || this->m_format == MatrixFormat::matrixFormatSparseBlockRow)
|
||||
} else if(m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow)
|
||||
{
|
||||
for(size_t j = 0; j < m_blockSize; j++)
|
||||
{
|
||||
|
@ -767,7 +756,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
size_t start = j* len;
|
||||
for(size_t p = start; p < start+len; p++)
|
||||
{
|
||||
ElemType val = this->m_blockVal[p];
|
||||
ElemType val = m_blockVal[p];
|
||||
|
||||
size_t row = (m_format == MatrixFormat::matrixFormatSparseBlockCol) ? (p - start) : i;
|
||||
size_t col = (m_format == MatrixFormat::matrixFormatSparseBlockCol) ? i : (p - start);
|
||||
|
@ -784,7 +773,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
template<class ElemType>
|
||||
CPUSparseMatrix<ElemType>& CPUSparseMatrix<ElemType>::InplaceTruncate (const ElemType threshold)
|
||||
{
|
||||
if(this->m_format == MatrixFormat::matrixFormatSparseBlockCol || this->m_format == MatrixFormat::matrixFormatSparseBlockRow)
|
||||
if(m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow)
|
||||
{
|
||||
ElemType locThresholdPos = abs(threshold);
|
||||
ElemType locTHresholdNeg = -locThresholdPos;
|
||||
|
@ -795,24 +784,20 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
size_t start = j* len;
|
||||
for (size_t p = start; p < start+len; p++)
|
||||
{
|
||||
if (this->m_blockVal[p] > locThresholdPos)
|
||||
if (m_blockVal[p] > locThresholdPos)
|
||||
{
|
||||
this->m_blockVal[p] = locThresholdPos;
|
||||
m_blockVal[p] = locThresholdPos;
|
||||
}
|
||||
else if (this->m_blockVal[p] < locTHresholdNeg)
|
||||
else if (m_blockVal[p] < locTHresholdNeg)
|
||||
{
|
||||
this->m_blockVal[p] = locTHresholdNeg;
|
||||
m_blockVal[p] = locTHresholdNeg;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifndef LINUX
|
||||
throw std::exception("CPUSparseMatrix:: InplaceTruncate() only support block based sparse matrix");
|
||||
#else
|
||||
throw std::exception();
|
||||
#endif /* LINUX */
|
||||
throw std::runtime_error("CPUSparseMatrix:: InplaceTruncate() only support block based sparse matrix");
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
#include <map>
|
||||
#include <unordered_map>
|
||||
|
||||
#ifndef LINUX
|
||||
#ifdef _WIN32
|
||||
#ifdef MATH_EXPORTS
|
||||
#define MATH_API __declspec(dllexport)
|
||||
#else
|
||||
|
@ -40,7 +40,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
void ShiftBy(int /*numShift*/) { NOT_IMPLEMENTED; }
|
||||
|
||||
size_t BufferSize() const {return this->m_elemSizeAllocated*sizeof(ElemType);}
|
||||
size_t BufferSize() const {return m_elemSizeAllocated*sizeof(ElemType);}
|
||||
ElemType* BufferPointer() const;
|
||||
|
||||
void SetGaussianRandomValue(const ElemType /*mean*/, const ElemType /*sigma*/, unsigned long /*seed*/) { NOT_IMPLEMENTED; }
|
||||
|
|
|
@ -8,11 +8,6 @@
|
|||
#include <string>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef LINUX
|
||||
#define wcsnlen_s wcsnlen /* Not sure if this is best replacement... Malcolm */
|
||||
// typedef char wchar_t;
|
||||
#endif
|
||||
|
||||
#define AUTOPLACEMATRIX 1000 // used in parameters only
|
||||
#define MANAGEDEXTERN -2 // managed externally (i.e. PTask)
|
||||
#define CPUDEVICE -1 // device is the CPU
|
||||
|
@ -83,9 +78,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
void SetMatrixName(const wchar_t* s)
|
||||
{
|
||||
Clear();
|
||||
if (s!=NULL)
|
||||
if (s!=nullptr)
|
||||
{
|
||||
size_t n = wcsnlen_s(s, SIZE_MAX);
|
||||
size_t n = wcslen(s);
|
||||
m_matrixName = new wchar_t[n+1];
|
||||
wmemcpy(m_matrixName,s,n+1);
|
||||
}
|
||||
|
@ -108,10 +103,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
protected:
|
||||
void Clear()
|
||||
{
|
||||
if (m_matrixName!=NULL)
|
||||
if (m_matrixName!=nullptr)
|
||||
{
|
||||
delete[] m_matrixName;
|
||||
m_matrixName = NULL;
|
||||
m_matrixName = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -3,6 +3,9 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// </copyright>
|
||||
//
|
||||
|
||||
#ifdef CPUONLY
|
||||
|
||||
#include "GPUMatrix.cuh"
|
||||
#include "GPUSparseMatrix.cuh"
|
||||
|
||||
|
@ -1661,7 +1664,4 @@ GPUWatcher::~GPUWatcher(void)
|
|||
{
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#endif // CPUONLY
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -17,7 +17,7 @@ typedef struct cublasContext *cublasHandle_t;
|
|||
struct CUstream_st;
|
||||
typedef struct CUstream_st *cudaStream_t;
|
||||
|
||||
#ifndef LINUX
|
||||
#ifdef _WIN32
|
||||
#ifndef MATH_API
|
||||
#ifdef MATH_EXPORTS
|
||||
#define MATH_API __declspec(dllexport)
|
||||
|
@ -25,7 +25,7 @@ typedef struct CUstream_st *cudaStream_t;
|
|||
#define MATH_API __declspec(dllimport)
|
||||
#endif
|
||||
#endif /* MATH_API */
|
||||
#else /* LINUX */
|
||||
#else // no DLLs in Linux
|
||||
#define MATH_API
|
||||
#endif
|
||||
|
||||
|
@ -51,12 +51,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
public:
|
||||
DeviceBoundNumber() {m_data=NULL;};
|
||||
DeviceBoundNumber(const DeviceBoundNumber<ElemType> &deepCopy);
|
||||
#ifndef LINUX
|
||||
DeviceBoundNumber(DeviceBoundNumber<ElemType> &&shallowCopy);
|
||||
#endif
|
||||
~DeviceBoundNumber();
|
||||
int GetDeviceId() const {return m_computeDevice;}
|
||||
ElemType* ExposePointer2Value() const {return this->m_data;}
|
||||
ElemType* ExposePointer2Value() const {return m_data;}
|
||||
//performs shallow copy only
|
||||
void ShallowCopyFrom(ElemType* newVal,int newValsDevceId);
|
||||
};
|
||||
|
@ -84,10 +82,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
GPUMatrix(const size_t numRows, const size_t numCols, ElemType *pArray, const size_t matrixFlags=matrixFlagNormal,int deviceId=0);
|
||||
GPUMatrix(const GPUMatrix<ElemType>& deepCopyFrom);
|
||||
GPUMatrix<ElemType>& operator=(const GPUMatrix<ElemType>& deepCopyFrom); //assignment operator, deep copy
|
||||
#ifndef LINUX
|
||||
GPUMatrix(GPUMatrix<ElemType>&& moveFrom);
|
||||
GPUMatrix<ElemType>& operator=(GPUMatrix<ElemType>&& moveFrom); //move assignment operator, shallow copy
|
||||
#endif /* LINUX */
|
||||
~GPUMatrix(void);
|
||||
|
||||
static int GetBestGPUDeviceId();
|
||||
|
@ -105,17 +101,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
GPUMatrix<ElemType> ColumnSlice(size_t startColumn, size_t numCols) const;
|
||||
GPUMatrix<ElemType>& AssignColumnSlice(const GPUMatrix<ElemType>& fromMatrix, size_t startColumn, size_t numCols);
|
||||
|
||||
size_t BufferSize() const {return this->m_numRows*this->m_numCols*sizeof(ElemType);}
|
||||
ElemType* BufferPointer() const {return this->m_pArray;}
|
||||
size_t BufferSize() const {return m_numRows*m_numCols*sizeof(ElemType);}
|
||||
ElemType* BufferPointer() const {return m_pArray;}
|
||||
|
||||
void Adagrad(GPUMatrix<ElemType>& gradients);
|
||||
void RmsProp(GPUMatrix<ElemType>& gradients,
|
||||
ElemType RMS_GAMMA,
|
||||
ElemType RMS_WGT_INC,
|
||||
ElemType RMS_WGT_MAX,
|
||||
ElemType RMS_WGT_DEC,
|
||||
ElemType RMS_WGT_MIN
|
||||
);
|
||||
void RmsProp(GPUMatrix<ElemType>& gradients, ElemType RMS_GAMMA, ElemType RMS_WGT_INC, ElemType RMS_WGT_MAX, ElemType RMS_WGT_DEC, ElemType RMS_WGT_MIN);
|
||||
void Reshape(const size_t numRows, const size_t numCols);
|
||||
void Resize(const size_t numRows, const size_t numCols, bool growOnly = true); //by default we only reallocate if need to grow
|
||||
|
||||
|
|
|
@ -3,7 +3,9 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// </copyright>
|
||||
//
|
||||
|
||||
#ifndef CPU_ONLY
|
||||
|
||||
#include <float.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include "CommonMatrix.h"
|
||||
|
@ -20,11 +22,7 @@
|
|||
#define LSMALL -0.5E10
|
||||
|
||||
// Predefine this for later.
|
||||
#ifndef LINUX
|
||||
static __inline__ __device__ double atomicAdd(double* address, double val);
|
||||
#else
|
||||
static __device__ double atomicAdd(double* address, double val);
|
||||
#endif
|
||||
|
||||
//CUDA Kernels code
|
||||
template<class ElemType>
|
||||
|
@ -2634,11 +2632,7 @@ __global__ void _normalGrad(
|
|||
}
|
||||
}
|
||||
|
||||
#ifndef LINUX
|
||||
static __inline__ __device__ double atomicAdd(double* address, double val)
|
||||
#else
|
||||
static __device__ double atomicAdd(double* address, double val)
|
||||
#endif
|
||||
{
|
||||
unsigned long long int* address_as_ull = (unsigned long long int*)address;
|
||||
unsigned long long int old = *address_as_ull, assumed;
|
||||
|
@ -3237,4 +3231,4 @@ d_tmp[0] = max((ElemType)0, d_tmp[0]/max((ElemType)1.0e-10,sqrt(d_tmp[1]))/max((
|
|||
}
|
||||
*/
|
||||
|
||||
#endif /*!CPU_ONLY*/
|
||||
#endif // !CPU_ONLY
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -27,11 +27,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
void performInplaceFunction(int kind);
|
||||
void DeepCopy(const GPUSparseMatrix<ElemType>& deepCopyFrom);
|
||||
void Clear();
|
||||
#ifndef LINUX
|
||||
void PrepareBuffer(size_t m, size_t n, bool canReuseBuffer, std::function<size_t (int* csrRowPtrC)> func);
|
||||
#else
|
||||
void PrepareBuffer(size_t m, size_t n, bool canReuseBuffer, size_t (*func)(int *csRowPtrC));
|
||||
#endif
|
||||
size_t ElemCountFromBufferSize(size_t totalBufferSize);
|
||||
void PrepareDevice(short deviceId=-1) const;
|
||||
|
||||
|
@ -56,22 +52,22 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// in memory format is always in the following order:
|
||||
// Non-zero data elements, Full index locations, compressed index locations
|
||||
// In CSR row data is compressed, in CSC col data is compressed
|
||||
const ElemType* NzLocation() const {return this->m_pArray;}
|
||||
ElemType* NzLocation() {return this->m_pArray;}
|
||||
size_t NzCount() const {return this->m_nz;}
|
||||
size_t NzSize() const {return sizeof(ElemType)*this->m_nz;} // actual number of element bytes in use
|
||||
int* IndexLocation() const {return (int*)(this->m_pArray+this->m_elemSizeAllocated);}
|
||||
size_t IndexSize() const {return sizeof(int)*this->m_nz;} // actual number of index bytes in use
|
||||
int* CompressedIndexLocation() const {return IndexLocation() + this->m_elemSizeAllocated;}
|
||||
const ElemType* NzLocation() const {return m_pArray;}
|
||||
ElemType* NzLocation() {return m_pArray;}
|
||||
size_t NzCount() const {return m_nz;}
|
||||
size_t NzSize() const {return sizeof(ElemType)*m_nz;} // actual number of element bytes in use
|
||||
int* IndexLocation() const {return (int*)(m_pArray+m_elemSizeAllocated);}
|
||||
size_t IndexSize() const {return sizeof(int)*m_nz;} // actual number of index bytes in use
|
||||
int* CompressedIndexLocation() const {return IndexLocation() + m_elemSizeAllocated;}
|
||||
size_t CompressedIndexCount() const
|
||||
{
|
||||
if (this->m_format&matrixFormatCompressed)
|
||||
if (m_format&matrixFormatCompressed)
|
||||
{
|
||||
size_t cnt = (this->m_format&matrixFormatRowMajor)?this->m_numRows:this->m_numCols;
|
||||
size_t cnt = (m_format&matrixFormatRowMajor)?m_numRows:m_numCols;
|
||||
if (cnt) cnt++; // add an extra element on the end for the "max" value
|
||||
return cnt;
|
||||
}
|
||||
return this->m_nz; // COO format
|
||||
return m_nz; // COO format
|
||||
}
|
||||
// get size for compressed index
|
||||
size_t CompressedIndexSize() const {return (CompressedIndexCount())*sizeof(int);}
|
||||
|
@ -79,10 +75,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
ElemType* BufferPointer() const;
|
||||
|
||||
// the column and row locations will swap based on what format we are in. Full index always follows the data array
|
||||
int* RowLocation() const {return (this->m_format&matrixFormatRowMajor)?CompressedIndexLocation():IndexLocation();}
|
||||
size_t RowSize() const {return (this->m_format&matrixFormatRowMajor)?CompressedIndexSize():IndexSize();}
|
||||
int* ColLocation() const {return (this->m_format&matrixFormatRowMajor)?IndexLocation():CompressedIndexLocation();}
|
||||
size_t ColSize() const {return (this->m_format&matrixFormatRowMajor)?IndexSize():CompressedIndexSize();} // actual number of row bytes in use
|
||||
int* RowLocation() const {return (m_format&matrixFormatRowMajor)?CompressedIndexLocation():IndexLocation();}
|
||||
size_t RowSize() const {return (m_format&matrixFormatRowMajor)?CompressedIndexSize():IndexSize();}
|
||||
int* ColLocation() const {return (m_format&matrixFormatRowMajor)?IndexLocation():CompressedIndexLocation();}
|
||||
size_t ColSize() const {return (m_format&matrixFormatRowMajor)?IndexSize():CompressedIndexSize();} // actual number of row bytes in use
|
||||
|
||||
void SetValue(const GPUSparseMatrix<ElemType>& deepCopyFrom);
|
||||
void SetValue(const GPUMatrix<ElemType>& denseMatrix);
|
||||
|
@ -110,7 +106,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
bool IsEqualTo(const GPUMatrix<ElemType>& a, const ElemType threshold = 1e-8) const;
|
||||
public:
|
||||
int GetComputeDeviceId(void) const;
|
||||
size_t GetNZElements() const {return this->m_nz;}
|
||||
size_t GetNZElements() const {return m_nz;}
|
||||
//Sets sparse matrix in CSR format. this acts as deep copy
|
||||
void SetMatrixFromCSRFormat(int *h_CSRRow, int *h_Col, ElemType *h_Val, size_t nz, size_t numRows, size_t numCols, bool IsOnDevice=false, int devId=0);
|
||||
void SetMatrixFromCSCFormat(size_t *h_row, size_t *h_rowIdx, size_t size, size_t blockSize);
|
||||
|
|
|
@ -182,6 +182,7 @@
|
|||
<CudaCompile Include="GPUWatcher.cu">
|
||||
<FileType>CppCode</FileType>
|
||||
</CudaCompile>
|
||||
<ClCompile Include="GPUDummy.cpp" />
|
||||
<ClCompile Include="Matrix.cpp" />
|
||||
<ClCompile Include="stdafx.cpp">
|
||||
<PrecompiledHeader>Create</PrecompiledHeader>
|
||||
|
|
|
@ -1,77 +1,54 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup>
|
||||
<Filter Include="Source Files">
|
||||
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
|
||||
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
|
||||
</Filter>
|
||||
<Filter Include="Header Files">
|
||||
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
|
||||
<Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
|
||||
</Filter>
|
||||
<Filter Include="Resource Files">
|
||||
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
|
||||
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="stdafx.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="targetver.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="CPUMatrix.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Matrix.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Helpers.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="CommonMatrix.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="CPUSparseMatrix.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="stdafx.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="dllmain.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="CPUMatrix.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="Matrix.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\Common\File.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\Common\fileutil.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="CPUSparseMatrix.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GPUDummy.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="GPUMatrix.cuh">
|
||||
<Filter>Header Files</Filter>
|
||||
</None>
|
||||
<None Include="GPUSparseMatrix.cuh">
|
||||
<Filter>Header Files</Filter>
|
||||
</None>
|
||||
<None Include="GPUWatcher.cuh">
|
||||
<Filter>Header Files</Filter>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup>
|
||||
<CudaCompile Include="GPUSparseMatrix.cu" />
|
||||
<CudaCompile Include="GPUWatcher.cu" />
|
||||
<CudaCompile Include="GPUMatrix.cu" />
|
||||
<CudaCompile Include="GPUMatrixCUDAKernels.cu" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="CPUSparseMatrix.cpp" />
|
||||
<ClCompile Include="dllmain.cpp" />
|
||||
<ClCompile Include="CPUMatrix.cpp" />
|
||||
<ClCompile Include="Matrix.cpp" />
|
||||
<ClCompile Include="stdafx.cpp" />
|
||||
<ClCompile Include="..\..\Common\File.cpp">
|
||||
<Filter>Common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\Common\fileutil.cpp">
|
||||
<Filter>Common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GPUDummy.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="CommonMatrix.h" />
|
||||
<ClInclude Include="CPUMatrix.h" />
|
||||
<ClInclude Include="CPUSparseMatrix.h" />
|
||||
<ClInclude Include="Helpers.h" />
|
||||
<ClInclude Include="Matrix.h" />
|
||||
<ClInclude Include="stdafx.h" />
|
||||
<ClInclude Include="targetver.h" />
|
||||
<ClInclude Include="..\..\Common\Include\File.h">
|
||||
<Filter>Common\Include</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\Common\Include\fileutil.h">
|
||||
<Filter>Common\Include</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\Common\Include\basetypes.h">
|
||||
<Filter>Common\Include</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="GPUWatcher.cuh" />
|
||||
<None Include="GPUSparseMatrix.cuh" />
|
||||
<None Include="GPUMatrix.cuh" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Filter Include="Common">
|
||||
<UniqueIdentifier>{4d07e945-74fb-48fa-aa63-23f3a7763789}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="Common\Include">
|
||||
<UniqueIdentifier>{51b468dd-7e8a-4be8-ae6f-5e3f3d752b88}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
</Project>
|
|
@ -203,7 +203,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
m_GPUSparseMatrix = (GPUSparseMatrix<ElemType>*)baseMatrix;
|
||||
SetDataLocation(GPU, SPARSE);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -216,8 +216,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
m_GPUMatrix = (GPUMatrix<ElemType>*)baseMatrix;
|
||||
SetDataLocation(GPU, DENSE);
|
||||
}
|
||||
}
|
||||
}
|
||||
m_baseMatrix = baseMatrix;
|
||||
m_baseMatrix->SetArray(pArray);
|
||||
}
|
||||
|
@ -288,15 +288,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
if (m_preferredDeviceId == CPUDEVICE)
|
||||
{
|
||||
m_CPUMatrix = new CPUMatrix<ElemType>(numRows,numCols);
|
||||
m_CPUMatrix = new CPUMatrix<ElemType>(numRows, numCols);
|
||||
SetDataLocation(CPU, DENSE);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_GPUMatrix = new GPUMatrix<ElemType>(numRows,numCols,m_preferredDeviceId);
|
||||
SetDataLocation(GPU, DENSE);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
m_GPUMatrix = new GPUMatrix<ElemType>(numRows, numCols, m_preferredDeviceId);
|
||||
SetDataLocation(GPU, DENSE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
|
@ -526,11 +526,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
else
|
||||
{
|
||||
#ifndef LINUX
|
||||
throw std::exception("Unknown matrix type");
|
||||
#else
|
||||
throw std::exception();
|
||||
#endif /* LINUX */
|
||||
throw std::runtime_error("Unknown matrix type");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -617,15 +613,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
else if (GetMatrixType() == MatrixType::SPARSE)
|
||||
{
|
||||
NOT_IMPLEMENTED;
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifndef LINUX
|
||||
throw std::exception("Unknown matrix type");
|
||||
#else
|
||||
throw std::exception();
|
||||
#endif /* LINUX */
|
||||
throw std::runtime_error("Unknown matrix type");
|
||||
}
|
||||
|
||||
return slice;
|
||||
|
@ -838,10 +830,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
DISPATCH_MATRIX_ON_FLAG(this,
|
||||
this,
|
||||
m_CPUMatrix->SetValue(*db_number.ExposePointer2Value()),
|
||||
if (GetDeviceId()!=db_number.GetDeviceId())
|
||||
{
|
||||
if (GetDeviceId()!=db_number.GetDeviceId())
|
||||
throw std::runtime_error("Matrix and device bound number must be on the same device");
|
||||
}
|
||||
m_GPUMatrix->SetValue(db_number.ExposePointer2Value()),
|
||||
NOT_IMPLEMENTED,
|
||||
NOT_IMPLEMENTED
|
||||
|
@ -3431,9 +3421,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
else
|
||||
{
|
||||
GPUMatrix<ElemType> firstDummy = transposeA ? a.m_GPUMatrix->Transpose()*alpha : (*a.m_GPUMatrix)*alpha;
|
||||
GPUMatrix<ElemType> & first= firstDummy; // By Malcolm.. gcc doesn't support auto like original
|
||||
GPUMatrix<ElemType> & first= firstDummy; // GCC does not support mixing refs and non-refs
|
||||
GPUSparseMatrix<ElemType> secondDummy = transposeB ? b.m_GPUSparseMatrix->Transpose() : *b.m_GPUSparseMatrix;
|
||||
GPUSparseMatrix<ElemType> & second = secondDummy; // By Malcolm.. gcc doesn't support auto like original
|
||||
GPUSparseMatrix<ElemType> & second = secondDummy;
|
||||
if (beta==0)
|
||||
{
|
||||
GPUSparseMatrix<ElemType>::Multiply(first,second,*c.m_GPUMatrix);
|
||||
|
|
|
@ -3,16 +3,14 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// </copyright>
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "CPUMatrix.h"
|
||||
#include "CPUSparseMatrix.h"
|
||||
#include "GPUMatrix.cuh"
|
||||
#include "GPUSparseMatrix.cuh"
|
||||
|
||||
#ifdef LINUX
|
||||
// typedef char wchar_t;
|
||||
#endif
|
||||
|
||||
// This class is exported from the Math.dll
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
enum CurrentDataLocation
|
||||
|
|
|
@ -5,13 +5,11 @@
|
|||
//
|
||||
#pragma once
|
||||
|
||||
#ifndef LINUX
|
||||
|
||||
// Including SDKDDKVer.h defines the highest available Windows platform.
|
||||
|
||||
// If you wish to build your application for a previous Windows platform, include WinSDKVer.h and
|
||||
// set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h.
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <SDKDDKVer.h>
|
||||
|
||||
#endif /* LINUX */
|
||||
#endif
|
||||
|
|
Загрузка…
Ссылка в новой задаче