More changes to get it compiling.. but not finished.
This commit is contained in:
Родитель
33e48b635f
Коммит
a863c7746f
|
@ -11,6 +11,8 @@
|
|||
typedef char16_t TCHAR;
|
||||
#include <stdarg.h>
|
||||
#define vsprintf_s vsprintf /* Not sure this is right... Malcolm */
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
#endif /* LINUX */
|
||||
|
||||
#ifndef UNDER_CE // fixed-buffer overloads not available for wince
|
||||
|
@ -108,6 +110,7 @@ using namespace std;
|
|||
#define __inout_cap(x)
|
||||
#define __inout_cap_c(x)
|
||||
#endif
|
||||
#endif // LINUX
|
||||
#ifndef __out_z_cap // non-VS2005 annotations
|
||||
#define __out_cap(x)
|
||||
#define __out_z_cap(x)
|
||||
|
@ -321,7 +324,6 @@ public:
|
|||
#endif
|
||||
};
|
||||
|
||||
#ifndef LINUX
|
||||
|
||||
// locks a critical section, and unlocks it automatically
|
||||
// when the lock goes out of scope
|
||||
|
@ -447,7 +449,11 @@ public:
|
|||
#include <xlocale> // uses strlen()
|
||||
#endif
|
||||
#define strlen strlen_
|
||||
#ifndef LINUX
|
||||
template<typename _T> inline __declspec(deprecated("Dummy general template, cannot be used directly"))
|
||||
#else
|
||||
template<typename _T> inline
|
||||
#endif // LINUX
|
||||
size_t strlen_(_T &s) { return strnlen_s(static_cast<const char *>(s), SIZE_MAX); } // never be called but needed to keep compiler happy
|
||||
template<typename _T> inline size_t strlen_(const _T &s) { return strnlen_s(static_cast<const char *>(s), SIZE_MAX); }
|
||||
template<> inline size_t strlen_(char * &s) { return strnlen_s(s, SIZE_MAX); }
|
||||
|
@ -980,7 +986,8 @@ template<typename FUNCTION> static void attempt (int retries, const FUNCTION & b
|
|||
#ifndef LINUX
|
||||
::Sleep (1000); // wait a little, then try again
|
||||
#else
|
||||
sleep(1);
|
||||
std::chrono::milliseconds dura(1000);
|
||||
std::this_thread::sleep_for(dura);
|
||||
#endif /* LINUX */
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
|
||||
#ifdef LINUX
|
||||
#define wcsnlen_s wcsnlen /* Not sure if this is best replacement... Malcolm */
|
||||
// typedef char wchar_t;
|
||||
#endif
|
||||
|
||||
#define AUTOPLACEMATRIX 1000 // used in parameters only
|
||||
|
|
|
@ -424,7 +424,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
ZeroInit(deepCopyFrom.m_computeDevice);
|
||||
SetValue(deepCopyFrom);
|
||||
SetMatrixName(deepCopyFrom.m_matrixName);
|
||||
this->SetMatrixName(deepCopyFrom.m_matrixName);
|
||||
}
|
||||
|
||||
#ifndef LINUX
|
||||
|
@ -452,7 +452,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (this != &deepCopyFrom)
|
||||
{
|
||||
SetValue(deepCopyFrom);
|
||||
SetMatrixName(deepCopyFrom.m_matrixName);
|
||||
this->SetMatrixName(deepCopyFrom.m_matrixName);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
@ -464,7 +464,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
if (this != &moveFrom)
|
||||
{
|
||||
if (OwnBuffer() && this->m_pArray!=NULL)
|
||||
if (this->OwnBuffer() && this->m_pArray!=NULL)
|
||||
{
|
||||
CUDA_CALL(cudaFree(this->m_pArray));
|
||||
}
|
||||
|
|
|
@ -18,6 +18,13 @@
|
|||
#define MINLOGEXP -9.2103
|
||||
#define LSMALL -0.5E10
|
||||
|
||||
// Predefine this for later.
|
||||
#ifndef LINUX
|
||||
static __inline__ __device__ double atomicAdd(double* address, double val);
|
||||
#else
|
||||
static __device__ double atomicAdd(double* address, double val);
|
||||
#endif
|
||||
|
||||
//CUDA Kernels code
|
||||
template<class ElemType>
|
||||
__global__ void _elementWisePowerOnCuda(
|
||||
|
|
|
@ -1044,7 +1044,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// canReuseBuffer - target matrix can be reused for temporary space
|
||||
// func - function to call to count elements in the result (returns count, and fills csrRowPtr array)
|
||||
template<class ElemType>
|
||||
#ifndef LINUX
|
||||
void GPUSparseMatrix<ElemType>::PrepareBuffer(size_t m, size_t n, bool canReuseBuffer, std::function<size_t (int* csrRowPtrC)> func)
|
||||
#else
|
||||
void GPUSparseMatrix<ElemType>::PrepareBuffer(size_t m, size_t n, bool canReuseBuffer, size_t (*func)(int *csRowPtrC))
|
||||
#endif /* LINUX */
|
||||
{
|
||||
int* csrRowPtrC=NULL;
|
||||
GPUSparseMatrix<ElemType>& c = *this;
|
||||
|
@ -1099,6 +1103,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
CUDACALL(cudaFree(csrRowPtrC));
|
||||
}
|
||||
|
||||
#ifdef LINUXxx
|
||||
size_t PrepareBufferMultiply(int* csrRowPtrC)
|
||||
{
|
||||
int nnzTotal = -1;
|
||||
CUSPARSECALL(cusparseXcsrgemmNnz(cusparseHandle,operA,operB,m,n,k,descrA,nnzA,S1.RowLocation(),S1.ColLocation(),descrB,nnzB,
|
||||
S2.RowLocation(),S2.ColLocation(),descrC,csrRowPtrC,&nnzTotal));
|
||||
return nnzTotal;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Multiply - multiply one spares matrix by another sparse matrix
|
||||
// S1 - first sparse matrix
|
||||
// transposeS1 - transpose first matrix?
|
||||
|
@ -1136,13 +1150,18 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
CUDACALL(cudaEventCreate(&done));
|
||||
//Step 1
|
||||
c.PrepareBuffer(m, n, true, // true means we can reuse the "c" buffer if it exists for temporaries
|
||||
#ifndef LINUX
|
||||
[&](int* csrRowPtrC) -> size_t
|
||||
{
|
||||
int nnzTotal = -1;
|
||||
CUSPARSECALL(cusparseXcsrgemmNnz(cusparseHandle,operA,operB,m,n,k,descrA,nnzA,S1.RowLocation(),S1.ColLocation(),descrB,nnzB,
|
||||
S2.RowLocation(),S2.ColLocation(),descrC,csrRowPtrC,&nnzTotal));
|
||||
return nnzTotal;
|
||||
});
|
||||
}
|
||||
#else
|
||||
NULL // PrepareBufferMultiply
|
||||
#endif
|
||||
);
|
||||
|
||||
|
||||
//Step 2
|
||||
|
@ -1196,12 +1215,18 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
CUDACALL(cudaEventCreate(&done));
|
||||
//Step 1
|
||||
bool inOutParameter = (&b == &c);
|
||||
c.PrepareBuffer(m, n, !inOutParameter, [&] (int* csrRowPtrC) -> size_t
|
||||
c.PrepareBuffer(m, n, !inOutParameter,
|
||||
#ifndef LINUX
|
||||
[&] (int* csrRowPtrC) -> size_t
|
||||
{
|
||||
int nnzTotal = -1;
|
||||
CUSPARSECALL(cusparseXcsrgeamNnz(cusparseHandle,m,n,descrA,nnzA,a.RowLocation(),a.ColLocation(),descrB,nnzB,b.RowLocation(),b.ColLocation(),descrC,csrRowPtrC,&nnzTotal));
|
||||
return nnzTotal;
|
||||
});
|
||||
}
|
||||
#else
|
||||
NULL
|
||||
#endif // Linux
|
||||
);
|
||||
|
||||
//Step 2
|
||||
if (sizeof(ElemType)==sizeof(float))
|
||||
|
@ -1588,7 +1613,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (this->IsEmpty())
|
||||
return;
|
||||
// transfer converted block over to this pointer
|
||||
#ifndef LINUX
|
||||
*this = std::move(this->Transpose());
|
||||
#else
|
||||
std::cerr << "Not sure how to do the InplaceTranspose()";
|
||||
#endif
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
|
|
|
@ -29,7 +29,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
void Clear();
|
||||
#ifndef LINUX
|
||||
void PrepareBuffer(size_t m, size_t n, bool canReuseBuffer, std::function<size_t (int* csrRowPtrC)> func);
|
||||
#endif
|
||||
#else
|
||||
void PrepareBuffer(size_t m, size_t n, bool canReuseBuffer, size_t (*func)(int *csRowPtrC));
|
||||
#endif
|
||||
size_t ElemCountFromBufferSize(size_t totalBufferSize);
|
||||
void PrepareDevice(short deviceId=-1) const;
|
||||
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
CSOURCES = CPUMatrix.cpp CPUSparseMatrix.cpp Matrix.cpp
|
||||
|
||||
OBJECTS = CPUMatrix.o CPUSparseMatrix.o Matrix.o \
|
||||
GPUSparseMatrix.o GPUWatcher.o \
|
||||
GPUMatrixCUDAKernels.o GPUMatrix.o
|
||||
GPUMatrixCUDAKernels.o GPUMatrix.o \
|
||||
GPUWatcher.o GPUSparseMatrix.o
|
||||
|
||||
INCLUDES = -I../../Common/Include -I/opt/acml5.3.1/gfortran64_mp_int64/include
|
||||
|
||||
|
@ -10,9 +10,10 @@ DEPS =
|
|||
|
||||
CFLAGS = $(INCLUDES) \
|
||||
-D BASETYPES_NO_UNSAFECRTOVERLOAD -DBASETYPES_NO_STRPRINTF \
|
||||
-DLINUX -D_FILEUTIL_ -Wnon-template-friend -std=c++11
|
||||
-DLINUX -Wnon-template-friend -std=c++11 # -D_FILEUTIL_
|
||||
|
||||
NVCFLAGS = -DLINUX -I../../Common/Include -D_FILEUTIL_ -arch sm_11
|
||||
NVCFLAGS = -DLINUX -D BASETYPES_NO_UNSAFECRTOVERLOAD -DBASETYPES_NO_STRPRINTF \
|
||||
-I../../Common/Include -arch=compute_20 -std=c++11 # -D_FILEUTIL_
|
||||
|
||||
CXX = gcc
|
||||
NVCC = nvcc
|
||||
|
|
|
@ -3435,9 +3435,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
else
|
||||
{
|
||||
GPUMatrix<ElemType> firstDummy = transposeA ? a.m_GPUMatrix->Transpose()*alpha : (*a.m_GPUMatrix)*alpha;
|
||||
GPUMatrix<ElemType> & first= firstDummy; // By Malcolm.. gcc doesn't support auto
|
||||
GPUMatrix<ElemType> & first= firstDummy; // By Malcolm.. gcc doesn't support auto like original
|
||||
GPUSparseMatrix<ElemType> secondDummy = transposeB ? b.m_GPUSparseMatrix->Transpose() : *b.m_GPUSparseMatrix;
|
||||
GPUSparseMatrix<ElemType> & second = secondDummy; // By Malcolm.. gcc doesn't support auto
|
||||
GPUSparseMatrix<ElemType> & second = secondDummy; // By Malcolm.. gcc doesn't support auto like original
|
||||
if (beta==0)
|
||||
{
|
||||
GPUSparseMatrix<ElemType>::Multiply(first,second,*c.m_GPUMatrix);
|
||||
|
|
|
@ -9,6 +9,10 @@
|
|||
#include "GPUMatrix.cuh"
|
||||
#include "GPUSparseMatrix.cuh"
|
||||
|
||||
#ifdef LINUX
|
||||
// typedef char wchar_t;
|
||||
#endif
|
||||
|
||||
// This class is exported from the Math.dll
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
enum CurrentDataLocation
|
||||
|
|
Загрузка…
Ссылка в новой задаче