More changes to get it compiling.. but not finished.

This commit is contained in:
Malcolm Slaney 2014-10-09 10:44:27 -07:00 коммит произвёл unknown
Родитель 33e48b635f
Коммит a863c7746f
9 изменённых файлов: 66 добавлений и 15 удалений

Просмотреть файл

@ -11,6 +11,8 @@
typedef char16_t TCHAR;
#include <stdarg.h>
#define vsprintf_s vsprintf /* Not sure this is right... Malcolm */
#include <chrono>
#include <thread>
#endif /* LINUX */
#ifndef UNDER_CE // fixed-buffer overloads not available for wince
@ -108,6 +110,7 @@ using namespace std;
#define __inout_cap(x)
#define __inout_cap_c(x)
#endif
#endif // LINUX
#ifndef __out_z_cap // non-VS2005 annotations
#define __out_cap(x)
#define __out_z_cap(x)
@ -321,7 +324,6 @@ public:
#endif
};
#ifndef LINUX
// locks a critical section, and unlocks it automatically
// when the lock goes out of scope
@ -447,7 +449,11 @@ public:
#include <xlocale> // uses strlen()
#endif
#define strlen strlen_
#ifndef LINUX
template<typename _T> inline __declspec(deprecated("Dummy general template, cannot be used directly"))
#else
template<typename _T> inline
#endif // LINUX
size_t strlen_(_T &s) { return strnlen_s(static_cast<const char *>(s), SIZE_MAX); } // never be called but needed to keep compiler happy
template<typename _T> inline size_t strlen_(const _T &s) { return strnlen_s(static_cast<const char *>(s), SIZE_MAX); }
template<> inline size_t strlen_(char * &s) { return strnlen_s(s, SIZE_MAX); }
@ -980,7 +986,8 @@ template<typename FUNCTION> static void attempt (int retries, const FUNCTION & b
#ifndef LINUX
::Sleep (1000); // wait a little, then try again
#else
sleep(1);
std::chrono::milliseconds dura(1000);
std::this_thread::sleep_for(dura);
#endif /* LINUX */
}
}

Просмотреть файл

@ -10,6 +10,7 @@
#ifdef LINUX
#define wcsnlen_s wcsnlen /* Not sure if this is best replacement... Malcolm */
// typedef char wchar_t;
#endif
#define AUTOPLACEMATRIX 1000 // used in parameters only

Просмотреть файл

@ -424,7 +424,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
ZeroInit(deepCopyFrom.m_computeDevice);
SetValue(deepCopyFrom);
SetMatrixName(deepCopyFrom.m_matrixName);
this->SetMatrixName(deepCopyFrom.m_matrixName);
}
#ifndef LINUX
@ -452,7 +452,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (this != &deepCopyFrom)
{
SetValue(deepCopyFrom);
SetMatrixName(deepCopyFrom.m_matrixName);
this->SetMatrixName(deepCopyFrom.m_matrixName);
}
return *this;
}
@ -464,7 +464,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
if (this != &moveFrom)
{
if (OwnBuffer() && this->m_pArray!=NULL)
if (this->OwnBuffer() && this->m_pArray!=NULL)
{
CUDA_CALL(cudaFree(this->m_pArray));
}

Просмотреть файл

@ -18,6 +18,13 @@
#define MINLOGEXP -9.2103
#define LSMALL -0.5E10
// Predefine this for later.
#ifndef LINUX
static __inline__ __device__ double atomicAdd(double* address, double val);
#else
static __device__ double atomicAdd(double* address, double val);
#endif
//CUDA Kernels code
template<class ElemType>
__global__ void _elementWisePowerOnCuda(

Просмотреть файл

@ -1044,7 +1044,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// canReuseBuffer - target matrix can be reused for temporary space
// func - function to call to count elements in the result (returns count, and fills csrRowPtr array)
template<class ElemType>
#ifndef LINUX
void GPUSparseMatrix<ElemType>::PrepareBuffer(size_t m, size_t n, bool canReuseBuffer, std::function<size_t (int* csrRowPtrC)> func)
#else
void GPUSparseMatrix<ElemType>::PrepareBuffer(size_t m, size_t n, bool canReuseBuffer, size_t (*func)(int *csRowPtrC))
#endif /* LINUX */
{
int* csrRowPtrC=NULL;
GPUSparseMatrix<ElemType>& c = *this;
@ -1099,6 +1103,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
CUDACALL(cudaFree(csrRowPtrC));
}
#ifdef LINUXxx
size_t PrepareBufferMultiply(int* csrRowPtrC)
{
int nnzTotal = -1;
CUSPARSECALL(cusparseXcsrgemmNnz(cusparseHandle,operA,operB,m,n,k,descrA,nnzA,S1.RowLocation(),S1.ColLocation(),descrB,nnzB,
S2.RowLocation(),S2.ColLocation(),descrC,csrRowPtrC,&nnzTotal));
return nnzTotal;
}
#endif
// Multiply - multiply one spares matrix by another sparse matrix
// S1 - first sparse matrix
// transposeS1 - transpose first matrix?
@ -1136,13 +1150,18 @@ namespace Microsoft { namespace MSR { namespace CNTK {
CUDACALL(cudaEventCreate(&done));
//Step 1
c.PrepareBuffer(m, n, true, // true means we can reuse the "c" buffer if it exists for temporaries
#ifndef LINUX
[&](int* csrRowPtrC) -> size_t
{
int nnzTotal = -1;
CUSPARSECALL(cusparseXcsrgemmNnz(cusparseHandle,operA,operB,m,n,k,descrA,nnzA,S1.RowLocation(),S1.ColLocation(),descrB,nnzB,
S2.RowLocation(),S2.ColLocation(),descrC,csrRowPtrC,&nnzTotal));
return nnzTotal;
});
}
#else
NULL // PrepareBufferMultiply
#endif
);
//Step 2
@ -1196,12 +1215,18 @@ namespace Microsoft { namespace MSR { namespace CNTK {
CUDACALL(cudaEventCreate(&done));
//Step 1
bool inOutParameter = (&b == &c);
c.PrepareBuffer(m, n, !inOutParameter, [&] (int* csrRowPtrC) -> size_t
c.PrepareBuffer(m, n, !inOutParameter,
#ifndef LINUX
[&] (int* csrRowPtrC) -> size_t
{
int nnzTotal = -1;
CUSPARSECALL(cusparseXcsrgeamNnz(cusparseHandle,m,n,descrA,nnzA,a.RowLocation(),a.ColLocation(),descrB,nnzB,b.RowLocation(),b.ColLocation(),descrC,csrRowPtrC,&nnzTotal));
return nnzTotal;
});
}
#else
NULL
#endif // Linux
);
//Step 2
if (sizeof(ElemType)==sizeof(float))
@ -1588,7 +1613,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (this->IsEmpty())
return;
// transfer converted block over to this pointer
#ifndef LINUX
*this = std::move(this->Transpose());
#else
std::cerr << "Not sure how to do the InplaceTranspose()";
#endif
}
template<class ElemType>

Просмотреть файл

@ -29,7 +29,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
void Clear();
#ifndef LINUX
void PrepareBuffer(size_t m, size_t n, bool canReuseBuffer, std::function<size_t (int* csrRowPtrC)> func);
#endif
#else
void PrepareBuffer(size_t m, size_t n, bool canReuseBuffer, size_t (*func)(int *csRowPtrC));
#endif
size_t ElemCountFromBufferSize(size_t totalBufferSize);
void PrepareDevice(short deviceId=-1) const;

Просмотреть файл

@ -1,8 +1,8 @@
CSOURCES = CPUMatrix.cpp CPUSparseMatrix.cpp Matrix.cpp
OBJECTS = CPUMatrix.o CPUSparseMatrix.o Matrix.o \
GPUSparseMatrix.o GPUWatcher.o \
GPUMatrixCUDAKernels.o GPUMatrix.o
GPUMatrixCUDAKernels.o GPUMatrix.o \
GPUWatcher.o GPUSparseMatrix.o
INCLUDES = -I../../Common/Include -I/opt/acml5.3.1/gfortran64_mp_int64/include
@ -10,9 +10,10 @@ DEPS =
CFLAGS = $(INCLUDES) \
-D BASETYPES_NO_UNSAFECRTOVERLOAD -DBASETYPES_NO_STRPRINTF \
-DLINUX -D_FILEUTIL_ -Wnon-template-friend -std=c++11
-DLINUX -Wnon-template-friend -std=c++11 # -D_FILEUTIL_
NVCFLAGS = -DLINUX -I../../Common/Include -D_FILEUTIL_ -arch sm_11
NVCFLAGS = -DLINUX -D BASETYPES_NO_UNSAFECRTOVERLOAD -DBASETYPES_NO_STRPRINTF \
-I../../Common/Include -arch=compute_20 -std=c++11 # -D_FILEUTIL_
CXX = gcc
NVCC = nvcc

Просмотреть файл

@ -3435,9 +3435,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
else
{
GPUMatrix<ElemType> firstDummy = transposeA ? a.m_GPUMatrix->Transpose()*alpha : (*a.m_GPUMatrix)*alpha;
GPUMatrix<ElemType> & first= firstDummy; // By Malcolm.. gcc doesn't support auto
GPUMatrix<ElemType> & first= firstDummy; // By Malcolm.. gcc doesn't support auto like original
GPUSparseMatrix<ElemType> secondDummy = transposeB ? b.m_GPUSparseMatrix->Transpose() : *b.m_GPUSparseMatrix;
GPUSparseMatrix<ElemType> & second = secondDummy; // By Malcolm.. gcc doesn't support auto
GPUSparseMatrix<ElemType> & second = secondDummy; // By Malcolm.. gcc doesn't support auto like original
if (beta==0)
{
GPUSparseMatrix<ElemType>::Multiply(first,second,*c.m_GPUMatrix);

Просмотреть файл

@ -9,6 +9,10 @@
#include "GPUMatrix.cuh"
#include "GPUSparseMatrix.cuh"
#ifdef LINUX
// typedef char wchar_t;
#endif
// This class is exported from the Math.dll
namespace Microsoft { namespace MSR { namespace CNTK {
enum CurrentDataLocation