More changes to get it compiling.. but not finished.

2014-10-09 10:44:27 -07:00 · 2014-10-09 10:44:27 -07:00 · a863c7746f
--- a/Common/Include/basetypes.h
+++ b/Common/Include/basetypes.h
@ -11,6 +11,8 @@
 typedef char16_t TCHAR;
 #include <stdarg.h>
 #define	vsprintf_s vsprintf		/* Not sure this is right... Malcolm */
+#include <chrono>
+#include <thread>
 #endif	 /* LINUX */

 #ifndef UNDER_CE    // fixed-buffer overloads not available for wince
@ -108,6 +110,7 @@ using namespace std;
 #define __inout_cap(x)
 #define __inout_cap_c(x)
 #endif
+#endif	// LINUX 
 #ifndef __out_z_cap    // non-VS2005 annotations
 #define __out_cap(x)
 #define __out_z_cap(x)
@ -321,7 +324,6 @@ public:
 #endif
 };

-#ifndef	LINUX

 // locks a critical section, and unlocks it automatically
 // when the lock goes out of scope
@ -447,7 +449,11 @@ public:
 #include <xlocale>      // uses strlen()
 #endif
 #define strlen strlen_
+#ifndef	LINUX
 template<typename _T> inline __declspec(deprecated("Dummy general template, cannot be used directly")) 
+#else
+template<typename _T> inline 
+#endif	// LINUX
 size_t strlen_(_T &s) { return strnlen_s(static_cast<const char *>(s), SIZE_MAX); } // never be called but needed to keep compiler happy
 template<typename _T> inline size_t strlen_(const _T &s)     { return strnlen_s(static_cast<const char *>(s), SIZE_MAX); }
 template<> inline size_t strlen_(char * &s)                  { return strnlen_s(s, SIZE_MAX); }
@ -980,7 +986,8 @@ template<typename FUNCTION> static void attempt (int retries, const FUNCTION & b
 #ifndef	LINUX
            ::Sleep (1000); // wait a little, then try again
 #else
-            sleep(1);
+            std::chrono::milliseconds dura(1000);
+            std::this_thread::sleep_for(dura);
 #endif	/* LINUX */
        }
    }
--- a/Math/Math/CommonMatrix.h
+++ b/Math/Math/CommonMatrix.h
@ -10,6 +10,7 @@

 #ifdef	LINUX
 #define	wcsnlen_s	wcsnlen			/* Not sure if this is best replacement... Malcolm */
+// typedef	char wchar_t;
 #endif	

 #define AUTOPLACEMATRIX 1000 // used in parameters only
--- a/Math/Math/GPUMatrix.cu
+++ b/Math/Math/GPUMatrix.cu
@ -424,7 +424,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
    {
        ZeroInit(deepCopyFrom.m_computeDevice);
        SetValue(deepCopyFrom);
-        SetMatrixName(deepCopyFrom.m_matrixName);       
+        this->SetMatrixName(deepCopyFrom.m_matrixName);       
    }

 #ifndef	LINUX
@ -452,7 +452,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        if (this != &deepCopyFrom)
        {
            SetValue(deepCopyFrom);
-            SetMatrixName(deepCopyFrom.m_matrixName);       
+            this->SetMatrixName(deepCopyFrom.m_matrixName);       
        }
        return *this;
    }
@ -464,7 +464,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
    {
        if (this != &moveFrom)
        {
-            if (OwnBuffer() && this->m_pArray!=NULL)
+            if (this->OwnBuffer() && this->m_pArray!=NULL)
            {
                CUDA_CALL(cudaFree(this->m_pArray));  
            }
--- a/Math/Math/GPUMatrixCUDAKernels.cu
+++ b/Math/Math/GPUMatrixCUDAKernels.cu
@ -18,6 +18,13 @@
 #define MINLOGEXP -9.2103
 #define LSMALL -0.5E10

+// Predefine this for later.
+#ifndef LINUX
+static __inline__ __device__ double atomicAdd(double* address, double val);
+#else
+static  __device__ double atomicAdd(double* address, double val);
+#endif
+
 //CUDA Kernels code
 template<class ElemType>
 __global__ void _elementWisePowerOnCuda(
--- a/Math/Math/GPUSparseMatrix.cu
+++ b/Math/Math/GPUSparseMatrix.cu
@ -1044,7 +1044,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
    // canReuseBuffer - target matrix can be reused for temporary space
    // func - function to call to count elements in the result (returns count, and fills csrRowPtr array)
    template<class ElemType>
+#ifndef	LINUX
    void GPUSparseMatrix<ElemType>::PrepareBuffer(size_t m, size_t n, bool canReuseBuffer, std::function<size_t (int* csrRowPtrC)> func)
+#else
+    void GPUSparseMatrix<ElemType>::PrepareBuffer(size_t m, size_t n, bool canReuseBuffer, size_t (*func)(int *csRowPtrC))
+#endif	/* LINUX */
    {
        int* csrRowPtrC=NULL;
        GPUSparseMatrix<ElemType>& c = *this;
@ -1099,6 +1103,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            CUDACALL(cudaFree(csrRowPtrC));
    }

+#ifdef	LINUXxx
+    size_t PrepareBufferMultiply(int* csrRowPtrC)
+        {
+            int nnzTotal = -1; 
+            CUSPARSECALL(cusparseXcsrgemmNnz(cusparseHandle,operA,operB,m,n,k,descrA,nnzA,S1.RowLocation(),S1.ColLocation(),descrB,nnzB,
+                S2.RowLocation(),S2.ColLocation(),descrC,csrRowPtrC,&nnzTotal));
+            return nnzTotal;
+        }
+#endif
+
    // Multiply - multiply one spares matrix by another sparse matrix
    // S1 - first sparse matrix
    // transposeS1 - transpose first matrix?
@ -1136,13 +1150,18 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        CUDACALL(cudaEventCreate(&done));
        //Step 1 
        c.PrepareBuffer(m, n, true, // true means we can reuse the "c" buffer if it exists for temporaries
+#ifndef	LINUX
            [&](int* csrRowPtrC) -> size_t
        {
            int nnzTotal = -1; 
            CUSPARSECALL(cusparseXcsrgemmNnz(cusparseHandle,operA,operB,m,n,k,descrA,nnzA,S1.RowLocation(),S1.ColLocation(),descrB,nnzB,
                S2.RowLocation(),S2.ColLocation(),descrC,csrRowPtrC,&nnzTotal));
            return nnzTotal;
-        });
+        }
+#else
+	NULL		// PrepareBufferMultiply
+#endif
+	);


        //Step 2
@ -1196,12 +1215,18 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        CUDACALL(cudaEventCreate(&done));
        //Step 1 
        bool inOutParameter = (&b == &c);
-        c.PrepareBuffer(m, n, !inOutParameter, [&] (int* csrRowPtrC) -> size_t
+        c.PrepareBuffer(m, n, !inOutParameter, 
+#ifndef	LINUX
+	[&] (int* csrRowPtrC) -> size_t
        {
            int nnzTotal = -1;
            CUSPARSECALL(cusparseXcsrgeamNnz(cusparseHandle,m,n,descrA,nnzA,a.RowLocation(),a.ColLocation(),descrB,nnzB,b.RowLocation(),b.ColLocation(),descrC,csrRowPtrC,&nnzTotal));
            return nnzTotal;
-        });
+        }
+#else
+	NULL
+#endif	// Linux
+	);

        //Step 2
        if (sizeof(ElemType)==sizeof(float))
@ -1588,7 +1613,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        if (this->IsEmpty())
            return;
        // transfer converted block over to this pointer
+#ifndef	LINUX
        *this = std::move(this->Transpose());
+#else	
+	std::cerr << "Not sure how to do the InplaceTranspose()";
+#endif
    }

    template<class ElemType>
--- a/Math/Math/GPUSparseMatrix.cuh
+++ b/Math/Math/GPUSparseMatrix.cuh
@ -29,7 +29,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        void Clear();
 #ifndef	LINUX
        void PrepareBuffer(size_t m, size_t n, bool canReuseBuffer, std::function<size_t (int* csrRowPtrC)> func);
-#endif
+#else
+        void PrepareBuffer(size_t m, size_t n, bool canReuseBuffer, size_t (*func)(int *csRowPtrC));
+#endif	
        size_t ElemCountFromBufferSize(size_t totalBufferSize);
        void PrepareDevice(short deviceId=-1) const;

--- a/Math/Math/Makefile
+++ b/Math/Math/Makefile
@ -1,8 +1,8 @@
 CSOURCES = CPUMatrix.cpp CPUSparseMatrix.cpp Matrix.cpp 

 OBJECTS = CPUMatrix.o CPUSparseMatrix.o Matrix.o \
-	GPUSparseMatrix.o GPUWatcher.o \
-	GPUMatrixCUDAKernels.o GPUMatrix.o 
+	GPUMatrixCUDAKernels.o GPUMatrix.o \
+	GPUWatcher.o GPUSparseMatrix.o  

 INCLUDES = -I../../Common/Include -I/opt/acml5.3.1/gfortran64_mp_int64/include 

@ -10,9 +10,10 @@ DEPS =

 CFLAGS =  $(INCLUDES)  \
 	-D BASETYPES_NO_UNSAFECRTOVERLOAD -DBASETYPES_NO_STRPRINTF \
-	-DLINUX -D_FILEUTIL_  -Wnon-template-friend  -std=c++11
+	-DLINUX -Wnon-template-friend  -std=c++11 # -D_FILEUTIL_  

-NVCFLAGS = -DLINUX -I../../Common/Include -D_FILEUTIL_ -arch sm_11
+NVCFLAGS = -DLINUX -D BASETYPES_NO_UNSAFECRTOVERLOAD -DBASETYPES_NO_STRPRINTF \
+	-I../../Common/Include -arch=compute_20  -std=c++11 # -D_FILEUTIL_ 

 CXX = gcc
 NVCC = nvcc
--- a/Math/Math/Matrix.cpp
+++ b/Math/Math/Matrix.cpp
@ -3435,9 +3435,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                else 
                {
                    GPUMatrix<ElemType> firstDummy = transposeA ? a.m_GPUMatrix->Transpose()*alpha : (*a.m_GPUMatrix)*alpha;
-                    GPUMatrix<ElemType> & first= firstDummy;				// By Malcolm.. gcc doesn't support auto
+                    GPUMatrix<ElemType> & first= firstDummy;				// By Malcolm.. gcc doesn't support auto like original
                    GPUSparseMatrix<ElemType> secondDummy = transposeB ? b.m_GPUSparseMatrix->Transpose() : *b.m_GPUSparseMatrix;
-                    GPUSparseMatrix<ElemType> & second = secondDummy;			// By Malcolm.. gcc doesn't support auto
+                    GPUSparseMatrix<ElemType> & second = secondDummy;			// By Malcolm.. gcc doesn't support auto like original
                    if (beta==0)
                    {
                        GPUSparseMatrix<ElemType>::Multiply(first,second,*c.m_GPUMatrix);
--- a/Math/Math/Matrix.h
+++ b/Math/Math/Matrix.h
@ -9,6 +9,10 @@
 #include "GPUMatrix.cuh"
 #include "GPUSparseMatrix.cuh"

+#ifdef	LINUX
+// typedef char wchar_t;
+#endif
+
 // This class is exported from the Math.dll
 namespace Microsoft { namespace MSR { namespace CNTK {
    enum CurrentDataLocation