diff --git a/Makefile.cpu b/Makefile.cpu index 0a7867f73..a64497ad7 100644 --- a/Makefile.cpu +++ b/Makefile.cpu @@ -22,12 +22,25 @@ # This makefile will be extended/completed as we go. CC = g++ +OBJDIR = .obj.cpu +# comment following and uncomment the next one to enable MKL library +USE_ACML=1 +#USE_MKL=1 + +ifdef USE_MKL MKL_PATH = /opt/intel/composer_xe_2013_sp1.0.080 -MKL_INCLUDE = $(MKL_PATH)/mkl/include -MKL_LIB = -L$(MKL_PATH)/compiler/lib/intel64 -L$(MKL_PATH)/mkl/lib/intel64 -L$(MKL_PATH)/compiler/lib/mic -L$(MKL_PATH)/mkl/lib/mic -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -lm -liomp5 -lpthread -lm +MATHLIB_INCLUDE = $(MKL_PATH)/mkl/include +MATHLIB_LIB = -L$(MKL_PATH)/compiler/lib/intel64 -L$(MKL_PATH)/mkl/lib/intel64 -L$(MKL_PATH)/compiler/lib/mic -L$(MKL_PATH)/mkl/lib/mic -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -lm -liomp5 -lpthread +MATHLIB_DEFINE = -DUSE_MKL +else +ACML_PATH = /usr/local/acml5.3.0/gfortran64 +MATHLIB_INCLUDE = $(ACML_PATH)/include +MATHLIB_LIB = -L$(ACML_PATH)/lib -lacml -lm +MATHLIB_DEFINE = -DUSE_ACML +endif -INCFLAGS = -I Common/Include -I Math/Math -I MachineLearning/cn -I $(MKL_INCLUDE) +INCFLAGS = -I Common/Include -I Math/Math -I MachineLearning/cn -I $(MATHLIB_INCLUDE) COMMON_SRC = Common/fileutil.cpp Common/DataWriter.cpp Common/ConfigFile.cpp Common/DataReader.cpp \ Common/Eval.cpp Common/File.cpp Common/BestGpu.cpp @@ -54,13 +67,14 @@ CORE_SRC = $(CN_SRC) $(MATH_SRC) $(COMMON_SRC) SRC = $(READER_SRC) $(CORE_SRC) -CFLAGS = -std=c++0x -std=c++11 -DCPUONLY -D_POSIX_SOURCE -D_XOPEN_SOURCE=600 -D__USE_XOPEN2K -DUSE_MKL -fopenmp -fpermissive -fPIC +CFLAGS = -std=c++0x -std=c++11 -DCPUONLY -D_POSIX_SOURCE -D_XOPEN_SOURCE=600 -D__USE_XOPEN2K $(MATHLIB_DEFINE) -fopenmp -fpermissive -fPIC DEBUG = -g all: cn.exe UCIFastReader SequenceReader LUSequenceReader - mkdir -p bin - mv cn.exe *.so bin/ + mkdir -p bin.cpu + mv cn.exe *.so bin.cpu/ + ln -sf bin.cpu bin UCIFastReader: ${UCIFASTREADER_SRC:.cpp=.o} ${CORE_SRC:.cpp=.o} $(CC) $(DEBUG) -fPIC -shared -o $(addsuffix .so, $@) $^ @@ -78,7 +92,7 @@ LUSequenceReader: ${LUSEQUENCEREADER_SRC:.cpp=.o} ${CORE_SRC:.cpp=.o} # $(CC) -o $(addsuffix .so, $@) $^ -fPIC -shared cn.exe: ${CORE_SRC:.cpp=.o} - $(CC) $(DEBUG) -o $@ $^ $(MKL_LIB) -fopenmp -ldl + $(CC) $(DEBUG) -o $@ $^ $(MATHLIB_LIB) -fopenmp -ldl -include ${SRC:.cpp=.d} diff --git a/Makefile.gpu b/Makefile.gpu index 911a60a6e..8eb72b014 100644 --- a/Makefile.gpu +++ b/Makefile.gpu @@ -24,15 +24,27 @@ CC = g++ NVCC = nvcc +# comment following and uncomment the next one to enable MKL library +USE_ACML=1 +#USE_MKL=1 + +ifdef USE_MKL +MKL_PATH = /opt/intel/composer_xe_2013_sp1.0.080 +MATHLIB_INCLUDE = $(MKL_PATH)/include +MATHLIB_LIB = -L$(MKL_PATH)/compiler/lib/intel64 -L$(MKL_PATH)/mkl/lib/intel64 -L$(MKL_PATH)/compiler/lib/mic -L$(MKL_PATH)/mkl/lib/mic -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -lm -liomp5 -lpthread +MATHLIB_DEFINE = -DUSE_MKL +else +ACML_PATH = /usr/local/acml5.3.0/gfortran64 +MATHLIB_INCLUDE = $(ACML_PATH)/include +MATHLIB_LIB = -L$(ACML_PATH)/lib -lacml -lm +MATHLIB_DEFINE = -DUSE_ACML +endif + CUDA_PATH = /usr/local/cuda-6.5 CUDA_INCLUDE = $(CUDA_PATH)/include CUDA_LIB = -L$(CUDA_PATH)/lib64 -lcublas -lcudart -lcuda -lcurand -lcusparse -lnvidia-ml -MKL_PATH = /opt/intel/composer_xe_2013_sp1.0.080 -MKL_INCLUDE = $(MKL_PATH)/include -MKL_LIB = -L$(MKL_PATH)/compiler/lib/intel64 -L$(MKL_PATH)/mkl/lib/intel64 -L$(MKL_PATH)/compiler/lib/mic -L$(MKL_PATH)/mkl/lib/mic -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -lm -liomp5 -lpthread -lm - -INCFLAGS = -I Common/Include -I Math/Math -I MachineLearning/cn -I $(CUDA_INCLUDE) -I $(MKL_INCLUDE) +INCFLAGS = -I Common/Include -I Math/Math -I MachineLearning/cn -I $(CUDA_INCLUDE) -I $(MATHLIB_INCLUDE) COMMON_SRC = Common/fileutil.cpp Common/DataWriter.cpp Common/ConfigFile.cpp Common/DataReader.cpp \ Common/Eval.cpp Common/File.cpp Common/BestGpu.cpp @@ -58,7 +70,7 @@ READER_SRC = $(UCIFASTREADER_SRC) $(LUSEQUENCEREADER_SRC) $(HTKMLFREADER_SRC) $( CORE_SRC = $(CN_SRC) $(MATH_SRC) $(COMMON_SRC) -CFLAGS = -std=c++0x -std=c++11 -D_POSIX_SOURCE -D_XOPEN_SOURCE=600 -D__USE_XOPEN2K -DUSE_MKL -fopenmp -fpermissive +CFLAGS = -std=c++0x -std=c++11 -D_POSIX_SOURCE -D_XOPEN_SOURCE=600 -D__USE_XOPEN2K $(MATHLIB_DEFINE) -fopenmp -fpermissive NVCCFLAGS = -std=c++11 -D_POSIX_SOURCE -D_XOPEN_SOURCE=600 -D__USE_XOPEN2K -arch=compute_20 @@ -67,8 +79,9 @@ DEBUG = -g GPU_DEBUG = -G all: cn.exe UCIFastReader SequenceReader LUSequenceReader - mkdir -p bin - mv cn.exe *.so bin/ + mkdir -p bin.gpu + mv cn.exe *.so bin.gpu/ + ln -sf bin.gpu bin tmp = ${CORE_SRC:.cpp=.o} @@ -88,7 +101,7 @@ LUSequenceReader: ${LUSEQUENCEREADER_SRC:.cpp=.o} ${tmp:.cu=.o} # $(CC) -o $(addsuffix .so, $@) $^ -fPIC -shared cn.exe: ${tmp:.cu=.o} - $(CC) $(DEBUG) -o $@ $^ $(CUDA_LIB) $(MKL_LIB) -fopenmp -ldl -fPIC + $(CC) $(DEBUG) -o $@ $^ $(CUDA_LIB) $(MATHLIB_LIB) -fopenmp -ldl -fPIC -include ${SRC:.cpp=.d} diff --git a/Math/Math/CPUMatrix.cpp b/Math/Math/CPUMatrix.cpp index 9fb349fbf..4e72902ba 100644 --- a/Math/Math/CPUMatrix.cpp +++ b/Math/Math/CPUMatrix.cpp @@ -3561,16 +3561,24 @@ namespace Microsoft { namespace MSR { namespace CNTK { const char flag = 'A'; if (sizeof(ElemType) == sizeof(double)) { +#ifndef USE_MKL + dgesvd('A', 'A', (int)m, (int)n, reinterpret_cast (A.m_pArray), (int)lda, reinterpret_cast (SIGMA.m_pArray), reinterpret_cast (U.m_pArray), (int)ldu, reinterpret_cast (VT.m_pArray), (int)ldvt, &info); +#else //missing arguments fixed! --author Wengong Jin 2014/12/15 double *work = new double[lwork]; dgesvd(&flag, &flag, (int *) &m, (int *) &n, reinterpret_cast (A.m_pArray), (int *) &lda, reinterpret_cast (SIGMA.m_pArray), reinterpret_cast (U.m_pArray), (int *) &ldu, reinterpret_cast (VT.m_pArray), (int *) &ldvt, work, &lwork, &info); +#endif } else { #pragma warning (suppress: 4244) +#ifndef USE_MKL + sgesvd('A', 'A', (int)m, (int)n, reinterpret_cast (A.m_pArray), (int)lda, reinterpret_cast (SIGMA.m_pArray), reinterpret_cast (U.m_pArray), (int)ldu, reinterpret_cast (VT.m_pArray), (int)ldvt, &info); +#else //missing arguments fixed! --author Wengong Jin 2014/12/15 float *work = new float[lwork]; sgesvd(&flag, &flag, (int *) &m, (int *) &n, reinterpret_cast (A.m_pArray), (int *) &lda, reinterpret_cast (SIGMA.m_pArray), reinterpret_cast (U.m_pArray), (int *) &ldu, reinterpret_cast (VT.m_pArray), (int *) &ldvt, work, &lwork, &info); +#endif } }