Simplified unified CPU/GPU/Kaldi Makefile

Modularize build specifications for each target, where each target
adds what it needs to paths.

Add rpath to cntkmath and plugins so they do not need LD_LIBRARY_PATH.

Remove object files from cntk that were already in cntkmath.

Organize build targets into UNIX-like bin and lib directories under a
configuration-specific directory.  Have .gitignore ignore these
directories.

Make it easy to keep sources in alphabetic order for easier comparison
with the Windows project definition.
This commit is contained in:
Scott Cyphers 2015-08-05 18:46:31 -04:00
Родитель 38f507d821
Коммит 25f8594d6e
2 изменённых файлов: 311 добавлений и 170 удалений

1
.gitignore поставляемый
Просмотреть файл

@ -16,6 +16,7 @@ build/
[Bb]in/
[Oo]bj/
.run-*
x86_64.*
# Enable "build/" folder in the NuGet Packages folder since NuGet packages use it for MSBuild targets
!packages/*/build/

480
Makefile
Просмотреть файл

@ -7,92 +7,62 @@
#
# This makefile will be extended/completed as we go.
#
# You will need to modify PATH and LD_LIBRARY_PATH environment variables to run CNTK
# export LD_LIBRARY_PATH=<path_to_math_lib>/ifort64/lib:<path_to_cuda>/lib64:/usr/local/lib
# export PATH=$PATH:/usr/local/bin:<path_to_cuda>/bin
#
# In order to deviate from the default settings in this Makefile, please specify options on
# the make command line, like this, for example (to build release):
# the make command line, like this, for example, to build debug with Kaldi for cuda,
#
# make BUILDTYPE=release -j
# make DEBUG=1 USE_CUDA=1 USE_KALDI=1
# These are the options. USE_ACML is the default for CPU math.
#DEBUG
#USE_ACML
#USE_MKL
#USE_CUDA
#USE_KALDI
# Paths. This still needs some generification
ACML_PATH = /usr/local/acml5.3.1/ifort64
MKL_PATH = /usr/users/yzhang87/tools/composer_xe_2015.2.164
CUDA_PATH = /scratch/cuda-6.5
#CUDA_PATH = /usr/local/cuda-7.0
# Kaldi build should correspond to whether you are using cuda
KALDI_CPU_PATH = /usr/users/cyphers/kaldi-trunk
KALDI_CUDA_PATH = /usr/users/yzhang87/code/kaldi-trunk
# You need to install the deployment kit from https://developer.nvidia.com/gpu-deployment-kit
# This is for the default install location, /
GDK_PATH=/usr
#### Configure based on options above
CC = g++
NVCC = nvcc
ARCH = x86_64
# DEVICE can also be cpu
DEVICE = gpu
# BUILDTYPE can also be release
BUILDTYPE = debug
# MATHLIB can also be mkl
MATHLIB = acml
# This is a suggested/default location for ACML library
MATHLIB_PATH = /usr/local/acml5.3.1/ifort64
# This is a suggested/default location for CUDA
CUDA_PATH = /usr/local/cuda-7.0
# This is a suggested/default location for NVML
NVML_INCLUDE = /usr/include/nvidia/gdk
NVML_LIB = /usr/src/gdk/nvml/lib
#######
BUILDFOR = $(ARCH).$(DEVICE).$(BUILDTYPE).$(MATHLIB)
OBJDIR = .build/$(BUILDFOR)
BINDIR = bin/$(BUILDFOR)
# Set up debug vs release compiler settings, both nvcc and gcc
ifeq ($(BUILDTYPE),debug)
BUILDTYPE_OPT = -g
GPU_BUILDTYPE_OPT = -O0 -G -lineinfo
else
BUILDTYPE_OPT = -O3 -flto
GPU_BUILDTYPE_OPT = -O3 -use_fast_math -lineinfo
ifndef ARCH
ARCH = $(shell uname -m)
endif
# Set up math library defines and libraries
ifeq ($(MATHLIB),mkl)
MATHLIB_INCLUDE = $(MATHLIB_PATH)/mkl/include
MATHLIB_LIB = -L$(MATHLIB_PATH)/compiler/lib/intel64 -L$(MATHLIB_PATH)/mkl/lib/intel64 -L$(MATHLIB_PATH)/compiler/lib/mic -L$(MATHLIB_PATH)/mkl/lib/mic -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -lm -liomp5 -lpthread
MATHLIB_DEFINE = -DUSE_MKL
else
MATHLIB_INCLUDE = $(MATHLIB_PATH)/include
MATHLIB_LIB = -L$(MATHLIB_PATH)/lib -lacml -lm -lpthread
MATHLIB_DEFINE = -DUSE_ACML
ifndef USE_MKL
ifndef USE_ACML
USE_ACML=1
endif
endif
# Set up CUDA includes and libraries
CUDA_INCLUDE = $(CUDA_PATH)/include
CUDA_LIB = -L$(CUDA_PATH)/lib64 -L$(NVML_LIB) -lcublas -lcudart -lcurand -lcusparse -lnvidia-ml
INCLUDEPATH:= Common/Include Math/Math MachineLearning/CNTK
CPPFLAGS:= -D_POSIX_SOURCE -D_XOPEN_SOURCE=600 -D__USE_XOPEN2K
CXXFLAGS:= -msse3 -std=c++0x -std=c++11 -fopenmp -fpermissive -fPIC
LIBPATH:=
LIBS:=
LDFLAGS:=
ORIGINLIBDIR:='$$ORIGIN/../lib'
ORIGINDIR:='$$ORIGIN'
# Set up final list of libs to use
ifeq ($(DEVICE),gpu)
LINK_LIBS = $(CUDA_LIB) $(MATHLIB_LIB)
else
LINK_LIBS = $(MATHLIB_LIB)
endif
SEPARATOR = "=-----------------------------------------------------------="
TARGETS:=
SRC:=
# Compile CNTK math into its own shared library to ensure that any change to its
# global variables, like CUDA streams is made in one place and has global effect.
# Otherwise, different clients of CNTK math would observe different states.
CNTKMATH_LINK_LIB = -L$(BINDIR) -lcntkmath
CNTKMATH_LIB = $(BINDIR)/libcntkmath.so
# Set up gcc includes and libraries
INCFLAGS_COMMON = -I Common/Include -I Math/Math -I MachineLearning/CNTK -I $(MATHLIB_INCLUDE)
CFLAGS_COMMON = -msse3 -std=c++0x -std=c++11 -D_POSIX_SOURCE -D_XOPEN_SOURCE=600 -D__USE_XOPEN2K $(MATHLIB_DEFINE) -fopenmp -fpermissive -fPIC
ifeq ($(DEVICE),gpu)
INCFLAGS = $(INCFLAGS_COMMON) -I $(CUDA_INCLUDE) -I $(NVML_INCLUDE)
CFLAGS = $(CFLAGS_COMMON)
else
INCFLAGS = $(INCFLAGS_COMMON)
CFLAGS = $(CFLAGS_COMMON) -DCPUONLY
endif
all : alltargets
# Set up nvcc target architectures (will generate code to support them all, i.e. fat-binary)
GENCODE_SM20 := -gencode arch=compute_20,code=\"sm_20,compute_20\"
@ -100,140 +70,310 @@ GENCODE_SM30 := -gencode arch=compute_30,code=\"sm_30,compute_30\"
GENCODE_SM35 := -gencode arch=compute_35,code=\"sm_35,compute_35\"
GENCODE_FLAGS := $(GENCODE_SM20) $(GENCODE_SM30) $(GENCODE_SM35)
# Set up basic nvcc options and add GPU targets from above
NVCCFLAGS = -std=c++11 -D_POSIX_SOURCE -D_XOPEN_SOURCE=600 -D__USE_XOPEN2K -m 64 $(GENCODE_FLAGS)
# Set up basic nvcc options and add CUDA targets from above
CUFLAGS = -std=c++11 -D_POSIX_SOURCE -D_XOPEN_SOURCE=600 -D__USE_XOPEN2K -m 64 $(GENCODE_FLAGS)
ifdef USE_CUDA
DEVICE = gpu
NVCC = $(CUDA_PATH)/bin/nvcc
KALDI_PATH = $(KALDI_CUDA_PATH)
# This is a suggested/default location for NVML
INCLUDEPATH+=$(GDK_PATH)/include/nvidia/gdk
NVMLPATH=$(GDK_PATH)/src/gdk/nvml/lib
# Set up CUDA includes and libraries
INCLUDEPATH += $(CUDA_PATH)/include
LIBPATH += $(CUDA_PATH)/lib64
LIBS += -lcublas -lcudart -lcuda -lcurand -lcusparse -lnvidia-ml
# Set up linker option to embed ORIGIN, i.e. directory where cntk is into the search path option
# at runtime. This will try to resolve all dependent binaries in the same directory where cntk binary resides
LDFLAGS:=-Wl,-rpath,'$$ORIGIN'
ifeq ($(DEVICE),cpu)
LDFLAGS:=$(LDFLAGS) -Wl,-rpath,$(MATHLIB_PATH)/lib
else
LDFLAGS:=$(LDFLAGS) -Wl,-rpath,$(CUDA_PATH)/lib -Wl,-rpath,$(NVML_LIB)
DEVICE = cpu
KALDI_PATH = $(KALDI_CPU_PATH)
CPPFLAGS +=-DCPUONLY
endif
ifdef USE_ACML
MATHLIB = acml
INCLUDEPATH += $(ACML_PATH)/include
LIBPATH += $(ACML_PATH)/lib
LIBS += -lacml -lm -lpthread
CPPFLAGS += -DUSE_ACML
endif
ifdef USE_MKL
MATHLIB = mkl
INCLUDEPATH += $(MKL_PATH)/mkl/include
LIBPATH += $(MKL_PATH)/compiler/lib/intel64 $(MKL_PATH)/mkl/lib/intel64 $(MKL_PATH)/compiler/lib/mic $(MKL_PATH)/mkl/lib/mic
LIBS += -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -lm -liomp5 -lpthread
CPPFLAGS += -DUSE_MKL
endif
ifdef USE_KALDI
########## Copy includes and defines from $(KALDI_PATH)/src/kaldi.mk ##########
FSTROOT = $(KALDI_PATH)/tools/openfst
ATLASINC = $(KALDI_PATH)/tools/ATLAS/include
INCLUDEPATH += $(KALDI_PATH)/src $(ATLASINC) $(FSTROOT)/include
CPPFLAGS+= -DKALDI_DOUBLEPRECISION=0 -DHAVE_POSIX_MEMALIGN -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_ATLAS -DHAVE_OPENFST_GE_10400
KALDI_LIBPATH += $(KALDI_PATH)/src/lib
KALDI_LIBS += -lkaldi-util -lkaldi-matrix -lkaldi-base -lkaldi-hmm -lkaldi-cudamatrix -lkaldi-nnet -lkaldi-lat
endif
# BUILDTYPE can also be release
ifdef DEBUG
BUILDTYPE = debug
CXXFLAGS += -g
CUFLAGS += -O0 -G -lineinfo
else
BUILDTYPE = release
CXXFLAGS += -O4
CUFLAGS += -O3 -use_fast_math -lineinfo
endif
#######
BUILDFOR:= $(ARCH).$(DEVICE).$(BUILDTYPE).$(MATHLIB)
OBJDIR:= .build/$(BUILDFOR)
BINDIR:= $(BUILDFOR)/bin
LIBDIR:= $(BUILDFOR)/lib
CNTKMATH:=cntkmath
# Define all sources that need to be built
COMMON_SRC = Common/fileutil.cpp Common/DataWriter.cpp Common/ConfigFile.cpp Common/DataReader.cpp \
Common/Eval.cpp Common/File.cpp Common/BestGpu.cpp Common/TimerUtility.cpp
COMMON_SRC =\
Common/BestGpu.cpp \
Common/ConfigFile.cpp \
Common/DataReader.cpp \
Common/DataWriter.cpp \
Common/Eval.cpp \
Common/File.cpp \
Common/TimerUtility.cpp \
Common/fileutil.cpp \
MATH_COMMON_SRC = Math/Math/Matrix.cpp Math/Math/CPUMatrix.cpp Math/Math/CPUSparseMatrix.cpp
MATH_SRC =\
Math/Math/CPUMatrix.cpp \
Math/Math/CPUSparseMatrix.cpp \
Math/Math/Matrix.cpp \
ifdef USE_CUDA
MATH_SRC +=\
Math/Math/GPUMatrix.cu \
Math/Math/GPUMatrixCUDAKernels.cu \
Math/Math/GPUSparseMatrix.cu \
Math/Math/GPUWatcher.cu \
ifeq ($(DEVICE),gpu)
MATH_SRC = $(MATH_COMMON_SRC) Math/Math/GPUMatrix.cu Math/Math/GPUMatrixCUDAKernels.cu Math/Math/GPUSparseMatrix.cu Math/Math/GPUWatcher.cu
else
MATH_SRC = $(MATH_COMMON_SRC) Math/Math/NoGPU.cpp
MATH_SRC +=\
Math/Math/NoGPU.cpp
endif
CN_SRC = MachineLearning/CNTK/NetworkDescriptionLanguage.cpp MachineLearning/CNTK/CNTK.cpp MachineLearning/CNTK/ComputationNode.cpp \
MachineLearning/CNTK/ModelEditLanguage.cpp MachineLearning/CNTK/SimpleNetworkBuilder.cpp MachineLearning/CNTK/tests.cpp \
MachineLearning/CNTK/Profiler.cpp MachineLearning/CNTKEval/CNTKEval.cpp
MATH_SRC+=$(COMMON_SRC)
BINARYREADER_SRC = DataReader/BinaryReader/BinaryWriter.cpp DataReader/BinaryReader/BinaryReader.cpp DataReader/BinaryReader/BinaryFile.cpp
HTKMLFREADER_SRC = DataReader/HTKMLFReader_linux/HTKMLFWriter.cpp DataReader/HTKMLFReader_linux/DataWriter.cpp DataReader/HTKMLFReader_linux/DataReader.cpp DataReader/HTKMLFReader_linux/HTKMLFReader.cpp
SEQUENCEREADER_SRC = DataReader/LMSequenceReader/SequenceReader.cpp DataReader/LMSequenceReader/SequenceParser.cpp DataReader/LMSequenceReader/Exports.cpp
LUSEQUENCEREADER_SRC = DataReader/LUSequenceReader/LUSequenceReader.cpp DataReader/LUSequenceReader/LUSequenceParser.cpp DataReader/LUSequenceReader/Exports.cpp
UCIFASTREADER_SRC = DataReader/UCIFastReader/UCIParser.cpp DataReader/UCIFastReader/UCIFastReader.cpp DataReader/UCIFastReader/Exports.cpp
MATH_OBJ := $(patsubst %.cu, $(OBJDIR)/%.o, $(patsubst %.cpp, $(OBJDIR)/%.o, $(MATH_SRC)))
READER_SRC = $(UCIFASTREADER_SRC) $(LUSEQUENCEREADER_SRC) $(HTKMLFREADER_SRC) $(SEQUENCEREADER_SRC) $(BINARYREADER_SRC)
CORE_SRC = $(CN_SRC) $(COMMON_SRC)
SRC = $(READER_SRC) $(CORE_SRC) $(MATH_SRC)
CNTKMATH_LIB:= $(LIBDIR)/lib$(CNTKMATH).so
TARGETS += $(CNTKMATH_LIB)
SRC+=$(MATH_SRC)
RPATH=-Wl,-rpath,
$(CNTKMATH_LIB): $(MATH_OBJ)
@echo $(SEPARATOR)
@echo creating $@ for $(ARCH) with build type $(BUILDTYPE)
@mkdir -p $(dir $@)
$(CC) $(LDFLAGS) -shared $(patsubst %,-L%, $(LIBPATH) $(NVMLPATH)) $(patsubst %,$(RPATH)%, $(ORIGINDIR) $(LIBPATH)) -o $@ $^ $(LIBS) -fopenmp
LIBLIBPATH:=$(LIBDIR) $(LIBPATH)
BINARYREADER_SRC =\
DataReader/BinaryReader/BinaryFile.cpp \
DataReader/BinaryReader/BinaryReader.cpp \
DataReader/BinaryReader/BinaryWriter.cpp \
BINARYREADER_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(BINARYREADER_SRC))
BINARY_READER:= $(LIBDIR)/BinaryReader.so
#TARGETS += $(BINARY_READER)
#SRC+=$(BINARYREADER_SRC)
$(BINARY_READER): $(BINARYREADER_OBJ) | $(CNTKMATH_LIB)
@echo $(SEPARATOR)
$(CC) $(LDFLAGS) -shared $(patsubst %,-L%, $(LIBDIR) $(LIBPATH)) $(patsubst %,$(RPATH)%, $(ORIGINDIR) $(LIBPATH)) -o $@ $^ -l$(CNTKMATH)
HTKMLFREADER_SRC =\
DataReader/HTKMLFReader_linux/DataReader.cpp \
DataReader/HTKMLFReader_linux/DataWriter.cpp \
DataReader/HTKMLFReader_linux/HTKMLFReader.cpp \
DataReader/HTKMLFReader_linux/HTKMLFWriter.cpp \
HTKMLREADER_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(HTKMLFREADER_SRC))
HTKMLREADER:=$(LIBDIR)/HTKMLFReader.so
TARGETS+=$(HTKMLREADER)
SRC+=$(HTKMLREADER_SRC)
$(LIBDIR)/HTKMLFReader.so: $(HTKMLREADER_OBJ) | $(CNTKMATH_LIB)
@echo $(SEPARATOR)
$(CC) $(LDFLAGS) -shared $(patsubst %,-L%, $(LIBDIR) $(LIBPATH)) $(patsubst %,$(RPATH)%, $(ORIGINDIR) $(LIBPATH)) -o $@ $^ -l$(CNTKMATH)
LMSEQUENCEREADER_SRC =\
DataReader/LMSequenceReader/Exports.cpp \
DataReader/LMSequenceReader/SequenceParser.cpp \
DataReader/LMSequenceReader/SequenceReader.cpp \
LMSEQUENCEREADER_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(LMSEQUENCEREADER_SRC))
LMSEQUENCEREADER:= $(LIBDIR)/LMSequenceReader.so
TARGETS+=$(LMSEQUENCEREADER)
SRC+=$(LMSEQUENCEREADER_SRC)
$(LMSEQUENCEREADER): $(LMSEQUENCEREADER_OBJ) | $(CNTKMATH_LIB)
@echo $(SEPARATOR)
$(CC) $(LDFLAGS) -shared $(patsubst %,-L%, $(LIBDIR) $(LIBPATH)) $(patsubst %,$(RPATH)%, $(ORIGINDIR) $(LIBPATH)) -o $@ $^ -l$(CNTKMATH)
LUSEQUENCEREADER_SRC =\
DataReader/LUSequenceReader/Exports.cpp \
DataReader/LUSequenceReader/LUSequenceParser.cpp \
DataReader/LUSequenceReader/LUSequenceReader.cpp \
LUSEQUENCEREADER_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(LUSEQUENCEREADER_SRC))
LUSEQUENCEREADER:=$(LIBDIR)/LUSequenceReader.so
TARGETS+=$(LUSEQUENCEREADER)
SRC+=$(LUSEQUENCEREADER_SRC)
$(LUSEQUENCEREADER): $(LUSEQUENCEREADER_OBJ) | $(CNTKMATH_LIB)
@echo $(SEPARATOR)
$(CC) $(LDFLAGS) -shared $(patsubst %,-L%, $(LIBDIR) $(LIBPATH)) $(patsubst %,$(RPATH)%, $(ORIGINDIR) $(LIBPATH)) -o $@ $^ -l$(CNTKMATH)
UCIFASTREADER_SRC =\
DataReader/UCIFastReader/Exports.cpp \
DataReader/UCIFastReader/UCIFastReader.cpp \
DataReader/UCIFastReader/UCIParser.cpp \
UCIFASTREADER_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(UCIFASTREADER_SRC))
UCIFASTREADER:=$(LIBDIR)/UCIFastReader.so
TARGETS += $(UCIFASTREADER)
SRC+=$(UCIFASTREADER_SRC)
$(UCIFASTREADER): $(UCIFASTREADER_OBJ) | $(CNTKMATH_LIB)
@echo $(SEPARATOR)
$(CC) $(LDFLAGS) -shared $(patsubst %,-L%, $(LIBDIR) $(LIBPATH)) $(patsubst %,$(RPATH)%, $(ORIGINDIR) $(LIBPATH)) -o $@ $^ -l$(CNTKMATH)
ifdef USE_KALDI
KALDIREADER_SRC = \
DataReader/KaldiReader/DataReader.cpp \
DataReader/KaldiReader/DataWriter.cpp \
DataReader/KaldiReader/HTKMLFReader.cpp \
DataReader/KaldiReader/HTKMLFWriter.cpp \
KALDIREADER_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(KALDIREADER_SRC))
KALDIREADER:=$(LIBDIR)/KaldiReader.so
TARGETS+=$(KALDIREADER)
SRC+=$(KALDIREADER_SRC)
$(KALDIREADER): $(KALDIREADER_OBJ)
@echo $(SEPARATOR)
$(CC) $(LDFLAGS) -shared $(patsubst %,-L%, $(LIBDIR) $(KALDI_LIBPATH) $(LIBPATH)) $(patsubst %,$(RPATH)%, $(ORIGINDIR) $(KALDI_LIBPATH) $(LIBPATH)) -o $@ $^ -l$(CNTKMATH) $(KALDI_LIBS)
KALDIWRITER:=$(LIBDIR)/KaldiWriter.so
TARGETS+=$(KALDIWRITER)
$(KALDIWRITER): $(KALDIREADER_OBJ)
@echo $(SEPARATOR)
$(CC) $(LDFLAGS) -shared $(patsubst %,-L%, $(LIBDIR) $(LIBPATH)) $(patsubst %,$(RPATH)%, $(ORIGINDIR) $(LIBPATH)) -o $@ $^ -l$(CNTKMATH)
KALDI2READER_SRC = \
DataReader/Kaldi2Reader/DataReader.cpp \
DataReader/Kaldi2Reader/DataWriter.cpp \
DataReader/Kaldi2Reader/HTKMLFReader.cpp \
DataReader/Kaldi2Reader/HTKMLFWriter.cpp \
DataReader/Kaldi2Reader/KaldiSequenceTrainingDerivative.cpp \
DataReader/Kaldi2Reader/UtteranceDerivativeBuffer.cpp \
KALDI2READER_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(KALDI2READER_SRC))
KALDI2READER:=$(LIBDIR)/Kaldi2Reader.so
TARGETS+=$(KALDI2READER)
SRC+=$(KALDI2READER_SRC)
$(KALDI2READER): $(KALDI2READER_OBJ)
@echo $(SEPARATOR)
$(CC) $(LDFLAGS) -shared $(patsubst %,-L%, $(LIBDIR) $(KALDI_LIBPATH) $(LIBPATH)) $(patsubst %,$(RPATH)%, $(ORIGINDIR) $(KALDI_LIBPATH) $(LIBPATH)) -o $@ $^ -l$(CNTKMATH) $(KALDI_LIBS)
endif
CN_SRC =\
MachineLearning/CNTK/CNTK.cpp \
MachineLearning/CNTK/ComputationNode.cpp \
MachineLearning/CNTK/ModelEditLanguage.cpp \
MachineLearning/CNTK/NetworkDescriptionLanguage.cpp \
MachineLearning/CNTK/Profiler.cpp \
MachineLearning/CNTK/SimpleNetworkBuilder.cpp \
MachineLearning/CNTK/tests.cpp \
MachineLearning/CNTKEval/CNTKEval.cpp \
CN_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(CN_SRC))
CNTK:=$(BINDIR)/cntk
TARGETS+=$(CNTK)
$(CNTK): $(CN_OBJ) | $(CNTKMATH_LIB)
@echo $(SEPARATOR)
@mkdir -p $(dir $@)
@echo building output for $(ARCH) with build type $(BUILDTYPE)
$(CC) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(LIBPATH)) $(patsubst %,$(RPATH)%, $(ORIGINLIBDIR) $(LIBPATH)) -o $@ $^ $(LIBS) -l$(CNTKMATH) -fopenmp
VPATH := $(sort $(dir $(SRC)))
# Define object files
OBJ_TMP := $(patsubst %.cpp, $(OBJDIR)/%.o, $(SRC))
ifeq ($(DEVICE),gpu)
OBJ := $(patsubst %.cu, $(OBJDIR)/%.o, $(OBJ_TMP))
else
OBJ := $(OBJ_TMP)
endif
CORE_OBJ_TMP := $(patsubst %.cpp, $(OBJDIR)/%.o, $(CORE_SRC))
ifeq ($(DEVICE),gpu)
CORE_OBJ := $(patsubst %.cu, $(OBJDIR)/%.o, $(CORE_OBJ_TMP))
else
CORE_OBJ := $(CORE_OBJ_TMP)
endif
COMMON_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(COMMON_SRC))
MATH_OBJ_TMP := $(patsubst %.cpp, $(OBJDIR)/%.o, $(MATH_SRC))
ifeq ($(DEVICE),gpu)
MATH_OBJ := $(patsubst %.cu, $(OBJDIR)/%.o, $(MATH_OBJ_TMP))
else
MATH_OBJ := $(MATH_OBJ_TMP)
endif
UCIFASTREADER_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(UCIFASTREADER_SRC))
LUSEQUENCEREADER_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(LUSEQUENCEREADER_SRC))
SEQUENCEREADER_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(SEQUENCEREADER_SRC))
HTKMLFREADER_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(HTKMLFREADER_SRC))
BINARYREADER_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(BINARYREADER_SRC))
OBJ := $(patsubst %.cu, $(OBJDIR)/%.o, $(patsubst %.cpp, $(OBJDIR)/%.o, $(SRC)))
# C++ include dependencies generated by -MF compiler option
DEP := $(patsubst %.o, %.d, $(OBJ))
SEPARATOR = "=-----------------------------------------------------------="
# Define build targets
all: $(BINDIR)/cntk $(BINDIR)/UCIFastReader.so $(BINDIR)/LMSequenceReader.so $(BINDIR)/LUSequenceReader.so $(BINDIR)/HTKMLFReader.so
@echo $(SEPARATOR)
@echo finished building for $(ARCH) with build type $(BUILDTYPE)
$(BINDIR)/UCIFastReader.so: $(UCIFASTREADER_OBJ) | $(CNTKMATH_LIB)
@echo $(SEPARATOR)
$(CC) $(BUILDTYPE_OPT) -fPIC -shared -o $@ $^ $(CNTKMATH_LINK_LIB)
$(BINDIR)/LMSequenceReader.so: $(SEQUENCEREADER_OBJ) | $(CNTKMATH_LIB)
@echo $(SEPARATOR)
$(CC) $(BUILDTYPE_OPT) -fPIC -shared -o $@ $^ $(CNTKMATH_LINK_LIB)
$(BINDIR)/LUSequenceReader.so: $(LUSEQUENCEREADER_OBJ) | $(CNTKMATH_LIB)
@echo $(SEPARATOR)
$(CC) $(BUILDTYPE_OPT) -fPIC -shared -o $@ $^ $(CNTKMATH_LINK_LIB)
$(BINDIR)/HTKMLFReader.so: $(HTKMLFREADER_OBJ) | $(CNTKMATH_LIB)
@echo $(SEPARATOR)
$(CC) $(BUILDTYPE_OPT) -fPIC -shared -o $@ $^ $(CNTKMATH_LINK_LIB)
$(BINDIR)/BinaryReader.so: $(BINARYREADER_OBJ) | $(CNTKMATH_LIB)
@echo $(SEPARATOR)
$(CC) $(BUILDTYPE_OPT) -fPIC -shared -o $@ $^ $(CNTKMATH_LINK_LIB)
$(BINDIR)/cntk: $(CORE_OBJ) | $(CNTKMATH_LIB)
@echo $(SEPARATOR)
@mkdir -p $(dir $@)
@echo building output for $(ARCH) with build type $(BUILDTYPE)
$(CC) $(BUILDTYPE_OPT) $(LDFLAGS) -o $@ $^ $(LINK_LIBS) $(CNTKMATH_LINK_LIB) -fopenmp -ldl -fPIC
$(CNTKMATH_LIB): $(MATH_OBJ) $(COMMON_OBJ)
@echo $(SEPARATOR)
@echo creating $@ for $(ARCH) with build type $(BUILDTYPE)
@mkdir -p $(dir $@)
$(CC) $(BUILDTYPE_OPT) -fPIC -shared -o $@ $^ $(LINK_LIBS) -fopenmp
# Include all C++ dependencies, like header files, to ensure that a change in those
# will result in the rebuild.
-include ${DEP}
ifeq ($(DEVICE),gpu)
$(OBJDIR)/%.o : %.cu Makefile
@echo $(SEPARATOR)
@echo creating $@ for $(ARCH) with build type $(BUILDTYPE)
@mkdir -p $(dir $@)
$(NVCC) -c $< -o $@ $(GPU_BUILDTYPE_OPT) $(NVCCFLAGS) $(INCFLAGS) -Xcompiler -fPIC
endif
$(NVCC) -c $< -o $@ $(CUFLAGS) $(INCLUDEPATH:%=-I%) -Xcompiler -fPIC
$(OBJDIR)/%.o : %.cpp Makefile
@echo $(SEPARATOR)
@echo creating $@ for $(ARCH) with build type $(BUILDTYPE)
@mkdir -p $(dir $@)
$(CC) -c $< -o $@ $(BUILDTYPE_OPT) $(CPPFLAGS) $(CFLAGS) $(INCFLAGS) -MD -MP -MF ${@:.o=.d}
$(CC) -c $< -o $@ $(CPPFLAGS) $(CXXFLAGS) $(INCLUDEPATH:%=-I%) -MD -MP -MF ${@:.o=.d}
.PHONY: clean
.PHONY: clean alltargets
clean:
@echo $(SEPARATOR)
@rm -rf $(OBJDIR)
@rm -rf $(BINDIR)
@rm -rf $(LIBDIR)
@echo finished cleaning up the project
alltargets : $(TARGETS)
@echo $(SEPARATOR)
@echo finished building for $(ARCH) with build type $(BUILDTYPE)