diff --git a/src/configure b/src/configure index 7e5ccbf3d..4206839ec 100755 --- a/src/configure +++ b/src/configure @@ -451,7 +451,7 @@ function linux_check_dynamic { echo "Atlas found in $dir"; return 0; else - echo "No libatlas.so in $dir"; + echo "... no libatlas.so in $dir"; return 1; fi } diff --git a/src/cudamatrix/cu-kernels.cu b/src/cudamatrix/cu-kernels.cu index d1c2f28dc..4f5248a38 100644 --- a/src/cudamatrix/cu-kernels.cu +++ b/src/cudamatrix/cu-kernels.cu @@ -276,8 +276,8 @@ static void _copy_col_from_vec(Real* mat, const Real* v, int col, MatrixDim d) { template __global__ static void _apply_exp(Real* mat, MatrixDim d) { - int32_cuda i = blockIdx.y * blockDim.y + threadIdx.y; - int32_cuda j = blockIdx.x * blockDim.x + threadIdx.x; + int32_cuda i = blockIdx.x * blockDim.x + threadIdx.x; + int32_cuda j = blockIdx.y * blockDim.y + threadIdx.y; int32_cuda index = i + j * d.stride; if ( i < d.cols && j < d.rows ) { mat[index] = exp(mat[index]); diff --git a/src/cudamatrix/cu-matrix-test.cc b/src/cudamatrix/cu-matrix-test.cc index c760837f2..11c3744a1 100644 --- a/src/cudamatrix/cu-matrix-test.cc +++ b/src/cudamatrix/cu-matrix-test.cc @@ -143,6 +143,28 @@ static void UnitTestCuMatrixApplyLog() { } +/* + * CuMatrix + */ +template +static void UnitTestCuMatrixApplyExp() { + int32 M = 100 + rand() % 200, N = 100 + rand() % 200; + Matrix H(M, N); + H.SetRandn(); + H.MulElements(H); // make numbers positive + + CuMatrix D(H); + + D.ApplyExp(); + H.ApplyExp(); + + Matrix H2(D); + + AssertEqual(H,H2); +} + + + template static void UnitTestCuMatrixSigmoid() { for (int32 i = 0; i < 3; i++) { @@ -1852,6 +1874,7 @@ template void CudaMatrixUnitTest() { UnitTestCuMatrixCopyCross(); UnitTestCuMatrixCopyCross2(); UnitTestCuMatrixApplyLog(); + UnitTestCuMatrixApplyExp(); UnitTestCuMatrixSetRandn(); UnitTestCuMatrixSetRandUniform(); UnitTestCuMatrixScale(); diff --git a/tools/Makefile b/tools/Makefile index fd0c256e9..4794162bc 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -1,8 +1,6 @@ # SHELL += -x -CXX ?= g++ -CXXFLAGS ?= -LDFLAGS ?= +CXX = g++ # On Mac OS 10.9, g++ is actually clang in disguise which by default uses the # new c++ standard library libc++. Since openfst uses stuff from the tr1 @@ -12,7 +10,7 @@ ifeq ($(findstring clang,$(COMPILER)),clang) CXXFLAGS += -stdlib=libstdc++ LDFLAGS += -stdlib=libstdc++ endif - + all: check_required_programs sph2pipe atlas irstlm_tgt sclite_tgt openfst_tgt @@ -109,7 +107,7 @@ irstlm_tgt: irstlm_compiled .PHONY: irstlm_compiled irstlm_compiled: irstlm/Makefile cd irstlm/; \ - $(MAKE); $(MAKE) install + make; $(MAKE) install irstlm/Makefile: irstlm/.patched cd irstlm; \ @@ -123,7 +121,7 @@ irstlm/.patched: | irstlm -cd irstlm;\ patch --verbose -N -p0 < ../interpolatedwrite-5.60.02.patch; \ patch --verbose -N -p0 < ../irstlm.patch; \ - touch $@ + touch .patched irstlm: svn -r 398 co --non-interactive --trust-server-cert https://svn.code.sf.net/p/irstlm/code/trunk irstlm