Merge branch 'master' into fseide/bs

Conflicts:
	Math/Math/NoGPU.cpp
This commit is contained in:
Frank Seide 2015-09-03 13:50:03 -07:00
Родитель 5f086ce8d9 ad619733fd
Коммит 785429e854
13 изменённых файлов: 28 добавлений и 20 удалений

Просмотреть файл

@ -60,7 +60,7 @@ class lattice
size_t impliedspunitid : 31; // id of implied last unit (intended as /sp/); only used in V2
size_t hasacscores : 1; // if 1 then ac scores are embedded
header_v1_v2() : numnodes (0), numedges (0), lmf (1.0f), wp (0.0f), frameduration (0.01/*assumption*/), numframes (0), impliedspunitid (SIZE_MAX), hasacscores (1) { }
header_v1_v2() : numnodes (0), numedges (0), lmf (1.0f), wp (0.0f), frameduration (0.01/*assumption*/), numframes (0), impliedspunitid (INT_MAX), hasacscores (1) { }
};
header_v1_v2 info; // information about the lattice
static const unsigned int NOEDGE = 0xffffff; // 24 bits
@ -507,7 +507,7 @@ public:
}
};
typedef aligninfo aligninfo; // now we can access it as htkmlfwordsequence::aligninfo although it comes from some totally other corner of the system
typedef msra::lattices::aligninfo aligninfo; // now we can access it as htkmlfwordsequence::aligninfo although it comes from some totally other corner of the system
std::vector<word> words;
std::vector<aligninfo> align;

Просмотреть файл

@ -1983,10 +1983,11 @@ public:
//// set prune value to 0 3 3
//setMinObs (iMinObs);
for (size_t i = 0; i < minObs.size(); i++)
{
MESSAGE("minObs %d: %d.", i, minObs[i]);
}
// TODO: Re-enable when MESSAGE definition is provided (printf?)
// for (size_t i = 0; i < minObs.size(); i++)
// {
// MESSAGE("minObs %d: %d.", i, minObs[i]);
// }
estimate (startId, minObs, dropWord);

Просмотреть файл

@ -169,7 +169,7 @@ namespace msra { namespace dbn {
// finish off last block
flushlastblock();
fflushOrDie (f);
fprintf (stderr, "biggrowablevectorarray: disk backup store created, %d frames, %ull bytes\n", (int) n, fgetpos (f));
fprintf (stderr, "biggrowablevectorarray: disk backup store created, %d frames, %lu bytes\n", (int) n, fgetpos (f));
fclose (f);
foreach_index (i, blocks) assert (!blocks[i]); // ensure we flushed
assert (inmembegin == inmemend); // nothing in cache

Просмотреть файл

@ -278,7 +278,7 @@ public:
bool addtoresult, const float thisscale, const float weight)
{
assert (a.size() == b.size());
assert ((15 & (int) &a[0]) == 0); assert ((15 & (int) &b[0]) == 0); // enforce SSE alignment
assert ((15 & reinterpret_cast<uintptr_t>(&a[0])) == 0); assert ((15 & reinterpret_cast<uintptr_t>(&b[0])) == 0); // enforce SSE alignment
size_t nlong = (a.size() + 3) / 4; // number of SSE elements
const msra::math::float4 * pa = (const msra::math::float4 *) &a[0];
@ -313,9 +313,9 @@ public:
// for (size_t k = 0; k < 4; k++)
// dotprod (row, const_array_ref<float> (&cols4[k * cols4stride], cols4stride), usij[k * usijstride]);
assert ((15 & (int) &row[0]) == 0);
assert ((15 & (int) &cols4[0]) == 0);
assert ((15 & (int) &cols4[cols4stride]) == 0);
assert ((15 & reinterpret_cast<uintptr_t>(&row[0])) == 0);
assert ((15 & reinterpret_cast<uintptr_t>(&cols4[0])) == 0);
assert ((15 & reinterpret_cast<uintptr_t>(&cols4[cols4stride])) == 0);
//assert (cols4stride * 4 == cols4.size()); // (passed in one vector with 4 columns stacked on top of each other)
//assert (row.size() * 4 == cols4.size()); // this assert is no longer appropriate because of further breaking into blocks

Просмотреть файл

@ -76,7 +76,7 @@ public:
double logprob(int i) const { if (uniform_sampling) return uniform_log_prob; else return m_log_prob[i]; }
template <typename Engine>
int sample(Engine &eng) const
int sample(Engine &eng)
{
int m = unif_int(eng);
if (uniform_sampling)

Просмотреть файл

@ -52,7 +52,7 @@ CXX = mpic++
INCLUDEPATH:= Common/Include Math/Math MachineLearning/CNTK BrainScript
CPPFLAGS:= -D_POSIX_SOURCE -D_XOPEN_SOURCE=600 -D__USE_XOPEN2K
CXXFLAGS:= -msse3 -std=c++0x -std=c++11 -fopenmp -fpermissive -fPIC
CXXFLAGS:= -msse3 -std=c++0x -std=c++11 -fopenmp -fpermissive -fPIC -Werror
LIBPATH:=
LIBS:=
LDFLAGS:=

Просмотреть файл

@ -1,6 +1,7 @@
#ifndef __COLUMN_QUANTIZER_H__
#define __COLUMN_QUANTIZER_H__
#include "ValueQuantizer.h"
#include <math.h>
#pragma warning (disable: 4127) // conditional expression is constant

Просмотреть файл

@ -23,8 +23,14 @@
#define IDX2C(i,j,ld) (((j)*(ld))+(i)) // 0 based indexing
#define threadsPerBlock 512
#ifdef __GNUC__
#define UNUSED_FUNCTION_ATTRIBUTE __attribute__ ((unused))
#else
#define UNUSED_FUNCTION_ATTRIBUTE
#endif
// Predefine this for later.
static __inline__ __device__ double atomicAdd(double* address, double val);
static __inline__ __device__ double atomicAdd(double* address, double val) UNUSED_FUNCTION_ATTRIBUTE;
//CUDA Kernels code
template<class ElemType>
__global__ void _elementWisePowerOnCuda(

Просмотреть файл

@ -81,7 +81,7 @@
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>libacml_mp_dll.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)\</AdditionalLibraryDirectories>
<DelayLoadDLLs>cublas64_70.dll; cusparse64_70.dll; curand64_70.dll; cudart64_70.dll; libacml_dll.dll; libacml_mp_dll.dll; %(DelayLoadDLLs)</DelayLoadDLLs>
<DelayLoadDLLs>cublas64_70.dll; cusparse64_70.dll; curand64_70.dll; cudart64_70.dll; libacml_mp_dll.dll; %(DelayLoadDLLs)</DelayLoadDLLs>
<Profile>true</Profile>
</Link>
<PostBuildEvent>

Просмотреть файл

@ -24,7 +24,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
blockdim = (unsigned int) warpsize; // -> blockIdx.x
}
// get the array index for the current thread
__device__ static size_t ParallelizeOverRangeIndex()
__device__ __inline__ static size_t ParallelizeOverRangeIndex()
{
return threadIdx.x + (blockIdx.x * blockDim.x);
}

Просмотреть файл

@ -69,7 +69,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// Start of new GPU Sparse Matrix code
//-------------------------------------------------------------------------
template<class ElemType> void GPUSparseMatrix<ElemType>::Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, const MatrixFormat matrixFormat, const bool growOnly = true, bool keepExistingValues = true) {}//matrix format will affect the size to allocate
template<class ElemType> void GPUSparseMatrix<ElemType>::Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, const MatrixFormat matrixFormat, const bool growOnly, bool keepExistingValues) {}//matrix format will affect the size to allocate
template<class ElemType> void GPUSparseMatrix<ElemType>::Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, const bool growOnly, bool keepExistingValues) {}
template<class ElemType> GPUMatrix<ElemType> GPUSparseMatrix<ElemType>::CopyToDenseMatrix() const
@ -351,7 +351,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType> void GPUSparseMatrix<ElemType>::ConvertToSparseFormat(MatrixFormat newFormat, GPUSparseMatrix<ElemType>& outMatrix) const {}
template<class ElemType> template <class OutType, class InType>
static void GPUSparseMatrix<ElemType>::CopyBuffer(OutType * outBuffer, const InType * inBuffer, const size_t size){}
void GPUSparseMatrix<ElemType>::CopyBuffer(OutType * outBuffer, const InType * inBuffer, const size_t size){}
#pragma endregion Helper Functions

Просмотреть файл

@ -5,7 +5,7 @@
namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType>
QuantizedMatrix<ElemType>::QuantizedMatrix(const size_t numRows, const size_t numCols, const size_t nbits, short deviceId, MemAllocator* allocator /* = nullptr */)
QuantizedMatrix<ElemType>::QuantizedMatrix(const size_t numRows, const size_t numCols, const size_t nbits, DEVICEID_TYPE deviceId, MemAllocator* allocator /* = nullptr */)
: m_numRows(numRows), m_numCols(numCols), m_numBits(nbits), m_allocator(allocator)
{
m_qColSize = QuantizedColumn<ElemType>::QuantizedColumnSize(m_numBits, m_numRows);

Просмотреть файл

@ -56,7 +56,7 @@ class MATH_API QuantizedMatrix
static const size_t QWordNumBits = ValueQuantizer<ElemType>::QWordNumBits;
public:
QuantizedMatrix(const size_t numRows, const size_t numCols, const size_t nbits, short deviceId, MemAllocator* allocator = nullptr);
QuantizedMatrix(const size_t numRows, const size_t numCols, const size_t nbits, DEVICEID_TYPE deviceId, MemAllocator* allocator = nullptr);
// Move constructor and assignment
QuantizedMatrix(QuantizedMatrix<ElemType>&& moveFrom);