Merge branch 'master' into fseide/bs
Conflicts: Math/Math/NoGPU.cpp
This commit is contained in:
Коммит
785429e854
|
@ -60,7 +60,7 @@ class lattice
|
|||
size_t impliedspunitid : 31; // id of implied last unit (intended as /sp/); only used in V2
|
||||
size_t hasacscores : 1; // if 1 then ac scores are embedded
|
||||
|
||||
header_v1_v2() : numnodes (0), numedges (0), lmf (1.0f), wp (0.0f), frameduration (0.01/*assumption*/), numframes (0), impliedspunitid (SIZE_MAX), hasacscores (1) { }
|
||||
header_v1_v2() : numnodes (0), numedges (0), lmf (1.0f), wp (0.0f), frameduration (0.01/*assumption*/), numframes (0), impliedspunitid (INT_MAX), hasacscores (1) { }
|
||||
};
|
||||
header_v1_v2 info; // information about the lattice
|
||||
static const unsigned int NOEDGE = 0xffffff; // 24 bits
|
||||
|
@ -507,7 +507,7 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
typedef aligninfo aligninfo; // now we can access it as htkmlfwordsequence::aligninfo although it comes from some totally other corner of the system
|
||||
typedef msra::lattices::aligninfo aligninfo; // now we can access it as htkmlfwordsequence::aligninfo although it comes from some totally other corner of the system
|
||||
|
||||
std::vector<word> words;
|
||||
std::vector<aligninfo> align;
|
||||
|
|
|
@ -1983,10 +1983,11 @@ public:
|
|||
//// set prune value to 0 3 3
|
||||
//setMinObs (iMinObs);
|
||||
|
||||
for (size_t i = 0; i < minObs.size(); i++)
|
||||
{
|
||||
MESSAGE("minObs %d: %d.", i, minObs[i]);
|
||||
}
|
||||
// TODO: Re-enable when MESSAGE definition is provided (printf?)
|
||||
// for (size_t i = 0; i < minObs.size(); i++)
|
||||
// {
|
||||
// MESSAGE("minObs %d: %d.", i, minObs[i]);
|
||||
// }
|
||||
|
||||
estimate (startId, minObs, dropWord);
|
||||
|
||||
|
|
|
@ -169,7 +169,7 @@ namespace msra { namespace dbn {
|
|||
// finish off last block
|
||||
flushlastblock();
|
||||
fflushOrDie (f);
|
||||
fprintf (stderr, "biggrowablevectorarray: disk backup store created, %d frames, %ull bytes\n", (int) n, fgetpos (f));
|
||||
fprintf (stderr, "biggrowablevectorarray: disk backup store created, %d frames, %lu bytes\n", (int) n, fgetpos (f));
|
||||
fclose (f);
|
||||
foreach_index (i, blocks) assert (!blocks[i]); // ensure we flushed
|
||||
assert (inmembegin == inmemend); // nothing in cache
|
||||
|
|
|
@ -278,7 +278,7 @@ public:
|
|||
bool addtoresult, const float thisscale, const float weight)
|
||||
{
|
||||
assert (a.size() == b.size());
|
||||
assert ((15 & (int) &a[0]) == 0); assert ((15 & (int) &b[0]) == 0); // enforce SSE alignment
|
||||
assert ((15 & reinterpret_cast<uintptr_t>(&a[0])) == 0); assert ((15 & reinterpret_cast<uintptr_t>(&b[0])) == 0); // enforce SSE alignment
|
||||
|
||||
size_t nlong = (a.size() + 3) / 4; // number of SSE elements
|
||||
const msra::math::float4 * pa = (const msra::math::float4 *) &a[0];
|
||||
|
@ -313,9 +313,9 @@ public:
|
|||
// for (size_t k = 0; k < 4; k++)
|
||||
// dotprod (row, const_array_ref<float> (&cols4[k * cols4stride], cols4stride), usij[k * usijstride]);
|
||||
|
||||
assert ((15 & (int) &row[0]) == 0);
|
||||
assert ((15 & (int) &cols4[0]) == 0);
|
||||
assert ((15 & (int) &cols4[cols4stride]) == 0);
|
||||
assert ((15 & reinterpret_cast<uintptr_t>(&row[0])) == 0);
|
||||
assert ((15 & reinterpret_cast<uintptr_t>(&cols4[0])) == 0);
|
||||
assert ((15 & reinterpret_cast<uintptr_t>(&cols4[cols4stride])) == 0);
|
||||
//assert (cols4stride * 4 == cols4.size()); // (passed in one vector with 4 columns stacked on top of each other)
|
||||
//assert (row.size() * 4 == cols4.size()); // this assert is no longer appropriate because of further breaking into blocks
|
||||
|
||||
|
|
|
@ -76,7 +76,7 @@ public:
|
|||
double logprob(int i) const { if (uniform_sampling) return uniform_log_prob; else return m_log_prob[i]; }
|
||||
|
||||
template <typename Engine>
|
||||
int sample(Engine &eng) const
|
||||
int sample(Engine &eng)
|
||||
{
|
||||
int m = unif_int(eng);
|
||||
if (uniform_sampling)
|
||||
|
|
2
Makefile
2
Makefile
|
@ -52,7 +52,7 @@ CXX = mpic++
|
|||
|
||||
INCLUDEPATH:= Common/Include Math/Math MachineLearning/CNTK BrainScript
|
||||
CPPFLAGS:= -D_POSIX_SOURCE -D_XOPEN_SOURCE=600 -D__USE_XOPEN2K
|
||||
CXXFLAGS:= -msse3 -std=c++0x -std=c++11 -fopenmp -fpermissive -fPIC
|
||||
CXXFLAGS:= -msse3 -std=c++0x -std=c++11 -fopenmp -fpermissive -fPIC -Werror
|
||||
LIBPATH:=
|
||||
LIBS:=
|
||||
LDFLAGS:=
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#ifndef __COLUMN_QUANTIZER_H__
|
||||
#define __COLUMN_QUANTIZER_H__
|
||||
#include "ValueQuantizer.h"
|
||||
#include <math.h>
|
||||
|
||||
#pragma warning (disable: 4127) // conditional expression is constant
|
||||
|
||||
|
|
|
@ -23,8 +23,14 @@
|
|||
#define IDX2C(i,j,ld) (((j)*(ld))+(i)) // 0 based indexing
|
||||
#define threadsPerBlock 512
|
||||
|
||||
#ifdef __GNUC__
|
||||
#define UNUSED_FUNCTION_ATTRIBUTE __attribute__ ((unused))
|
||||
#else
|
||||
#define UNUSED_FUNCTION_ATTRIBUTE
|
||||
#endif
|
||||
|
||||
// Predefine this for later.
|
||||
static __inline__ __device__ double atomicAdd(double* address, double val);
|
||||
static __inline__ __device__ double atomicAdd(double* address, double val) UNUSED_FUNCTION_ATTRIBUTE;
|
||||
//CUDA Kernels code
|
||||
template<class ElemType>
|
||||
__global__ void _elementWisePowerOnCuda(
|
||||
|
|
|
@ -81,7 +81,7 @@
|
|||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>libacml_mp_dll.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)\</AdditionalLibraryDirectories>
|
||||
<DelayLoadDLLs>cublas64_70.dll; cusparse64_70.dll; curand64_70.dll; cudart64_70.dll; libacml_dll.dll; libacml_mp_dll.dll; %(DelayLoadDLLs)</DelayLoadDLLs>
|
||||
<DelayLoadDLLs>cublas64_70.dll; cusparse64_70.dll; curand64_70.dll; cudart64_70.dll; libacml_mp_dll.dll; %(DelayLoadDLLs)</DelayLoadDLLs>
|
||||
<Profile>true</Profile>
|
||||
</Link>
|
||||
<PostBuildEvent>
|
||||
|
|
|
@ -24,7 +24,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
blockdim = (unsigned int) warpsize; // -> blockIdx.x
|
||||
}
|
||||
// get the array index for the current thread
|
||||
__device__ static size_t ParallelizeOverRangeIndex()
|
||||
__device__ __inline__ static size_t ParallelizeOverRangeIndex()
|
||||
{
|
||||
return threadIdx.x + (blockIdx.x * blockDim.x);
|
||||
}
|
||||
|
|
|
@ -69,7 +69,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// Start of new GPU Sparse Matrix code
|
||||
//-------------------------------------------------------------------------
|
||||
|
||||
template<class ElemType> void GPUSparseMatrix<ElemType>::Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, const MatrixFormat matrixFormat, const bool growOnly = true, bool keepExistingValues = true) {}//matrix format will affect the size to allocate
|
||||
template<class ElemType> void GPUSparseMatrix<ElemType>::Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, const MatrixFormat matrixFormat, const bool growOnly, bool keepExistingValues) {}//matrix format will affect the size to allocate
|
||||
template<class ElemType> void GPUSparseMatrix<ElemType>::Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, const bool growOnly, bool keepExistingValues) {}
|
||||
|
||||
template<class ElemType> GPUMatrix<ElemType> GPUSparseMatrix<ElemType>::CopyToDenseMatrix() const
|
||||
|
@ -351,7 +351,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
template<class ElemType> void GPUSparseMatrix<ElemType>::ConvertToSparseFormat(MatrixFormat newFormat, GPUSparseMatrix<ElemType>& outMatrix) const {}
|
||||
|
||||
template<class ElemType> template <class OutType, class InType>
|
||||
static void GPUSparseMatrix<ElemType>::CopyBuffer(OutType * outBuffer, const InType * inBuffer, const size_t size){}
|
||||
void GPUSparseMatrix<ElemType>::CopyBuffer(OutType * outBuffer, const InType * inBuffer, const size_t size){}
|
||||
|
||||
#pragma endregion Helper Functions
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
template<class ElemType>
|
||||
QuantizedMatrix<ElemType>::QuantizedMatrix(const size_t numRows, const size_t numCols, const size_t nbits, short deviceId, MemAllocator* allocator /* = nullptr */)
|
||||
QuantizedMatrix<ElemType>::QuantizedMatrix(const size_t numRows, const size_t numCols, const size_t nbits, DEVICEID_TYPE deviceId, MemAllocator* allocator /* = nullptr */)
|
||||
: m_numRows(numRows), m_numCols(numCols), m_numBits(nbits), m_allocator(allocator)
|
||||
{
|
||||
m_qColSize = QuantizedColumn<ElemType>::QuantizedColumnSize(m_numBits, m_numRows);
|
||||
|
|
|
@ -56,7 +56,7 @@ class MATH_API QuantizedMatrix
|
|||
static const size_t QWordNumBits = ValueQuantizer<ElemType>::QWordNumBits;
|
||||
|
||||
public:
|
||||
QuantizedMatrix(const size_t numRows, const size_t numCols, const size_t nbits, short deviceId, MemAllocator* allocator = nullptr);
|
||||
QuantizedMatrix(const size_t numRows, const size_t numCols, const size_t nbits, DEVICEID_TYPE deviceId, MemAllocator* allocator = nullptr);
|
||||
|
||||
// Move constructor and assignment
|
||||
QuantizedMatrix(QuantizedMatrix<ElemType>&& moveFrom);
|
||||
|
|
Загрузка…
Ссылка в новой задаче