#if-0'ed out stuff in basetypes.h, which will soon removed completely;
fixed the include path of the MathPerformanceTests project (which were incomplete and even inconsistent between Release/Debug); more attempts at ImageReader.cpp
This commit is contained in:
Родитель
e2f07b32e3
Коммит
cea0043fa1
|
@ -12,6 +12,36 @@
|
|||
#include <vector>
|
||||
#include <memory> // for shared_ptr
|
||||
|
||||
enum class MinibatchPackingFlags : char // (note: not using unsigned char because these go into a matrix, and we use Matrix<char>, since we use it as a data holder)
|
||||
{
|
||||
None = 0,
|
||||
SequenceStart = 1 << 0, // binary 0001 frame is first of an utterance
|
||||
SequenceEnd = 1 << 1, // binary 0010 frame is last of an utterance
|
||||
NoFeature = 1 << 2, // binary 0100 frame has no feature (e.g. a gap due to BPTT)
|
||||
NoLabel = 1 << 3, // binary 1000 frame has no label
|
||||
|
||||
NoInput = NoFeature | NoLabel, // when we refactorize reader, NoInput will no longer needed
|
||||
SequenceStartOrNoFeature = SequenceStart | NoFeature,
|
||||
SequenceEndOrNoFeature = SequenceEnd | NoFeature,
|
||||
SequenceStartOrEndOrNoFeature = SequenceStart | SequenceEnd | NoFeature,
|
||||
};
|
||||
|
||||
inline MinibatchPackingFlags operator| (MinibatchPackingFlags a, MinibatchPackingFlags b)
|
||||
{
|
||||
return static_cast<MinibatchPackingFlags>(static_cast<unsigned char>(a) | static_cast<unsigned char>(b));
|
||||
}
|
||||
|
||||
inline MinibatchPackingFlags& operator|= (MinibatchPackingFlags& a, MinibatchPackingFlags b)
|
||||
{
|
||||
a = a | b;
|
||||
return a;
|
||||
}
|
||||
|
||||
inline bool operator& (MinibatchPackingFlags a, MinibatchPackingFlags b)
|
||||
{
|
||||
return (static_cast<unsigned char>(a) & static_cast<unsigned char>(b)) != 0;
|
||||
}
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
// Forward declarations
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
#ifndef _BASETYPES_
|
||||
#define _BASETYPES_
|
||||
|
||||
|
||||
#if 0
|
||||
#ifndef UNDER_CE // fixed-buffer overloads not available for wince
|
||||
#ifdef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES // fixed-buffer overloads for strcpy() etc.
|
||||
#undef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
|
||||
|
@ -68,6 +68,7 @@ OACR_WARNING_DISABLE(POTENTIAL_ARGUMENT_TYPE_MISMATCH, "Not level1 or level2_sec
|
|||
#if !defined(_DEBUG) || defined(_CHECKED) || defined(_MANAGED)
|
||||
#pragma warning(disable : 4702) // unreachable code
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "Platform.h"
|
||||
#include <stdio.h>
|
||||
|
@ -105,15 +106,18 @@ OACR_WARNING_DISABLE(POTENTIAL_ARGUMENT_TYPE_MISMATCH, "Not level1 or level2_sec
|
|||
typedef unsigned char byte;
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
#ifdef _WIN32
|
||||
#pragma push_macro("STRSAFE_NO_DEPRECATE")
|
||||
#define STRSAFE_NO_DEPRECATE // deprecation managed elsewhere, not by strsafe
|
||||
#include <strsafe.h> // for strbcpy() etc templates
|
||||
#pragma pop_macro("STRSAFE_NO_DEPRECATE")
|
||||
#endif
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
#if 0
|
||||
// CRT error handling seems to not be included in wince headers
|
||||
// so we define our own imports
|
||||
#ifdef UNDER_CE
|
||||
|
@ -147,6 +151,7 @@ using namespace std;
|
|||
// disable warnings for which fixing would make code less readable
|
||||
#pragma warning(disable : 4290) // throw() declaration ignored
|
||||
#pragma warning(disable : 4244) // conversion from typeA to typeB, possible loss of data
|
||||
#endif
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// (w)cstring -- helper class like std::string but with auto-cast to char*
|
||||
|
@ -176,6 +181,7 @@ static inline wchar_t*GetWC(const char *c)
|
|||
|
||||
return wc;
|
||||
}
|
||||
#if 0
|
||||
struct MatchPathSeparator
|
||||
{
|
||||
bool operator()( char ch ) const
|
||||
|
@ -207,6 +213,7 @@ static inline std::wstring removeExtension (std::wstring const& filename)
|
|||
size_t lastindex = filename.find_last_of(L".");
|
||||
return filename.substr(0, lastindex);
|
||||
}
|
||||
#endif
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// some mappings for non-Windows builds
|
||||
|
@ -249,12 +256,14 @@ static inline void Sleep (size_t ms) { std::this_thread::sleep_for (std::chrono:
|
|||
// basic macros --TODO: do we need those? delete what we dont' need
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
#ifndef ASSERT
|
||||
#if 0
|
||||
#ifndef assert
|
||||
#ifdef _CHECKED // basetypes.h expects this function to be defined (it is in message.h)
|
||||
extern void _CHECKED_ASSERT_error(const char * file, int line, const char * exp);
|
||||
#define ASSERT(exp) ((exp)||(_CHECKED_ASSERT_error(__FILE__,__LINE__,#exp),0))
|
||||
#define assert(exp) ((exp)||(_CHECKED_ASSERT_error(__FILE__,__LINE__,#exp),0))
|
||||
#else
|
||||
#define ASSERT assert
|
||||
#define assert assert
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#define UNUSED(x) (void)(x)
|
||||
|
@ -303,14 +312,15 @@ namespace msra { namespace basetypes {
|
|||
}
|
||||
};
|
||||
|
||||
// class ARRAY -- std::vector with array-bounds checking
|
||||
// class std::vector -- std::vector with array-bounds checking
|
||||
// VS 2008 and above do this, so there is no longer a need for this.
|
||||
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 4555) // expression has no affect, used so retail won't be empty
|
||||
|
||||
#if 0
|
||||
template<class _ElemType>
|
||||
class ARRAY : public std::vector<_ElemType>
|
||||
class std::vector : public std::vector<_ElemType>
|
||||
{
|
||||
#if defined (_DEBUG) || defined (_CHECKED) // debug version with range checking
|
||||
static void throwOutOfBounds()
|
||||
|
@ -319,15 +329,15 @@ class ARRAY : public std::vector<_ElemType>
|
|||
OACR_WARNING_DISABLE(IGNOREDBYCOMMA, "Reviewd OK. Special trick below to show a message when assertion fails"
|
||||
"[rogeryu 2006/03/24]");
|
||||
OACR_WARNING_DISABLE(BOGUS_EXPRESSION_LIST, "This is intentional. [rogeryu 2006/03/24]");
|
||||
//ASSERT ("ARRAY::operator[] out of bounds", false);
|
||||
//assert ("std::vector::operator[] out of bounds", false);
|
||||
OACR_WARNING_POP;
|
||||
}
|
||||
#endif
|
||||
|
||||
public:
|
||||
|
||||
ARRAY() : std::vector<_ElemType> () { }
|
||||
ARRAY (int size) : std::vector<_ElemType> (size) { }
|
||||
std::vector() : std::vector<_ElemType> () { }
|
||||
std::vector (int size) : std::vector<_ElemType> (size) { }
|
||||
|
||||
#if defined (_DEBUG) || defined (_CHECKED) // debug version with range checking
|
||||
// ------------------------------------------------------------------------
|
||||
|
@ -361,8 +371,9 @@ public:
|
|||
}
|
||||
};
|
||||
// overload swap(), otherwise we'd fallback to 3-way assignment & possibly throw
|
||||
template<class _T> inline void swap (ARRAY<_T> & L, ARRAY<_T> & R) throw()
|
||||
template<class _T> inline void swap (std::vector<_T> & L, std::vector<_T> & R) throw()
|
||||
{ swap ((std::vector<_T> &) L, (std::vector<_T> &) R); }
|
||||
#endif
|
||||
|
||||
// class fixed_vector - non-resizable vector
|
||||
|
||||
|
@ -372,14 +383,14 @@ template<class _T> class fixed_vector
|
|||
size_t n; // number of elements
|
||||
void check (int index) const
|
||||
{
|
||||
ASSERT (index >= 0 && (size_t) index < n);
|
||||
assert (index >= 0 && (size_t) index < n);
|
||||
#ifdef NDEBUG
|
||||
UNUSED(index);
|
||||
#endif
|
||||
}
|
||||
void check (size_t index) const
|
||||
{
|
||||
ASSERT (index < n);
|
||||
assert (index < n);
|
||||
#ifdef NDEBUG
|
||||
UNUSED(index);
|
||||
#endif
|
||||
|
@ -404,7 +415,7 @@ public:
|
|||
inline const _T & operator[] (int index) const { check (index); return p[index]; } // reading
|
||||
inline _T & operator[] (size_t index) { check (index); return p[index]; } // writing
|
||||
inline const _T & operator[] (size_t index) const { check (index); return p[index]; } // reading
|
||||
inline int indexof (const _T & elem) const { ASSERT (&elem >= p && &elem < p + n); return &elem - p; }
|
||||
inline int indexof (const _T & elem) const { assert (&elem >= p && &elem < p + n); return &elem - p; }
|
||||
void swap (fixed_vector & other) throw() { std::swap (other.p, p); std::swap (other.n, n); }
|
||||
template<class VECTOR> fixed_vector & operator= (const VECTOR & other)
|
||||
{
|
||||
|
@ -431,10 +442,11 @@ template<class _T> inline void swap (fixed_vector<_T> & L, fixed_vector<_T> & R)
|
|||
// class matrix - simple fixed-size 2-dimensional array, access elements as m(i,j)
|
||||
// stored as concatenation of rows
|
||||
|
||||
#if 1
|
||||
template<class T> class matrix : fixed_vector<T>
|
||||
{
|
||||
size_t numcols;
|
||||
size_t locate(size_t i, size_t j) const { ASSERT(i < rows() && j < cols()); return i * cols() + j; }
|
||||
size_t locate(size_t i, size_t j) const { assert(i < rows() && j < cols()); return i * cols() + j; }
|
||||
public:
|
||||
typedef T elemtype;
|
||||
matrix() : numcols(0) {}
|
||||
|
@ -454,6 +466,7 @@ template<class _T> inline void swap(matrix<_T> & L, matrix<_T> & R) throw() { L.
|
|||
typedef std::string STRING;
|
||||
typedef std::wstring WSTRING;
|
||||
typedef std::basic_string<TCHAR> TSTRING; // wide/narrow character string
|
||||
#endif
|
||||
|
||||
// derive from this for noncopyable classes (will get you private unimplemented copy constructors)
|
||||
// ... TODO: change all of basetypes classes/structs to use this
|
||||
|
@ -596,7 +609,7 @@ struct utf16 : std::wstring { utf16 (const std::string & p) // utf-8 to -16
|
|||
int rc = MultiByteToWideChar (CP_UTF8, 0, p.c_str(), (int) len,
|
||||
&buf[0], (int) buf.size());
|
||||
if (rc == 0) RuntimeError("MultiByteToWideChar");
|
||||
ASSERT (rc < buf.size ());
|
||||
assert (rc < buf.size ());
|
||||
(*(std::wstring*)this) = &buf[0];
|
||||
}};
|
||||
#endif
|
||||
|
@ -618,7 +631,7 @@ static inline std::wstring mbstowcs(const std::string & p) // input: MBCS
|
|||
size_t len = p.length();
|
||||
msra::basetypes::fixed_vector<wchar_t> buf(len + 1); // max: >1 mb chars => 1 wchar
|
||||
std::fill(buf.begin(), buf.end(), (wchar_t)0);
|
||||
OACR_WARNING_SUPPRESS(UNSAFE_STRING_FUNCTION, "Reviewed OK. size checked. [rogeryu 2006/03/21]");
|
||||
//OACR_WARNING_SUPPRESS(UNSAFE_STRING_FUNCTION, "Reviewed OK. size checked. [rogeryu 2006/03/21]");
|
||||
::mbstowcs(&buf[0], p.c_str(), len + 1);
|
||||
return std::wstring(&buf[0]);
|
||||
}
|
||||
|
@ -663,6 +676,7 @@ template<class _T> static inline std::basic_string<_T> join (const std::vector<s
|
|||
return res;
|
||||
}
|
||||
|
||||
#if 1
|
||||
// parsing strings to numbers
|
||||
static inline int toint (const wchar_t * s)
|
||||
{
|
||||
|
@ -720,12 +734,14 @@ static inline double todouble (const std::wstring & s)
|
|||
if (*endptr) RuntimeError("todouble: invalid input string");
|
||||
return value;
|
||||
}
|
||||
#endif
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// tokenizer -- utility for white-space tokenizing strings in a character buffer
|
||||
// This simple class just breaks a string, but does not own the string buffer.
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
#if 1
|
||||
class tokenizer : public std::vector<char*>
|
||||
{
|
||||
const char * delim;
|
||||
|
@ -747,6 +763,7 @@ public:
|
|||
#endif
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
};}; // namespace
|
||||
|
||||
|
@ -789,6 +806,7 @@ public:
|
|||
};
|
||||
inline int fclose (auto_file_ptr & af) { return af.fclose(); }
|
||||
|
||||
#if 0
|
||||
#ifdef _MSC_VER
|
||||
// auto-closing container for Win32 handles.
|
||||
// Pass close function if not CloseHandle(), e.g.
|
||||
|
@ -827,7 +845,7 @@ public:
|
|||
operator const T () const { return it; }
|
||||
T detach () { T tmp = it; it = 0; return tmp; } // release ownership of object
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
};};
|
||||
|
||||
|
@ -879,6 +897,7 @@ public:
|
|||
|
||||
namespace msra { namespace util {
|
||||
|
||||
#if 0
|
||||
// to (slightly) simplify processing of command-line arguments.
|
||||
// command_line args (argc, argv);
|
||||
// while (args.has (1) && args[0][0] == '-') { option = args.shift(); process (option); }
|
||||
|
@ -894,7 +913,8 @@ public:
|
|||
const wchar_t * shift() { if (size() == 0) return NULL; num--; return *args++; }
|
||||
const wchar_t * operator[] (int i) const { return (i < 0 || i >= size()) ? NULL : args[i]; }
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
// byte-reverse a variable --reverse all bytes (intended for integral types and float)
|
||||
template<typename T> static inline void bytereverse (T & v) throw()
|
||||
{ // note: this is more efficient than it looks because sizeof (v[0]) is a constant
|
||||
|
@ -943,46 +963,18 @@ template<class S> static inline void ZeroStruct (S & s) { memset (&s, 0, sizeof
|
|||
// machine dependent
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
#if 0
|
||||
#define MACHINE_IS_BIG_ENDIAN (false)
|
||||
#endif
|
||||
|
||||
using namespace msra::basetypes; // for compatibility
|
||||
|
||||
#pragma warning (pop)
|
||||
//#pragma warning (pop)
|
||||
|
||||
#define EPSILON 1e-5
|
||||
#define ISCLOSE(a, b, threshold) (abs(a - b) < threshold)?true:false
|
||||
|
||||
// why is this in basetypes.h?
|
||||
enum class MinibatchPackingFlags : char // (note: not using unsigned char because these go into a matrix, and we use Matrix<char>, since we use it as a data holder)
|
||||
{
|
||||
None = 0,
|
||||
SequenceStart = 1 << 0, // binary 0001 frame is first of an utterance
|
||||
SequenceEnd = 1 << 1, // binary 0010 frame is last of an utterance
|
||||
NoFeature = 1 << 2, // binary 0100 frame has no feature (e.g. a gap due to BPTT)
|
||||
NoLabel = 1 << 3, // binary 1000 frame has no label
|
||||
|
||||
NoInput = NoFeature | NoLabel, // when we refactorize reader, NoInput will no longer needed
|
||||
SequenceStartOrNoFeature = SequenceStart | NoFeature,
|
||||
SequenceEndOrNoFeature = SequenceEnd | NoFeature,
|
||||
SequenceStartOrEndOrNoFeature = SequenceStart | SequenceEnd | NoFeature,
|
||||
};
|
||||
|
||||
inline MinibatchPackingFlags operator| (MinibatchPackingFlags a, MinibatchPackingFlags b)
|
||||
{
|
||||
return static_cast<MinibatchPackingFlags>(static_cast<unsigned char>(a) | static_cast<unsigned char>(b));
|
||||
}
|
||||
|
||||
inline MinibatchPackingFlags& operator|= (MinibatchPackingFlags& a, MinibatchPackingFlags b)
|
||||
{
|
||||
a = a | b;
|
||||
return a;
|
||||
}
|
||||
|
||||
inline bool operator& (MinibatchPackingFlags a, MinibatchPackingFlags b)
|
||||
{
|
||||
return (static_cast<unsigned char>(a) & static_cast<unsigned char>(b)) != 0;
|
||||
}
|
||||
|
||||
template<class F>
|
||||
static inline bool comparator(const pair<int, F>& l, const pair<int, F>& r)
|
||||
{
|
||||
|
|
|
@ -326,8 +326,8 @@ double fgetdouble (FILE * f);
|
|||
// fgetwav(): read an entire .wav file
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
void fgetwav (FILE * f, ARRAY<short> & wav, int & sampleRate);
|
||||
void fgetwav (const wstring & fn, ARRAY<short> & wav, int & sampleRate);
|
||||
void fgetwav (FILE * f, std::vector<short> & wav, int & sampleRate);
|
||||
void fgetwav (const wstring & fn, std::vector<short> & wav, int & sampleRate);
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// fputwav(): save data into a .wav file
|
||||
|
|
|
@ -1361,8 +1361,8 @@ void WAVEHEADER::write (FILE * f)
|
|||
fputint (f, nAvgBytesPerSec);
|
||||
fputshort (f, nBlockAlign);
|
||||
fputshort (f, wBitsPerSample);
|
||||
ASSERT (FmtLength == 16);
|
||||
ASSERT (wFormatTag == 1);
|
||||
assert (FmtLength == 16);
|
||||
assert (wFormatTag == 1);
|
||||
fputTag (f, "data");
|
||||
fputint (f, DataLength);
|
||||
fflushOrDie (f);
|
||||
|
@ -1455,14 +1455,14 @@ static short toolULawToLinear(unsigned char p_ucULawByte)
|
|||
|
||||
// fgetwavraw(): only read data of .wav file. For multi-channel data, samples
|
||||
// are kept interleaved.
|
||||
static void fgetwavraw(FILE * f, ARRAY<short> & wav, const WAVEHEADER & wavhd)
|
||||
static void fgetwavraw(FILE * f, std::vector<short> & wav, const WAVEHEADER & wavhd)
|
||||
{
|
||||
int bytesPerSample = wavhd.wBitsPerSample / 8; // (sample size on one channel)
|
||||
wav.resize (wavhd.DataLength / bytesPerSample);
|
||||
if (wavhd.wFormatTag == 7) // mulaw
|
||||
{
|
||||
(wavhd.nChannels == 1) || RuntimeError ("fgetwav: wChannels=%d not supported for mulaw", wavhd.nChannels);
|
||||
ARRAY<unsigned char> data;
|
||||
std::vector<unsigned char> data;
|
||||
int numSamples = wavhd.DataLength/wavhd.nBlockAlign;
|
||||
data.resize (numSamples);
|
||||
freadOrDie (&data[0], sizeof (data[0]), numSamples, f);
|
||||
|
@ -1486,7 +1486,7 @@ static void fgetwavraw(FILE * f, ARRAY<short> & wav, const WAVEHEADER & wavhd)
|
|||
// fgetwav(): read an entire .wav file. Stereo is mapped to mono.
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
void fgetwav (FILE * f, ARRAY<short> & wav, int & sampleRate)
|
||||
void fgetwav (FILE * f, std::vector<short> & wav, int & sampleRate)
|
||||
{
|
||||
WAVEHEADER wavhd; // will be filled in for 16-bit PCM!!
|
||||
signed short wFormatTag; // real format tag as found in data
|
||||
|
@ -1502,7 +1502,7 @@ void fgetwav (FILE * f, ARRAY<short> & wav, int & sampleRate)
|
|||
else if (wavhd.nChannels == 2)
|
||||
{
|
||||
//read raw data
|
||||
ARRAY<short> buf;
|
||||
std::vector<short> buf;
|
||||
buf.resize(numSamples * 2);
|
||||
fgetwavraw(f, buf, wavhd);
|
||||
|
||||
|
@ -1523,7 +1523,7 @@ void fgetwav (FILE * f, ARRAY<short> & wav, int & sampleRate)
|
|||
}
|
||||
}
|
||||
|
||||
void fgetwav (const wstring & fn, ARRAY<short> & wav, int & sampleRate)
|
||||
void fgetwav (const wstring & fn, std::vector<short> & wav, int & sampleRate)
|
||||
{
|
||||
auto_file_ptr f = fopenOrDie (fn, L"rbS");
|
||||
fgetwav (f, wav, sampleRate);
|
||||
|
@ -1538,13 +1538,13 @@ void fgetwav (const wstring & fn, ARRAY<short> & wav, int & sampleRate)
|
|||
// channel. j is sample index.
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
void fgetraw (FILE *f, ARRAY< ARRAY<short> > & data, const WAVEHEADER & wavhd)
|
||||
void fgetraw (FILE *f, std::vector< std::vector<short> > & data, const WAVEHEADER & wavhd)
|
||||
{
|
||||
ARRAY<short> wavraw;
|
||||
std::vector<short> wavraw;
|
||||
fgetwavraw (f, wavraw, wavhd);
|
||||
data.resize (wavhd.nChannels);
|
||||
int numSamples = wavhd.DataLength/wavhd.nBlockAlign;
|
||||
ASSERT (numSamples == (int) wavraw.size() / wavhd.nChannels);
|
||||
assert (numSamples == (int) wavraw.size() / wavhd.nChannels);
|
||||
|
||||
for (int i = 0; i < wavhd.nChannels; i++)
|
||||
{
|
||||
|
@ -1599,7 +1599,7 @@ void fputwfx (FILE *f, const WAVEFORMATEX & wfx, unsigned int numSamples)
|
|||
unsigned int RiffLength = 36 + DataLength;
|
||||
unsigned int FmtLength = 16;
|
||||
// file header
|
||||
ASSERT (wfx.cbSize == 0 || wfx.cbSize == FmtLength + 2);
|
||||
assert (wfx.cbSize == 0 || wfx.cbSize == FmtLength + 2);
|
||||
fputTag (f, "RIFF");
|
||||
fputint (f, RiffLength);
|
||||
fputTag (f, "WAVE");
|
||||
|
@ -1861,11 +1861,24 @@ bool operator>= (const FILETIME & targettime, const FILETIME & inputtime) // f
|
|||
}
|
||||
#endif
|
||||
|
||||
bool getfiletime (const wstring & path, FILETIME & time)
|
||||
#ifdef _WIN32
|
||||
class auto_find_handle
|
||||
{
|
||||
HANDLE h;
|
||||
auto_find_handle operator= (const auto_find_handle &);
|
||||
auto_find_handle(const auto_find_handle &);
|
||||
public:
|
||||
auto_find_handle(HANDLE p_h) : h(p_h) {}
|
||||
~auto_find_handle() { if (h != INVALID_HANDLE_VALUE) ::FindClose(h); }
|
||||
operator HANDLE () const { return h; }
|
||||
};
|
||||
#endif
|
||||
|
||||
bool getfiletime(const wstring & path, FILETIME & time)
|
||||
{ // return file modification time, false if cannot be determined
|
||||
#ifdef _WIN32
|
||||
WIN32_FIND_DATAW findFileData;
|
||||
auto_handle hFind (FindFirstFileW (path.c_str(), &findFileData), ::FindClose);
|
||||
auto_find_handle hFind (FindFirstFileW (path.c_str(), &findFileData));
|
||||
if (hFind != INVALID_HANDLE_VALUE)
|
||||
{
|
||||
time = findFileData.ftLastWriteTime;
|
||||
|
@ -1891,7 +1904,7 @@ bool getfiletime (const wstring & path, FILETIME & time)
|
|||
#if 0
|
||||
void setfiletime (const wstring & path, const FILETIME & time)
|
||||
{ // update the file modification time of an existing file
|
||||
auto_handle h (CreateFileW (path.c_str(), FILE_WRITE_ATTRIBUTES,
|
||||
auto_find_handle h (CreateFileW (path.c_str(), FILE_WRITE_ATTRIBUTES,
|
||||
FILE_SHARE_READ|FILE_SHARE_WRITE, NULL,
|
||||
OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL));
|
||||
if (h == INVALID_HANDLE_VALUE)
|
||||
|
@ -1947,7 +1960,7 @@ static BOOL ExpandWildcards (wstring path, vector<wstring> & paths)
|
|||
|
||||
// crawl folder
|
||||
WIN32_FIND_DATAW ffdata;
|
||||
auto_handle hFind (::FindFirstFileW (path.c_str(), &ffdata), ::FindClose);
|
||||
auto_find_handle hFind (::FindFirstFileW (path.c_str(), &ffdata));
|
||||
if (hFind == INVALID_HANDLE_VALUE)
|
||||
{
|
||||
DWORD err = ::GetLastError();
|
||||
|
@ -2136,7 +2149,7 @@ static inline std::wstring mbstowcs(const std::string & p) // input: MBCS
|
|||
size_t len = p.length();
|
||||
msra::basetypes::fixed_vector<wchar_t> buf(len + 1); // max: >1 mb chars => 1 wchar
|
||||
std::fill(buf.begin(), buf.end(), (wchar_t)0);
|
||||
OACR_WARNING_SUPPRESS(UNSAFE_STRING_FUNCTION, "Reviewed OK. size checked. [rogeryu 2006/03/21]");
|
||||
//OACR_WARNING_SUPPRESS(UNSAFE_STRING_FUNCTION, "Reviewed OK. size checked. [rogeryu 2006/03/21]");
|
||||
::mbstowcs(&buf[0], p.c_str(), len + 1);
|
||||
return std::wstring(&buf[0]);
|
||||
}
|
||||
|
|
|
@ -555,7 +555,7 @@ void lattice::fromhtklattice (const wstring & path, const std::unordered_map<std
|
|||
else
|
||||
RuntimeError("lattice: mal-formed before parse N=/L= line in lattice.");
|
||||
|
||||
ASSERT(info.numnodes > 0);
|
||||
assert(info.numnodes > 0);
|
||||
nodes.reserve (info.numnodes);
|
||||
// parse the nodes
|
||||
for (size_t i = 0; i < info.numnodes; i++, iter++)
|
||||
|
@ -572,7 +572,7 @@ void lattice::fromhtklattice (const wstring & path, const std::unordered_map<std
|
|||
info.numframes = max (info.numframes, (size_t) nodes.back().t);
|
||||
}
|
||||
// parse the edges
|
||||
ASSERT(info.numedges > 0);
|
||||
assert(info.numedges > 0);
|
||||
edges.reserve (info.numedges);
|
||||
align.reserve (info.numedges * 10); // 10 phones per word on av. should be enough
|
||||
std::string label;
|
||||
|
|
|
@ -249,7 +249,7 @@ public:
|
|||
p[0] = (unsigned char) value;
|
||||
p[1] = (unsigned char) (value >> 8);
|
||||
p[2] = (unsigned char) (value >> 16);
|
||||
ASSERT (value == (int) *this);
|
||||
assert (value == (int) *this);
|
||||
return value;
|
||||
}
|
||||
};
|
||||
|
@ -268,7 +268,7 @@ public:
|
|||
base.resize (newsize);
|
||||
uint24_ref r = uint24_ref (&base[cursize]);
|
||||
r = value;
|
||||
ASSERT (value == back());
|
||||
assert (value == back());
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -313,7 +313,7 @@ class mgram_map
|
|||
if ((size_t) id >= level1lookup.size()) return nindex;
|
||||
i = level1lookup[id];
|
||||
}
|
||||
ASSERT (i == nindex || ids[1][i] == id);
|
||||
assert (i == nindex || ids[1][i] == id);
|
||||
return i;
|
||||
}
|
||||
index_t beg = firsts[m][i];
|
||||
|
@ -736,11 +736,11 @@ public:
|
|||
|
||||
coord c (k.m, (index_t) ids[k.m].size());
|
||||
|
||||
ASSERT (firsts[k.m-1].back() == (index_t) ids[k.m].size());
|
||||
assert (firsts[k.m-1].back() == (index_t) ids[k.m].size());
|
||||
ids[k.m].push_back (thisid); // create value
|
||||
firsts[k.m-1].back() = (index_t) ids[k.m].size();
|
||||
if (firsts[k.m-1].back() != (index_t) ids[k.m].size()) fail ("create() numeric overflow--index_t too small");
|
||||
ASSERT (k.m == M || firsts[k.m].back() == (index_t) ids[k.m+1].size());
|
||||
assert (k.m == M || firsts[k.m].back() == (index_t) ids[k.m+1].size());
|
||||
|
||||
// optimization: level1nonsparse flag
|
||||
// If unigram level is entirely non-sparse, we can save the search
|
||||
|
@ -772,10 +772,10 @@ public:
|
|||
firsts[m].resize (ids[m].size() +1, (int) ids[m+1].size());
|
||||
foreach_index (m, firsts)
|
||||
{
|
||||
ASSERT (firsts[m][0] == 0);
|
||||
assert (firsts[m][0] == 0);
|
||||
foreach_index (i, ids[m])
|
||||
ASSERT (firsts[m][i] <= firsts[m][i+1]);
|
||||
ASSERT ((size_t) firsts[m].back() == ids[m+1].size());
|
||||
assert (firsts[m][i] <= firsts[m][i+1]);
|
||||
assert ((size_t) firsts[m].back() == ids[m+1].size());
|
||||
}
|
||||
// id mapping
|
||||
// user-provided w->id map
|
||||
|
@ -1042,7 +1042,7 @@ public:
|
|||
continue;
|
||||
|
||||
const mgram_map::key key = *iter;
|
||||
ASSERT (m == key.order());
|
||||
assert (m == key.order());
|
||||
|
||||
// --- output m-gram to ARPA file
|
||||
fprintfOrDie (outf, "%.4f", logP[iter] / log10);
|
||||
|
@ -1068,7 +1068,7 @@ public:
|
|||
numMGramsWritten++;
|
||||
}
|
||||
fflushOrDie (outf);
|
||||
ASSERT (numMGramsWritten == map.size (m));
|
||||
assert (numMGramsWritten == map.size (m));
|
||||
fprintf (stderr, "\n");
|
||||
}
|
||||
|
||||
|
@ -1355,7 +1355,7 @@ protected:
|
|||
int newid = w2id[w]; // map to new id space
|
||||
mgram[m-1] = newid;
|
||||
}
|
||||
for (int k = 0; k < m; k++) ASSERT (mgram[k] == w2id[key[k]]);
|
||||
for (int k = 0; k < m; k++) assert (mgram[k] == w2id[key[k]]);
|
||||
// insert new key into sortedMap
|
||||
mgram_map::coord c = sortedMap.create (mgram_map::unmapped_key (&mgram[0], m), createCache);
|
||||
// copy over logP and logB
|
||||
|
@ -1481,7 +1481,7 @@ protected:
|
|||
if (m == 0) continue;
|
||||
|
||||
const mgram_map::key key = *iter;
|
||||
ASSERT (m == key.order());
|
||||
assert (m == key.order());
|
||||
|
||||
float thisP = P[iter];
|
||||
if (islog)
|
||||
|
@ -1606,6 +1606,7 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
#if 0
|
||||
// ===========================================================================
|
||||
// CMGramLMEstimator -- estimator for CMGramLM
|
||||
// Implements Kneser-Ney discounting with Goodman/Chen modification, as well
|
||||
|
@ -1970,7 +1971,7 @@ public:
|
|||
// estimate
|
||||
vector<bool> dropWord (userSymMap.size(), false);
|
||||
dropWord.push_back (true); // filtering but no <UNK>:
|
||||
ASSERT (!filterVocabulary || unkId != -1 || dropWord[dropId]);
|
||||
assert (!filterVocabulary || unkId != -1 || dropWord[dropId]);
|
||||
|
||||
//std::vector<unsigned int> minObs (2, 0);
|
||||
//std::vector<unsigned int> iMinObs (3, 0);
|
||||
|
@ -2105,7 +2106,7 @@ public:
|
|||
if (m < M && m < 3) // for comments see where we estimate the discounted probabilities
|
||||
{ // ^^ seems not to work for 4-grams...
|
||||
const mgram_map::key key = *iter; // needed to check for startId
|
||||
ASSERT (key.order() == m);
|
||||
assert (key.order() == m);
|
||||
|
||||
if (m < 2 || key.pop_w().back() != startId)
|
||||
{
|
||||
|
@ -2249,7 +2250,7 @@ public:
|
|||
}
|
||||
|
||||
const mgram_map::key key = *iter;
|
||||
ASSERT (key.order() == iter.order()); // (remove this check once verified)
|
||||
assert (key.order() == iter.order()); // (remove this check once verified)
|
||||
|
||||
// get history's count
|
||||
const mgram_map::coord j = histCoord[m-1]; // index of parent entry
|
||||
|
@ -2282,7 +2283,7 @@ public:
|
|||
histCount = KNTotalCounts[c_h]; // (u,v,w) -> count (*,v,*)
|
||||
if (histCount == 0) // must exist
|
||||
RuntimeError ("estimate: malformed data: back-off value not found (denominator)");
|
||||
ASSERT (histCount >= count);
|
||||
assert (histCount >= count);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2423,7 +2424,9 @@ skippruned:; // m-gram was pruned
|
|||
fprintf (stderr, "\n");
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
// ===========================================================================
|
||||
// CMGramLMClone -- create CMGramLM from sub-LMs through ILM and ILM::IIter
|
||||
// - create in memory into a CMGramLM
|
||||
|
@ -2538,539 +2541,9 @@ skipMGram:
|
|||
lmpath = msra::strfun::utf16 (lmpath8);
|
||||
}
|
||||
};
|
||||
|
||||
#if 0 // old version --remove once we are fully tested and comfortable
|
||||
class OldCMGramLM : public ILM
|
||||
{
|
||||
protected:
|
||||
// representation of LM in memory
|
||||
// For each order, there is a flattened array of LMSCORE tokens.
|
||||
// For each history order, there is a flattened array of LMHISTs.
|
||||
// E.g. a trigram's history's LMHIST entry (somewhere in refs[2]) denotes
|
||||
// the start index of the first LMSCORE entry (in entries[3]). The end
|
||||
// index is denoted by the start index of the next LMHIST entry (for this
|
||||
// purpose, the LMHIST arrays have one extra entry at the end).
|
||||
struct LMSCORE // an LM score, plus its word id for sparse storage
|
||||
{
|
||||
int id; // token id (in LM space)
|
||||
float logP; // and its score
|
||||
LMSCORE (int p_id, double p_logP) : id (p_id), logP ((float) p_logP) { }
|
||||
};
|
||||
struct LMHIST // an LM history -- index corresponds to LMSCORE index
|
||||
{
|
||||
int firstEntry; // index of first entry (end entry known from next LMHIST)
|
||||
float logB; // back-off weight
|
||||
LMHIST (int p_firstEntry, double p_logB) : firstEntry (p_firstEntry), logB ((float) p_logB) { }
|
||||
};
|
||||
int M;
|
||||
std::vector<std::vector<LMHIST>> refs; // [M] e.g. [2] for trigram history
|
||||
std::vector<std::vector<LMSCORE>> entries; // [M+1] e.g. [3] for trigrams. [0]=dummy
|
||||
|
||||
// mapping of numeric word ids from external (user-defined) space to the internal LM's
|
||||
std::vector<int> userToLMSymMap; // map to ids used in LM
|
||||
|
||||
// map user id to LM id, return -1 for anything unknown
|
||||
inline int mapId (int userId) const
|
||||
{
|
||||
if (userId < 0 || userId >= (int) userToLMSymMap.size()) return -1;
|
||||
else return userToLMSymMap[userId];
|
||||
}
|
||||
|
||||
bool entries1Unmapped; // if true then findEntry(id) == i for entries[1]
|
||||
|
||||
// search in an LMSCORE array
|
||||
// This is a relatively generic binary search.
|
||||
inline int findEntry (const std::vector<LMSCORE> & entries, int beg, int end, int id) const
|
||||
{
|
||||
while (beg < end)
|
||||
{
|
||||
int i = (beg + end) / 2;
|
||||
int v = entries[i].id;
|
||||
if (id == v) return i; // found it
|
||||
else if (id < v) end = i; // id is left of i
|
||||
else beg = i + 1; // id is right of i
|
||||
}
|
||||
return -1; // not found
|
||||
}
|
||||
|
||||
// diagnostics of previous score() call
|
||||
mutable int longestMGramFound; // longest m-gram (incl. predicted token) found
|
||||
mutable int longestHistoryFound; // longest history (excl. predicted token) found
|
||||
|
||||
public:
|
||||
virtual int getLastLongestHistoryFound() const { return longestHistoryFound; }
|
||||
virtual int getLastLongestMGramFound() const { return longestMGramFound; }
|
||||
virtual int order() const { return M; }
|
||||
|
||||
// mgram[m-1] = word to predict, tokens before that are history
|
||||
// m=3 means trigram
|
||||
virtual double score (const int * mgram, int m) const
|
||||
{
|
||||
longestHistoryFound = 0; // (diagnostics)
|
||||
|
||||
if (m > M) // too long a history for this model
|
||||
{
|
||||
mgram += (m - M);
|
||||
m = M;
|
||||
}
|
||||
double totalLogB = 0.0; // accumulated back-off
|
||||
|
||||
for (;;)
|
||||
{
|
||||
longestMGramFound = m; // (diagnostics)
|
||||
|
||||
if (m == 0) // not really defined in ARPA format
|
||||
return totalLogB + entries[0][0].logP;
|
||||
|
||||
if (m == 1)
|
||||
{
|
||||
// find the actual score
|
||||
// [beg, end) is the sub-range in entries array.
|
||||
int id = mapId (mgram[0]);
|
||||
const char * sym = idToSymbol (id); sym;// (debugging)
|
||||
|
||||
const std::vector<LMSCORE> & entries_1 = entries[1];
|
||||
int i = entries1Unmapped ? id : findEntry (entries_1, refs[0][0].firstEntry, refs[0][1].firstEntry, id);
|
||||
if (i == -1)
|
||||
goto backoff0;
|
||||
|
||||
ASSERT (entries_1[i].id == id); // verify unmapped unigram case
|
||||
double logP = entries_1[i].logP;
|
||||
return totalLogB + logP;
|
||||
}
|
||||
|
||||
// locate LMHIST and LMSCORE
|
||||
// We traverse history one by one.
|
||||
|
||||
int id = mapId (mgram[0]); // start with unigram history
|
||||
const char * sym = idToSymbol (id); // (debugging)
|
||||
int i = (entries1Unmapped) ? id : findEntry (entries[1], refs[0][0].firstEntry, refs[0][1].firstEntry, id);
|
||||
if (i == -1) // unknown history: fall back
|
||||
goto fallback;
|
||||
ASSERT (entries[1][i].id == id); // verify unmapped unigram case
|
||||
|
||||
// found it: advance search by one history token
|
||||
const std::vector<LMHIST> & refs_1 = refs[1];
|
||||
float logB = refs_1[i].logB;
|
||||
int beg = refs_1[i].firstEntry; // sub-array range for next level
|
||||
int end = refs_1[i+1].firstEntry;
|
||||
for (int n = 2; n < m; n++)
|
||||
{
|
||||
if (beg == end)
|
||||
goto fallback; // unseen history: fall back
|
||||
int id = mapId (mgram[n -1]);
|
||||
const char * sym = idToSymbol (id); sym; // (debugging)
|
||||
int i = findEntry (entries[n], beg, end, id);
|
||||
if (i == -1) // unseen history: fall back
|
||||
goto fallback;
|
||||
ASSERT (entries[n][i].id == id); // verify unmapped unigram case
|
||||
|
||||
// found it: advance search by one history token
|
||||
const std::vector<LMHIST> & refs_n = refs[n];
|
||||
logB = refs_n[i].logB;
|
||||
beg = refs_n[i].firstEntry; // sub-array range for next level
|
||||
end = refs_n[i+1].firstEntry;
|
||||
}
|
||||
|
||||
// we found the entire history: now find the actual score
|
||||
// [beg, end) is the sub-range in entries array.
|
||||
if (m -1 > longestHistoryFound)
|
||||
longestHistoryFound = m -1;
|
||||
|
||||
if (beg == end) // history has no successors (but a back-off weight)
|
||||
goto backoff;
|
||||
|
||||
id = mapId (mgram[m -1]);
|
||||
sym = idToSymbol (id); // (debugging)
|
||||
const std::vector<LMSCORE> & entries_m = entries[m];
|
||||
i = findEntry (entries_m, beg, end, id);
|
||||
if (i == -1)
|
||||
goto backoff;
|
||||
ASSERT (entries_m[i].id == id); // verify unmapped unigram case
|
||||
|
||||
longestMGramFound = m;
|
||||
|
||||
double logP = entries_m[i].logP;
|
||||
return totalLogB + logP;
|
||||
|
||||
backoff: // found history but not predicted token: back-off
|
||||
totalLogB += logB;
|
||||
|
||||
backoff0: // back-off knowing that logB == 0
|
||||
|
||||
fallback: // we get here in case of fallback (no back-off weight) or back-off
|
||||
mgram++;
|
||||
m--;
|
||||
} // and go again with the shortened history
|
||||
}
|
||||
|
||||
// same as score() but without optimizations (for reference)
|
||||
double score_unoptimized (const int * mgram, int m) const
|
||||
{
|
||||
if (m == 0) // not really defined in ARPA format
|
||||
return entries[0][0].logP;
|
||||
else if (m > M) // too long a history for this model
|
||||
{
|
||||
mgram += (m - M);
|
||||
m = M;
|
||||
}
|
||||
|
||||
// locate LMHIST and LMSCORE
|
||||
// We traverse history one by one.
|
||||
int beg = refs[0][0].firstEntry; // start with the unigram array
|
||||
int end = refs[0][1].firstEntry;
|
||||
float logB = 0.0f; // remember in the loop in case we need it
|
||||
for (int n = 1; n < m; n++)
|
||||
{
|
||||
int userId = mgram[n -1]; // may be -1 for unknown word
|
||||
int id = mapId (userId);
|
||||
const char * sym = idToSymbol (id); sym; // (debugging)
|
||||
const std::vector<LMSCORE> & entries_n = entries[n];
|
||||
int i = findEntry (entries_n, beg, end, id);
|
||||
if (i == -1) // unknown history: fall back
|
||||
return score_unoptimized (mgram +1, m -1); // tail recursion
|
||||
ASSERT (entries_n[i].id == id); // verify unmapped unigram case
|
||||
// found it: advance search by one history token
|
||||
const std::vector<LMHIST> & refs_n = refs[n];
|
||||
logB = refs_n[i].logB;
|
||||
beg = refs_n[i].firstEntry; // sub-array range for next level
|
||||
end = refs_n[i+1].firstEntry;
|
||||
}
|
||||
|
||||
// we found the entire history: now find the actual score
|
||||
// [beg, end) is the sub-range in entries array.
|
||||
int userId = mgram[m -1]; // word to predict
|
||||
int id = mapId (userId);
|
||||
const char * sym = idToSymbol (id); sym; // (debugging)
|
||||
const std::vector<LMSCORE> & entries_m1 = entries[m];
|
||||
int i = findEntry (entries_m1, beg, end, id);
|
||||
if (i != -1)
|
||||
{
|
||||
ASSERT (entries_m1[i].id == id); // verify unmapped unigram case
|
||||
double logP = entries_m1[i].logP;
|
||||
return logP;
|
||||
}
|
||||
|
||||
// found history but not predicted token: back-off
|
||||
return logB + score_unoptimized (mgram + 1, m -1);
|
||||
}
|
||||
|
||||
// test for OOV word (OOV w.r.t. LM)
|
||||
virtual bool oov (int id) const { return mapId (id) < 0; }
|
||||
|
||||
virtual void adapt (const int *, size_t) { } // this LM does not adapt
|
||||
private:
|
||||
|
||||
// keep this for debugging
|
||||
std::wstring filename; // input filename
|
||||
struct SYMBOL
|
||||
{
|
||||
string symbol; // token
|
||||
int id; // numeric id in LM space (index of word read)
|
||||
bool operator< (const SYMBOL & other) const { return symbol < other.symbol; }
|
||||
SYMBOL (int p_id, const char * p_symbol) : id (p_id), symbol (p_symbol) { }
|
||||
};
|
||||
std::vector<SYMBOL> lmSymbols; // (id, word) symbols used in LM
|
||||
std::vector<int> idToSymIndex; // map LM id to index in lmSymbols[] array
|
||||
|
||||
// search for a word in the sorted word array.
|
||||
// Only use this after sorting, i.e. after full 1-gram section has been read.
|
||||
// Only really used in read().
|
||||
inline int symbolToId (const char * word) const
|
||||
{
|
||||
int beg = 0;
|
||||
int end = (int) lmSymbols.size();
|
||||
while (beg < end)
|
||||
{
|
||||
int i = (beg + end) / 2;
|
||||
const char * v = lmSymbols[i].symbol.c_str();
|
||||
int cmp = strcmp (word, v);
|
||||
if (cmp == 0) return lmSymbols[i].id; // found it
|
||||
else if (cmp < 0) end = i; // id is left of i
|
||||
else beg = i + 1; // id is right of i
|
||||
}
|
||||
return -1; // not found
|
||||
}
|
||||
|
||||
inline const char * idToSymbol (int id) const
|
||||
{
|
||||
if (id < 0) return NULL; // empty string for unknown ids
|
||||
int i = idToSymIndex[id];
|
||||
return lmSymbols[i].symbol.c_str();
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
// read an ARPA (text) file.
|
||||
// Words do not need to be sorted in the unigram section, but the m-gram
|
||||
// sections have to be in the same order as the unigrams.
|
||||
// The 'userSymMap' defines the vocabulary space used in score().
|
||||
// If 'filterVocabulary' then LM entries for words not in userSymMap are skipped.
|
||||
// Otherwise the userSymMap is updated with the words from the LM.
|
||||
// 'maxM' allows to restrict the loading to a smaller LM order.
|
||||
// SYMMAP can be e.g. CSymMap or CSymbolSet.
|
||||
template<class SYMMAP>
|
||||
void read (const std::wstring & pathname, SYMMAP & userSymMap, bool filterVocabulary, int maxM)
|
||||
{
|
||||
int lineNo = 0;
|
||||
msra::basetypes::auto_file_ptr f = fopenOrDie (pathname, L"rbS");
|
||||
fprintf (stderr, "read: reading %S", pathname.c_str());
|
||||
filename = pathname; // (keep this info for debugging)
|
||||
|
||||
// --- read header information
|
||||
|
||||
// search for header line
|
||||
char buf[1024];
|
||||
lineNo++, fgetline (f, buf);
|
||||
while (strcmp (buf, "\\data\\") != 0 && !feof (f))
|
||||
lineNo++, fgetline (f, buf);
|
||||
lineNo++, fgetline (f, buf);
|
||||
|
||||
// get the dimensions
|
||||
std::vector<int> dims; dims.reserve (4);
|
||||
|
||||
while (buf[0] == 0 && !feof (f))
|
||||
lineNo++, fgetline (f, buf);
|
||||
|
||||
int n, dim;
|
||||
dims.push_back (1); // dummy zerogram entry
|
||||
while (sscanf (buf, "ngram %d=%d", &n, &dim) == 2 && n == (int) dims.size())
|
||||
{
|
||||
dims.push_back (dim);
|
||||
lineNo++, fgetline (f, buf);
|
||||
}
|
||||
|
||||
M = (int) dims.size() -1;
|
||||
if (M == 0)
|
||||
RuntimeError ("read: mal-formed LM file, no dimension information (%d): %S", lineNo, pathname.c_str());
|
||||
int fileM = M;
|
||||
if (M > maxM)
|
||||
M = maxM;
|
||||
|
||||
// allocate main storage
|
||||
refs.resize (M);
|
||||
for (int m = 0; m < M; m++)
|
||||
refs[m].reserve (dims[m] +1);
|
||||
entries.resize (M +1);
|
||||
for (int m = 0; m <= M; m++)
|
||||
entries[m].reserve (dims[m]);
|
||||
lmSymbols.reserve (dims[0]);
|
||||
|
||||
refs[0].push_back (LMHIST (0, 0.0));
|
||||
refs[0].push_back (LMHIST (0, -99.0)); // this one gets updated
|
||||
entries[0].push_back (LMSCORE (-1, -99.0)); // zerogram score -- gets updated later
|
||||
|
||||
std::vector<bool> skipWord; // true: skip entry containing this word
|
||||
skipWord.reserve (lmSymbols.capacity());
|
||||
|
||||
// --- read main sections
|
||||
|
||||
const double ln10xLMF = log (10.0); // ARPA scores are strangely scaled
|
||||
for (int m = 1; m <= M; m++)
|
||||
{
|
||||
while (buf[0] == 0 && !feof (f))
|
||||
lineNo++, fgetline (f, buf);
|
||||
|
||||
if (sscanf (buf, "\\%d-grams:", &n) != 1 || n != m)
|
||||
RuntimeError ("read: mal-formed LM file, bad section header (%d): %S", lineNo, pathname.c_str());
|
||||
lineNo++, fgetline (f, buf);
|
||||
|
||||
std::vector<int> mgram (m +1); // current mgram being read
|
||||
std::vector<int> prevmgram (m +1, -1); // previous mgram read
|
||||
std::vector<int> histEntry (m); // sub-array ranges
|
||||
|
||||
histEntry[0] = 0;
|
||||
|
||||
// read all the m-grams
|
||||
while (buf[0] != '\\')
|
||||
{
|
||||
if (buf[0] == 0)
|
||||
{
|
||||
lineNo++, fgetline (f, buf);
|
||||
continue;
|
||||
}
|
||||
|
||||
// -- parse the line
|
||||
const char * delim = " \t\n\r";
|
||||
const char * score = strtok (&buf[0], delim);
|
||||
if (score == NULL || score[0] == 0) // not checking whether it is numeric
|
||||
RuntimeError ("read: mal-formed LM file, no score (%d): %S", lineNo, pathname.c_str());
|
||||
double scoreVal = atof (score);
|
||||
double logP = scoreVal * ln10xLMF; // convert to natural log
|
||||
|
||||
bool skipEntry = false;
|
||||
for (int n = 1; n <= m; n++)
|
||||
{
|
||||
/*const*/ char * tok = strtok (NULL, delim);
|
||||
if (tok == NULL)
|
||||
RuntimeError ("read: mal-formed LM file, not enough words in mgram (%d): %S", lineNo, pathname.c_str());
|
||||
// map to id
|
||||
int id;
|
||||
if (m == 1) // unigram: build vocab table
|
||||
{
|
||||
id = (int) lmSymbols.size(); // unique id for this symbol
|
||||
lmSymbols.push_back (SYMBOL (id, tok));
|
||||
bool toSkip = false;
|
||||
if (userSymMap.sym2existingId (lmSymbols.back().symbol) == -1)
|
||||
{
|
||||
if (filterVocabulary)
|
||||
toSkip = true; // unknown word
|
||||
else
|
||||
userSymMap.sym2id (lmSymbols.back().symbol); // create it in user's space
|
||||
}
|
||||
skipWord.push_back (toSkip);
|
||||
}
|
||||
else // mgram: look up word in vocabulary
|
||||
{
|
||||
if (prevmgram[n] >= 0 && strcmp (idToSymbol (prevmgram[n]), tok) == 0)
|
||||
id = prevmgram[n];
|
||||
else
|
||||
{
|
||||
id = symbolToId (tok);
|
||||
if (id == -1)
|
||||
RuntimeError ("read: mal-formed LM file, m-gram contains unknown word (%d): %S", lineNo, pathname.c_str());
|
||||
}
|
||||
}
|
||||
mgram[n] = id; // that's our id
|
||||
skipEntry |= skipWord[id]; // skip entry if any token is unknown
|
||||
}
|
||||
|
||||
double logB = 0.0;
|
||||
if (m < M)
|
||||
{
|
||||
const char * bo = strtok (NULL, delim);
|
||||
if (score == NULL || score[0] == 0) // not checking whether it is numeric
|
||||
RuntimeError ("read: mal-formed LM file, no score (%d): %S", lineNo, pathname.c_str());
|
||||
double boVal = atof (bo);
|
||||
logB = boVal * ln10xLMF; // convert to natural log
|
||||
}
|
||||
|
||||
lineNo++, fgetline (f, buf);
|
||||
|
||||
if (skipEntry) // word contained unknown vocabulary: skip entire entry
|
||||
goto skipMGram;
|
||||
|
||||
// -- enter the information into our data structure
|
||||
|
||||
// locate the corresponding entries
|
||||
// histEntry[n] are valid iff mgram[n'] == prevmgram[n'] for all n' <= '
|
||||
|
||||
bool prevValid = true;
|
||||
for (int n = 1; n < m; n++)
|
||||
{
|
||||
if (prevValid && mgram[n] == prevmgram[n])
|
||||
continue;
|
||||
|
||||
if (prevValid && mgram[n] < prevmgram[n])
|
||||
RuntimeError ("read: mal-formed LM file, m-gram out of order (%d): %S", lineNo, pathname.c_str());
|
||||
|
||||
// a history token differs from previous mgram. That history must exist.
|
||||
const std::vector<LMSCORE> & entries_n = entries[n];
|
||||
const std::vector<LMHIST> & refs_h = refs[n -1]; // history
|
||||
int beg = refs_h[histEntry[n -1]].firstEntry; // sub-array range for next level
|
||||
int end = refs_h[histEntry[n -1] +1].firstEntry;
|
||||
int i = findEntry (entries_n, beg, end, mgram[n]);
|
||||
if (i == -1) // unknown history: fall back
|
||||
RuntimeError ("read: mal-formed LM file, m-gram history not defined (%d): %S", lineNo, pathname.c_str());
|
||||
// found it: narrow down search range
|
||||
histEntry[n] = i;
|
||||
prevValid = false;
|
||||
}
|
||||
|
||||
if (prevValid && mgram[m] <= prevmgram[m])
|
||||
RuntimeError ("read: mal-formed LM file, m-gram out of order (%d): %S", lineNo, pathname.c_str());
|
||||
|
||||
if (m < M) // create history entry
|
||||
refs[m].push_back (LMHIST (0, logB));
|
||||
entries[m].push_back (LMSCORE (mgram[m], logP)); // score entry
|
||||
|
||||
refs[m-1][histEntry[m-1]].firstEntry++; // for now count how many histories we got
|
||||
|
||||
skipMGram:
|
||||
// remember current mgram for next iteration
|
||||
::swap (mgram, prevmgram);
|
||||
}
|
||||
|
||||
// Update previous level history from #entries to firstEntry.
|
||||
// We do this afterwards because some histories may not be used and
|
||||
// therefore not occur in higher-order m-grams, such that we cannot
|
||||
// rely on touching them in the loop above. Counting entries instead
|
||||
// leaves those at 0, which is correct.
|
||||
std::vector<LMHIST> & refs_h = refs[m -1]; // history
|
||||
int n0 = 0;
|
||||
for (int i = 0; i < (int) refs_h.size(); i++)
|
||||
{
|
||||
int num = refs_h[i].firstEntry;
|
||||
refs_h[i].firstEntry = n0;
|
||||
n0 += num;
|
||||
}
|
||||
ASSERT (refs_h.back().firstEntry == (int) entries[m].size());
|
||||
|
||||
// create closing history entry
|
||||
if (m < M)
|
||||
refs[m].push_back (LMHIST (0, -99.0));
|
||||
|
||||
// fix the symbol set -- now we can binary-search in them with symbolToId()
|
||||
if (m == 1)
|
||||
{
|
||||
std::sort (lmSymbols.begin(), lmSymbols.end());
|
||||
idToSymIndex.resize (lmSymbols.size(), -1);
|
||||
for (int i = 0; i < (int) lmSymbols.size(); i++)
|
||||
{
|
||||
idToSymIndex[lmSymbols[i].id] = i;
|
||||
}
|
||||
}
|
||||
|
||||
fprintf (stderr, ", %d %d-grams", entries[m].size(), m);
|
||||
}
|
||||
fprintf (stderr, "\n");
|
||||
|
||||
// check end tag
|
||||
if (M == fileM)
|
||||
{ // only if caller did not restrict us to a lower order
|
||||
while (buf[0] == 0 && !feof (f))
|
||||
lineNo++, fgetline (f, buf);
|
||||
if (strcmp (buf, "\\end\\") != 0)
|
||||
RuntimeError ("read: mal-formed LM file, no \\end\\ tag (%d): %S", lineNo, pathname.c_str());
|
||||
}
|
||||
|
||||
// update zerogram score
|
||||
// We use the minimum of all unigram scores.
|
||||
const std::vector<LMSCORE> & entries_1 = entries[1];
|
||||
float unknownLogP = 0.0f;
|
||||
for (int i = 0; i < (int) entries_1.size(); i++)
|
||||
{
|
||||
if (entries_1[i].logP < -98.9f) continue; // disabled token does not count
|
||||
if (entries_1[i].logP < unknownLogP)
|
||||
unknownLogP = entries_1[i].logP;
|
||||
}
|
||||
entries[0][0].logP = unknownLogP;;
|
||||
//= (float) -log ((double) lmSymbols.size()); // zerogram score
|
||||
|
||||
// establish mapping of word ids from user to LM space
|
||||
userToLMSymMap.resize (userSymMap.size());
|
||||
for (int i = 0; i < userSymMap.size(); i++)
|
||||
{
|
||||
const char * sym = userSymMap.id2sym (i);
|
||||
int id = symbolToId (sym); // may be -1 if not found
|
||||
userToLMSymMap[i] = id;
|
||||
}
|
||||
|
||||
// check whether first-level unigrams need mapping
|
||||
// We don't unless user provided a dictionary to filter.
|
||||
entries1Unmapped = true; // assume findEntry (id) == id
|
||||
for (int i = 0; i < (int) entries_1.size(); i++)
|
||||
{
|
||||
if (entries_1[i].id != i)
|
||||
{
|
||||
entries1Unmapped = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
// ===========================================================================
|
||||
// CPerplexity -- helper to measure perplexity
|
||||
// ===========================================================================
|
||||
|
@ -3172,5 +2645,6 @@ public:
|
|||
// return number of utterances
|
||||
int getNumUtterances() const { return numUtterances; }
|
||||
};
|
||||
#endif
|
||||
|
||||
};}; // namespace
|
||||
|
|
|
@ -274,15 +274,10 @@ public:
|
|||
else
|
||||
{
|
||||
cv::FileStorage fs;
|
||||
// REVIEW alexeyk: this sort of defeats the purpose of using wstring at all...
|
||||
auto fname = meanFile;
|
||||
#ifdef _WIN32
|
||||
fs.open(fname.c_str(), cv::FileStorage::READ);
|
||||
#else
|
||||
fs.open(charpath(fname), cv::FileStorage::READ);
|
||||
#endif
|
||||
// REVIEW alexeyk: this sort of defeats the purpose of using wstring at all... [fseide] no, only OpenCV has this problem.
|
||||
fs.open(msra::strfun::utf8(meanFile).c_str(), cv::FileStorage::READ);
|
||||
if (!fs.isOpened())
|
||||
RuntimeError("Could not open file: " + fname);
|
||||
RuntimeError("Could not open file: %ls", meanFile.c_str());
|
||||
fs["MeanImg"] >> m_meanImg;
|
||||
int cchan;
|
||||
fs["Channel"] >> cchan;
|
||||
|
@ -291,7 +286,7 @@ public:
|
|||
int ccol;
|
||||
fs["Col"] >> ccol;
|
||||
if (cchan * crow * ccol != m_meanImg.channels() * m_meanImg.rows * m_meanImg.cols)
|
||||
RuntimeError("Invalid data in file: " + fname);
|
||||
RuntimeError("Invalid data in file: %ls", meanFile.c_str());
|
||||
fs.release();
|
||||
m_meanImg = m_meanImg.reshape(cchan, crow);
|
||||
}
|
||||
|
|
|
@ -217,9 +217,6 @@ static inline void Sleep (size_t ms) { std::this_thread::sleep_for (std::chrono:
|
|||
|
||||
//#define SAFE_DELETE(p) { if(p) { delete (p); (p)=NULL; } }
|
||||
//#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } } // nasty! use CComPtr<>
|
||||
#ifndef ASSERT
|
||||
#define ASSERT assert
|
||||
#endif
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// basic data types
|
||||
|
@ -227,66 +224,6 @@ static inline void Sleep (size_t ms) { std::this_thread::sleep_for (std::chrono:
|
|||
|
||||
namespace msra { namespace basetypes {
|
||||
|
||||
// class ARRAY -- std::vector with array-bounds checking
|
||||
// VS 2008 and above do this, so there is no longer a need for this.
|
||||
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 4555) // expression has no affect, used so retail won't be empty
|
||||
|
||||
template<class _ElemType>
|
||||
class ARRAY : public std::vector<_ElemType>
|
||||
{
|
||||
#if defined (_DEBUG) || defined (_CHECKED) // debug version with range checking
|
||||
static void throwOutOfBounds()
|
||||
{ // (moved to separate function hoping to keep inlined code smaller
|
||||
OACR_WARNING_PUSH;
|
||||
OACR_WARNING_DISABLE(IGNOREDBYCOMMA, "Reviewd OK. Special trick below to show a message when assertion fails"
|
||||
"[rogeryu 2006/03/24]");
|
||||
OACR_WARNING_DISABLE(BOGUS_EXPRESSION_LIST, "This is intentional. [rogeryu 2006/03/24]");
|
||||
//ASSERT ("ARRAY::operator[] out of bounds", false);
|
||||
OACR_WARNING_POP;
|
||||
}
|
||||
#endif
|
||||
|
||||
public:
|
||||
|
||||
ARRAY() : std::vector<_ElemType> () { }
|
||||
ARRAY (int size) : std::vector<_ElemType> (size) { }
|
||||
|
||||
#if defined (_DEBUG) || defined (_CHECKED) // debug version with range checking
|
||||
// ------------------------------------------------------------------------
|
||||
// operator[]: with array-bounds checking
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
inline _ElemType & operator[] (int index) // writing
|
||||
{
|
||||
if (index < 0 || index >= size()) throwOutOfBounds();
|
||||
return (*(std::vector<_ElemType>*) this)[index];
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
inline const _ElemType & operator[] (int index) const // reading
|
||||
{
|
||||
if (index < 0 || index >= size()) throwOutOfBounds();
|
||||
return (*(std::vector<_ElemType>*) this)[index];
|
||||
}
|
||||
#endif
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// size(): same as base class, but returning an 'int' instead of 'size_t'
|
||||
// to allow for better readable code
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
inline int size() const
|
||||
{
|
||||
size_t siz = ((std::vector<_ElemType>*) this)->size();
|
||||
return (int) siz;
|
||||
}
|
||||
};
|
||||
// overload swap(), otherwise we'd fallback to 3-way assignment & possibly throw
|
||||
template<class _T> inline void swap (ARRAY<_T> & L, ARRAY<_T> & R) throw()
|
||||
{ swap ((std::vector<_T> &) L, (std::vector<_T> &) R); }
|
||||
|
||||
// class fixed_vector - non-resizable vector
|
||||
|
||||
|
@ -294,8 +231,8 @@ template<class _T> class fixed_vector
|
|||
{
|
||||
_T * p; // pointer array
|
||||
size_t n; // number of elements
|
||||
void check (int index) const { index/*avoid compiler warning*/;ASSERT (index >= 0 && (size_t) index < n); }
|
||||
void check (size_t index) const { ASSERT (index < n); }
|
||||
void check (int index) const { index/*avoid compiler warning*/;assert (index >= 0 && (size_t) index < n); }
|
||||
void check (size_t index) const { assert (index < n); }
|
||||
// ... TODO: when I make this public, LinearTransform.h acts totally up but I cannot see where it comes from.
|
||||
//fixed_vector (const fixed_vector & other) : n (0), p (NULL) { *this = other; }
|
||||
public:
|
||||
|
@ -316,7 +253,7 @@ public:
|
|||
inline const _T & operator[] (int index) const { check (index); return p[index]; } // reading
|
||||
inline _T & operator[] (size_t index) { check (index); return p[index]; } // writing
|
||||
inline const _T & operator[] (size_t index) const { check (index); return p[index]; } // reading
|
||||
inline int indexof (const _T & elem) const { ASSERT (&elem >= p && &elem < p + n); return &elem - p; }
|
||||
inline int indexof (const _T & elem) const { assert (&elem >= p && &elem < p + n); return &elem - p; }
|
||||
inline void swap (fixed_vector & other) throw() { std::swap (other.p, p); std::swap (other.n, n); }
|
||||
template<class VECTOR> fixed_vector & operator= (const VECTOR & other)
|
||||
{
|
||||
|
@ -346,7 +283,7 @@ template<class _T> inline void swap (fixed_vector<_T> & L, fixed_vector<_T> & R)
|
|||
template<class T> class matrix : fixed_vector<T>
|
||||
{
|
||||
size_t numcols;
|
||||
size_t locate (size_t i, size_t j) const { ASSERT (i < rows() && j < cols()); return i * cols() + j; }
|
||||
size_t locate (size_t i, size_t j) const { assert (i < rows() && j < cols()); return i * cols() + j; }
|
||||
public:
|
||||
typedef T elemtype;
|
||||
matrix() : numcols (0) {}
|
||||
|
|
|
@ -558,14 +558,14 @@ std::wstring fgetlinew (FILE * f)
|
|||
}
|
||||
|
||||
// STL string version avoiding most memory allocations
|
||||
void fgetline (FILE * f, std::string & s, ARRAY<char> & buf)
|
||||
void fgetline (FILE * f, std::string & s, std::vector<char> & buf)
|
||||
{
|
||||
buf.resize (1000000); // enough? // KIT: increased to 1M to be safe
|
||||
const char * p = fgetline (f, &buf[0], (int) buf.size());
|
||||
s.assign (p);
|
||||
}
|
||||
|
||||
void fgetline (FILE * f, std::wstring & s, ARRAY<wchar_t> & buf)
|
||||
void fgetline (FILE * f, std::wstring & s, std::vector<wchar_t> & buf)
|
||||
{
|
||||
buf.resize (1000000); // enough? // KIT: increased to 1M to be safe
|
||||
const wchar_t * p = fgetline (f, &buf[0], (int) buf.size());
|
||||
|
@ -573,7 +573,7 @@ void fgetline (FILE * f, std::wstring & s, ARRAY<wchar_t> & buf)
|
|||
}
|
||||
|
||||
// char buffer version
|
||||
void fgetline (FILE * f, ARRAY<char> & buf)
|
||||
void fgetline (FILE * f, std::vector<char> & buf)
|
||||
{
|
||||
const int BUF_SIZE = 1000000; // enough? // KIT: increased to 1M to be safe
|
||||
buf.resize (BUF_SIZE);
|
||||
|
@ -581,7 +581,7 @@ void fgetline (FILE * f, ARRAY<char> & buf)
|
|||
buf.resize (strnlen (&buf[0], BUF_SIZE) +1); // SECURITY NOTE: string use has been reviewed
|
||||
}
|
||||
|
||||
void fgetline (FILE * f, ARRAY<wchar_t> & buf)
|
||||
void fgetline (FILE * f, std::vector<wchar_t> & buf)
|
||||
{
|
||||
const int BUF_SIZE = 1000000; // enough? // KIT: increased to 1M to be safe
|
||||
buf.resize (BUF_SIZE);
|
||||
|
@ -605,7 +605,7 @@ const char * fgetstring (FILE * f, __out_z_cap(size) char * buf, int size)
|
|||
}
|
||||
buf[i] = (char) c;
|
||||
}
|
||||
ASSERT (i < size);
|
||||
assert (i < size);
|
||||
buf[i] = 0;
|
||||
return buf;
|
||||
}
|
||||
|
@ -624,7 +624,7 @@ const char * fgetstring (const HANDLE f, __out_z_cap(size) char * buf, int size)
|
|||
}
|
||||
buf[i] = (char) c;
|
||||
}
|
||||
ASSERT (i < size);
|
||||
assert (i < size);
|
||||
buf[i] = 0;
|
||||
return buf;
|
||||
}
|
||||
|
@ -711,7 +711,7 @@ const char * fgettoken (FILE * f, __out_z_cap(size) char * buf, int size)
|
|||
if (rc != c)
|
||||
RuntimeError ("error in ungetc(): %s", strerror (errno));
|
||||
}
|
||||
ASSERT (i < size);
|
||||
assert (i < size);
|
||||
buf[i] = 0;
|
||||
return buf;
|
||||
}
|
||||
|
@ -818,14 +818,14 @@ void fcompareTag (const STRING & readTag, const STRING & expectedTag)
|
|||
void fputTag (FILE * f, const char * tag)
|
||||
{
|
||||
const int TAG_LEN = 4;
|
||||
ASSERT (strnlen (tag, TAG_LEN + 1) == TAG_LEN);
|
||||
assert (strnlen (tag, TAG_LEN + 1) == TAG_LEN);
|
||||
fwriteOrDie ((void *) tag, sizeof (*tag), strnlen (tag, TAG_LEN), f);
|
||||
}
|
||||
|
||||
void fputTag(const HANDLE f, const char * tag)
|
||||
{
|
||||
const int TAG_LEN = 4;
|
||||
ASSERT (strnlen (tag, TAG_LEN + 1) == TAG_LEN);
|
||||
assert (strnlen (tag, TAG_LEN + 1) == TAG_LEN);
|
||||
fwriteOrDie ((void *) tag, sizeof (*tag), strnlen (tag, TAG_LEN), f);
|
||||
}
|
||||
|
||||
|
@ -860,7 +860,7 @@ void fpad (FILE * f, int n)
|
|||
int len = n - (pos % n);
|
||||
const char dummyString[] = "MSR-Asia: JL+FS";
|
||||
size_t offset = sizeof(dummyString)/sizeof(dummyString[0]) - len;
|
||||
ASSERT (offset >= 0);
|
||||
assert (offset >= 0);
|
||||
fputstring (f, dummyString + offset);
|
||||
}
|
||||
// ----------------------------------------------------------------------------
|
||||
|
@ -899,7 +899,7 @@ short fgetshort_bigendian (FILE * f)
|
|||
int fgetint24 (FILE * f)
|
||||
{
|
||||
int v;
|
||||
ASSERT (sizeof (v) == 4);
|
||||
assert (sizeof (v) == 4);
|
||||
freadOrDie (&v, sizeof (v) -1, 1, f); // only read 3 lower-order bytes
|
||||
v <<= 8; // shift up (upper 8 bits uninit'ed)
|
||||
v >>= 8; // shift down 8 bits with sign-extend
|
||||
|
@ -976,7 +976,7 @@ float fgetfloat_ascii (FILE * f)
|
|||
RuntimeError ("error reading float value from file (invalid format): %s");
|
||||
else if (rc == EOF)
|
||||
RuntimeError ("error reading from file: %s", strerror (errno));
|
||||
ASSERT (rc == 1);
|
||||
assert (rc == 1);
|
||||
return val;
|
||||
}
|
||||
|
||||
|
@ -1066,8 +1066,8 @@ void WAVEHEADER::write (FILE * f)
|
|||
fputint (f, nAvgBytesPerSec);
|
||||
fputshort (f, nBlockAlign);
|
||||
fputshort (f, wBitsPerSample);
|
||||
ASSERT (FmtLength == 16);
|
||||
ASSERT (wFormatTag == 1);
|
||||
assert (FmtLength == 16);
|
||||
assert (wFormatTag == 1);
|
||||
fputTag (f, "data");
|
||||
fputint (f, DataLength);
|
||||
fflushOrDie (f);
|
||||
|
@ -1160,14 +1160,14 @@ static short toolULawToLinear(unsigned char p_ucULawByte)
|
|||
|
||||
// fgetwavraw(): only read data of .wav file. For multi-channel data, samples
|
||||
// are kept interleaved.
|
||||
static void fgetwavraw(FILE * f, ARRAY<short> & wav, const WAVEHEADER & wavhd)
|
||||
static void fgetwavraw(FILE * f, std::vector<short> & wav, const WAVEHEADER & wavhd)
|
||||
{
|
||||
int bytesPerSample = wavhd.wBitsPerSample / 8; // (sample size on one channel)
|
||||
wav.resize (wavhd.DataLength / bytesPerSample);
|
||||
if (wavhd.wFormatTag == 7) // mulaw
|
||||
{
|
||||
(wavhd.nChannels == 1) || RuntimeError ("fgetwav: wChannels=%d not supported for mulaw", wavhd.nChannels);
|
||||
ARRAY<unsigned char> data;
|
||||
std::vector<unsigned char> data;
|
||||
int numSamples = wavhd.DataLength/wavhd.nBlockAlign;
|
||||
data.resize (numSamples);
|
||||
freadOrDie (&data[0], sizeof (data[0]), numSamples, f);
|
||||
|
@ -1191,7 +1191,7 @@ static void fgetwavraw(FILE * f, ARRAY<short> & wav, const WAVEHEADER & wavhd)
|
|||
// fgetwav(): read an entire .wav file. Stereo is mapped to mono.
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
void fgetwav (FILE * f, ARRAY<short> & wav, int & sampleRate)
|
||||
void fgetwav (FILE * f, std::vector<short> & wav, int & sampleRate)
|
||||
{
|
||||
WAVEHEADER wavhd; // will be filled in for 16-bit PCM!!
|
||||
signed short wFormatTag; // real format tag as found in data
|
||||
|
@ -1207,7 +1207,7 @@ void fgetwav (FILE * f, ARRAY<short> & wav, int & sampleRate)
|
|||
else if (wavhd.nChannels == 2)
|
||||
{
|
||||
//read raw data
|
||||
ARRAY<short> buf;
|
||||
std::vector<short> buf;
|
||||
buf.resize(numSamples * 2);
|
||||
fgetwavraw(f, buf, wavhd);
|
||||
|
||||
|
@ -1228,7 +1228,7 @@ void fgetwav (FILE * f, ARRAY<short> & wav, int & sampleRate)
|
|||
}
|
||||
}
|
||||
|
||||
void fgetwav (const wstring & fn, ARRAY<short> & wav, int & sampleRate)
|
||||
void fgetwav (const wstring & fn, std::vector<short> & wav, int & sampleRate)
|
||||
{
|
||||
auto_file_ptr f = fopenOrDie (fn, L"rbS");
|
||||
fgetwav (f, wav, sampleRate);
|
||||
|
@ -1243,13 +1243,13 @@ void fgetwav (const wstring & fn, ARRAY<short> & wav, int & sampleRate)
|
|||
// channel. j is sample index.
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
void fgetraw (FILE *f, ARRAY< ARRAY<short> > & data, const WAVEHEADER & wavhd)
|
||||
void fgetraw (FILE *f, std::vector< std::vector<short> > & data, const WAVEHEADER & wavhd)
|
||||
{
|
||||
ARRAY<short> wavraw;
|
||||
std::vector<short> wavraw;
|
||||
fgetwavraw (f, wavraw, wavhd);
|
||||
data.resize (wavhd.nChannels);
|
||||
int numSamples = wavhd.DataLength/wavhd.nBlockAlign;
|
||||
ASSERT (numSamples == (int) wavraw.size() / wavhd.nChannels);
|
||||
assert (numSamples == (int) wavraw.size() / wavhd.nChannels);
|
||||
|
||||
for (int i = 0; i < wavhd.nChannels; i++)
|
||||
{
|
||||
|
@ -1304,7 +1304,7 @@ void fputwfx (FILE *f, const WAVEFORMATEX & wfx, unsigned int numSamples)
|
|||
unsigned int RiffLength = 36 + DataLength;
|
||||
unsigned int FmtLength = 16;
|
||||
// file header
|
||||
ASSERT (wfx.cbSize == 0 || wfx.cbSize == FmtLength + 2);
|
||||
assert (wfx.cbSize == 0 || wfx.cbSize == FmtLength + 2);
|
||||
fputTag (f, "RIFF");
|
||||
fputint (f, RiffLength);
|
||||
fputTag (f, "WAVE");
|
||||
|
@ -1377,7 +1377,7 @@ void fputshort (FILE * f, short v)
|
|||
|
||||
void fputint24 (FILE * f, int v)
|
||||
{
|
||||
ASSERT (sizeof (v) == 4);
|
||||
assert (sizeof (v) == 4);
|
||||
fwriteOrDie (&v, sizeof (v) -1, 1, f); // write low-order 3 bytes
|
||||
}
|
||||
|
||||
|
@ -1417,7 +1417,7 @@ void fputdouble (FILE * f, double v)
|
|||
// fputfile(): write a binary block or a string as a file
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
void fputfile (const WSTRING & pathname, const ARRAY<char> & buffer)
|
||||
void fputfile (const WSTRING & pathname, const std::vector<char> & buffer)
|
||||
{
|
||||
FILE * f = fopenOrDie (pathname, L"wb");
|
||||
try
|
||||
|
@ -1475,7 +1475,7 @@ void fputfile (const WSTRING & pathname, const std::string & string)
|
|||
// fgetfile(): load a file as a binary block
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
void fgetfile (const WSTRING & pathname, ARRAY<char> & buffer)
|
||||
void fgetfile (const WSTRING & pathname, std::vector<char> & buffer)
|
||||
{
|
||||
FILE * f = fopenOrDie (pathname, L"rb");
|
||||
size_t len = filesize (f);
|
||||
|
@ -1487,11 +1487,11 @@ void fgetfile (const WSTRING & pathname, ARRAY<char> & buffer)
|
|||
fclose (f);
|
||||
}
|
||||
|
||||
void fgetfile (FILE * f, ARRAY<char> & buffer)
|
||||
void fgetfile (FILE * f, std::vector<char> & buffer)
|
||||
{ // this version reads until eof
|
||||
buffer.resize (0);
|
||||
buffer.reserve (1000000); // avoid too many reallocations
|
||||
ARRAY<char> inbuf;
|
||||
std::vector<char> inbuf;
|
||||
inbuf.resize (65536); // read in chunks of this size
|
||||
while (!feof (f)) // read until eof
|
||||
{
|
||||
|
|
|
@ -569,7 +569,7 @@ void lattice::fromhtklattice (const wstring & path, const std::unordered_map<std
|
|||
else
|
||||
throw std::runtime_error ("lattice: mal-formed before parse N=/L= line in lattice.");
|
||||
|
||||
ASSERT(info.numnodes > 0);
|
||||
assert(info.numnodes > 0);
|
||||
nodes.reserve (info.numnodes);
|
||||
// parse the nodes
|
||||
for (size_t i = 0; i < info.numnodes; i++, iter++)
|
||||
|
@ -586,7 +586,7 @@ void lattice::fromhtklattice (const wstring & path, const std::unordered_map<std
|
|||
info.numframes = max (info.numframes, (size_t) nodes.back().t);
|
||||
}
|
||||
// parse the edges
|
||||
ASSERT(info.numedges > 0);
|
||||
assert(info.numedges > 0);
|
||||
edges.reserve (info.numedges);
|
||||
align.reserve (info.numedges * 10); // 10 phones per word on av. should be enough
|
||||
std::string label;
|
||||
|
|
|
@ -246,7 +246,7 @@ public:
|
|||
p[0] = (unsigned char) value;
|
||||
p[1] = (unsigned char) (value >> 8);
|
||||
p[2] = (unsigned char) (value >> 16);
|
||||
ASSERT (value == (int) *this);
|
||||
assert (value == (int) *this);
|
||||
return value;
|
||||
}
|
||||
};
|
||||
|
@ -265,7 +265,7 @@ public:
|
|||
base.resize (newsize);
|
||||
uint24_ref r = uint24_ref (&base[cursize]);
|
||||
r = value;
|
||||
ASSERT (value == back());
|
||||
assert (value == back());
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -310,7 +310,7 @@ class mgram_map
|
|||
if ((size_t) id >= level1lookup.size()) return nindex;
|
||||
i = level1lookup[id];
|
||||
}
|
||||
ASSERT (i == nindex || ids[1][i] == id);
|
||||
assert (i == nindex || ids[1][i] == id);
|
||||
return i;
|
||||
}
|
||||
index_t beg = firsts[m][i];
|
||||
|
@ -733,11 +733,11 @@ public:
|
|||
|
||||
coord c (k.m, (index_t) ids[k.m].size());
|
||||
|
||||
ASSERT (firsts[k.m-1].back() == (index_t) ids[k.m].size());
|
||||
assert (firsts[k.m-1].back() == (index_t) ids[k.m].size());
|
||||
ids[k.m].push_back (thisid); // create value
|
||||
firsts[k.m-1].back() = (index_t) ids[k.m].size();
|
||||
if (firsts[k.m-1].back() != (index_t) ids[k.m].size()) fail ("create() numeric overflow--index_t too small");
|
||||
ASSERT (k.m == M || firsts[k.m].back() == (index_t) ids[k.m+1].size());
|
||||
assert (k.m == M || firsts[k.m].back() == (index_t) ids[k.m+1].size());
|
||||
|
||||
// optimization: level1nonsparse flag
|
||||
// If unigram level is entirely non-sparse, we can save the search
|
||||
|
@ -769,10 +769,10 @@ public:
|
|||
firsts[m].resize (ids[m].size() +1, (int) ids[m+1].size());
|
||||
foreach_index (m, firsts)
|
||||
{
|
||||
ASSERT (firsts[m][0] == 0);
|
||||
assert (firsts[m][0] == 0);
|
||||
foreach_index (i, ids[m])
|
||||
ASSERT (firsts[m][i] <= firsts[m][i+1]);
|
||||
ASSERT ((size_t) firsts[m].back() == ids[m+1].size());
|
||||
assert (firsts[m][i] <= firsts[m][i+1]);
|
||||
assert ((size_t) firsts[m].back() == ids[m+1].size());
|
||||
}
|
||||
// id mapping
|
||||
// user-provided w->id map
|
||||
|
@ -1039,7 +1039,7 @@ public:
|
|||
continue;
|
||||
|
||||
const mgram_map::key key = *iter;
|
||||
ASSERT (m == key.order());
|
||||
assert (m == key.order());
|
||||
|
||||
// --- output m-gram to ARPA file
|
||||
fprintfOrDie (outf, "%.4f", logP[iter] / log10);
|
||||
|
@ -1065,7 +1065,7 @@ public:
|
|||
numMGramsWritten++;
|
||||
}
|
||||
fflushOrDie (outf);
|
||||
ASSERT (numMGramsWritten == map.size (m));
|
||||
assert (numMGramsWritten == map.size (m));
|
||||
fprintf (stderr, "\n");
|
||||
}
|
||||
|
||||
|
@ -1352,7 +1352,7 @@ protected:
|
|||
int newid = w2id[w]; // map to new id space
|
||||
mgram[m-1] = newid;
|
||||
}
|
||||
for (int k = 0; k < m; k++) ASSERT (mgram[k] == w2id[key[k]]);
|
||||
for (int k = 0; k < m; k++) assert (mgram[k] == w2id[key[k]]);
|
||||
// insert new key into sortedMap
|
||||
mgram_map::coord c = sortedMap.create (mgram_map::unmapped_key (&mgram[0], m), createCache);
|
||||
// copy over logP and logB
|
||||
|
@ -1478,7 +1478,7 @@ protected:
|
|||
if (m == 0) continue;
|
||||
|
||||
const mgram_map::key key = *iter;
|
||||
ASSERT (m == key.order());
|
||||
assert (m == key.order());
|
||||
|
||||
float thisP = P[iter];
|
||||
if (islog)
|
||||
|
@ -1967,7 +1967,7 @@ public:
|
|||
// estimate
|
||||
vector<bool> dropWord (userSymMap.size(), false);
|
||||
dropWord.push_back (true); // filtering but no <UNK>:
|
||||
ASSERT (!filterVocabulary || unkId != -1 || dropWord[dropId]);
|
||||
assert (!filterVocabulary || unkId != -1 || dropWord[dropId]);
|
||||
|
||||
//std::vector<unsigned int> minObs (2, 0);
|
||||
//std::vector<unsigned int> iMinObs (3, 0);
|
||||
|
@ -2101,7 +2101,7 @@ public:
|
|||
if (m < M && m < 3) // for comments see where we estimate the discounted probabilities
|
||||
{ // ^^ seems not to work for 4-grams...
|
||||
const mgram_map::key key = *iter; // needed to check for startId
|
||||
ASSERT (key.order() == m);
|
||||
assert (key.order() == m);
|
||||
|
||||
if (m < 2 || key.pop_w().back() != startId)
|
||||
{
|
||||
|
@ -2245,7 +2245,7 @@ public:
|
|||
}
|
||||
|
||||
const mgram_map::key key = *iter;
|
||||
ASSERT (key.order() == iter.order()); // (remove this check once verified)
|
||||
assert (key.order() == iter.order()); // (remove this check once verified)
|
||||
|
||||
// get history's count
|
||||
const mgram_map::coord j = histCoord[m-1]; // index of parent entry
|
||||
|
@ -2278,7 +2278,7 @@ public:
|
|||
histCount = KNTotalCounts[c_h]; // (u,v,w) -> count (*,v,*)
|
||||
if (histCount == 0) // must exist
|
||||
RuntimeError ("estimate: malformed data: back-off value not found (denominator)");
|
||||
ASSERT (histCount >= count);
|
||||
assert (histCount >= count);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2627,7 +2627,7 @@ public:
|
|||
if (i == -1)
|
||||
goto backoff0;
|
||||
|
||||
ASSERT (entries_1[i].id == id); // verify unmapped unigram case
|
||||
assert (entries_1[i].id == id); // verify unmapped unigram case
|
||||
double logP = entries_1[i].logP;
|
||||
return totalLogB + logP;
|
||||
}
|
||||
|
@ -2640,7 +2640,7 @@ public:
|
|||
int i = (entries1Unmapped) ? id : findEntry (entries[1], refs[0][0].firstEntry, refs[0][1].firstEntry, id);
|
||||
if (i == -1) // unknown history: fall back
|
||||
goto fallback;
|
||||
ASSERT (entries[1][i].id == id); // verify unmapped unigram case
|
||||
assert (entries[1][i].id == id); // verify unmapped unigram case
|
||||
|
||||
// found it: advance search by one history token
|
||||
const std::vector<LMHIST> & refs_1 = refs[1];
|
||||
|
@ -2656,7 +2656,7 @@ public:
|
|||
int i = findEntry (entries[n], beg, end, id);
|
||||
if (i == -1) // unseen history: fall back
|
||||
goto fallback;
|
||||
ASSERT (entries[n][i].id == id); // verify unmapped unigram case
|
||||
assert (entries[n][i].id == id); // verify unmapped unigram case
|
||||
|
||||
// found it: advance search by one history token
|
||||
const std::vector<LMHIST> & refs_n = refs[n];
|
||||
|
@ -2679,7 +2679,7 @@ public:
|
|||
i = findEntry (entries_m, beg, end, id);
|
||||
if (i == -1)
|
||||
goto backoff;
|
||||
ASSERT (entries_m[i].id == id); // verify unmapped unigram case
|
||||
assert (entries_m[i].id == id); // verify unmapped unigram case
|
||||
|
||||
longestMGramFound = m;
|
||||
|
||||
|
@ -2722,7 +2722,7 @@ fallback: // we get here in case of fallback (no back-off weight) or back-off
|
|||
int i = findEntry (entries_n, beg, end, id);
|
||||
if (i == -1) // unknown history: fall back
|
||||
return score_unoptimized (mgram +1, m -1); // tail recursion
|
||||
ASSERT (entries_n[i].id == id); // verify unmapped unigram case
|
||||
assert (entries_n[i].id == id); // verify unmapped unigram case
|
||||
// found it: advance search by one history token
|
||||
const std::vector<LMHIST> & refs_n = refs[n];
|
||||
logB = refs_n[i].logB;
|
||||
|
@ -2739,7 +2739,7 @@ fallback: // we get here in case of fallback (no back-off weight) or back-off
|
|||
int i = findEntry (entries_m1, beg, end, id);
|
||||
if (i != -1)
|
||||
{
|
||||
ASSERT (entries_m1[i].id == id); // verify unmapped unigram case
|
||||
assert (entries_m1[i].id == id); // verify unmapped unigram case
|
||||
double logP = entries_m1[i].logP;
|
||||
return logP;
|
||||
}
|
||||
|
@ -2997,7 +2997,7 @@ skipMGram:
|
|||
refs_h[i].firstEntry = n0;
|
||||
n0 += num;
|
||||
}
|
||||
ASSERT (refs_h.back().firstEntry == (int) entries[m].size());
|
||||
assert (refs_h.back().firstEntry == (int) entries[m].size());
|
||||
|
||||
// create closing history entry
|
||||
if (m < M)
|
||||
|
|
|
@ -217,8 +217,8 @@ static inline void Sleep (size_t ms) { std::this_thread::sleep_for (std::chrono:
|
|||
|
||||
//#define SAFE_DELETE(p) { if(p) { delete (p); (p)=NULL; } }
|
||||
//#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } } // nasty! use CComPtr<>
|
||||
#ifndef ASSERT
|
||||
#define ASSERT assert
|
||||
#ifndef assert
|
||||
#define assert assert
|
||||
#endif
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
@ -227,14 +227,14 @@ static inline void Sleep (size_t ms) { std::this_thread::sleep_for (std::chrono:
|
|||
|
||||
namespace msra { namespace basetypes {
|
||||
|
||||
// class ARRAY -- std::vector with array-bounds checking
|
||||
// class std::vector -- std::vector with array-bounds checking
|
||||
// VS 2008 and above do this, so there is no longer a need for this.
|
||||
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 4555) // expression has no affect, used so retail won't be empty
|
||||
|
||||
template<class _ElemType>
|
||||
class ARRAY : public std::vector<_ElemType>
|
||||
class std::vector : public std::vector<_ElemType>
|
||||
{
|
||||
#if defined (_DEBUG) || defined (_CHECKED) // debug version with range checking
|
||||
static void throwOutOfBounds()
|
||||
|
@ -243,15 +243,15 @@ class ARRAY : public std::vector<_ElemType>
|
|||
OACR_WARNING_DISABLE(IGNOREDBYCOMMA, "Reviewd OK. Special trick below to show a message when assertion fails"
|
||||
"[rogeryu 2006/03/24]");
|
||||
OACR_WARNING_DISABLE(BOGUS_EXPRESSION_LIST, "This is intentional. [rogeryu 2006/03/24]");
|
||||
//ASSERT ("ARRAY::operator[] out of bounds", false);
|
||||
//assert ("std::vector::operator[] out of bounds", false);
|
||||
OACR_WARNING_POP;
|
||||
}
|
||||
#endif
|
||||
|
||||
public:
|
||||
|
||||
ARRAY() : std::vector<_ElemType> () { }
|
||||
ARRAY (int size) : std::vector<_ElemType> (size) { }
|
||||
std::vector() : std::vector<_ElemType> () { }
|
||||
std::vector (int size) : std::vector<_ElemType> (size) { }
|
||||
|
||||
#if defined (_DEBUG) || defined (_CHECKED) // debug version with range checking
|
||||
// ------------------------------------------------------------------------
|
||||
|
@ -285,7 +285,7 @@ public:
|
|||
}
|
||||
};
|
||||
// overload swap(), otherwise we'd fallback to 3-way assignment & possibly throw
|
||||
template<class _T> inline void swap (ARRAY<_T> & L, ARRAY<_T> & R) throw()
|
||||
template<class _T> inline void swap (std::vector<_T> & L, std::vector<_T> & R) throw()
|
||||
{ swap ((std::vector<_T> &) L, (std::vector<_T> &) R); }
|
||||
|
||||
// class fixed_vector - non-resizable vector
|
||||
|
@ -294,8 +294,8 @@ template<class _T> class fixed_vector
|
|||
{
|
||||
_T * p; // pointer array
|
||||
size_t n; // number of elements
|
||||
void check (int index) const { index/*avoid compiler warning*/;ASSERT (index >= 0 && (size_t) index < n); }
|
||||
void check (size_t index) const { ASSERT (index < n); }
|
||||
void check (int index) const { index/*avoid compiler warning*/;assert (index >= 0 && (size_t) index < n); }
|
||||
void check (size_t index) const { assert (index < n); }
|
||||
// ... TODO: when I make this public, LinearTransform.h acts totally up but I cannot see where it comes from.
|
||||
//fixed_vector (const fixed_vector & other) : n (0), p (NULL) { *this = other; }
|
||||
public:
|
||||
|
@ -316,7 +316,7 @@ public:
|
|||
inline const _T & operator[] (int index) const { check (index); return p[index]; } // reading
|
||||
inline _T & operator[] (size_t index) { check (index); return p[index]; } // writing
|
||||
inline const _T & operator[] (size_t index) const { check (index); return p[index]; } // reading
|
||||
inline int indexof (const _T & elem) const { ASSERT (&elem >= p && &elem < p + n); return &elem - p; }
|
||||
inline int indexof (const _T & elem) const { assert (&elem >= p && &elem < p + n); return &elem - p; }
|
||||
inline void swap (fixed_vector & other) throw() { std::swap (other.p, p); std::swap (other.n, n); }
|
||||
template<class VECTOR> fixed_vector & operator= (const VECTOR & other)
|
||||
{
|
||||
|
@ -346,7 +346,7 @@ template<class _T> inline void swap (fixed_vector<_T> & L, fixed_vector<_T> & R)
|
|||
template<class T> class matrix : fixed_vector<T>
|
||||
{
|
||||
size_t numcols;
|
||||
size_t locate (size_t i, size_t j) const { ASSERT (i < rows() && j < cols()); return i * cols() + j; }
|
||||
size_t locate (size_t i, size_t j) const { assert (i < rows() && j < cols()); return i * cols() + j; }
|
||||
public:
|
||||
typedef T elemtype;
|
||||
matrix() : numcols (0) {}
|
||||
|
|
|
@ -148,12 +148,12 @@ typedef unsigned char byte;
|
|||
|
||||
#define SAFE_DELETE(p) { if(p) { delete (p); (p)=NULL; } }
|
||||
#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } } // nasty! use CComPtr<>
|
||||
#ifndef ASSERT
|
||||
#ifndef assert
|
||||
#ifdef _CHECKED // basetypes.h expects this function to be defined (it is in message.h)
|
||||
extern void _CHECKED_ASSERT_error(const char * file, int line, const char * exp);
|
||||
#define ASSERT(exp) ((exp)||(_CHECKED_ASSERT_error(__FILE__,__LINE__,#exp),0))
|
||||
#define assert(exp) ((exp)||(_CHECKED_ASSERT_error(__FILE__,__LINE__,#exp),0))
|
||||
#else
|
||||
#define ASSERT assert
|
||||
#define assert assert
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -164,11 +164,11 @@ using namespace std;
|
|||
|
||||
namespace msra { namespace basetypes {
|
||||
|
||||
// class ARRAY -- std::vector with array-bounds checking
|
||||
// class std::vector -- std::vector with array-bounds checking
|
||||
// VS 2008 and above do this, so there is no longer a need for this.
|
||||
|
||||
template<class _ElemType>
|
||||
class ARRAY : public std::vector<_ElemType>
|
||||
class std::vector : public std::vector<_ElemType>
|
||||
{
|
||||
#if defined (_DEBUG) || defined (_CHECKED) // debug version with range checking
|
||||
static void throwOutOfBounds()
|
||||
|
@ -177,15 +177,15 @@ class ARRAY : public std::vector<_ElemType>
|
|||
OACR_WARNING_DISABLE(IGNOREDBYCOMMA, "Reviewd OK. Special trick below to show a message when assertion fails"
|
||||
"[rogeryu 2006/03/24]");
|
||||
OACR_WARNING_DISABLE(BOGUS_EXPRESSION_LIST, "This is intentional. [rogeryu 2006/03/24]");
|
||||
ASSERT (("ARRAY::operator[] out of bounds", false));
|
||||
assert (("std::vector::operator[] out of bounds", false));
|
||||
OACR_WARNING_POP;
|
||||
}
|
||||
#endif
|
||||
|
||||
public:
|
||||
|
||||
ARRAY() : std::vector<_ElemType> () { }
|
||||
ARRAY (int size) : std::vector<_ElemType> (size) { }
|
||||
std::vector() : std::vector<_ElemType> () { }
|
||||
std::vector (int size) : std::vector<_ElemType> (size) { }
|
||||
|
||||
#if defined (_DEBUG) || defined (_CHECKED) // debug version with range checking
|
||||
// ------------------------------------------------------------------------
|
||||
|
@ -219,7 +219,7 @@ public:
|
|||
}
|
||||
};
|
||||
// overload swap(), otherwise we'd fallback to 3-way assignment & possibly throw
|
||||
template<class _T> inline void swap (ARRAY<_T> & L, ARRAY<_T> & R) throw()
|
||||
template<class _T> inline void swap (std::vector<_T> & L, std::vector<_T> & R) throw()
|
||||
{ swap ((std::vector<_T> &) L, (std::vector<_T> &) R); }
|
||||
|
||||
// class fixed_vector - non-resizable vector
|
||||
|
@ -228,8 +228,8 @@ template<class _T> class fixed_vector
|
|||
{
|
||||
_T * p; // pointer array
|
||||
size_t n; // number of elements
|
||||
void check (int index) const { index; ASSERT (index >= 0 && (size_t) index < n); }
|
||||
void check (size_t index) const { index; ASSERT (index < n); }
|
||||
void check (int index) const { index; assert (index >= 0 && (size_t) index < n); }
|
||||
void check (size_t index) const { index; assert (index < n); }
|
||||
// ... TODO: when I make this public, LinearTransform.h acts totally up but I cannot see where it comes from.
|
||||
//fixed_vector (const fixed_vector & other) : n (0), p (NULL) { *this = other; }
|
||||
public:
|
||||
|
@ -250,7 +250,7 @@ public:
|
|||
inline const _T & operator[] (int index) const { check (index); return p[index]; } // reading
|
||||
inline _T & operator[] (size_t index) { check (index); return p[index]; } // writing
|
||||
inline const _T & operator[] (size_t index) const { check (index); return p[index]; } // reading
|
||||
inline int indexof (const _T & elem) const { ASSERT (&elem >= p && &elem < p + n); return &elem - p; }
|
||||
inline int indexof (const _T & elem) const { assert (&elem >= p && &elem < p + n); return &elem - p; }
|
||||
inline void swap (fixed_vector & other) throw() { std::swap (other.p, p); std::swap (other.n, n); }
|
||||
template<class VECTOR> fixed_vector & operator= (const VECTOR & other)
|
||||
{
|
||||
|
@ -278,7 +278,7 @@ template<class _T> inline void swap (fixed_vector<_T> & L, fixed_vector<_T> & R)
|
|||
template<class T> class matrix : fixed_vector<T>
|
||||
{
|
||||
size_t numcols;
|
||||
size_t locate (size_t i, size_t j) const { ASSERT (i < rows() && j < cols()); return i * cols() + j; }
|
||||
size_t locate (size_t i, size_t j) const { assert (i < rows() && j < cols()); return i * cols() + j; }
|
||||
public:
|
||||
typedef T elemtype;
|
||||
matrix() : numcols (0) {}
|
||||
|
@ -464,7 +464,7 @@ struct utf16 : std::wstring { utf16 (const std::string & p) // utf-8 to -16
|
|||
int rc = MultiByteToWideChar (CP_UTF8, 0, p.c_str(), (int) len,
|
||||
&buf[0], (int) buf.size());
|
||||
if (rc == 0) throw std::runtime_error ("MultiByteToWideChar");
|
||||
ASSERT (rc < buf.size ());
|
||||
assert (rc < buf.size ());
|
||||
(*(std::wstring*)this) = &buf[0];
|
||||
}};
|
||||
#endif
|
||||
|
|
|
@ -558,14 +558,14 @@ std::wstring fgetlinew (FILE * f)
|
|||
}
|
||||
|
||||
// STL string version avoiding most memory allocations
|
||||
void fgetline (FILE * f, std::string & s, ARRAY<char> & buf)
|
||||
void fgetline (FILE * f, std::string & s, std::vector<char> & buf)
|
||||
{
|
||||
buf.resize (1000000); // enough? // KIT: increased to 1M to be safe
|
||||
const char * p = fgetline (f, &buf[0], (int) buf.size());
|
||||
s.assign (p);
|
||||
}
|
||||
|
||||
void fgetline (FILE * f, std::wstring & s, ARRAY<wchar_t> & buf)
|
||||
void fgetline (FILE * f, std::wstring & s, std::vector<wchar_t> & buf)
|
||||
{
|
||||
buf.resize (1000000); // enough? // KIT: increased to 1M to be safe
|
||||
const wchar_t * p = fgetline (f, &buf[0], (int) buf.size());
|
||||
|
@ -573,7 +573,7 @@ void fgetline (FILE * f, std::wstring & s, ARRAY<wchar_t> & buf)
|
|||
}
|
||||
|
||||
// char buffer version
|
||||
void fgetline (FILE * f, ARRAY<char> & buf)
|
||||
void fgetline (FILE * f, std::vector<char> & buf)
|
||||
{
|
||||
const int BUF_SIZE = 1000000; // enough? // KIT: increased to 1M to be safe
|
||||
buf.resize (BUF_SIZE);
|
||||
|
@ -581,7 +581,7 @@ void fgetline (FILE * f, ARRAY<char> & buf)
|
|||
buf.resize (strnlen (&buf[0], BUF_SIZE) +1); // SECURITY NOTE: string use has been reviewed
|
||||
}
|
||||
|
||||
void fgetline (FILE * f, ARRAY<wchar_t> & buf)
|
||||
void fgetline (FILE * f, std::vector<wchar_t> & buf)
|
||||
{
|
||||
const int BUF_SIZE = 1000000; // enough? // KIT: increased to 1M to be safe
|
||||
buf.resize (BUF_SIZE);
|
||||
|
@ -605,7 +605,7 @@ const char * fgetstring (FILE * f, __out_z_cap(size) char * buf, int size)
|
|||
}
|
||||
buf[i] = (char) c;
|
||||
}
|
||||
ASSERT (i < size);
|
||||
assert (i < size);
|
||||
buf[i] = 0;
|
||||
return buf;
|
||||
}
|
||||
|
@ -624,7 +624,7 @@ const char * fgetstring (const HANDLE f, __out_z_cap(size) char * buf, int size)
|
|||
}
|
||||
buf[i] = (char) c;
|
||||
}
|
||||
ASSERT (i < size);
|
||||
assert (i < size);
|
||||
buf[i] = 0;
|
||||
return buf;
|
||||
}
|
||||
|
@ -711,7 +711,7 @@ const char * fgettoken (FILE * f, __out_z_cap(size) char * buf, int size)
|
|||
if (rc != c)
|
||||
RuntimeError ("error in ungetc(): %s", strerror (errno));
|
||||
}
|
||||
ASSERT (i < size);
|
||||
assert (i < size);
|
||||
buf[i] = 0;
|
||||
return buf;
|
||||
}
|
||||
|
@ -818,14 +818,14 @@ void fcompareTag (const STRING & readTag, const STRING & expectedTag)
|
|||
void fputTag (FILE * f, const char * tag)
|
||||
{
|
||||
const int TAG_LEN = 4;
|
||||
ASSERT (strnlen (tag, TAG_LEN + 1) == TAG_LEN);
|
||||
assert (strnlen (tag, TAG_LEN + 1) == TAG_LEN);
|
||||
fwriteOrDie ((void *) tag, sizeof (*tag), strnlen (tag, TAG_LEN), f);
|
||||
}
|
||||
|
||||
void fputTag(const HANDLE f, const char * tag)
|
||||
{
|
||||
const int TAG_LEN = 4;
|
||||
ASSERT (strnlen (tag, TAG_LEN + 1) == TAG_LEN);
|
||||
assert (strnlen (tag, TAG_LEN + 1) == TAG_LEN);
|
||||
fwriteOrDie ((void *) tag, sizeof (*tag), strnlen (tag, TAG_LEN), f);
|
||||
}
|
||||
|
||||
|
@ -860,7 +860,7 @@ void fpad (FILE * f, int n)
|
|||
int len = n - (pos % n);
|
||||
const char dummyString[] = "MSR-Asia: JL+FS";
|
||||
size_t offset = sizeof(dummyString)/sizeof(dummyString[0]) - len;
|
||||
ASSERT (offset >= 0);
|
||||
assert (offset >= 0);
|
||||
fputstring (f, dummyString + offset);
|
||||
}
|
||||
// ----------------------------------------------------------------------------
|
||||
|
@ -899,7 +899,7 @@ short fgetshort_bigendian (FILE * f)
|
|||
int fgetint24 (FILE * f)
|
||||
{
|
||||
int v;
|
||||
ASSERT (sizeof (v) == 4);
|
||||
assert (sizeof (v) == 4);
|
||||
freadOrDie (&v, sizeof (v) -1, 1, f); // only read 3 lower-order bytes
|
||||
v <<= 8; // shift up (upper 8 bits uninit'ed)
|
||||
v >>= 8; // shift down 8 bits with sign-extend
|
||||
|
@ -976,7 +976,7 @@ float fgetfloat_ascii (FILE * f)
|
|||
RuntimeError ("error reading float value from file (invalid format): %s");
|
||||
else if (rc == EOF)
|
||||
RuntimeError ("error reading from file: %s", strerror (errno));
|
||||
ASSERT (rc == 1);
|
||||
assert (rc == 1);
|
||||
return val;
|
||||
}
|
||||
|
||||
|
@ -1066,8 +1066,8 @@ void WAVEHEADER::write (FILE * f)
|
|||
fputint (f, nAvgBytesPerSec);
|
||||
fputshort (f, nBlockAlign);
|
||||
fputshort (f, wBitsPerSample);
|
||||
ASSERT (FmtLength == 16);
|
||||
ASSERT (wFormatTag == 1);
|
||||
assert (FmtLength == 16);
|
||||
assert (wFormatTag == 1);
|
||||
fputTag (f, "data");
|
||||
fputint (f, DataLength);
|
||||
fflushOrDie (f);
|
||||
|
@ -1160,14 +1160,14 @@ static short toolULawToLinear(unsigned char p_ucULawByte)
|
|||
|
||||
// fgetwavraw(): only read data of .wav file. For multi-channel data, samples
|
||||
// are kept interleaved.
|
||||
static void fgetwavraw(FILE * f, ARRAY<short> & wav, const WAVEHEADER & wavhd)
|
||||
static void fgetwavraw(FILE * f, std::vector<short> & wav, const WAVEHEADER & wavhd)
|
||||
{
|
||||
int bytesPerSample = wavhd.wBitsPerSample / 8; // (sample size on one channel)
|
||||
wav.resize (wavhd.DataLength / bytesPerSample);
|
||||
if (wavhd.wFormatTag == 7) // mulaw
|
||||
{
|
||||
(wavhd.nChannels == 1) || RuntimeError ("fgetwav: wChannels=%d not supported for mulaw", wavhd.nChannels);
|
||||
ARRAY<unsigned char> data;
|
||||
std::vector<unsigned char> data;
|
||||
int numSamples = wavhd.DataLength/wavhd.nBlockAlign;
|
||||
data.resize (numSamples);
|
||||
freadOrDie (&data[0], sizeof (data[0]), numSamples, f);
|
||||
|
@ -1191,7 +1191,7 @@ static void fgetwavraw(FILE * f, ARRAY<short> & wav, const WAVEHEADER & wavhd)
|
|||
// fgetwav(): read an entire .wav file. Stereo is mapped to mono.
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
void fgetwav (FILE * f, ARRAY<short> & wav, int & sampleRate)
|
||||
void fgetwav (FILE * f, std::vector<short> & wav, int & sampleRate)
|
||||
{
|
||||
WAVEHEADER wavhd; // will be filled in for 16-bit PCM!!
|
||||
signed short wFormatTag; // real format tag as found in data
|
||||
|
@ -1207,7 +1207,7 @@ void fgetwav (FILE * f, ARRAY<short> & wav, int & sampleRate)
|
|||
else if (wavhd.nChannels == 2)
|
||||
{
|
||||
//read raw data
|
||||
ARRAY<short> buf;
|
||||
std::vector<short> buf;
|
||||
buf.resize(numSamples * 2);
|
||||
fgetwavraw(f, buf, wavhd);
|
||||
|
||||
|
@ -1228,7 +1228,7 @@ void fgetwav (FILE * f, ARRAY<short> & wav, int & sampleRate)
|
|||
}
|
||||
}
|
||||
|
||||
void fgetwav (const wstring & fn, ARRAY<short> & wav, int & sampleRate)
|
||||
void fgetwav (const wstring & fn, std::vector<short> & wav, int & sampleRate)
|
||||
{
|
||||
auto_file_ptr f = fopenOrDie (fn, L"rbS");
|
||||
fgetwav (f, wav, sampleRate);
|
||||
|
@ -1243,13 +1243,13 @@ void fgetwav (const wstring & fn, ARRAY<short> & wav, int & sampleRate)
|
|||
// channel. j is sample index.
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
void fgetraw (FILE *f, ARRAY< ARRAY<short> > & data, const WAVEHEADER & wavhd)
|
||||
void fgetraw (FILE *f, std::vector< std::vector<short> > & data, const WAVEHEADER & wavhd)
|
||||
{
|
||||
ARRAY<short> wavraw;
|
||||
std::vector<short> wavraw;
|
||||
fgetwavraw (f, wavraw, wavhd);
|
||||
data.resize (wavhd.nChannels);
|
||||
int numSamples = wavhd.DataLength/wavhd.nBlockAlign;
|
||||
ASSERT (numSamples == (int) wavraw.size() / wavhd.nChannels);
|
||||
assert (numSamples == (int) wavraw.size() / wavhd.nChannels);
|
||||
|
||||
for (int i = 0; i < wavhd.nChannels; i++)
|
||||
{
|
||||
|
@ -1304,7 +1304,7 @@ void fputwfx (FILE *f, const WAVEFORMATEX & wfx, unsigned int numSamples)
|
|||
unsigned int RiffLength = 36 + DataLength;
|
||||
unsigned int FmtLength = 16;
|
||||
// file header
|
||||
ASSERT (wfx.cbSize == 0 || wfx.cbSize == FmtLength + 2);
|
||||
assert (wfx.cbSize == 0 || wfx.cbSize == FmtLength + 2);
|
||||
fputTag (f, "RIFF");
|
||||
fputint (f, RiffLength);
|
||||
fputTag (f, "WAVE");
|
||||
|
@ -1377,7 +1377,7 @@ void fputshort (FILE * f, short v)
|
|||
|
||||
void fputint24 (FILE * f, int v)
|
||||
{
|
||||
ASSERT (sizeof (v) == 4);
|
||||
assert (sizeof (v) == 4);
|
||||
fwriteOrDie (&v, sizeof (v) -1, 1, f); // write low-order 3 bytes
|
||||
}
|
||||
|
||||
|
@ -1417,7 +1417,7 @@ void fputdouble (FILE * f, double v)
|
|||
// fputfile(): write a binary block or a string as a file
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
void fputfile (const WSTRING & pathname, const ARRAY<char> & buffer)
|
||||
void fputfile (const WSTRING & pathname, const std::vector<char> & buffer)
|
||||
{
|
||||
FILE * f = fopenOrDie (pathname, L"wb");
|
||||
try
|
||||
|
@ -1475,7 +1475,7 @@ void fputfile (const WSTRING & pathname, const std::string & string)
|
|||
// fgetfile(): load a file as a binary block
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
void fgetfile (const WSTRING & pathname, ARRAY<char> & buffer)
|
||||
void fgetfile (const WSTRING & pathname, std::vector<char> & buffer)
|
||||
{
|
||||
FILE * f = fopenOrDie (pathname, L"rb");
|
||||
size_t len = filesize (f);
|
||||
|
@ -1487,11 +1487,11 @@ void fgetfile (const WSTRING & pathname, ARRAY<char> & buffer)
|
|||
fclose (f);
|
||||
}
|
||||
|
||||
void fgetfile (FILE * f, ARRAY<char> & buffer)
|
||||
void fgetfile (FILE * f, std::vector<char> & buffer)
|
||||
{ // this version reads until eof
|
||||
buffer.resize (0);
|
||||
buffer.reserve (1000000); // avoid too many reallocations
|
||||
ARRAY<char> inbuf;
|
||||
std::vector<char> inbuf;
|
||||
inbuf.resize (65536); // read in chunks of this size
|
||||
while (!feof (f)) // read until eof
|
||||
{
|
||||
|
|
|
@ -162,10 +162,10 @@ template<class CHAR> CHAR * fgetline (FILE * f, CHAR * buf, int size);
|
|||
template<class CHAR, size_t n> CHAR * fgetline (FILE * f, CHAR (& buf)[n]) { return fgetline (f, buf, n); }
|
||||
STRING fgetline (FILE * f);
|
||||
WSTRING fgetlinew (FILE * f);
|
||||
void fgetline (FILE * f, std::string & s, ARRAY<char> & buf);
|
||||
void fgetline (FILE * f, std::wstring & s, ARRAY<char> & buf);
|
||||
void fgetline (FILE * f, ARRAY<char> & buf);
|
||||
void fgetline (FILE * f, ARRAY<wchar_t> & buf);
|
||||
void fgetline (FILE * f, std::string & s, std::vector<char> & buf);
|
||||
void fgetline (FILE * f, std::wstring & s, std::vector<char> & buf);
|
||||
void fgetline (FILE * f, std::vector<char> & buf);
|
||||
void fgetline (FILE * f, std::vector<wchar_t> & buf);
|
||||
|
||||
const char * fgetstring (FILE * f, char * buf, int size);
|
||||
template<size_t n> const char * fgetstring (FILE * f, char (& buf)[n]) { return fgetstring (f, buf, n); }
|
||||
|
@ -274,8 +274,8 @@ double fgetdouble (FILE * f);
|
|||
// fgetwav(): read an entire .wav file
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
void fgetwav (FILE * f, ARRAY<short> & wav, int & sampleRate);
|
||||
void fgetwav (const wstring & fn, ARRAY<short> & wav, int & sampleRate);
|
||||
void fgetwav (FILE * f, std::vector<short> & wav, int & sampleRate);
|
||||
void fgetwav (const wstring & fn, std::vector<short> & wav, int & sampleRate);
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// fputwav(): save data into a .wav file
|
||||
|
@ -325,7 +325,7 @@ void fputdouble (FILE * f, double val);
|
|||
// fputfile(): write a binary block or a string as a file
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
void fputfile (const WSTRING & pathname, const ARRAY<char> & buffer);
|
||||
void fputfile (const WSTRING & pathname, const std::vector<char> & buffer);
|
||||
void fputfile (const WSTRING & pathname, const std::wstring & string);
|
||||
void fputfile (const WSTRING & pathname, const std::string & string);
|
||||
|
||||
|
@ -333,8 +333,8 @@ void fputfile (const WSTRING & pathname, const std::string & string);
|
|||
// fgetfile(): load a file as a binary block
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
void fgetfile (const WSTRING & pathname, ARRAY<char> & buffer);
|
||||
void fgetfile (FILE * f, ARRAY<char> & buffer);
|
||||
void fgetfile (const WSTRING & pathname, std::vector<char> & buffer);
|
||||
void fgetfile (FILE * f, std::vector<char> & buffer);
|
||||
namespace msra { namespace files {
|
||||
void fgetfilelines (const std::wstring & pathname, vector<char> & readbuffer, std::vector<std::string> & lines);
|
||||
static inline std::vector<std::string> fgetfilelines (const std::wstring & pathname) { vector<char> buffer; std::vector<std::string> lines; fgetfilelines (pathname, buffer, lines); return lines; }
|
||||
|
@ -408,7 +408,7 @@ void fputwfx (FILE *f, const WAVEFORMATEX & wfx, unsigned int numSamples);
|
|||
// For example, data[i][j]: i is channel index, 0 means the first
|
||||
// channel. j is sample index.
|
||||
// ----------------------------------------------------------------------------
|
||||
void fgetraw (FILE *f,ARRAY< ARRAY<short> > & data,const WAVEHEADER & wavhd);
|
||||
void fgetraw (FILE *f,std::vector< std::vector<short> > & data,const WAVEHEADER & wavhd);
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// temp functions -- clean these up
|
||||
|
|
|
@ -569,7 +569,7 @@ void lattice::fromhtklattice (const wstring & path, const std::unordered_map<std
|
|||
else
|
||||
throw std::runtime_error ("lattice: mal-formed before parse N=/L= line in lattice.");
|
||||
|
||||
ASSERT(info.numnodes > 0);
|
||||
assert(info.numnodes > 0);
|
||||
nodes.reserve (info.numnodes);
|
||||
// parse the nodes
|
||||
for (size_t i = 0; i < info.numnodes; i++, iter++)
|
||||
|
@ -586,7 +586,7 @@ void lattice::fromhtklattice (const wstring & path, const std::unordered_map<std
|
|||
info.numframes = max (info.numframes, (size_t) nodes.back().t);
|
||||
}
|
||||
// parse the edges
|
||||
ASSERT(info.numedges > 0);
|
||||
assert(info.numedges > 0);
|
||||
edges.reserve (info.numedges);
|
||||
align.reserve (info.numedges * 10); // 10 phones per word on av. should be enough
|
||||
std::string label;
|
||||
|
|
|
@ -246,7 +246,7 @@ public:
|
|||
p[0] = (unsigned char) value;
|
||||
p[1] = (unsigned char) (value >> 8);
|
||||
p[2] = (unsigned char) (value >> 16);
|
||||
ASSERT (value == (int) *this);
|
||||
assert (value == (int) *this);
|
||||
return value;
|
||||
}
|
||||
};
|
||||
|
@ -265,7 +265,7 @@ public:
|
|||
base.resize (newsize);
|
||||
uint24_ref r = uint24_ref (&base[cursize]);
|
||||
r = value;
|
||||
ASSERT (value == back());
|
||||
assert (value == back());
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -310,7 +310,7 @@ class mgram_map
|
|||
if ((size_t) id >= level1lookup.size()) return nindex;
|
||||
i = level1lookup[id];
|
||||
}
|
||||
ASSERT (i == nindex || ids[1][i] == id);
|
||||
assert (i == nindex || ids[1][i] == id);
|
||||
return i;
|
||||
}
|
||||
index_t beg = firsts[m][i];
|
||||
|
@ -733,11 +733,11 @@ public:
|
|||
|
||||
coord c (k.m, (index_t) ids[k.m].size());
|
||||
|
||||
ASSERT (firsts[k.m-1].back() == (index_t) ids[k.m].size());
|
||||
assert (firsts[k.m-1].back() == (index_t) ids[k.m].size());
|
||||
ids[k.m].push_back (thisid); // create value
|
||||
firsts[k.m-1].back() = (index_t) ids[k.m].size();
|
||||
if (firsts[k.m-1].back() != (index_t) ids[k.m].size()) fail ("create() numeric overflow--index_t too small");
|
||||
ASSERT (k.m == M || firsts[k.m].back() == (index_t) ids[k.m+1].size());
|
||||
assert (k.m == M || firsts[k.m].back() == (index_t) ids[k.m+1].size());
|
||||
|
||||
// optimization: level1nonsparse flag
|
||||
// If unigram level is entirely non-sparse, we can save the search
|
||||
|
@ -769,10 +769,10 @@ public:
|
|||
firsts[m].resize (ids[m].size() +1, (int) ids[m+1].size());
|
||||
foreach_index (m, firsts)
|
||||
{
|
||||
ASSERT (firsts[m][0] == 0);
|
||||
assert (firsts[m][0] == 0);
|
||||
foreach_index (i, ids[m])
|
||||
ASSERT (firsts[m][i] <= firsts[m][i+1]);
|
||||
ASSERT ((size_t) firsts[m].back() == ids[m+1].size());
|
||||
assert (firsts[m][i] <= firsts[m][i+1]);
|
||||
assert ((size_t) firsts[m].back() == ids[m+1].size());
|
||||
}
|
||||
// id mapping
|
||||
// user-provided w->id map
|
||||
|
@ -1039,7 +1039,7 @@ public:
|
|||
continue;
|
||||
|
||||
const mgram_map::key key = *iter;
|
||||
ASSERT (m == key.order());
|
||||
assert (m == key.order());
|
||||
|
||||
// --- output m-gram to ARPA file
|
||||
fprintfOrDie (outf, "%.4f", logP[iter] / log10);
|
||||
|
@ -1065,7 +1065,7 @@ public:
|
|||
numMGramsWritten++;
|
||||
}
|
||||
fflushOrDie (outf);
|
||||
ASSERT (numMGramsWritten == map.size (m));
|
||||
assert (numMGramsWritten == map.size (m));
|
||||
fprintf (stderr, "\n");
|
||||
}
|
||||
|
||||
|
@ -1352,7 +1352,7 @@ protected:
|
|||
int newid = w2id[w]; // map to new id space
|
||||
mgram[m-1] = newid;
|
||||
}
|
||||
for (int k = 0; k < m; k++) ASSERT (mgram[k] == w2id[key[k]]);
|
||||
for (int k = 0; k < m; k++) assert (mgram[k] == w2id[key[k]]);
|
||||
// insert new key into sortedMap
|
||||
mgram_map::coord c = sortedMap.create (mgram_map::unmapped_key (&mgram[0], m), createCache);
|
||||
// copy over logP and logB
|
||||
|
@ -1478,7 +1478,7 @@ protected:
|
|||
if (m == 0) continue;
|
||||
|
||||
const mgram_map::key key = *iter;
|
||||
ASSERT (m == key.order());
|
||||
assert (m == key.order());
|
||||
|
||||
float thisP = P[iter];
|
||||
if (islog)
|
||||
|
@ -1967,7 +1967,7 @@ public:
|
|||
// estimate
|
||||
vector<bool> dropWord (userSymMap.size(), false);
|
||||
dropWord.push_back (true); // filtering but no <UNK>:
|
||||
ASSERT (!filterVocabulary || unkId != -1 || dropWord[dropId]);
|
||||
assert (!filterVocabulary || unkId != -1 || dropWord[dropId]);
|
||||
|
||||
//std::vector<unsigned int> minObs (2, 0);
|
||||
//std::vector<unsigned int> iMinObs (3, 0);
|
||||
|
@ -2101,7 +2101,7 @@ public:
|
|||
if (m < M && m < 3) // for comments see where we estimate the discounted probabilities
|
||||
{ // ^^ seems not to work for 4-grams...
|
||||
const mgram_map::key key = *iter; // needed to check for startId
|
||||
ASSERT (key.order() == m);
|
||||
assert (key.order() == m);
|
||||
|
||||
if (m < 2 || key.pop_w().back() != startId)
|
||||
{
|
||||
|
@ -2245,7 +2245,7 @@ public:
|
|||
}
|
||||
|
||||
const mgram_map::key key = *iter;
|
||||
ASSERT (key.order() == iter.order()); // (remove this check once verified)
|
||||
assert (key.order() == iter.order()); // (remove this check once verified)
|
||||
|
||||
// get history's count
|
||||
const mgram_map::coord j = histCoord[m-1]; // index of parent entry
|
||||
|
@ -2278,7 +2278,7 @@ public:
|
|||
histCount = KNTotalCounts[c_h]; // (u,v,w) -> count (*,v,*)
|
||||
if (histCount == 0) // must exist
|
||||
RuntimeError ("estimate: malformed data: back-off value not found (denominator)");
|
||||
ASSERT (histCount >= count);
|
||||
assert (histCount >= count);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2627,7 +2627,7 @@ public:
|
|||
if (i == -1)
|
||||
goto backoff0;
|
||||
|
||||
ASSERT (entries_1[i].id == id); // verify unmapped unigram case
|
||||
assert (entries_1[i].id == id); // verify unmapped unigram case
|
||||
double logP = entries_1[i].logP;
|
||||
return totalLogB + logP;
|
||||
}
|
||||
|
@ -2640,7 +2640,7 @@ public:
|
|||
int i = (entries1Unmapped) ? id : findEntry (entries[1], refs[0][0].firstEntry, refs[0][1].firstEntry, id);
|
||||
if (i == -1) // unknown history: fall back
|
||||
goto fallback;
|
||||
ASSERT (entries[1][i].id == id); // verify unmapped unigram case
|
||||
assert (entries[1][i].id == id); // verify unmapped unigram case
|
||||
|
||||
// found it: advance search by one history token
|
||||
const std::vector<LMHIST> & refs_1 = refs[1];
|
||||
|
@ -2656,7 +2656,7 @@ public:
|
|||
int i = findEntry (entries[n], beg, end, id);
|
||||
if (i == -1) // unseen history: fall back
|
||||
goto fallback;
|
||||
ASSERT (entries[n][i].id == id); // verify unmapped unigram case
|
||||
assert (entries[n][i].id == id); // verify unmapped unigram case
|
||||
|
||||
// found it: advance search by one history token
|
||||
const std::vector<LMHIST> & refs_n = refs[n];
|
||||
|
@ -2679,7 +2679,7 @@ public:
|
|||
i = findEntry (entries_m, beg, end, id);
|
||||
if (i == -1)
|
||||
goto backoff;
|
||||
ASSERT (entries_m[i].id == id); // verify unmapped unigram case
|
||||
assert (entries_m[i].id == id); // verify unmapped unigram case
|
||||
|
||||
longestMGramFound = m;
|
||||
|
||||
|
@ -2722,7 +2722,7 @@ fallback: // we get here in case of fallback (no back-off weight) or back-off
|
|||
int i = findEntry (entries_n, beg, end, id);
|
||||
if (i == -1) // unknown history: fall back
|
||||
return score_unoptimized (mgram +1, m -1); // tail recursion
|
||||
ASSERT (entries_n[i].id == id); // verify unmapped unigram case
|
||||
assert (entries_n[i].id == id); // verify unmapped unigram case
|
||||
// found it: advance search by one history token
|
||||
const std::vector<LMHIST> & refs_n = refs[n];
|
||||
logB = refs_n[i].logB;
|
||||
|
@ -2739,7 +2739,7 @@ fallback: // we get here in case of fallback (no back-off weight) or back-off
|
|||
int i = findEntry (entries_m1, beg, end, id);
|
||||
if (i != -1)
|
||||
{
|
||||
ASSERT (entries_m1[i].id == id); // verify unmapped unigram case
|
||||
assert (entries_m1[i].id == id); // verify unmapped unigram case
|
||||
double logP = entries_m1[i].logP;
|
||||
return logP;
|
||||
}
|
||||
|
@ -2997,7 +2997,7 @@ skipMGram:
|
|||
refs_h[i].firstEntry = n0;
|
||||
n0 += num;
|
||||
}
|
||||
ASSERT (refs_h.back().firstEntry == (int) entries[m].size());
|
||||
assert (refs_h.back().firstEntry == (int) entries[m].size());
|
||||
|
||||
// create closing history entry
|
||||
if (m < M)
|
||||
|
|
|
@ -803,7 +803,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
template<class ElemType>
|
||||
Matrix<ElemType>& Matrix<ElemType>::SetColumnSlice(const Matrix<ElemType>& fromMatrix, size_t startColumn, size_t numCols)
|
||||
{
|
||||
ASSERT(m_CPUMatrix != nullptr || m_GPUMatrix != nullptr);
|
||||
assert(m_CPUMatrix != nullptr || m_GPUMatrix != nullptr);
|
||||
// must already been allocated
|
||||
|
||||
DISPATCH_MATRIX_ON_FLAG(&fromMatrix,
|
||||
|
@ -820,7 +820,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
template<class ElemType>
|
||||
void Matrix<ElemType>::CopyColumnsStrided(const Matrix<ElemType>& fromMatrix, size_t numCols, size_t srcNumColsStride, size_t destNumColsStride)
|
||||
{
|
||||
ASSERT(m_CPUMatrix != nullptr || m_GPUMatrix != nullptr);
|
||||
assert(m_CPUMatrix != nullptr || m_GPUMatrix != nullptr);
|
||||
|
||||
DISPATCH_MATRIX_ON_FLAG(&fromMatrix,
|
||||
this,
|
||||
|
|
|
@ -10,8 +10,9 @@
|
|||
#include <chrono>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include "..\Math\Matrix.h"
|
||||
#include "..\Math\CPUMatrix.h"
|
||||
#include "Matrix.h"
|
||||
#include "CPUMatrix.h"
|
||||
#include "Sequences.h"
|
||||
using namespace Microsoft::MSR::CNTK;
|
||||
using namespace std;
|
||||
|
||||
|
@ -95,7 +96,7 @@ void oldRnnEvaluateThisNodeSRP(Matrix<ElemType>& functionValues, size_t mNbr, Ma
|
|||
template<class ElemType>
|
||||
void oldRNNEvaluateThisNodeSRP(const size_t timeIdxInSeq, const int delay, const bool reset, const ElemType default_activity, Matrix<ElemType>& functionValues, const Matrix<ElemType>& pastActivity, const Matrix<ElemType>& inputFunctionValues, const size_t indexInBatch, const size_t mNbr)
|
||||
{
|
||||
ASSERT(delay > 0);
|
||||
assert(delay > 0);
|
||||
|
||||
if (functionValues.GetNumRows() != inputFunctionValues.GetNumRows() ||
|
||||
functionValues.GetNumCols() != inputFunctionValues.GetNumCols())
|
||||
|
|
|
@ -65,6 +65,7 @@
|
|||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<TreatWarningAsError>true</TreatWarningAsError>
|
||||
<AdditionalIncludeDirectories>..\Math; ..\..\Common\Include; $(CudaToolkitIncludeDir); %(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
|
@ -86,7 +87,7 @@
|
|||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<OpenMPSupport>true</OpenMPSupport>
|
||||
<AdditionalIncludeDirectories>..\..\common\include</AdditionalIncludeDirectories>
|
||||
<AdditionalIncludeDirectories>..\Math; ..\..\Common\Include; $(CudaToolkitIncludeDir); %(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<TreatWarningAsError>true</TreatWarningAsError>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
|
|
Загрузка…
Ссылка в новой задаче