diff --git a/Common/Include/Sequences.h b/Common/Include/Sequences.h index bcec8bd43..f3051a4e2 100644 --- a/Common/Include/Sequences.h +++ b/Common/Include/Sequences.h @@ -12,6 +12,36 @@ #include #include // for shared_ptr +enum class MinibatchPackingFlags : char // (note: not using unsigned char because these go into a matrix, and we use Matrix, since we use it as a data holder) +{ + None = 0, + SequenceStart = 1 << 0, // binary 0001 frame is first of an utterance + SequenceEnd = 1 << 1, // binary 0010 frame is last of an utterance + NoFeature = 1 << 2, // binary 0100 frame has no feature (e.g. a gap due to BPTT) + NoLabel = 1 << 3, // binary 1000 frame has no label + + NoInput = NoFeature | NoLabel, // when we refactorize reader, NoInput will no longer needed + SequenceStartOrNoFeature = SequenceStart | NoFeature, + SequenceEndOrNoFeature = SequenceEnd | NoFeature, + SequenceStartOrEndOrNoFeature = SequenceStart | SequenceEnd | NoFeature, +}; + +inline MinibatchPackingFlags operator| (MinibatchPackingFlags a, MinibatchPackingFlags b) +{ + return static_cast(static_cast(a) | static_cast(b)); +} + +inline MinibatchPackingFlags& operator|= (MinibatchPackingFlags& a, MinibatchPackingFlags b) +{ + a = a | b; + return a; +} + +inline bool operator& (MinibatchPackingFlags a, MinibatchPackingFlags b) +{ + return (static_cast(a) & static_cast(b)) != 0; +} + namespace Microsoft { namespace MSR { namespace CNTK { // Forward declarations diff --git a/Common/Include/basetypes.h b/Common/Include/basetypes.h index e184cd7e7..52f77e32b 100644 --- a/Common/Include/basetypes.h +++ b/Common/Include/basetypes.h @@ -9,7 +9,7 @@ #ifndef _BASETYPES_ #define _BASETYPES_ - +#if 0 #ifndef UNDER_CE // fixed-buffer overloads not available for wince #ifdef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES // fixed-buffer overloads for strcpy() etc. #undef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES @@ -68,6 +68,7 @@ OACR_WARNING_DISABLE(POTENTIAL_ARGUMENT_TYPE_MISMATCH, "Not level1 or level2_sec #if !defined(_DEBUG) || defined(_CHECKED) || defined(_MANAGED) #pragma warning(disable : 4702) // unreachable code #endif +#endif #include "Platform.h" #include @@ -105,15 +106,18 @@ OACR_WARNING_DISABLE(POTENTIAL_ARGUMENT_TYPE_MISMATCH, "Not level1 or level2_sec typedef unsigned char byte; #endif +#if 0 #ifdef _WIN32 #pragma push_macro("STRSAFE_NO_DEPRECATE") #define STRSAFE_NO_DEPRECATE // deprecation managed elsewhere, not by strsafe #include // for strbcpy() etc templates #pragma pop_macro("STRSAFE_NO_DEPRECATE") #endif +#endif using namespace std; +#if 0 // CRT error handling seems to not be included in wince headers // so we define our own imports #ifdef UNDER_CE @@ -147,6 +151,7 @@ using namespace std; // disable warnings for which fixing would make code less readable #pragma warning(disable : 4290) // throw() declaration ignored #pragma warning(disable : 4244) // conversion from typeA to typeB, possible loss of data +#endif // ---------------------------------------------------------------------------- // (w)cstring -- helper class like std::string but with auto-cast to char* @@ -176,6 +181,7 @@ static inline wchar_t*GetWC(const char *c) return wc; } +#if 0 struct MatchPathSeparator { bool operator()( char ch ) const @@ -207,6 +213,7 @@ static inline std::wstring removeExtension (std::wstring const& filename) size_t lastindex = filename.find_last_of(L"."); return filename.substr(0, lastindex); } +#endif // ---------------------------------------------------------------------------- // some mappings for non-Windows builds @@ -249,12 +256,14 @@ static inline void Sleep (size_t ms) { std::this_thread::sleep_for (std::chrono: // basic macros --TODO: do we need those? delete what we dont' need // ---------------------------------------------------------------------------- -#ifndef ASSERT +#if 0 +#ifndef assert #ifdef _CHECKED // basetypes.h expects this function to be defined (it is in message.h) extern void _CHECKED_ASSERT_error(const char * file, int line, const char * exp); -#define ASSERT(exp) ((exp)||(_CHECKED_ASSERT_error(__FILE__,__LINE__,#exp),0)) +#define assert(exp) ((exp)||(_CHECKED_ASSERT_error(__FILE__,__LINE__,#exp),0)) #else -#define ASSERT assert +#define assert assert +#endif #endif #endif #define UNUSED(x) (void)(x) @@ -303,14 +312,15 @@ namespace msra { namespace basetypes { } }; -// class ARRAY -- std::vector with array-bounds checking +// class std::vector -- std::vector with array-bounds checking // VS 2008 and above do this, so there is no longer a need for this. #pragma warning(push) #pragma warning(disable : 4555) // expression has no affect, used so retail won't be empty +#if 0 template -class ARRAY : public std::vector<_ElemType> +class std::vector : public std::vector<_ElemType> { #if defined (_DEBUG) || defined (_CHECKED) // debug version with range checking static void throwOutOfBounds() @@ -319,15 +329,15 @@ class ARRAY : public std::vector<_ElemType> OACR_WARNING_DISABLE(IGNOREDBYCOMMA, "Reviewd OK. Special trick below to show a message when assertion fails" "[rogeryu 2006/03/24]"); OACR_WARNING_DISABLE(BOGUS_EXPRESSION_LIST, "This is intentional. [rogeryu 2006/03/24]"); - //ASSERT ("ARRAY::operator[] out of bounds", false); + //assert ("std::vector::operator[] out of bounds", false); OACR_WARNING_POP; } #endif public: - ARRAY() : std::vector<_ElemType> () { } - ARRAY (int size) : std::vector<_ElemType> (size) { } + std::vector() : std::vector<_ElemType> () { } + std::vector (int size) : std::vector<_ElemType> (size) { } #if defined (_DEBUG) || defined (_CHECKED) // debug version with range checking // ------------------------------------------------------------------------ @@ -361,8 +371,9 @@ public: } }; // overload swap(), otherwise we'd fallback to 3-way assignment & possibly throw -template inline void swap (ARRAY<_T> & L, ARRAY<_T> & R) throw() +template inline void swap (std::vector<_T> & L, std::vector<_T> & R) throw() { swap ((std::vector<_T> &) L, (std::vector<_T> &) R); } +#endif // class fixed_vector - non-resizable vector @@ -372,14 +383,14 @@ template class fixed_vector size_t n; // number of elements void check (int index) const { - ASSERT (index >= 0 && (size_t) index < n); + assert (index >= 0 && (size_t) index < n); #ifdef NDEBUG UNUSED(index); #endif } void check (size_t index) const { - ASSERT (index < n); + assert (index < n); #ifdef NDEBUG UNUSED(index); #endif @@ -404,7 +415,7 @@ public: inline const _T & operator[] (int index) const { check (index); return p[index]; } // reading inline _T & operator[] (size_t index) { check (index); return p[index]; } // writing inline const _T & operator[] (size_t index) const { check (index); return p[index]; } // reading - inline int indexof (const _T & elem) const { ASSERT (&elem >= p && &elem < p + n); return &elem - p; } + inline int indexof (const _T & elem) const { assert (&elem >= p && &elem < p + n); return &elem - p; } void swap (fixed_vector & other) throw() { std::swap (other.p, p); std::swap (other.n, n); } template fixed_vector & operator= (const VECTOR & other) { @@ -431,10 +442,11 @@ template inline void swap (fixed_vector<_T> & L, fixed_vector<_T> & R) // class matrix - simple fixed-size 2-dimensional array, access elements as m(i,j) // stored as concatenation of rows +#if 1 template class matrix : fixed_vector { size_t numcols; - size_t locate(size_t i, size_t j) const { ASSERT(i < rows() && j < cols()); return i * cols() + j; } + size_t locate(size_t i, size_t j) const { assert(i < rows() && j < cols()); return i * cols() + j; } public: typedef T elemtype; matrix() : numcols(0) {} @@ -454,6 +466,7 @@ template inline void swap(matrix<_T> & L, matrix<_T> & R) throw() { L. typedef std::string STRING; typedef std::wstring WSTRING; typedef std::basic_string TSTRING; // wide/narrow character string +#endif // derive from this for noncopyable classes (will get you private unimplemented copy constructors) // ... TODO: change all of basetypes classes/structs to use this @@ -596,7 +609,7 @@ struct utf16 : std::wstring { utf16 (const std::string & p) // utf-8 to -16 int rc = MultiByteToWideChar (CP_UTF8, 0, p.c_str(), (int) len, &buf[0], (int) buf.size()); if (rc == 0) RuntimeError("MultiByteToWideChar"); - ASSERT (rc < buf.size ()); + assert (rc < buf.size ()); (*(std::wstring*)this) = &buf[0]; }}; #endif @@ -618,7 +631,7 @@ static inline std::wstring mbstowcs(const std::string & p) // input: MBCS size_t len = p.length(); msra::basetypes::fixed_vector buf(len + 1); // max: >1 mb chars => 1 wchar std::fill(buf.begin(), buf.end(), (wchar_t)0); - OACR_WARNING_SUPPRESS(UNSAFE_STRING_FUNCTION, "Reviewed OK. size checked. [rogeryu 2006/03/21]"); + //OACR_WARNING_SUPPRESS(UNSAFE_STRING_FUNCTION, "Reviewed OK. size checked. [rogeryu 2006/03/21]"); ::mbstowcs(&buf[0], p.c_str(), len + 1); return std::wstring(&buf[0]); } @@ -663,6 +676,7 @@ template static inline std::basic_string<_T> join (const std::vector { const char * delim; @@ -747,6 +763,7 @@ public: #endif } }; +#endif };}; // namespace @@ -789,6 +806,7 @@ public: }; inline int fclose (auto_file_ptr & af) { return af.fclose(); } +#if 0 #ifdef _MSC_VER // auto-closing container for Win32 handles. // Pass close function if not CloseHandle(), e.g. @@ -827,7 +845,7 @@ public: operator const T () const { return it; } T detach () { T tmp = it; it = 0; return tmp; } // release ownership of object }; - +#endif };}; @@ -879,6 +897,7 @@ public: namespace msra { namespace util { +#if 0 // to (slightly) simplify processing of command-line arguments. // command_line args (argc, argv); // while (args.has (1) && args[0][0] == '-') { option = args.shift(); process (option); } @@ -894,7 +913,8 @@ public: const wchar_t * shift() { if (size() == 0) return NULL; num--; return *args++; } const wchar_t * operator[] (int i) const { return (i < 0 || i >= size()) ? NULL : args[i]; } }; - +#endif + // byte-reverse a variable --reverse all bytes (intended for integral types and float) template static inline void bytereverse (T & v) throw() { // note: this is more efficient than it looks because sizeof (v[0]) is a constant @@ -943,46 +963,18 @@ template static inline void ZeroStruct (S & s) { memset (&s, 0, sizeof // machine dependent // ---------------------------------------------------------------------------- +#if 0 #define MACHINE_IS_BIG_ENDIAN (false) +#endif using namespace msra::basetypes; // for compatibility -#pragma warning (pop) +//#pragma warning (pop) #define EPSILON 1e-5 #define ISCLOSE(a, b, threshold) (abs(a - b) < threshold)?true:false // why is this in basetypes.h? -enum class MinibatchPackingFlags : char // (note: not using unsigned char because these go into a matrix, and we use Matrix, since we use it as a data holder) -{ - None = 0, - SequenceStart = 1 << 0, // binary 0001 frame is first of an utterance - SequenceEnd = 1 << 1, // binary 0010 frame is last of an utterance - NoFeature = 1 << 2, // binary 0100 frame has no feature (e.g. a gap due to BPTT) - NoLabel = 1 << 3, // binary 1000 frame has no label - - NoInput = NoFeature | NoLabel, // when we refactorize reader, NoInput will no longer needed - SequenceStartOrNoFeature = SequenceStart | NoFeature, - SequenceEndOrNoFeature = SequenceEnd | NoFeature, - SequenceStartOrEndOrNoFeature = SequenceStart | SequenceEnd | NoFeature, -}; - -inline MinibatchPackingFlags operator| (MinibatchPackingFlags a, MinibatchPackingFlags b) -{ - return static_cast(static_cast(a) | static_cast(b)); -} - -inline MinibatchPackingFlags& operator|= (MinibatchPackingFlags& a, MinibatchPackingFlags b) -{ - a = a | b; - return a; -} - -inline bool operator& (MinibatchPackingFlags a, MinibatchPackingFlags b) -{ - return (static_cast(a) & static_cast(b)) != 0; -} - template static inline bool comparator(const pair& l, const pair& r) { diff --git a/Common/Include/fileutil.h b/Common/Include/fileutil.h index 4b4ff0c67..d56e80040 100644 --- a/Common/Include/fileutil.h +++ b/Common/Include/fileutil.h @@ -326,8 +326,8 @@ double fgetdouble (FILE * f); // fgetwav(): read an entire .wav file // ---------------------------------------------------------------------------- -void fgetwav (FILE * f, ARRAY & wav, int & sampleRate); -void fgetwav (const wstring & fn, ARRAY & wav, int & sampleRate); +void fgetwav (FILE * f, std::vector & wav, int & sampleRate); +void fgetwav (const wstring & fn, std::vector & wav, int & sampleRate); // ---------------------------------------------------------------------------- // fputwav(): save data into a .wav file diff --git a/Common/fileutil.cpp b/Common/fileutil.cpp index 4b3b9a45e..b180f11f9 100644 --- a/Common/fileutil.cpp +++ b/Common/fileutil.cpp @@ -1361,8 +1361,8 @@ void WAVEHEADER::write (FILE * f) fputint (f, nAvgBytesPerSec); fputshort (f, nBlockAlign); fputshort (f, wBitsPerSample); - ASSERT (FmtLength == 16); - ASSERT (wFormatTag == 1); + assert (FmtLength == 16); + assert (wFormatTag == 1); fputTag (f, "data"); fputint (f, DataLength); fflushOrDie (f); @@ -1455,14 +1455,14 @@ static short toolULawToLinear(unsigned char p_ucULawByte) // fgetwavraw(): only read data of .wav file. For multi-channel data, samples // are kept interleaved. -static void fgetwavraw(FILE * f, ARRAY & wav, const WAVEHEADER & wavhd) +static void fgetwavraw(FILE * f, std::vector & wav, const WAVEHEADER & wavhd) { int bytesPerSample = wavhd.wBitsPerSample / 8; // (sample size on one channel) wav.resize (wavhd.DataLength / bytesPerSample); if (wavhd.wFormatTag == 7) // mulaw { (wavhd.nChannels == 1) || RuntimeError ("fgetwav: wChannels=%d not supported for mulaw", wavhd.nChannels); - ARRAY data; + std::vector data; int numSamples = wavhd.DataLength/wavhd.nBlockAlign; data.resize (numSamples); freadOrDie (&data[0], sizeof (data[0]), numSamples, f); @@ -1486,7 +1486,7 @@ static void fgetwavraw(FILE * f, ARRAY & wav, const WAVEHEADER & wavhd) // fgetwav(): read an entire .wav file. Stereo is mapped to mono. // ---------------------------------------------------------------------------- -void fgetwav (FILE * f, ARRAY & wav, int & sampleRate) +void fgetwav (FILE * f, std::vector & wav, int & sampleRate) { WAVEHEADER wavhd; // will be filled in for 16-bit PCM!! signed short wFormatTag; // real format tag as found in data @@ -1502,7 +1502,7 @@ void fgetwav (FILE * f, ARRAY & wav, int & sampleRate) else if (wavhd.nChannels == 2) { //read raw data - ARRAY buf; + std::vector buf; buf.resize(numSamples * 2); fgetwavraw(f, buf, wavhd); @@ -1523,7 +1523,7 @@ void fgetwav (FILE * f, ARRAY & wav, int & sampleRate) } } -void fgetwav (const wstring & fn, ARRAY & wav, int & sampleRate) +void fgetwav (const wstring & fn, std::vector & wav, int & sampleRate) { auto_file_ptr f = fopenOrDie (fn, L"rbS"); fgetwav (f, wav, sampleRate); @@ -1538,13 +1538,13 @@ void fgetwav (const wstring & fn, ARRAY & wav, int & sampleRate) // channel. j is sample index. // ---------------------------------------------------------------------------- -void fgetraw (FILE *f, ARRAY< ARRAY > & data, const WAVEHEADER & wavhd) +void fgetraw (FILE *f, std::vector< std::vector > & data, const WAVEHEADER & wavhd) { - ARRAY wavraw; + std::vector wavraw; fgetwavraw (f, wavraw, wavhd); data.resize (wavhd.nChannels); int numSamples = wavhd.DataLength/wavhd.nBlockAlign; - ASSERT (numSamples == (int) wavraw.size() / wavhd.nChannels); + assert (numSamples == (int) wavraw.size() / wavhd.nChannels); for (int i = 0; i < wavhd.nChannels; i++) { @@ -1599,7 +1599,7 @@ void fputwfx (FILE *f, const WAVEFORMATEX & wfx, unsigned int numSamples) unsigned int RiffLength = 36 + DataLength; unsigned int FmtLength = 16; // file header - ASSERT (wfx.cbSize == 0 || wfx.cbSize == FmtLength + 2); + assert (wfx.cbSize == 0 || wfx.cbSize == FmtLength + 2); fputTag (f, "RIFF"); fputint (f, RiffLength); fputTag (f, "WAVE"); @@ -1861,11 +1861,24 @@ bool operator>= (const FILETIME & targettime, const FILETIME & inputtime) // f } #endif -bool getfiletime (const wstring & path, FILETIME & time) +#ifdef _WIN32 +class auto_find_handle +{ + HANDLE h; + auto_find_handle operator= (const auto_find_handle &); + auto_find_handle(const auto_find_handle &); +public: + auto_find_handle(HANDLE p_h) : h(p_h) {} + ~auto_find_handle() { if (h != INVALID_HANDLE_VALUE) ::FindClose(h); } + operator HANDLE () const { return h; } +}; +#endif + +bool getfiletime(const wstring & path, FILETIME & time) { // return file modification time, false if cannot be determined #ifdef _WIN32 WIN32_FIND_DATAW findFileData; - auto_handle hFind (FindFirstFileW (path.c_str(), &findFileData), ::FindClose); + auto_find_handle hFind (FindFirstFileW (path.c_str(), &findFileData)); if (hFind != INVALID_HANDLE_VALUE) { time = findFileData.ftLastWriteTime; @@ -1891,7 +1904,7 @@ bool getfiletime (const wstring & path, FILETIME & time) #if 0 void setfiletime (const wstring & path, const FILETIME & time) { // update the file modification time of an existing file - auto_handle h (CreateFileW (path.c_str(), FILE_WRITE_ATTRIBUTES, + auto_find_handle h (CreateFileW (path.c_str(), FILE_WRITE_ATTRIBUTES, FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL)); if (h == INVALID_HANDLE_VALUE) @@ -1947,7 +1960,7 @@ static BOOL ExpandWildcards (wstring path, vector & paths) // crawl folder WIN32_FIND_DATAW ffdata; - auto_handle hFind (::FindFirstFileW (path.c_str(), &ffdata), ::FindClose); + auto_find_handle hFind (::FindFirstFileW (path.c_str(), &ffdata)); if (hFind == INVALID_HANDLE_VALUE) { DWORD err = ::GetLastError(); @@ -2136,7 +2149,7 @@ static inline std::wstring mbstowcs(const std::string & p) // input: MBCS size_t len = p.length(); msra::basetypes::fixed_vector buf(len + 1); // max: >1 mb chars => 1 wchar std::fill(buf.begin(), buf.end(), (wchar_t)0); - OACR_WARNING_SUPPRESS(UNSAFE_STRING_FUNCTION, "Reviewed OK. size checked. [rogeryu 2006/03/21]"); + //OACR_WARNING_SUPPRESS(UNSAFE_STRING_FUNCTION, "Reviewed OK. size checked. [rogeryu 2006/03/21]"); ::mbstowcs(&buf[0], p.c_str(), len + 1); return std::wstring(&buf[0]); } diff --git a/DataReader/HTKMLFReader/latticearchive.cpp b/DataReader/HTKMLFReader/latticearchive.cpp index 27919eccf..ebb069c40 100644 --- a/DataReader/HTKMLFReader/latticearchive.cpp +++ b/DataReader/HTKMLFReader/latticearchive.cpp @@ -555,7 +555,7 @@ void lattice::fromhtklattice (const wstring & path, const std::unordered_map 0); + assert(info.numnodes > 0); nodes.reserve (info.numnodes); // parse the nodes for (size_t i = 0; i < info.numnodes; i++, iter++) @@ -572,7 +572,7 @@ void lattice::fromhtklattice (const wstring & path, const std::unordered_map 0); + assert(info.numedges > 0); edges.reserve (info.numedges); align.reserve (info.numedges * 10); // 10 phones per word on av. should be enough std::string label; diff --git a/DataReader/HTKMLFReader/msra_mgram.h b/DataReader/HTKMLFReader/msra_mgram.h index 546e985a6..80fa264fa 100644 --- a/DataReader/HTKMLFReader/msra_mgram.h +++ b/DataReader/HTKMLFReader/msra_mgram.h @@ -249,7 +249,7 @@ public: p[0] = (unsigned char) value; p[1] = (unsigned char) (value >> 8); p[2] = (unsigned char) (value >> 16); - ASSERT (value == (int) *this); + assert (value == (int) *this); return value; } }; @@ -268,7 +268,7 @@ public: base.resize (newsize); uint24_ref r = uint24_ref (&base[cursize]); r = value; - ASSERT (value == back()); + assert (value == back()); } }; @@ -313,7 +313,7 @@ class mgram_map if ((size_t) id >= level1lookup.size()) return nindex; i = level1lookup[id]; } - ASSERT (i == nindex || ids[1][i] == id); + assert (i == nindex || ids[1][i] == id); return i; } index_t beg = firsts[m][i]; @@ -736,11 +736,11 @@ public: coord c (k.m, (index_t) ids[k.m].size()); - ASSERT (firsts[k.m-1].back() == (index_t) ids[k.m].size()); + assert (firsts[k.m-1].back() == (index_t) ids[k.m].size()); ids[k.m].push_back (thisid); // create value firsts[k.m-1].back() = (index_t) ids[k.m].size(); if (firsts[k.m-1].back() != (index_t) ids[k.m].size()) fail ("create() numeric overflow--index_t too small"); - ASSERT (k.m == M || firsts[k.m].back() == (index_t) ids[k.m+1].size()); + assert (k.m == M || firsts[k.m].back() == (index_t) ids[k.m+1].size()); // optimization: level1nonsparse flag // If unigram level is entirely non-sparse, we can save the search @@ -772,10 +772,10 @@ public: firsts[m].resize (ids[m].size() +1, (int) ids[m+1].size()); foreach_index (m, firsts) { - ASSERT (firsts[m][0] == 0); + assert (firsts[m][0] == 0); foreach_index (i, ids[m]) - ASSERT (firsts[m][i] <= firsts[m][i+1]); - ASSERT ((size_t) firsts[m].back() == ids[m+1].size()); + assert (firsts[m][i] <= firsts[m][i+1]); + assert ((size_t) firsts[m].back() == ids[m+1].size()); } // id mapping // user-provided w->id map @@ -1042,7 +1042,7 @@ public: continue; const mgram_map::key key = *iter; - ASSERT (m == key.order()); + assert (m == key.order()); // --- output m-gram to ARPA file fprintfOrDie (outf, "%.4f", logP[iter] / log10); @@ -1068,7 +1068,7 @@ public: numMGramsWritten++; } fflushOrDie (outf); - ASSERT (numMGramsWritten == map.size (m)); + assert (numMGramsWritten == map.size (m)); fprintf (stderr, "\n"); } @@ -1355,7 +1355,7 @@ protected: int newid = w2id[w]; // map to new id space mgram[m-1] = newid; } - for (int k = 0; k < m; k++) ASSERT (mgram[k] == w2id[key[k]]); + for (int k = 0; k < m; k++) assert (mgram[k] == w2id[key[k]]); // insert new key into sortedMap mgram_map::coord c = sortedMap.create (mgram_map::unmapped_key (&mgram[0], m), createCache); // copy over logP and logB @@ -1481,7 +1481,7 @@ protected: if (m == 0) continue; const mgram_map::key key = *iter; - ASSERT (m == key.order()); + assert (m == key.order()); float thisP = P[iter]; if (islog) @@ -1606,6 +1606,7 @@ public: } }; +#if 0 // =========================================================================== // CMGramLMEstimator -- estimator for CMGramLM // Implements Kneser-Ney discounting with Goodman/Chen modification, as well @@ -1970,7 +1971,7 @@ public: // estimate vector dropWord (userSymMap.size(), false); dropWord.push_back (true); // filtering but no : - ASSERT (!filterVocabulary || unkId != -1 || dropWord[dropId]); + assert (!filterVocabulary || unkId != -1 || dropWord[dropId]); //std::vector minObs (2, 0); //std::vector iMinObs (3, 0); @@ -2105,7 +2106,7 @@ public: if (m < M && m < 3) // for comments see where we estimate the discounted probabilities { // ^^ seems not to work for 4-grams... const mgram_map::key key = *iter; // needed to check for startId - ASSERT (key.order() == m); + assert (key.order() == m); if (m < 2 || key.pop_w().back() != startId) { @@ -2249,7 +2250,7 @@ public: } const mgram_map::key key = *iter; - ASSERT (key.order() == iter.order()); // (remove this check once verified) + assert (key.order() == iter.order()); // (remove this check once verified) // get history's count const mgram_map::coord j = histCoord[m-1]; // index of parent entry @@ -2282,7 +2283,7 @@ public: histCount = KNTotalCounts[c_h]; // (u,v,w) -> count (*,v,*) if (histCount == 0) // must exist RuntimeError ("estimate: malformed data: back-off value not found (denominator)"); - ASSERT (histCount >= count); + assert (histCount >= count); } } @@ -2423,7 +2424,9 @@ skippruned:; // m-gram was pruned fprintf (stderr, "\n"); } }; +#endif +#if 0 // =========================================================================== // CMGramLMClone -- create CMGramLM from sub-LMs through ILM and ILM::IIter // - create in memory into a CMGramLM @@ -2538,539 +2541,9 @@ skipMGram: lmpath = msra::strfun::utf16 (lmpath8); } }; - -#if 0 // old version --remove once we are fully tested and comfortable -class OldCMGramLM : public ILM -{ -protected: - // representation of LM in memory - // For each order, there is a flattened array of LMSCORE tokens. - // For each history order, there is a flattened array of LMHISTs. - // E.g. a trigram's history's LMHIST entry (somewhere in refs[2]) denotes - // the start index of the first LMSCORE entry (in entries[3]). The end - // index is denoted by the start index of the next LMHIST entry (for this - // purpose, the LMHIST arrays have one extra entry at the end). - struct LMSCORE // an LM score, plus its word id for sparse storage - { - int id; // token id (in LM space) - float logP; // and its score - LMSCORE (int p_id, double p_logP) : id (p_id), logP ((float) p_logP) { } - }; - struct LMHIST // an LM history -- index corresponds to LMSCORE index - { - int firstEntry; // index of first entry (end entry known from next LMHIST) - float logB; // back-off weight - LMHIST (int p_firstEntry, double p_logB) : firstEntry (p_firstEntry), logB ((float) p_logB) { } - }; - int M; - std::vector> refs; // [M] e.g. [2] for trigram history - std::vector> entries; // [M+1] e.g. [3] for trigrams. [0]=dummy - - // mapping of numeric word ids from external (user-defined) space to the internal LM's - std::vector userToLMSymMap; // map to ids used in LM - - // map user id to LM id, return -1 for anything unknown - inline int mapId (int userId) const - { - if (userId < 0 || userId >= (int) userToLMSymMap.size()) return -1; - else return userToLMSymMap[userId]; - } - - bool entries1Unmapped; // if true then findEntry(id) == i for entries[1] - - // search in an LMSCORE array - // This is a relatively generic binary search. - inline int findEntry (const std::vector & entries, int beg, int end, int id) const - { - while (beg < end) - { - int i = (beg + end) / 2; - int v = entries[i].id; - if (id == v) return i; // found it - else if (id < v) end = i; // id is left of i - else beg = i + 1; // id is right of i - } - return -1; // not found - } - - // diagnostics of previous score() call - mutable int longestMGramFound; // longest m-gram (incl. predicted token) found - mutable int longestHistoryFound; // longest history (excl. predicted token) found - -public: - virtual int getLastLongestHistoryFound() const { return longestHistoryFound; } - virtual int getLastLongestMGramFound() const { return longestMGramFound; } - virtual int order() const { return M; } - - // mgram[m-1] = word to predict, tokens before that are history - // m=3 means trigram - virtual double score (const int * mgram, int m) const - { - longestHistoryFound = 0; // (diagnostics) - - if (m > M) // too long a history for this model - { - mgram += (m - M); - m = M; - } - double totalLogB = 0.0; // accumulated back-off - - for (;;) - { - longestMGramFound = m; // (diagnostics) - - if (m == 0) // not really defined in ARPA format - return totalLogB + entries[0][0].logP; - - if (m == 1) - { - // find the actual score - // [beg, end) is the sub-range in entries array. - int id = mapId (mgram[0]); - const char * sym = idToSymbol (id); sym;// (debugging) - - const std::vector & entries_1 = entries[1]; - int i = entries1Unmapped ? id : findEntry (entries_1, refs[0][0].firstEntry, refs[0][1].firstEntry, id); - if (i == -1) - goto backoff0; - - ASSERT (entries_1[i].id == id); // verify unmapped unigram case - double logP = entries_1[i].logP; - return totalLogB + logP; - } - - // locate LMHIST and LMSCORE - // We traverse history one by one. - - int id = mapId (mgram[0]); // start with unigram history - const char * sym = idToSymbol (id); // (debugging) - int i = (entries1Unmapped) ? id : findEntry (entries[1], refs[0][0].firstEntry, refs[0][1].firstEntry, id); - if (i == -1) // unknown history: fall back - goto fallback; - ASSERT (entries[1][i].id == id); // verify unmapped unigram case - - // found it: advance search by one history token - const std::vector & refs_1 = refs[1]; - float logB = refs_1[i].logB; - int beg = refs_1[i].firstEntry; // sub-array range for next level - int end = refs_1[i+1].firstEntry; - for (int n = 2; n < m; n++) - { - if (beg == end) - goto fallback; // unseen history: fall back - int id = mapId (mgram[n -1]); - const char * sym = idToSymbol (id); sym; // (debugging) - int i = findEntry (entries[n], beg, end, id); - if (i == -1) // unseen history: fall back - goto fallback; - ASSERT (entries[n][i].id == id); // verify unmapped unigram case - - // found it: advance search by one history token - const std::vector & refs_n = refs[n]; - logB = refs_n[i].logB; - beg = refs_n[i].firstEntry; // sub-array range for next level - end = refs_n[i+1].firstEntry; - } - - // we found the entire history: now find the actual score - // [beg, end) is the sub-range in entries array. - if (m -1 > longestHistoryFound) - longestHistoryFound = m -1; - - if (beg == end) // history has no successors (but a back-off weight) - goto backoff; - - id = mapId (mgram[m -1]); - sym = idToSymbol (id); // (debugging) - const std::vector & entries_m = entries[m]; - i = findEntry (entries_m, beg, end, id); - if (i == -1) - goto backoff; - ASSERT (entries_m[i].id == id); // verify unmapped unigram case - - longestMGramFound = m; - - double logP = entries_m[i].logP; - return totalLogB + logP; - -backoff: // found history but not predicted token: back-off - totalLogB += logB; - -backoff0: // back-off knowing that logB == 0 - -fallback: // we get here in case of fallback (no back-off weight) or back-off - mgram++; - m--; - } // and go again with the shortened history - } - - // same as score() but without optimizations (for reference) - double score_unoptimized (const int * mgram, int m) const - { - if (m == 0) // not really defined in ARPA format - return entries[0][0].logP; - else if (m > M) // too long a history for this model - { - mgram += (m - M); - m = M; - } - - // locate LMHIST and LMSCORE - // We traverse history one by one. - int beg = refs[0][0].firstEntry; // start with the unigram array - int end = refs[0][1].firstEntry; - float logB = 0.0f; // remember in the loop in case we need it - for (int n = 1; n < m; n++) - { - int userId = mgram[n -1]; // may be -1 for unknown word - int id = mapId (userId); - const char * sym = idToSymbol (id); sym; // (debugging) - const std::vector & entries_n = entries[n]; - int i = findEntry (entries_n, beg, end, id); - if (i == -1) // unknown history: fall back - return score_unoptimized (mgram +1, m -1); // tail recursion - ASSERT (entries_n[i].id == id); // verify unmapped unigram case - // found it: advance search by one history token - const std::vector & refs_n = refs[n]; - logB = refs_n[i].logB; - beg = refs_n[i].firstEntry; // sub-array range for next level - end = refs_n[i+1].firstEntry; - } - - // we found the entire history: now find the actual score - // [beg, end) is the sub-range in entries array. - int userId = mgram[m -1]; // word to predict - int id = mapId (userId); - const char * sym = idToSymbol (id); sym; // (debugging) - const std::vector & entries_m1 = entries[m]; - int i = findEntry (entries_m1, beg, end, id); - if (i != -1) - { - ASSERT (entries_m1[i].id == id); // verify unmapped unigram case - double logP = entries_m1[i].logP; - return logP; - } - - // found history but not predicted token: back-off - return logB + score_unoptimized (mgram + 1, m -1); - } - - // test for OOV word (OOV w.r.t. LM) - virtual bool oov (int id) const { return mapId (id) < 0; } - - virtual void adapt (const int *, size_t) { } // this LM does not adapt -private: - - // keep this for debugging - std::wstring filename; // input filename - struct SYMBOL - { - string symbol; // token - int id; // numeric id in LM space (index of word read) - bool operator< (const SYMBOL & other) const { return symbol < other.symbol; } - SYMBOL (int p_id, const char * p_symbol) : id (p_id), symbol (p_symbol) { } - }; - std::vector lmSymbols; // (id, word) symbols used in LM - std::vector idToSymIndex; // map LM id to index in lmSymbols[] array - - // search for a word in the sorted word array. - // Only use this after sorting, i.e. after full 1-gram section has been read. - // Only really used in read(). - inline int symbolToId (const char * word) const - { - int beg = 0; - int end = (int) lmSymbols.size(); - while (beg < end) - { - int i = (beg + end) / 2; - const char * v = lmSymbols[i].symbol.c_str(); - int cmp = strcmp (word, v); - if (cmp == 0) return lmSymbols[i].id; // found it - else if (cmp < 0) end = i; // id is left of i - else beg = i + 1; // id is right of i - } - return -1; // not found - } - - inline const char * idToSymbol (int id) const - { - if (id < 0) return NULL; // empty string for unknown ids - int i = idToSymIndex[id]; - return lmSymbols[i].symbol.c_str(); - } - -public: - - // read an ARPA (text) file. - // Words do not need to be sorted in the unigram section, but the m-gram - // sections have to be in the same order as the unigrams. - // The 'userSymMap' defines the vocabulary space used in score(). - // If 'filterVocabulary' then LM entries for words not in userSymMap are skipped. - // Otherwise the userSymMap is updated with the words from the LM. - // 'maxM' allows to restrict the loading to a smaller LM order. - // SYMMAP can be e.g. CSymMap or CSymbolSet. - template - void read (const std::wstring & pathname, SYMMAP & userSymMap, bool filterVocabulary, int maxM) - { - int lineNo = 0; - msra::basetypes::auto_file_ptr f = fopenOrDie (pathname, L"rbS"); - fprintf (stderr, "read: reading %S", pathname.c_str()); - filename = pathname; // (keep this info for debugging) - - // --- read header information - - // search for header line - char buf[1024]; - lineNo++, fgetline (f, buf); - while (strcmp (buf, "\\data\\") != 0 && !feof (f)) - lineNo++, fgetline (f, buf); - lineNo++, fgetline (f, buf); - - // get the dimensions - std::vector dims; dims.reserve (4); - - while (buf[0] == 0 && !feof (f)) - lineNo++, fgetline (f, buf); - - int n, dim; - dims.push_back (1); // dummy zerogram entry - while (sscanf (buf, "ngram %d=%d", &n, &dim) == 2 && n == (int) dims.size()) - { - dims.push_back (dim); - lineNo++, fgetline (f, buf); - } - - M = (int) dims.size() -1; - if (M == 0) - RuntimeError ("read: mal-formed LM file, no dimension information (%d): %S", lineNo, pathname.c_str()); - int fileM = M; - if (M > maxM) - M = maxM; - - // allocate main storage - refs.resize (M); - for (int m = 0; m < M; m++) - refs[m].reserve (dims[m] +1); - entries.resize (M +1); - for (int m = 0; m <= M; m++) - entries[m].reserve (dims[m]); - lmSymbols.reserve (dims[0]); - - refs[0].push_back (LMHIST (0, 0.0)); - refs[0].push_back (LMHIST (0, -99.0)); // this one gets updated - entries[0].push_back (LMSCORE (-1, -99.0)); // zerogram score -- gets updated later - - std::vector skipWord; // true: skip entry containing this word - skipWord.reserve (lmSymbols.capacity()); - - // --- read main sections - - const double ln10xLMF = log (10.0); // ARPA scores are strangely scaled - for (int m = 1; m <= M; m++) - { - while (buf[0] == 0 && !feof (f)) - lineNo++, fgetline (f, buf); - - if (sscanf (buf, "\\%d-grams:", &n) != 1 || n != m) - RuntimeError ("read: mal-formed LM file, bad section header (%d): %S", lineNo, pathname.c_str()); - lineNo++, fgetline (f, buf); - - std::vector mgram (m +1); // current mgram being read - std::vector prevmgram (m +1, -1); // previous mgram read - std::vector histEntry (m); // sub-array ranges - - histEntry[0] = 0; - - // read all the m-grams - while (buf[0] != '\\') - { - if (buf[0] == 0) - { - lineNo++, fgetline (f, buf); - continue; - } - - // -- parse the line - const char * delim = " \t\n\r"; - const char * score = strtok (&buf[0], delim); - if (score == NULL || score[0] == 0) // not checking whether it is numeric - RuntimeError ("read: mal-formed LM file, no score (%d): %S", lineNo, pathname.c_str()); - double scoreVal = atof (score); - double logP = scoreVal * ln10xLMF; // convert to natural log - - bool skipEntry = false; - for (int n = 1; n <= m; n++) - { - /*const*/ char * tok = strtok (NULL, delim); - if (tok == NULL) - RuntimeError ("read: mal-formed LM file, not enough words in mgram (%d): %S", lineNo, pathname.c_str()); - // map to id - int id; - if (m == 1) // unigram: build vocab table - { - id = (int) lmSymbols.size(); // unique id for this symbol - lmSymbols.push_back (SYMBOL (id, tok)); - bool toSkip = false; - if (userSymMap.sym2existingId (lmSymbols.back().symbol) == -1) - { - if (filterVocabulary) - toSkip = true; // unknown word - else - userSymMap.sym2id (lmSymbols.back().symbol); // create it in user's space - } - skipWord.push_back (toSkip); - } - else // mgram: look up word in vocabulary - { - if (prevmgram[n] >= 0 && strcmp (idToSymbol (prevmgram[n]), tok) == 0) - id = prevmgram[n]; - else - { - id = symbolToId (tok); - if (id == -1) - RuntimeError ("read: mal-formed LM file, m-gram contains unknown word (%d): %S", lineNo, pathname.c_str()); - } - } - mgram[n] = id; // that's our id - skipEntry |= skipWord[id]; // skip entry if any token is unknown - } - - double logB = 0.0; - if (m < M) - { - const char * bo = strtok (NULL, delim); - if (score == NULL || score[0] == 0) // not checking whether it is numeric - RuntimeError ("read: mal-formed LM file, no score (%d): %S", lineNo, pathname.c_str()); - double boVal = atof (bo); - logB = boVal * ln10xLMF; // convert to natural log - } - - lineNo++, fgetline (f, buf); - - if (skipEntry) // word contained unknown vocabulary: skip entire entry - goto skipMGram; - - // -- enter the information into our data structure - - // locate the corresponding entries - // histEntry[n] are valid iff mgram[n'] == prevmgram[n'] for all n' <= ' - - bool prevValid = true; - for (int n = 1; n < m; n++) - { - if (prevValid && mgram[n] == prevmgram[n]) - continue; - - if (prevValid && mgram[n] < prevmgram[n]) - RuntimeError ("read: mal-formed LM file, m-gram out of order (%d): %S", lineNo, pathname.c_str()); - - // a history token differs from previous mgram. That history must exist. - const std::vector & entries_n = entries[n]; - const std::vector & refs_h = refs[n -1]; // history - int beg = refs_h[histEntry[n -1]].firstEntry; // sub-array range for next level - int end = refs_h[histEntry[n -1] +1].firstEntry; - int i = findEntry (entries_n, beg, end, mgram[n]); - if (i == -1) // unknown history: fall back - RuntimeError ("read: mal-formed LM file, m-gram history not defined (%d): %S", lineNo, pathname.c_str()); - // found it: narrow down search range - histEntry[n] = i; - prevValid = false; - } - - if (prevValid && mgram[m] <= prevmgram[m]) - RuntimeError ("read: mal-formed LM file, m-gram out of order (%d): %S", lineNo, pathname.c_str()); - - if (m < M) // create history entry - refs[m].push_back (LMHIST (0, logB)); - entries[m].push_back (LMSCORE (mgram[m], logP)); // score entry - - refs[m-1][histEntry[m-1]].firstEntry++; // for now count how many histories we got - -skipMGram: - // remember current mgram for next iteration - ::swap (mgram, prevmgram); - } - - // Update previous level history from #entries to firstEntry. - // We do this afterwards because some histories may not be used and - // therefore not occur in higher-order m-grams, such that we cannot - // rely on touching them in the loop above. Counting entries instead - // leaves those at 0, which is correct. - std::vector & refs_h = refs[m -1]; // history - int n0 = 0; - for (int i = 0; i < (int) refs_h.size(); i++) - { - int num = refs_h[i].firstEntry; - refs_h[i].firstEntry = n0; - n0 += num; - } - ASSERT (refs_h.back().firstEntry == (int) entries[m].size()); - - // create closing history entry - if (m < M) - refs[m].push_back (LMHIST (0, -99.0)); - - // fix the symbol set -- now we can binary-search in them with symbolToId() - if (m == 1) - { - std::sort (lmSymbols.begin(), lmSymbols.end()); - idToSymIndex.resize (lmSymbols.size(), -1); - for (int i = 0; i < (int) lmSymbols.size(); i++) - { - idToSymIndex[lmSymbols[i].id] = i; - } - } - - fprintf (stderr, ", %d %d-grams", entries[m].size(), m); - } - fprintf (stderr, "\n"); - - // check end tag - if (M == fileM) - { // only if caller did not restrict us to a lower order - while (buf[0] == 0 && !feof (f)) - lineNo++, fgetline (f, buf); - if (strcmp (buf, "\\end\\") != 0) - RuntimeError ("read: mal-formed LM file, no \\end\\ tag (%d): %S", lineNo, pathname.c_str()); - } - - // update zerogram score - // We use the minimum of all unigram scores. - const std::vector & entries_1 = entries[1]; - float unknownLogP = 0.0f; - for (int i = 0; i < (int) entries_1.size(); i++) - { - if (entries_1[i].logP < -98.9f) continue; // disabled token does not count - if (entries_1[i].logP < unknownLogP) - unknownLogP = entries_1[i].logP; - } - entries[0][0].logP = unknownLogP;; - //= (float) -log ((double) lmSymbols.size()); // zerogram score - - // establish mapping of word ids from user to LM space - userToLMSymMap.resize (userSymMap.size()); - for (int i = 0; i < userSymMap.size(); i++) - { - const char * sym = userSymMap.id2sym (i); - int id = symbolToId (sym); // may be -1 if not found - userToLMSymMap[i] = id; - } - - // check whether first-level unigrams need mapping - // We don't unless user provided a dictionary to filter. - entries1Unmapped = true; // assume findEntry (id) == id - for (int i = 0; i < (int) entries_1.size(); i++) - { - if (entries_1[i].id != i) - { - entries1Unmapped = false; - break; - } - } - } -}; #endif +#if 0 // =========================================================================== // CPerplexity -- helper to measure perplexity // =========================================================================== @@ -3172,5 +2645,6 @@ public: // return number of utterances int getNumUtterances() const { return numUtterances; } }; +#endif };}; // namespace diff --git a/DataReader/ImageReader/ImageReader.cpp b/DataReader/ImageReader/ImageReader.cpp index a3b830b99..12b95809c 100644 --- a/DataReader/ImageReader/ImageReader.cpp +++ b/DataReader/ImageReader/ImageReader.cpp @@ -274,15 +274,10 @@ public: else { cv::FileStorage fs; - // REVIEW alexeyk: this sort of defeats the purpose of using wstring at all... - auto fname = meanFile; -#ifdef _WIN32 - fs.open(fname.c_str(), cv::FileStorage::READ); -#else - fs.open(charpath(fname), cv::FileStorage::READ); -#endif + // REVIEW alexeyk: this sort of defeats the purpose of using wstring at all... [fseide] no, only OpenCV has this problem. + fs.open(msra::strfun::utf8(meanFile).c_str(), cv::FileStorage::READ); if (!fs.isOpened()) - RuntimeError("Could not open file: " + fname); + RuntimeError("Could not open file: %ls", meanFile.c_str()); fs["MeanImg"] >> m_meanImg; int cchan; fs["Channel"] >> cchan; @@ -291,7 +286,7 @@ public: int ccol; fs["Col"] >> ccol; if (cchan * crow * ccol != m_meanImg.channels() * m_meanImg.rows * m_meanImg.cols) - RuntimeError("Invalid data in file: " + fname); + RuntimeError("Invalid data in file: %ls", meanFile.c_str()); fs.release(); m_meanImg = m_meanImg.reshape(cchan, crow); } diff --git a/DataReader/Kaldi2Reader/basetypes.h b/DataReader/Kaldi2Reader/basetypes.h index f45172b2f..abbef5d0e 100644 --- a/DataReader/Kaldi2Reader/basetypes.h +++ b/DataReader/Kaldi2Reader/basetypes.h @@ -217,9 +217,6 @@ static inline void Sleep (size_t ms) { std::this_thread::sleep_for (std::chrono: //#define SAFE_DELETE(p) { if(p) { delete (p); (p)=NULL; } } //#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } } // nasty! use CComPtr<> -#ifndef ASSERT -#define ASSERT assert -#endif // ---------------------------------------------------------------------------- // basic data types @@ -227,66 +224,6 @@ static inline void Sleep (size_t ms) { std::this_thread::sleep_for (std::chrono: namespace msra { namespace basetypes { -// class ARRAY -- std::vector with array-bounds checking -// VS 2008 and above do this, so there is no longer a need for this. - -#pragma warning(push) -#pragma warning(disable : 4555) // expression has no affect, used so retail won't be empty - -template -class ARRAY : public std::vector<_ElemType> -{ -#if defined (_DEBUG) || defined (_CHECKED) // debug version with range checking - static void throwOutOfBounds() - { // (moved to separate function hoping to keep inlined code smaller - OACR_WARNING_PUSH; - OACR_WARNING_DISABLE(IGNOREDBYCOMMA, "Reviewd OK. Special trick below to show a message when assertion fails" - "[rogeryu 2006/03/24]"); - OACR_WARNING_DISABLE(BOGUS_EXPRESSION_LIST, "This is intentional. [rogeryu 2006/03/24]"); - //ASSERT ("ARRAY::operator[] out of bounds", false); - OACR_WARNING_POP; - } -#endif - -public: - - ARRAY() : std::vector<_ElemType> () { } - ARRAY (int size) : std::vector<_ElemType> (size) { } - -#if defined (_DEBUG) || defined (_CHECKED) // debug version with range checking - // ------------------------------------------------------------------------ - // operator[]: with array-bounds checking - // ------------------------------------------------------------------------ - - inline _ElemType & operator[] (int index) // writing - { - if (index < 0 || index >= size()) throwOutOfBounds(); - return (*(std::vector<_ElemType>*) this)[index]; - } - - // ------------------------------------------------------------------------ - - inline const _ElemType & operator[] (int index) const // reading - { - if (index < 0 || index >= size()) throwOutOfBounds(); - return (*(std::vector<_ElemType>*) this)[index]; - } -#endif - - // ------------------------------------------------------------------------ - // size(): same as base class, but returning an 'int' instead of 'size_t' - // to allow for better readable code - // ------------------------------------------------------------------------ - - inline int size() const - { - size_t siz = ((std::vector<_ElemType>*) this)->size(); - return (int) siz; - } -}; -// overload swap(), otherwise we'd fallback to 3-way assignment & possibly throw -template inline void swap (ARRAY<_T> & L, ARRAY<_T> & R) throw() -{ swap ((std::vector<_T> &) L, (std::vector<_T> &) R); } // class fixed_vector - non-resizable vector @@ -294,8 +231,8 @@ template class fixed_vector { _T * p; // pointer array size_t n; // number of elements - void check (int index) const { index/*avoid compiler warning*/;ASSERT (index >= 0 && (size_t) index < n); } - void check (size_t index) const { ASSERT (index < n); } + void check (int index) const { index/*avoid compiler warning*/;assert (index >= 0 && (size_t) index < n); } + void check (size_t index) const { assert (index < n); } // ... TODO: when I make this public, LinearTransform.h acts totally up but I cannot see where it comes from. //fixed_vector (const fixed_vector & other) : n (0), p (NULL) { *this = other; } public: @@ -316,7 +253,7 @@ public: inline const _T & operator[] (int index) const { check (index); return p[index]; } // reading inline _T & operator[] (size_t index) { check (index); return p[index]; } // writing inline const _T & operator[] (size_t index) const { check (index); return p[index]; } // reading - inline int indexof (const _T & elem) const { ASSERT (&elem >= p && &elem < p + n); return &elem - p; } + inline int indexof (const _T & elem) const { assert (&elem >= p && &elem < p + n); return &elem - p; } inline void swap (fixed_vector & other) throw() { std::swap (other.p, p); std::swap (other.n, n); } template fixed_vector & operator= (const VECTOR & other) { @@ -346,7 +283,7 @@ template inline void swap (fixed_vector<_T> & L, fixed_vector<_T> & R) template class matrix : fixed_vector { size_t numcols; - size_t locate (size_t i, size_t j) const { ASSERT (i < rows() && j < cols()); return i * cols() + j; } + size_t locate (size_t i, size_t j) const { assert (i < rows() && j < cols()); return i * cols() + j; } public: typedef T elemtype; matrix() : numcols (0) {} diff --git a/DataReader/Kaldi2Reader/fileutil.cpp b/DataReader/Kaldi2Reader/fileutil.cpp index abdb96e37..2daeda543 100644 --- a/DataReader/Kaldi2Reader/fileutil.cpp +++ b/DataReader/Kaldi2Reader/fileutil.cpp @@ -558,14 +558,14 @@ std::wstring fgetlinew (FILE * f) } // STL string version avoiding most memory allocations -void fgetline (FILE * f, std::string & s, ARRAY & buf) +void fgetline (FILE * f, std::string & s, std::vector & buf) { buf.resize (1000000); // enough? // KIT: increased to 1M to be safe const char * p = fgetline (f, &buf[0], (int) buf.size()); s.assign (p); } -void fgetline (FILE * f, std::wstring & s, ARRAY & buf) +void fgetline (FILE * f, std::wstring & s, std::vector & buf) { buf.resize (1000000); // enough? // KIT: increased to 1M to be safe const wchar_t * p = fgetline (f, &buf[0], (int) buf.size()); @@ -573,7 +573,7 @@ void fgetline (FILE * f, std::wstring & s, ARRAY & buf) } // char buffer version -void fgetline (FILE * f, ARRAY & buf) +void fgetline (FILE * f, std::vector & buf) { const int BUF_SIZE = 1000000; // enough? // KIT: increased to 1M to be safe buf.resize (BUF_SIZE); @@ -581,7 +581,7 @@ void fgetline (FILE * f, ARRAY & buf) buf.resize (strnlen (&buf[0], BUF_SIZE) +1); // SECURITY NOTE: string use has been reviewed } -void fgetline (FILE * f, ARRAY & buf) +void fgetline (FILE * f, std::vector & buf) { const int BUF_SIZE = 1000000; // enough? // KIT: increased to 1M to be safe buf.resize (BUF_SIZE); @@ -605,7 +605,7 @@ const char * fgetstring (FILE * f, __out_z_cap(size) char * buf, int size) } buf[i] = (char) c; } - ASSERT (i < size); + assert (i < size); buf[i] = 0; return buf; } @@ -624,7 +624,7 @@ const char * fgetstring (const HANDLE f, __out_z_cap(size) char * buf, int size) } buf[i] = (char) c; } - ASSERT (i < size); + assert (i < size); buf[i] = 0; return buf; } @@ -711,7 +711,7 @@ const char * fgettoken (FILE * f, __out_z_cap(size) char * buf, int size) if (rc != c) RuntimeError ("error in ungetc(): %s", strerror (errno)); } - ASSERT (i < size); + assert (i < size); buf[i] = 0; return buf; } @@ -818,14 +818,14 @@ void fcompareTag (const STRING & readTag, const STRING & expectedTag) void fputTag (FILE * f, const char * tag) { const int TAG_LEN = 4; - ASSERT (strnlen (tag, TAG_LEN + 1) == TAG_LEN); + assert (strnlen (tag, TAG_LEN + 1) == TAG_LEN); fwriteOrDie ((void *) tag, sizeof (*tag), strnlen (tag, TAG_LEN), f); } void fputTag(const HANDLE f, const char * tag) { const int TAG_LEN = 4; - ASSERT (strnlen (tag, TAG_LEN + 1) == TAG_LEN); + assert (strnlen (tag, TAG_LEN + 1) == TAG_LEN); fwriteOrDie ((void *) tag, sizeof (*tag), strnlen (tag, TAG_LEN), f); } @@ -860,7 +860,7 @@ void fpad (FILE * f, int n) int len = n - (pos % n); const char dummyString[] = "MSR-Asia: JL+FS"; size_t offset = sizeof(dummyString)/sizeof(dummyString[0]) - len; - ASSERT (offset >= 0); + assert (offset >= 0); fputstring (f, dummyString + offset); } // ---------------------------------------------------------------------------- @@ -899,7 +899,7 @@ short fgetshort_bigendian (FILE * f) int fgetint24 (FILE * f) { int v; - ASSERT (sizeof (v) == 4); + assert (sizeof (v) == 4); freadOrDie (&v, sizeof (v) -1, 1, f); // only read 3 lower-order bytes v <<= 8; // shift up (upper 8 bits uninit'ed) v >>= 8; // shift down 8 bits with sign-extend @@ -976,7 +976,7 @@ float fgetfloat_ascii (FILE * f) RuntimeError ("error reading float value from file (invalid format): %s"); else if (rc == EOF) RuntimeError ("error reading from file: %s", strerror (errno)); - ASSERT (rc == 1); + assert (rc == 1); return val; } @@ -1066,8 +1066,8 @@ void WAVEHEADER::write (FILE * f) fputint (f, nAvgBytesPerSec); fputshort (f, nBlockAlign); fputshort (f, wBitsPerSample); - ASSERT (FmtLength == 16); - ASSERT (wFormatTag == 1); + assert (FmtLength == 16); + assert (wFormatTag == 1); fputTag (f, "data"); fputint (f, DataLength); fflushOrDie (f); @@ -1160,14 +1160,14 @@ static short toolULawToLinear(unsigned char p_ucULawByte) // fgetwavraw(): only read data of .wav file. For multi-channel data, samples // are kept interleaved. -static void fgetwavraw(FILE * f, ARRAY & wav, const WAVEHEADER & wavhd) +static void fgetwavraw(FILE * f, std::vector & wav, const WAVEHEADER & wavhd) { int bytesPerSample = wavhd.wBitsPerSample / 8; // (sample size on one channel) wav.resize (wavhd.DataLength / bytesPerSample); if (wavhd.wFormatTag == 7) // mulaw { (wavhd.nChannels == 1) || RuntimeError ("fgetwav: wChannels=%d not supported for mulaw", wavhd.nChannels); - ARRAY data; + std::vector data; int numSamples = wavhd.DataLength/wavhd.nBlockAlign; data.resize (numSamples); freadOrDie (&data[0], sizeof (data[0]), numSamples, f); @@ -1191,7 +1191,7 @@ static void fgetwavraw(FILE * f, ARRAY & wav, const WAVEHEADER & wavhd) // fgetwav(): read an entire .wav file. Stereo is mapped to mono. // ---------------------------------------------------------------------------- -void fgetwav (FILE * f, ARRAY & wav, int & sampleRate) +void fgetwav (FILE * f, std::vector & wav, int & sampleRate) { WAVEHEADER wavhd; // will be filled in for 16-bit PCM!! signed short wFormatTag; // real format tag as found in data @@ -1207,7 +1207,7 @@ void fgetwav (FILE * f, ARRAY & wav, int & sampleRate) else if (wavhd.nChannels == 2) { //read raw data - ARRAY buf; + std::vector buf; buf.resize(numSamples * 2); fgetwavraw(f, buf, wavhd); @@ -1228,7 +1228,7 @@ void fgetwav (FILE * f, ARRAY & wav, int & sampleRate) } } -void fgetwav (const wstring & fn, ARRAY & wav, int & sampleRate) +void fgetwav (const wstring & fn, std::vector & wav, int & sampleRate) { auto_file_ptr f = fopenOrDie (fn, L"rbS"); fgetwav (f, wav, sampleRate); @@ -1243,13 +1243,13 @@ void fgetwav (const wstring & fn, ARRAY & wav, int & sampleRate) // channel. j is sample index. // ---------------------------------------------------------------------------- -void fgetraw (FILE *f, ARRAY< ARRAY > & data, const WAVEHEADER & wavhd) +void fgetraw (FILE *f, std::vector< std::vector > & data, const WAVEHEADER & wavhd) { - ARRAY wavraw; + std::vector wavraw; fgetwavraw (f, wavraw, wavhd); data.resize (wavhd.nChannels); int numSamples = wavhd.DataLength/wavhd.nBlockAlign; - ASSERT (numSamples == (int) wavraw.size() / wavhd.nChannels); + assert (numSamples == (int) wavraw.size() / wavhd.nChannels); for (int i = 0; i < wavhd.nChannels; i++) { @@ -1304,7 +1304,7 @@ void fputwfx (FILE *f, const WAVEFORMATEX & wfx, unsigned int numSamples) unsigned int RiffLength = 36 + DataLength; unsigned int FmtLength = 16; // file header - ASSERT (wfx.cbSize == 0 || wfx.cbSize == FmtLength + 2); + assert (wfx.cbSize == 0 || wfx.cbSize == FmtLength + 2); fputTag (f, "RIFF"); fputint (f, RiffLength); fputTag (f, "WAVE"); @@ -1377,7 +1377,7 @@ void fputshort (FILE * f, short v) void fputint24 (FILE * f, int v) { - ASSERT (sizeof (v) == 4); + assert (sizeof (v) == 4); fwriteOrDie (&v, sizeof (v) -1, 1, f); // write low-order 3 bytes } @@ -1417,7 +1417,7 @@ void fputdouble (FILE * f, double v) // fputfile(): write a binary block or a string as a file // ---------------------------------------------------------------------------- -void fputfile (const WSTRING & pathname, const ARRAY & buffer) +void fputfile (const WSTRING & pathname, const std::vector & buffer) { FILE * f = fopenOrDie (pathname, L"wb"); try @@ -1475,7 +1475,7 @@ void fputfile (const WSTRING & pathname, const std::string & string) // fgetfile(): load a file as a binary block // ---------------------------------------------------------------------------- -void fgetfile (const WSTRING & pathname, ARRAY & buffer) +void fgetfile (const WSTRING & pathname, std::vector & buffer) { FILE * f = fopenOrDie (pathname, L"rb"); size_t len = filesize (f); @@ -1487,11 +1487,11 @@ void fgetfile (const WSTRING & pathname, ARRAY & buffer) fclose (f); } -void fgetfile (FILE * f, ARRAY & buffer) +void fgetfile (FILE * f, std::vector & buffer) { // this version reads until eof buffer.resize (0); buffer.reserve (1000000); // avoid too many reallocations - ARRAY inbuf; + std::vector inbuf; inbuf.resize (65536); // read in chunks of this size while (!feof (f)) // read until eof { diff --git a/DataReader/Kaldi2Reader/latticearchive.cpp b/DataReader/Kaldi2Reader/latticearchive.cpp index 0fd07440d..135b79103 100644 --- a/DataReader/Kaldi2Reader/latticearchive.cpp +++ b/DataReader/Kaldi2Reader/latticearchive.cpp @@ -569,7 +569,7 @@ void lattice::fromhtklattice (const wstring & path, const std::unordered_map 0); + assert(info.numnodes > 0); nodes.reserve (info.numnodes); // parse the nodes for (size_t i = 0; i < info.numnodes; i++, iter++) @@ -586,7 +586,7 @@ void lattice::fromhtklattice (const wstring & path, const std::unordered_map 0); + assert(info.numedges > 0); edges.reserve (info.numedges); align.reserve (info.numedges * 10); // 10 phones per word on av. should be enough std::string label; diff --git a/DataReader/Kaldi2Reader/msra_mgram.h b/DataReader/Kaldi2Reader/msra_mgram.h index b8f85ff30..b3d87d9af 100644 --- a/DataReader/Kaldi2Reader/msra_mgram.h +++ b/DataReader/Kaldi2Reader/msra_mgram.h @@ -246,7 +246,7 @@ public: p[0] = (unsigned char) value; p[1] = (unsigned char) (value >> 8); p[2] = (unsigned char) (value >> 16); - ASSERT (value == (int) *this); + assert (value == (int) *this); return value; } }; @@ -265,7 +265,7 @@ public: base.resize (newsize); uint24_ref r = uint24_ref (&base[cursize]); r = value; - ASSERT (value == back()); + assert (value == back()); } }; @@ -310,7 +310,7 @@ class mgram_map if ((size_t) id >= level1lookup.size()) return nindex; i = level1lookup[id]; } - ASSERT (i == nindex || ids[1][i] == id); + assert (i == nindex || ids[1][i] == id); return i; } index_t beg = firsts[m][i]; @@ -733,11 +733,11 @@ public: coord c (k.m, (index_t) ids[k.m].size()); - ASSERT (firsts[k.m-1].back() == (index_t) ids[k.m].size()); + assert (firsts[k.m-1].back() == (index_t) ids[k.m].size()); ids[k.m].push_back (thisid); // create value firsts[k.m-1].back() = (index_t) ids[k.m].size(); if (firsts[k.m-1].back() != (index_t) ids[k.m].size()) fail ("create() numeric overflow--index_t too small"); - ASSERT (k.m == M || firsts[k.m].back() == (index_t) ids[k.m+1].size()); + assert (k.m == M || firsts[k.m].back() == (index_t) ids[k.m+1].size()); // optimization: level1nonsparse flag // If unigram level is entirely non-sparse, we can save the search @@ -769,10 +769,10 @@ public: firsts[m].resize (ids[m].size() +1, (int) ids[m+1].size()); foreach_index (m, firsts) { - ASSERT (firsts[m][0] == 0); + assert (firsts[m][0] == 0); foreach_index (i, ids[m]) - ASSERT (firsts[m][i] <= firsts[m][i+1]); - ASSERT ((size_t) firsts[m].back() == ids[m+1].size()); + assert (firsts[m][i] <= firsts[m][i+1]); + assert ((size_t) firsts[m].back() == ids[m+1].size()); } // id mapping // user-provided w->id map @@ -1039,7 +1039,7 @@ public: continue; const mgram_map::key key = *iter; - ASSERT (m == key.order()); + assert (m == key.order()); // --- output m-gram to ARPA file fprintfOrDie (outf, "%.4f", logP[iter] / log10); @@ -1065,7 +1065,7 @@ public: numMGramsWritten++; } fflushOrDie (outf); - ASSERT (numMGramsWritten == map.size (m)); + assert (numMGramsWritten == map.size (m)); fprintf (stderr, "\n"); } @@ -1352,7 +1352,7 @@ protected: int newid = w2id[w]; // map to new id space mgram[m-1] = newid; } - for (int k = 0; k < m; k++) ASSERT (mgram[k] == w2id[key[k]]); + for (int k = 0; k < m; k++) assert (mgram[k] == w2id[key[k]]); // insert new key into sortedMap mgram_map::coord c = sortedMap.create (mgram_map::unmapped_key (&mgram[0], m), createCache); // copy over logP and logB @@ -1478,7 +1478,7 @@ protected: if (m == 0) continue; const mgram_map::key key = *iter; - ASSERT (m == key.order()); + assert (m == key.order()); float thisP = P[iter]; if (islog) @@ -1967,7 +1967,7 @@ public: // estimate vector dropWord (userSymMap.size(), false); dropWord.push_back (true); // filtering but no : - ASSERT (!filterVocabulary || unkId != -1 || dropWord[dropId]); + assert (!filterVocabulary || unkId != -1 || dropWord[dropId]); //std::vector minObs (2, 0); //std::vector iMinObs (3, 0); @@ -2101,7 +2101,7 @@ public: if (m < M && m < 3) // for comments see where we estimate the discounted probabilities { // ^^ seems not to work for 4-grams... const mgram_map::key key = *iter; // needed to check for startId - ASSERT (key.order() == m); + assert (key.order() == m); if (m < 2 || key.pop_w().back() != startId) { @@ -2245,7 +2245,7 @@ public: } const mgram_map::key key = *iter; - ASSERT (key.order() == iter.order()); // (remove this check once verified) + assert (key.order() == iter.order()); // (remove this check once verified) // get history's count const mgram_map::coord j = histCoord[m-1]; // index of parent entry @@ -2278,7 +2278,7 @@ public: histCount = KNTotalCounts[c_h]; // (u,v,w) -> count (*,v,*) if (histCount == 0) // must exist RuntimeError ("estimate: malformed data: back-off value not found (denominator)"); - ASSERT (histCount >= count); + assert (histCount >= count); } } @@ -2627,7 +2627,7 @@ public: if (i == -1) goto backoff0; - ASSERT (entries_1[i].id == id); // verify unmapped unigram case + assert (entries_1[i].id == id); // verify unmapped unigram case double logP = entries_1[i].logP; return totalLogB + logP; } @@ -2640,7 +2640,7 @@ public: int i = (entries1Unmapped) ? id : findEntry (entries[1], refs[0][0].firstEntry, refs[0][1].firstEntry, id); if (i == -1) // unknown history: fall back goto fallback; - ASSERT (entries[1][i].id == id); // verify unmapped unigram case + assert (entries[1][i].id == id); // verify unmapped unigram case // found it: advance search by one history token const std::vector & refs_1 = refs[1]; @@ -2656,7 +2656,7 @@ public: int i = findEntry (entries[n], beg, end, id); if (i == -1) // unseen history: fall back goto fallback; - ASSERT (entries[n][i].id == id); // verify unmapped unigram case + assert (entries[n][i].id == id); // verify unmapped unigram case // found it: advance search by one history token const std::vector & refs_n = refs[n]; @@ -2679,7 +2679,7 @@ public: i = findEntry (entries_m, beg, end, id); if (i == -1) goto backoff; - ASSERT (entries_m[i].id == id); // verify unmapped unigram case + assert (entries_m[i].id == id); // verify unmapped unigram case longestMGramFound = m; @@ -2722,7 +2722,7 @@ fallback: // we get here in case of fallback (no back-off weight) or back-off int i = findEntry (entries_n, beg, end, id); if (i == -1) // unknown history: fall back return score_unoptimized (mgram +1, m -1); // tail recursion - ASSERT (entries_n[i].id == id); // verify unmapped unigram case + assert (entries_n[i].id == id); // verify unmapped unigram case // found it: advance search by one history token const std::vector & refs_n = refs[n]; logB = refs_n[i].logB; @@ -2739,7 +2739,7 @@ fallback: // we get here in case of fallback (no back-off weight) or back-off int i = findEntry (entries_m1, beg, end, id); if (i != -1) { - ASSERT (entries_m1[i].id == id); // verify unmapped unigram case + assert (entries_m1[i].id == id); // verify unmapped unigram case double logP = entries_m1[i].logP; return logP; } @@ -2997,7 +2997,7 @@ skipMGram: refs_h[i].firstEntry = n0; n0 += num; } - ASSERT (refs_h.back().firstEntry == (int) entries[m].size()); + assert (refs_h.back().firstEntry == (int) entries[m].size()); // create closing history entry if (m < M) diff --git a/DataReader/KaldiReader/basetypes.h b/DataReader/KaldiReader/basetypes.h index 0331fa78d..ac26f2ae8 100644 --- a/DataReader/KaldiReader/basetypes.h +++ b/DataReader/KaldiReader/basetypes.h @@ -217,8 +217,8 @@ static inline void Sleep (size_t ms) { std::this_thread::sleep_for (std::chrono: //#define SAFE_DELETE(p) { if(p) { delete (p); (p)=NULL; } } //#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } } // nasty! use CComPtr<> -#ifndef ASSERT -#define ASSERT assert +#ifndef assert +#define assert assert #endif // ---------------------------------------------------------------------------- @@ -227,14 +227,14 @@ static inline void Sleep (size_t ms) { std::this_thread::sleep_for (std::chrono: namespace msra { namespace basetypes { -// class ARRAY -- std::vector with array-bounds checking +// class std::vector -- std::vector with array-bounds checking // VS 2008 and above do this, so there is no longer a need for this. #pragma warning(push) #pragma warning(disable : 4555) // expression has no affect, used so retail won't be empty template -class ARRAY : public std::vector<_ElemType> +class std::vector : public std::vector<_ElemType> { #if defined (_DEBUG) || defined (_CHECKED) // debug version with range checking static void throwOutOfBounds() @@ -243,15 +243,15 @@ class ARRAY : public std::vector<_ElemType> OACR_WARNING_DISABLE(IGNOREDBYCOMMA, "Reviewd OK. Special trick below to show a message when assertion fails" "[rogeryu 2006/03/24]"); OACR_WARNING_DISABLE(BOGUS_EXPRESSION_LIST, "This is intentional. [rogeryu 2006/03/24]"); - //ASSERT ("ARRAY::operator[] out of bounds", false); + //assert ("std::vector::operator[] out of bounds", false); OACR_WARNING_POP; } #endif public: - ARRAY() : std::vector<_ElemType> () { } - ARRAY (int size) : std::vector<_ElemType> (size) { } + std::vector() : std::vector<_ElemType> () { } + std::vector (int size) : std::vector<_ElemType> (size) { } #if defined (_DEBUG) || defined (_CHECKED) // debug version with range checking // ------------------------------------------------------------------------ @@ -285,7 +285,7 @@ public: } }; // overload swap(), otherwise we'd fallback to 3-way assignment & possibly throw -template inline void swap (ARRAY<_T> & L, ARRAY<_T> & R) throw() +template inline void swap (std::vector<_T> & L, std::vector<_T> & R) throw() { swap ((std::vector<_T> &) L, (std::vector<_T> &) R); } // class fixed_vector - non-resizable vector @@ -294,8 +294,8 @@ template class fixed_vector { _T * p; // pointer array size_t n; // number of elements - void check (int index) const { index/*avoid compiler warning*/;ASSERT (index >= 0 && (size_t) index < n); } - void check (size_t index) const { ASSERT (index < n); } + void check (int index) const { index/*avoid compiler warning*/;assert (index >= 0 && (size_t) index < n); } + void check (size_t index) const { assert (index < n); } // ... TODO: when I make this public, LinearTransform.h acts totally up but I cannot see where it comes from. //fixed_vector (const fixed_vector & other) : n (0), p (NULL) { *this = other; } public: @@ -316,7 +316,7 @@ public: inline const _T & operator[] (int index) const { check (index); return p[index]; } // reading inline _T & operator[] (size_t index) { check (index); return p[index]; } // writing inline const _T & operator[] (size_t index) const { check (index); return p[index]; } // reading - inline int indexof (const _T & elem) const { ASSERT (&elem >= p && &elem < p + n); return &elem - p; } + inline int indexof (const _T & elem) const { assert (&elem >= p && &elem < p + n); return &elem - p; } inline void swap (fixed_vector & other) throw() { std::swap (other.p, p); std::swap (other.n, n); } template fixed_vector & operator= (const VECTOR & other) { @@ -346,7 +346,7 @@ template inline void swap (fixed_vector<_T> & L, fixed_vector<_T> & R) template class matrix : fixed_vector { size_t numcols; - size_t locate (size_t i, size_t j) const { ASSERT (i < rows() && j < cols()); return i * cols() + j; } + size_t locate (size_t i, size_t j) const { assert (i < rows() && j < cols()); return i * cols() + j; } public: typedef T elemtype; matrix() : numcols (0) {} diff --git a/DataReader/KaldiReader/basetypes.old.h b/DataReader/KaldiReader/basetypes.old.h index 98673b89d..2fed21952 100644 --- a/DataReader/KaldiReader/basetypes.old.h +++ b/DataReader/KaldiReader/basetypes.old.h @@ -148,12 +148,12 @@ typedef unsigned char byte; #define SAFE_DELETE(p) { if(p) { delete (p); (p)=NULL; } } #define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } } // nasty! use CComPtr<> -#ifndef ASSERT +#ifndef assert #ifdef _CHECKED // basetypes.h expects this function to be defined (it is in message.h) extern void _CHECKED_ASSERT_error(const char * file, int line, const char * exp); -#define ASSERT(exp) ((exp)||(_CHECKED_ASSERT_error(__FILE__,__LINE__,#exp),0)) +#define assert(exp) ((exp)||(_CHECKED_ASSERT_error(__FILE__,__LINE__,#exp),0)) #else -#define ASSERT assert +#define assert assert #endif #endif @@ -164,11 +164,11 @@ using namespace std; namespace msra { namespace basetypes { -// class ARRAY -- std::vector with array-bounds checking +// class std::vector -- std::vector with array-bounds checking // VS 2008 and above do this, so there is no longer a need for this. template -class ARRAY : public std::vector<_ElemType> +class std::vector : public std::vector<_ElemType> { #if defined (_DEBUG) || defined (_CHECKED) // debug version with range checking static void throwOutOfBounds() @@ -177,15 +177,15 @@ class ARRAY : public std::vector<_ElemType> OACR_WARNING_DISABLE(IGNOREDBYCOMMA, "Reviewd OK. Special trick below to show a message when assertion fails" "[rogeryu 2006/03/24]"); OACR_WARNING_DISABLE(BOGUS_EXPRESSION_LIST, "This is intentional. [rogeryu 2006/03/24]"); - ASSERT (("ARRAY::operator[] out of bounds", false)); + assert (("std::vector::operator[] out of bounds", false)); OACR_WARNING_POP; } #endif public: - ARRAY() : std::vector<_ElemType> () { } - ARRAY (int size) : std::vector<_ElemType> (size) { } + std::vector() : std::vector<_ElemType> () { } + std::vector (int size) : std::vector<_ElemType> (size) { } #if defined (_DEBUG) || defined (_CHECKED) // debug version with range checking // ------------------------------------------------------------------------ @@ -219,7 +219,7 @@ public: } }; // overload swap(), otherwise we'd fallback to 3-way assignment & possibly throw -template inline void swap (ARRAY<_T> & L, ARRAY<_T> & R) throw() +template inline void swap (std::vector<_T> & L, std::vector<_T> & R) throw() { swap ((std::vector<_T> &) L, (std::vector<_T> &) R); } // class fixed_vector - non-resizable vector @@ -228,8 +228,8 @@ template class fixed_vector { _T * p; // pointer array size_t n; // number of elements - void check (int index) const { index; ASSERT (index >= 0 && (size_t) index < n); } - void check (size_t index) const { index; ASSERT (index < n); } + void check (int index) const { index; assert (index >= 0 && (size_t) index < n); } + void check (size_t index) const { index; assert (index < n); } // ... TODO: when I make this public, LinearTransform.h acts totally up but I cannot see where it comes from. //fixed_vector (const fixed_vector & other) : n (0), p (NULL) { *this = other; } public: @@ -250,7 +250,7 @@ public: inline const _T & operator[] (int index) const { check (index); return p[index]; } // reading inline _T & operator[] (size_t index) { check (index); return p[index]; } // writing inline const _T & operator[] (size_t index) const { check (index); return p[index]; } // reading - inline int indexof (const _T & elem) const { ASSERT (&elem >= p && &elem < p + n); return &elem - p; } + inline int indexof (const _T & elem) const { assert (&elem >= p && &elem < p + n); return &elem - p; } inline void swap (fixed_vector & other) throw() { std::swap (other.p, p); std::swap (other.n, n); } template fixed_vector & operator= (const VECTOR & other) { @@ -278,7 +278,7 @@ template inline void swap (fixed_vector<_T> & L, fixed_vector<_T> & R) template class matrix : fixed_vector { size_t numcols; - size_t locate (size_t i, size_t j) const { ASSERT (i < rows() && j < cols()); return i * cols() + j; } + size_t locate (size_t i, size_t j) const { assert (i < rows() && j < cols()); return i * cols() + j; } public: typedef T elemtype; matrix() : numcols (0) {} @@ -464,7 +464,7 @@ struct utf16 : std::wstring { utf16 (const std::string & p) // utf-8 to -16 int rc = MultiByteToWideChar (CP_UTF8, 0, p.c_str(), (int) len, &buf[0], (int) buf.size()); if (rc == 0) throw std::runtime_error ("MultiByteToWideChar"); - ASSERT (rc < buf.size ()); + assert (rc < buf.size ()); (*(std::wstring*)this) = &buf[0]; }}; #endif diff --git a/DataReader/KaldiReader/fileutil.cpp b/DataReader/KaldiReader/fileutil.cpp index abdb96e37..2daeda543 100644 --- a/DataReader/KaldiReader/fileutil.cpp +++ b/DataReader/KaldiReader/fileutil.cpp @@ -558,14 +558,14 @@ std::wstring fgetlinew (FILE * f) } // STL string version avoiding most memory allocations -void fgetline (FILE * f, std::string & s, ARRAY & buf) +void fgetline (FILE * f, std::string & s, std::vector & buf) { buf.resize (1000000); // enough? // KIT: increased to 1M to be safe const char * p = fgetline (f, &buf[0], (int) buf.size()); s.assign (p); } -void fgetline (FILE * f, std::wstring & s, ARRAY & buf) +void fgetline (FILE * f, std::wstring & s, std::vector & buf) { buf.resize (1000000); // enough? // KIT: increased to 1M to be safe const wchar_t * p = fgetline (f, &buf[0], (int) buf.size()); @@ -573,7 +573,7 @@ void fgetline (FILE * f, std::wstring & s, ARRAY & buf) } // char buffer version -void fgetline (FILE * f, ARRAY & buf) +void fgetline (FILE * f, std::vector & buf) { const int BUF_SIZE = 1000000; // enough? // KIT: increased to 1M to be safe buf.resize (BUF_SIZE); @@ -581,7 +581,7 @@ void fgetline (FILE * f, ARRAY & buf) buf.resize (strnlen (&buf[0], BUF_SIZE) +1); // SECURITY NOTE: string use has been reviewed } -void fgetline (FILE * f, ARRAY & buf) +void fgetline (FILE * f, std::vector & buf) { const int BUF_SIZE = 1000000; // enough? // KIT: increased to 1M to be safe buf.resize (BUF_SIZE); @@ -605,7 +605,7 @@ const char * fgetstring (FILE * f, __out_z_cap(size) char * buf, int size) } buf[i] = (char) c; } - ASSERT (i < size); + assert (i < size); buf[i] = 0; return buf; } @@ -624,7 +624,7 @@ const char * fgetstring (const HANDLE f, __out_z_cap(size) char * buf, int size) } buf[i] = (char) c; } - ASSERT (i < size); + assert (i < size); buf[i] = 0; return buf; } @@ -711,7 +711,7 @@ const char * fgettoken (FILE * f, __out_z_cap(size) char * buf, int size) if (rc != c) RuntimeError ("error in ungetc(): %s", strerror (errno)); } - ASSERT (i < size); + assert (i < size); buf[i] = 0; return buf; } @@ -818,14 +818,14 @@ void fcompareTag (const STRING & readTag, const STRING & expectedTag) void fputTag (FILE * f, const char * tag) { const int TAG_LEN = 4; - ASSERT (strnlen (tag, TAG_LEN + 1) == TAG_LEN); + assert (strnlen (tag, TAG_LEN + 1) == TAG_LEN); fwriteOrDie ((void *) tag, sizeof (*tag), strnlen (tag, TAG_LEN), f); } void fputTag(const HANDLE f, const char * tag) { const int TAG_LEN = 4; - ASSERT (strnlen (tag, TAG_LEN + 1) == TAG_LEN); + assert (strnlen (tag, TAG_LEN + 1) == TAG_LEN); fwriteOrDie ((void *) tag, sizeof (*tag), strnlen (tag, TAG_LEN), f); } @@ -860,7 +860,7 @@ void fpad (FILE * f, int n) int len = n - (pos % n); const char dummyString[] = "MSR-Asia: JL+FS"; size_t offset = sizeof(dummyString)/sizeof(dummyString[0]) - len; - ASSERT (offset >= 0); + assert (offset >= 0); fputstring (f, dummyString + offset); } // ---------------------------------------------------------------------------- @@ -899,7 +899,7 @@ short fgetshort_bigendian (FILE * f) int fgetint24 (FILE * f) { int v; - ASSERT (sizeof (v) == 4); + assert (sizeof (v) == 4); freadOrDie (&v, sizeof (v) -1, 1, f); // only read 3 lower-order bytes v <<= 8; // shift up (upper 8 bits uninit'ed) v >>= 8; // shift down 8 bits with sign-extend @@ -976,7 +976,7 @@ float fgetfloat_ascii (FILE * f) RuntimeError ("error reading float value from file (invalid format): %s"); else if (rc == EOF) RuntimeError ("error reading from file: %s", strerror (errno)); - ASSERT (rc == 1); + assert (rc == 1); return val; } @@ -1066,8 +1066,8 @@ void WAVEHEADER::write (FILE * f) fputint (f, nAvgBytesPerSec); fputshort (f, nBlockAlign); fputshort (f, wBitsPerSample); - ASSERT (FmtLength == 16); - ASSERT (wFormatTag == 1); + assert (FmtLength == 16); + assert (wFormatTag == 1); fputTag (f, "data"); fputint (f, DataLength); fflushOrDie (f); @@ -1160,14 +1160,14 @@ static short toolULawToLinear(unsigned char p_ucULawByte) // fgetwavraw(): only read data of .wav file. For multi-channel data, samples // are kept interleaved. -static void fgetwavraw(FILE * f, ARRAY & wav, const WAVEHEADER & wavhd) +static void fgetwavraw(FILE * f, std::vector & wav, const WAVEHEADER & wavhd) { int bytesPerSample = wavhd.wBitsPerSample / 8; // (sample size on one channel) wav.resize (wavhd.DataLength / bytesPerSample); if (wavhd.wFormatTag == 7) // mulaw { (wavhd.nChannels == 1) || RuntimeError ("fgetwav: wChannels=%d not supported for mulaw", wavhd.nChannels); - ARRAY data; + std::vector data; int numSamples = wavhd.DataLength/wavhd.nBlockAlign; data.resize (numSamples); freadOrDie (&data[0], sizeof (data[0]), numSamples, f); @@ -1191,7 +1191,7 @@ static void fgetwavraw(FILE * f, ARRAY & wav, const WAVEHEADER & wavhd) // fgetwav(): read an entire .wav file. Stereo is mapped to mono. // ---------------------------------------------------------------------------- -void fgetwav (FILE * f, ARRAY & wav, int & sampleRate) +void fgetwav (FILE * f, std::vector & wav, int & sampleRate) { WAVEHEADER wavhd; // will be filled in for 16-bit PCM!! signed short wFormatTag; // real format tag as found in data @@ -1207,7 +1207,7 @@ void fgetwav (FILE * f, ARRAY & wav, int & sampleRate) else if (wavhd.nChannels == 2) { //read raw data - ARRAY buf; + std::vector buf; buf.resize(numSamples * 2); fgetwavraw(f, buf, wavhd); @@ -1228,7 +1228,7 @@ void fgetwav (FILE * f, ARRAY & wav, int & sampleRate) } } -void fgetwav (const wstring & fn, ARRAY & wav, int & sampleRate) +void fgetwav (const wstring & fn, std::vector & wav, int & sampleRate) { auto_file_ptr f = fopenOrDie (fn, L"rbS"); fgetwav (f, wav, sampleRate); @@ -1243,13 +1243,13 @@ void fgetwav (const wstring & fn, ARRAY & wav, int & sampleRate) // channel. j is sample index. // ---------------------------------------------------------------------------- -void fgetraw (FILE *f, ARRAY< ARRAY > & data, const WAVEHEADER & wavhd) +void fgetraw (FILE *f, std::vector< std::vector > & data, const WAVEHEADER & wavhd) { - ARRAY wavraw; + std::vector wavraw; fgetwavraw (f, wavraw, wavhd); data.resize (wavhd.nChannels); int numSamples = wavhd.DataLength/wavhd.nBlockAlign; - ASSERT (numSamples == (int) wavraw.size() / wavhd.nChannels); + assert (numSamples == (int) wavraw.size() / wavhd.nChannels); for (int i = 0; i < wavhd.nChannels; i++) { @@ -1304,7 +1304,7 @@ void fputwfx (FILE *f, const WAVEFORMATEX & wfx, unsigned int numSamples) unsigned int RiffLength = 36 + DataLength; unsigned int FmtLength = 16; // file header - ASSERT (wfx.cbSize == 0 || wfx.cbSize == FmtLength + 2); + assert (wfx.cbSize == 0 || wfx.cbSize == FmtLength + 2); fputTag (f, "RIFF"); fputint (f, RiffLength); fputTag (f, "WAVE"); @@ -1377,7 +1377,7 @@ void fputshort (FILE * f, short v) void fputint24 (FILE * f, int v) { - ASSERT (sizeof (v) == 4); + assert (sizeof (v) == 4); fwriteOrDie (&v, sizeof (v) -1, 1, f); // write low-order 3 bytes } @@ -1417,7 +1417,7 @@ void fputdouble (FILE * f, double v) // fputfile(): write a binary block or a string as a file // ---------------------------------------------------------------------------- -void fputfile (const WSTRING & pathname, const ARRAY & buffer) +void fputfile (const WSTRING & pathname, const std::vector & buffer) { FILE * f = fopenOrDie (pathname, L"wb"); try @@ -1475,7 +1475,7 @@ void fputfile (const WSTRING & pathname, const std::string & string) // fgetfile(): load a file as a binary block // ---------------------------------------------------------------------------- -void fgetfile (const WSTRING & pathname, ARRAY & buffer) +void fgetfile (const WSTRING & pathname, std::vector & buffer) { FILE * f = fopenOrDie (pathname, L"rb"); size_t len = filesize (f); @@ -1487,11 +1487,11 @@ void fgetfile (const WSTRING & pathname, ARRAY & buffer) fclose (f); } -void fgetfile (FILE * f, ARRAY & buffer) +void fgetfile (FILE * f, std::vector & buffer) { // this version reads until eof buffer.resize (0); buffer.reserve (1000000); // avoid too many reallocations - ARRAY inbuf; + std::vector inbuf; inbuf.resize (65536); // read in chunks of this size while (!feof (f)) // read until eof { diff --git a/DataReader/KaldiReader/fileutil.old.h b/DataReader/KaldiReader/fileutil.old.h index 7578b0a5c..8d6b4e519 100644 --- a/DataReader/KaldiReader/fileutil.old.h +++ b/DataReader/KaldiReader/fileutil.old.h @@ -162,10 +162,10 @@ template CHAR * fgetline (FILE * f, CHAR * buf, int size); template CHAR * fgetline (FILE * f, CHAR (& buf)[n]) { return fgetline (f, buf, n); } STRING fgetline (FILE * f); WSTRING fgetlinew (FILE * f); -void fgetline (FILE * f, std::string & s, ARRAY & buf); -void fgetline (FILE * f, std::wstring & s, ARRAY & buf); -void fgetline (FILE * f, ARRAY & buf); -void fgetline (FILE * f, ARRAY & buf); +void fgetline (FILE * f, std::string & s, std::vector & buf); +void fgetline (FILE * f, std::wstring & s, std::vector & buf); +void fgetline (FILE * f, std::vector & buf); +void fgetline (FILE * f, std::vector & buf); const char * fgetstring (FILE * f, char * buf, int size); template const char * fgetstring (FILE * f, char (& buf)[n]) { return fgetstring (f, buf, n); } @@ -274,8 +274,8 @@ double fgetdouble (FILE * f); // fgetwav(): read an entire .wav file // ---------------------------------------------------------------------------- -void fgetwav (FILE * f, ARRAY & wav, int & sampleRate); -void fgetwav (const wstring & fn, ARRAY & wav, int & sampleRate); +void fgetwav (FILE * f, std::vector & wav, int & sampleRate); +void fgetwav (const wstring & fn, std::vector & wav, int & sampleRate); // ---------------------------------------------------------------------------- // fputwav(): save data into a .wav file @@ -325,7 +325,7 @@ void fputdouble (FILE * f, double val); // fputfile(): write a binary block or a string as a file // ---------------------------------------------------------------------------- -void fputfile (const WSTRING & pathname, const ARRAY & buffer); +void fputfile (const WSTRING & pathname, const std::vector & buffer); void fputfile (const WSTRING & pathname, const std::wstring & string); void fputfile (const WSTRING & pathname, const std::string & string); @@ -333,8 +333,8 @@ void fputfile (const WSTRING & pathname, const std::string & string); // fgetfile(): load a file as a binary block // ---------------------------------------------------------------------------- -void fgetfile (const WSTRING & pathname, ARRAY & buffer); -void fgetfile (FILE * f, ARRAY & buffer); +void fgetfile (const WSTRING & pathname, std::vector & buffer); +void fgetfile (FILE * f, std::vector & buffer); namespace msra { namespace files { void fgetfilelines (const std::wstring & pathname, vector & readbuffer, std::vector & lines); static inline std::vector fgetfilelines (const std::wstring & pathname) { vector buffer; std::vector lines; fgetfilelines (pathname, buffer, lines); return lines; } @@ -408,7 +408,7 @@ void fputwfx (FILE *f, const WAVEFORMATEX & wfx, unsigned int numSamples); // For example, data[i][j]: i is channel index, 0 means the first // channel. j is sample index. // ---------------------------------------------------------------------------- -void fgetraw (FILE *f,ARRAY< ARRAY > & data,const WAVEHEADER & wavhd); +void fgetraw (FILE *f,std::vector< std::vector > & data,const WAVEHEADER & wavhd); // ---------------------------------------------------------------------------- // temp functions -- clean these up diff --git a/DataReader/KaldiReader/latticearchive.cpp b/DataReader/KaldiReader/latticearchive.cpp index 0fd07440d..135b79103 100644 --- a/DataReader/KaldiReader/latticearchive.cpp +++ b/DataReader/KaldiReader/latticearchive.cpp @@ -569,7 +569,7 @@ void lattice::fromhtklattice (const wstring & path, const std::unordered_map 0); + assert(info.numnodes > 0); nodes.reserve (info.numnodes); // parse the nodes for (size_t i = 0; i < info.numnodes; i++, iter++) @@ -586,7 +586,7 @@ void lattice::fromhtklattice (const wstring & path, const std::unordered_map 0); + assert(info.numedges > 0); edges.reserve (info.numedges); align.reserve (info.numedges * 10); // 10 phones per word on av. should be enough std::string label; diff --git a/DataReader/KaldiReader/msra_mgram.h b/DataReader/KaldiReader/msra_mgram.h index b8f85ff30..b3d87d9af 100644 --- a/DataReader/KaldiReader/msra_mgram.h +++ b/DataReader/KaldiReader/msra_mgram.h @@ -246,7 +246,7 @@ public: p[0] = (unsigned char) value; p[1] = (unsigned char) (value >> 8); p[2] = (unsigned char) (value >> 16); - ASSERT (value == (int) *this); + assert (value == (int) *this); return value; } }; @@ -265,7 +265,7 @@ public: base.resize (newsize); uint24_ref r = uint24_ref (&base[cursize]); r = value; - ASSERT (value == back()); + assert (value == back()); } }; @@ -310,7 +310,7 @@ class mgram_map if ((size_t) id >= level1lookup.size()) return nindex; i = level1lookup[id]; } - ASSERT (i == nindex || ids[1][i] == id); + assert (i == nindex || ids[1][i] == id); return i; } index_t beg = firsts[m][i]; @@ -733,11 +733,11 @@ public: coord c (k.m, (index_t) ids[k.m].size()); - ASSERT (firsts[k.m-1].back() == (index_t) ids[k.m].size()); + assert (firsts[k.m-1].back() == (index_t) ids[k.m].size()); ids[k.m].push_back (thisid); // create value firsts[k.m-1].back() = (index_t) ids[k.m].size(); if (firsts[k.m-1].back() != (index_t) ids[k.m].size()) fail ("create() numeric overflow--index_t too small"); - ASSERT (k.m == M || firsts[k.m].back() == (index_t) ids[k.m+1].size()); + assert (k.m == M || firsts[k.m].back() == (index_t) ids[k.m+1].size()); // optimization: level1nonsparse flag // If unigram level is entirely non-sparse, we can save the search @@ -769,10 +769,10 @@ public: firsts[m].resize (ids[m].size() +1, (int) ids[m+1].size()); foreach_index (m, firsts) { - ASSERT (firsts[m][0] == 0); + assert (firsts[m][0] == 0); foreach_index (i, ids[m]) - ASSERT (firsts[m][i] <= firsts[m][i+1]); - ASSERT ((size_t) firsts[m].back() == ids[m+1].size()); + assert (firsts[m][i] <= firsts[m][i+1]); + assert ((size_t) firsts[m].back() == ids[m+1].size()); } // id mapping // user-provided w->id map @@ -1039,7 +1039,7 @@ public: continue; const mgram_map::key key = *iter; - ASSERT (m == key.order()); + assert (m == key.order()); // --- output m-gram to ARPA file fprintfOrDie (outf, "%.4f", logP[iter] / log10); @@ -1065,7 +1065,7 @@ public: numMGramsWritten++; } fflushOrDie (outf); - ASSERT (numMGramsWritten == map.size (m)); + assert (numMGramsWritten == map.size (m)); fprintf (stderr, "\n"); } @@ -1352,7 +1352,7 @@ protected: int newid = w2id[w]; // map to new id space mgram[m-1] = newid; } - for (int k = 0; k < m; k++) ASSERT (mgram[k] == w2id[key[k]]); + for (int k = 0; k < m; k++) assert (mgram[k] == w2id[key[k]]); // insert new key into sortedMap mgram_map::coord c = sortedMap.create (mgram_map::unmapped_key (&mgram[0], m), createCache); // copy over logP and logB @@ -1478,7 +1478,7 @@ protected: if (m == 0) continue; const mgram_map::key key = *iter; - ASSERT (m == key.order()); + assert (m == key.order()); float thisP = P[iter]; if (islog) @@ -1967,7 +1967,7 @@ public: // estimate vector dropWord (userSymMap.size(), false); dropWord.push_back (true); // filtering but no : - ASSERT (!filterVocabulary || unkId != -1 || dropWord[dropId]); + assert (!filterVocabulary || unkId != -1 || dropWord[dropId]); //std::vector minObs (2, 0); //std::vector iMinObs (3, 0); @@ -2101,7 +2101,7 @@ public: if (m < M && m < 3) // for comments see where we estimate the discounted probabilities { // ^^ seems not to work for 4-grams... const mgram_map::key key = *iter; // needed to check for startId - ASSERT (key.order() == m); + assert (key.order() == m); if (m < 2 || key.pop_w().back() != startId) { @@ -2245,7 +2245,7 @@ public: } const mgram_map::key key = *iter; - ASSERT (key.order() == iter.order()); // (remove this check once verified) + assert (key.order() == iter.order()); // (remove this check once verified) // get history's count const mgram_map::coord j = histCoord[m-1]; // index of parent entry @@ -2278,7 +2278,7 @@ public: histCount = KNTotalCounts[c_h]; // (u,v,w) -> count (*,v,*) if (histCount == 0) // must exist RuntimeError ("estimate: malformed data: back-off value not found (denominator)"); - ASSERT (histCount >= count); + assert (histCount >= count); } } @@ -2627,7 +2627,7 @@ public: if (i == -1) goto backoff0; - ASSERT (entries_1[i].id == id); // verify unmapped unigram case + assert (entries_1[i].id == id); // verify unmapped unigram case double logP = entries_1[i].logP; return totalLogB + logP; } @@ -2640,7 +2640,7 @@ public: int i = (entries1Unmapped) ? id : findEntry (entries[1], refs[0][0].firstEntry, refs[0][1].firstEntry, id); if (i == -1) // unknown history: fall back goto fallback; - ASSERT (entries[1][i].id == id); // verify unmapped unigram case + assert (entries[1][i].id == id); // verify unmapped unigram case // found it: advance search by one history token const std::vector & refs_1 = refs[1]; @@ -2656,7 +2656,7 @@ public: int i = findEntry (entries[n], beg, end, id); if (i == -1) // unseen history: fall back goto fallback; - ASSERT (entries[n][i].id == id); // verify unmapped unigram case + assert (entries[n][i].id == id); // verify unmapped unigram case // found it: advance search by one history token const std::vector & refs_n = refs[n]; @@ -2679,7 +2679,7 @@ public: i = findEntry (entries_m, beg, end, id); if (i == -1) goto backoff; - ASSERT (entries_m[i].id == id); // verify unmapped unigram case + assert (entries_m[i].id == id); // verify unmapped unigram case longestMGramFound = m; @@ -2722,7 +2722,7 @@ fallback: // we get here in case of fallback (no back-off weight) or back-off int i = findEntry (entries_n, beg, end, id); if (i == -1) // unknown history: fall back return score_unoptimized (mgram +1, m -1); // tail recursion - ASSERT (entries_n[i].id == id); // verify unmapped unigram case + assert (entries_n[i].id == id); // verify unmapped unigram case // found it: advance search by one history token const std::vector & refs_n = refs[n]; logB = refs_n[i].logB; @@ -2739,7 +2739,7 @@ fallback: // we get here in case of fallback (no back-off weight) or back-off int i = findEntry (entries_m1, beg, end, id); if (i != -1) { - ASSERT (entries_m1[i].id == id); // verify unmapped unigram case + assert (entries_m1[i].id == id); // verify unmapped unigram case double logP = entries_m1[i].logP; return logP; } @@ -2997,7 +2997,7 @@ skipMGram: refs_h[i].firstEntry = n0; n0 += num; } - ASSERT (refs_h.back().firstEntry == (int) entries[m].size()); + assert (refs_h.back().firstEntry == (int) entries[m].size()); // create closing history entry if (m < M) diff --git a/Math/Math/Matrix.cpp b/Math/Math/Matrix.cpp index 2018aec50..08f38cc6d 100755 --- a/Math/Math/Matrix.cpp +++ b/Math/Math/Matrix.cpp @@ -803,7 +803,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template Matrix& Matrix::SetColumnSlice(const Matrix& fromMatrix, size_t startColumn, size_t numCols) { - ASSERT(m_CPUMatrix != nullptr || m_GPUMatrix != nullptr); + assert(m_CPUMatrix != nullptr || m_GPUMatrix != nullptr); // must already been allocated DISPATCH_MATRIX_ON_FLAG(&fromMatrix, @@ -820,7 +820,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void Matrix::CopyColumnsStrided(const Matrix& fromMatrix, size_t numCols, size_t srcNumColsStride, size_t destNumColsStride) { - ASSERT(m_CPUMatrix != nullptr || m_GPUMatrix != nullptr); + assert(m_CPUMatrix != nullptr || m_GPUMatrix != nullptr); DISPATCH_MATRIX_ON_FLAG(&fromMatrix, this, diff --git a/Math/MathPerformanceTests/MathPerformanceTests.cpp b/Math/MathPerformanceTests/MathPerformanceTests.cpp index f391483cf..4da701548 100644 --- a/Math/MathPerformanceTests/MathPerformanceTests.cpp +++ b/Math/MathPerformanceTests/MathPerformanceTests.cpp @@ -10,8 +10,9 @@ #include #include #include -#include "..\Math\Matrix.h" -#include "..\Math\CPUMatrix.h" +#include "Matrix.h" +#include "CPUMatrix.h" +#include "Sequences.h" using namespace Microsoft::MSR::CNTK; using namespace std; @@ -95,7 +96,7 @@ void oldRnnEvaluateThisNodeSRP(Matrix& functionValues, size_t mNbr, Ma template void oldRNNEvaluateThisNodeSRP(const size_t timeIdxInSeq, const int delay, const bool reset, const ElemType default_activity, Matrix& functionValues, const Matrix& pastActivity, const Matrix& inputFunctionValues, const size_t indexInBatch, const size_t mNbr) { - ASSERT(delay > 0); + assert(delay > 0); if (functionValues.GetNumRows() != inputFunctionValues.GetNumRows() || functionValues.GetNumCols() != inputFunctionValues.GetNumCols()) diff --git a/Math/MathPerformanceTests/MathPerformanceTests.vcxproj b/Math/MathPerformanceTests/MathPerformanceTests.vcxproj index a21a60247..87e2e2e00 100644 --- a/Math/MathPerformanceTests/MathPerformanceTests.vcxproj +++ b/Math/MathPerformanceTests/MathPerformanceTests.vcxproj @@ -65,6 +65,7 @@ WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) true true + ..\Math; ..\..\Common\Include; $(CudaToolkitIncludeDir); %(AdditionalIncludeDirectories) Console @@ -86,7 +87,7 @@ WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) true true - ..\..\common\include + ..\Math; ..\..\Common\Include; $(CudaToolkitIncludeDir); %(AdditionalIncludeDirectories) true