CNTK/MachineLearning/cn/fileutil.cpp

2088 строки
62 KiB
C++

//
// fileutil.cpp - file I/O with error checking
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// $Log: /Speech_To_Speech_Translation/dbn/dbn/fileutil.cpp $
//
// 125 1/03/13 8:53p Kaisheny
// Asynchronous SGD using data pipe.
//
// 124 9/30/12 10:46a Fseide
// new optional parameter to fuptodate()--caller can now choose whether a
// missing input file, with target file present, will cause a failure or
// considers the target up-to-date
//
// 123 8/20/12 12:29p V-hansu
// fixed a major bug in freadOrDie() for chunks > 15M units (breaking into
// chunks was broken)
//
// 122 4/01/12 12:02p Fseide
// (expanded an error message)
//
// 121 11/09/11 10:01 Fseide
// added a new overload for fgetfilelines() that returns an array of char*
// instead of strings, to avoid mem alloc
//
// 120 10/27/11 18:52 Fseide
// updated freadOrDie() to smaller chunk size
//
// 119 10/27/11 13:40 Fseide
// freadOrDie() now explicitly breaks up large reads because CRT fread()
// does not handle them (due to a Windows bug)
//
// 118 6/10/11 9:49 Fseide
// new function fgetfilelines() for reading text files
//
// 117 3/07/11 12:13 Fseide
// actually implemented unlinkOrDie() (was a dummy)
//
// 116 12/07/10 10:03 Fseide
// (corrected the buffer size in fsetpos() fro 65336 to 65536)
//
// 115 12/03/10 10:53 Fseide
// fsetpos() optimization when seeking forward within the current read
// buffer
//
// 114 11/18/10 4:32p Kit
// added missing header for errno
//
// 113 11/18/10 9:20 Fseide
// a basic optimization in fsetpos() to avoid rereading the buffer if
// fsetpos() does not actually move the file pointer
//
// 112 11/17/10 15:00 Fseide
// new function fuptodate();
// make_intermediate_dirs() moved to namespace msra::files (all new
// functions should be put in there)
//
// 111 11/12/10 16:43 Fseide
// bug in getfiletime(), totally broken
//
// 110 11/09/10 8:56 Fseide
// some cleanup of make_intermediate_dirs()
//
// 109 11/08/10 17:07 Fseide
// new function make_intermediate_dirs()
//
// 108 11/30/09 1:32p Kit
//
// 107 2/05/09 19:05 Fseide
// fgetline() now returns a non-const pointer, because user may want to
// post-process the line, and the returned value is a user-specified
// buffer anyway
//
// 106 1/16/09 8:59 Fseide
// exported fskipspace()
//
// 105 1/16/09 8:47 Fseide
// (a comment added)
//
// 104 1/15/09 7:38 Fseide
// some magic to unify fgetstring() for char and wchar_t to a single
// template function
//
// 103 1/14/09 19:27 Fseide
// new functions fsetpos() and fgetpos();
// added missing read-error checks to fget(w)string()
//
// 102 1/14/09 12:38 Fseide
// bug fix in fgetline(): missed an error check
//
// 101 1/09/09 7:40 Fseide
// (fixed a warning)
//
// 100 1/08/09 16:38 Fseide
// fopenOrDie() now supports "-" as the pathname, referring to stdin or
// stdout
//
// 99 1/08/09 15:32 Fseide
// new funtion expand_wildcards()
//
// 98 1/05/09 8:44 Fseide
// (added comments)
//
// 97 12/24/08 14:44 Fseide
// added an overflow check to fputwfx()
//
// 96 12/12/08 10:11a Qiluo
// (change marker of banned APIs)
//
// 95 12/11/08 7:40p Qiluo
// (change marker of banned APIs)
//
// 94 12/09/08 6:59p Qiluo
// reverted stringerror => strerror
//
// 93 12/09/08 6:37p Qiluo
// fixed a few compilation bugs
//
// 92 12/09/08 6:28p Qiluo
// strerror => stringerror
//
// 91 12/01/08 2:43p Qiluo
// add markers for banned APIs, and refine the api fixing
//
// 90 11/11/08 7:34p Qiluo
// fix bug in strnlen
//
// 89 11/11/08 18:27 Fseide
// no longer disables C4996
//
// 88 11/11/08 6:04p Qiluo
// recover the old fputstring functions
//
// 87 11/10/08 2:34p Qiluo
// remove the dependency of header "StringUtil.h"
//
// 86 10/31/08 5:08p Qiluo
// remove banned APIs
//
// 85 6/24/08 19:03 Fseide
// added fgetwstring() and fputstring() for wstrings
//
// 84 6/02/08 14:11 Fseide
// fgetwfx() and wputwfx() now a bit more tolerant
//
// 83 08-05-29 18:18 Llu
// fix the interface of fputwav
//
// 82 08-05-29 14:53 Llu
//
// 81 08-05-29 13:53 Llu
// add fputwav revise fgetwav using stl instead of short *
//
// 80 3/19/08 16:13 Fseide
// (better solution to prev. problem)
//
// 79 3/19/08 16:07 Fseide
// (#ifdef'ed out fprintfOrDie() in _MANAGED builds)
//
// 78 10/30/07 16:46 Fseide
//
// 77 3/27/07 13:54 Fseide
// added 'using namespace std;' (was removed from message.h as it does not
// belong there)
//
// 76 1/30/07 1:59p Kit
// Undid updates to fgetline error handling
//
// 70 12/20/06 10:48a Kit
// increased size of line buffer for fgetline because we seem to be
// getting large strings in some rss feeds
//
// 69 06-12-04 18:30 Llu
// (fixed an unnecessary "deprecated string function" warning under VS
// 2005)
//
// 68 11/27/06 11:40 Fseide
// new methods fgetwfx() and fputwfx() for direct access to simple PCM WAV
// files
//
// 67 10/14/06 18:31 Fseide
// added char* version of fexists()
//
// 66 5/14/06 19:58 Fseide
// new function fsetmode()
//
// 65 3/29/06 16:10 Fseide
// increased buffer size in fgetfile() to 64k
//
// 64 3/29/06 15:36 Fseide
// changed to reading entire file instead of line-by-line, not changing
// newlines anymore
//
// 63 3/24/06 4:40p Rogeryu
// workaround a VC 2003 header bug (va_start macro for references) in
// MESSAGE/ERROR functions
//
// 62 3/22/06 3:31p Rogeryu
// (comments changed)
//
// 61 3/21/06 5:21p Rogeryu
// review and fix level2_security OACR warnings
//
// 60 3/21/06 9:26a Rogeryu
// review and fix OACR warnings
//
// 59 06-03-15 15:41 Yushli
// Suppress C4996 Warning per function
//
// 58 06-03-14 12:11 Yushli
// Suppress C4996 Warning on strerror per function
//
// 57 06-03-14 10:33 Yushli
// Suppress C4996 Warning per function.
//
// 56 2/28/06 1:49p Kjchen
// suppress oacr warning
//
// 55 2/24/06 8:03p Kjchen
// depress oacr warnings
//
// 54 2/21/06 11:32a Kit
// aadded filesize64 to support large files
//
// 53 1/10/06 8:23p Rogeryu
// fix a warning
//
// 52 1/09/06 7:12p Rogeryu
// wide version of fgetline
//
// 51 12/20/05 21:15 Fseide
// changed CreateFile() to CreateFileW()
//
// 50 12/19/05 22:50 Fseide
// setfiletime() fixed, now actually works
//
// 49 12/19/05 21:52 Fseide
// fputfile() added in 8-bit string version
//
// 48 12/18/05 17:01 Fseide
// fixed file-handle leaks in error conditions
//
// 47 12/15/05 20:25 Fseide
// added getfiletime(), setfiletime(), and fputfile() for strings
//
// 46 9/27/05 12:22 Fseide
// added wstring version of renameOrDie()
//
// 45 9/22/05 12:26 Fseide
// new method fexists()
//
// 44 9/15/05 11:33 Fseide
// new version of fgetline() that avoids buffer allocations, since this
// seems very expensive esp. when reading a file line by line with
// fgetline()
//
// 43 9/05/05 4:57p F-xyzhao
// renameOrDie(): changed string to std::string
//
// 42 9/05/05 11:00 Fseide
// new method renameOrDie()
//
// 41 8/19/05 18:19 Fseide
// bugfixes in WAVEHEADER::write and prepare
//
// 40 8/19/05 18:02 Fseide
// WAVEHEADER::write() now flushes
//
// 39 8/19/05 17:56 Fseide
// extended WAVEHEADER with write() and update()
//
// 38 8/14/05 16:56 Fseide
// fopenOrDie() now sets large buffer if 'S' option
//
// 37 8/13/05 15:37 Fseide
// added new version of fgetline that takes a buffer
//
// 36 7/28/05 18:04 Fseide
// bug fix in fgetin24 and fputint24
//
// 35 7/26/05 18:54 Fseide
// new functions fgetint24() and fputint24()
//
// 34 5/10/05 14:12 Fseide
// (level-4 warning fixed)
//
// 33 5/10/05 11:57 Fseide
// (level-4 warnings removed)
//
// 32 5/09/05 12:07 Fseide
// fixed for-loop conformance issues
//
// 31 2/27/05 17:41 Fseide
// recovered v29 that somehow got overwritten
//
// 29 2/12/05 15:21 Fseide
// fgetdouble() and fputdouble() added
//
// 28 2/05/05 12:38 Fseide
// new methods fputfile(), fgetfile();
// new overload for filesize()
//
// 27 2/03/05 22:34 Fseide
// added new version of fgetline() that returns an STL string
//
// 26 5/31/04 10:06 Fseide
// new methods fseekOrDie(), ftellOrDie(), unlinkOrDie(), renameOrDie()
//
// 25 3/19/04 4:01p Fseide
// fwriteOrDie(): first argument changed to const
//
// 24 2/21/04 10:26 Fseide
// (compiler warnings eliminated)
//
// 23 2/19/04 9:46p V-xlshi
//
// 22 2/19/04 3:44p V-xlshi
// fgetwavraw and fgetraw function is added, fgetwav is changed but its
// functionality is the same with the old one.
//
// 21 2/03/04 8:17p V-xlshi
//
// 20 9/08/03 22:55 Fseide
// fgetwav() can now read stereo PCM files
//
// 19 8/15/03 15:40 Fseide
// new method filesize()
//
// 18 8/13/03 21:06 Fseide
// new function fputbyte()
//
// 17 8/13/03 15:37 Fseide
// an error msg corrected
//
// 16 8/07/03 22:04 Fseide
// fprintfOrDie() now really dies in case of error
//
// 15 7/30/03 5:09p Fseide
// (eliminated a compiler warning)
//
// 14 03-07-30 14:17 I-rogery
//
// 13 7/25/03 6:07p Fseide
// new functions fgetbyte() and fgetwav()
//
// 12 6/03/03 5:23p Fseide
// (some compiler warnings related to size_t eliminated)
//
// 11 3/27/03 3:42p Fseide
// fwriteOrDie() rewritten to break huge blocks into chunks of 16 MB
// because Windows std C lib can't handle fwrite() with e.g. 100 MB in one
// call
//
// 10 7/23/02 9:00p Jlzhou
//
// 9 7/03/02 9:25p Fseide
// fcompareTag() now uses STRING type for both of its arguments (before,
// it used const char * for one of them)
//
// 8 6/10/02 3:14p Fseide
// new functions fgettoken(), fgetfloat_ascii(), fskipNewline()
//
// 7 6/07/02 7:26p Fseide
// new functions fcheckTag_ascii() and fgetint_ascii()
//
// 6 6/03/02 10:58a Jlzhou
//
// 5 4/15/02 1:12p Fseide
// void fputstring (FILE * f, const TSTRING & str) and fpad() added
//
// 4 4/03/02 3:56p Fseide
// VSS keyword and copyright added
//
// F. Seide 5 Mar 2002
//
#ifndef UNDER_CE // fixed-buffer overloads not available for wince
#ifdef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES // fixed-buffer overloads for strcpy() etc.
#undef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
#endif
#define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
#endif
#include "basetypes.h"
#include "fileutil.h"
#include "message.h"
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "windows.h" // for FILETIME
#include <algorithm> // for std::find
#ifndef UNDER_CE // some headers don't exist under winCE - the appropriate definitions seem to be in stdlib.h
#include <fcntl.h> // for _O_BINARY/TEXT - not needed for wince
#include <io.h> // for _setmode()
#endif
#include <errno.h>
using namespace std;
// ----------------------------------------------------------------------------
// fopenOrDie(): like fopen() but terminate with err msg in case of error.
// A pathname of "-" returns stdout or stdin, depending on mode, and it will
// change the binary mode if 'b' or 't' are given. If you use this, make sure
// not to fclose() such a handle.
// ----------------------------------------------------------------------------
static const wchar_t * strchr (const wchar_t * s, wchar_t v) { return wcschr (s, v); }
// pathname is "-" -- open stdin or stdout. Changes bin mode if 'b' or 't' given.
template<class _T> FILE * fopenStdHandle (const _T * mode)
{
FILE * f = strchr (mode, 'r') ? stdin : stdout;
if (strchr (mode, 'b') || strchr (mode, 't')) // change binary mode
{
// switch to binary mode if not yet (in case it is stdin)
int rc = _setmode (_fileno (f), strchr (mode, 'b') ? _O_BINARY : _O_TEXT);
if (rc == -1)
ERROR ("error switching stream to binary mode: %s", strerror (errno));
}
return f;
}
FILE * fopenOrDie (const STRING & pathname, const char * mode)
{
FILE * f = (pathname[0] == '-') ? fopenStdHandle (mode) : fopen (pathname.c_str(), mode);
if (f == NULL)
{
ERROR ("error opening file '%s': %s", pathname.c_str(), strerror (errno));
return NULL; // keep OACR happy
}
if (strchr (mode, 'S'))
{ // if optimized for sequential access then use large buffer
setvbuf (f, NULL, _IOFBF, 10000000); // OK if it fails
}
return f;
}
FILE * fopenOrDie (const WSTRING & pathname, const wchar_t * mode)
{
FILE * f = (pathname[0] == '-') ? fopenStdHandle (mode) : _wfopen (pathname.c_str(), mode);
if (f == NULL)
{
ERROR ("error opening file '%S': %s", pathname.c_str(), strerror (errno));
return NULL; // keep OACR happy
}
if (strchr (mode, 'S'))
{ // if optimized for sequential access then use large buffer
setvbuf (f, NULL, _IOFBF, 10000000); // OK if it fails
}
return f;
}
// ----------------------------------------------------------------------------
// set mode to binary or text (pass 'b' or 't')
// ----------------------------------------------------------------------------
void fsetmode (FILE * f, char type)
{
if (type != 'b' && type != 't')
{
ERROR ("fsetmode: invalid type '%c'");
}
#ifdef UNDER_CE // winCE and win32 have different return types for _fileno
FILE *fd = _fileno (f); // note: no error check possible
#else
int fd = _fileno (f); // note: no error check possible
#endif
int mode = type == 'b' ? _O_BINARY : _O_TEXT;
int rc = _setmode (fd, mode);
if (rc == -1)
{
ERROR ("error changing file mode: %s", strerror (errno));
}
}
// ----------------------------------------------------------------------------
// freadOrDie(): like fread() but terminate with err msg in case of error
// ----------------------------------------------------------------------------
void freadOrDie (void * ptr, size_t size, size_t count, FILE * f)
{
// \\XXX\C$ reads are limited, with some randomness (e.g. 48 MB), on Windows 7 32 bit, so we break this into chunks of some MB. Meh.
while (count > 0)
{
size_t chunkn = min (count, 15*1024*1024); // BUGBUG: I surely meant this limit to be bytes, not units of 'size'...
size_t n = fread (ptr, size, chunkn, f);
if (n != chunkn)
ERROR ("error reading from file: %s", strerror (errno));
count -= n;
ptr = n * size + (char*) ptr;
}
}
void freadOrDie (void * ptr, size_t size, size_t count, const HANDLE f)
{
// \\XXX\C$ reads are limited, with some randomness (e.g. 48 MB), on Windows 7 32 bit, so we break this into chunks of some MB. Meh.
while (count > 0)
{
size_t chunkn = min (count * size, 15*1024*1024);
DWORD n ;
ReadFile(f, ptr, (DWORD) chunkn, &n, NULL);
if (n != chunkn)
ERROR ("error number for reading from file: %s", GetLastError());
count -= (size_t) (n / size);
ptr = n + (char*) ptr;
}
}
// ----------------------------------------------------------------------------
// fwriteOrDie(): like fwrite() but terminate with err msg in case of error;
// Windows C std lib fwrite() has problems writing >100 MB at a time (fails
// with Invalid Argument error), so we break it into chunks (yak!!)
// ----------------------------------------------------------------------------
void fwriteOrDie (const void * ptr, size_t size, size_t count, FILE * f)
{
const char * p1 = (const char *) ptr;
size_t totalBytes = size * count;
while (totalBytes > 0)
{
size_t wantWrite = totalBytes;
#define LIMIT (16*1024*1024) // limit to 16 MB at a time
if (wantWrite > LIMIT)
{
wantWrite = LIMIT;
}
size_t n = fwrite ((const void *) p1, 1, wantWrite, f);
if (n != wantWrite)
{
ERROR ("error writing to file (ptr=0x%08lx, size=%d,"
" count=%d, writing %d bytes after %d): %s",
ptr, size, count, (int) wantWrite,
(int) (size * count - totalBytes),
strerror (errno));
}
totalBytes -= wantWrite;
p1 += wantWrite;
}
}
void fwriteOrDie (const void * ptr, size_t size, size_t count, const HANDLE f)
{
const char * p1 = (const char *) ptr;
DWORD totalBytes = (DWORD) (size * count);
while (totalBytes > 0)
{
DWORD wantWrite = totalBytes;
#define LIMIT (16*1024*1024) // limit to 16 MB at a time
if (wantWrite > LIMIT)
{
wantWrite = LIMIT;
}
DWORD byteWritten = 0 ;
if (WriteFile(f, (const void *) p1, wantWrite, &byteWritten, NULL) == false)
{
ERROR ("error writing to file (ptr=0x%08lx, size=%d,"
" count=%d, writing %d bytes after %d): %s",
ptr, size, count, (int) wantWrite,
(int) (size * count - totalBytes),
strerror (errno));
}
totalBytes -= wantWrite;
p1 += wantWrite;
}
}
// ----------------------------------------------------------------------------
// fprintfOrDie(): like fprintf() but terminate with err msg in case of error
// ----------------------------------------------------------------------------
#pragma warning(push)
#pragma warning(disable : 4793) // 'vararg' : causes native code generation
void fprintfOrDie (FILE * f, const char * fmt, ...)
{
va_list arg_ptr;
va_start (arg_ptr, fmt);
int rc = vfprintf (f, fmt, arg_ptr);
if (rc < 0)
{
ERROR ("error writing to file: %s", strerror (errno));
}
}
#pragma warning(pop)
// ----------------------------------------------------------------------------
// fflushOrDie(): like fflush() but terminate with err msg in case of error
// ----------------------------------------------------------------------------
void fflushOrDie (FILE * f)
{
int rc = fflush (f);
if (rc != 0)
{
ERROR ("error flushing to file: %s", strerror (errno));
}
}
// ----------------------------------------------------------------------------
// filesize(): determine size of the file in bytes (with open file)
// BUGBUG: how about files > 4 GB?
// ----------------------------------------------------------------------------
size_t filesize (FILE * f)
{
long curPos = ftell (f);
if (curPos == -1L)
{
ERROR ("error determining file position: %s", strerror (errno));
}
int rc = fseek (f, 0, SEEK_END);
if (rc != 0)
{
ERROR ("error seeking to end of file: %s", strerror (errno));
}
long len = ftell (f);
if (len == -1L)
{
ERROR ("error determining file position: %s", strerror (errno));
}
rc = fseek (f, curPos, SEEK_SET);
if (rc != 0)
{
ERROR ("error resetting file position: %s", strerror (errno));
}
return (size_t) len;
}
// filesize(): determine size of the file in bytes (with pathname)
size_t filesize (const wchar_t * pathname)
{
FILE * f = fopenOrDie (pathname, L"rb");
try
{
size_t len = filesize (f);
fclose (f);
return (size_t) len;
}
catch (...)
{
fclose (f);
throw;
}
}
#ifndef UNDER_CE // no 64-bit under winCE
// filesize64(): determine size of the file in bytes (with pathname)
__int64 filesize64 (const wchar_t * pathname)
{
__stat64 fileinfo;
if (_wstat64 (pathname,&fileinfo) == -1)
return 0;
else
return fileinfo.st_size;
}
#endif
// ----------------------------------------------------------------------------
// fseekOrDie(),ftellOrDie(), fget/setpos(): seek functions with error handling
// ----------------------------------------------------------------------------
long fseekOrDie (FILE * f, long offset, int mode)
{
long curPos = ftell (f);
if (curPos == -1L)
{
ERROR ("error seeking: %s", strerror (errno));
}
int rc = fseek (f, offset, mode);
if (rc != 0)
{
ERROR ("error seeking: %s", strerror (errno));
}
return curPos;
}
unsigned __int64 fgetpos (FILE * f)
{
fpos_t post;
int rc = ::fgetpos (f, &post);
if (rc != 0)
ERROR ("error getting file position: %s", strerror (errno));
return post;
}
void fsetpos (FILE * f, unsigned __int64 reqpos)
{
// ::fsetpos() flushes the read buffer. This conflicts with a situation where
// we generally read linearly but skip a few bytes or KB occasionally, as is
// the case in speech recognition tools. This requires a number of optimizations.
unsigned __int64 curpos = fgetpos (f);
unsigned __int64 cureob = curpos + f->_cnt; // UGH: we mess with an internal structure here
while (reqpos >= curpos && reqpos < cureob)
{
// if we made it then do not call fsetpos()
if (reqpos == fgetpos (f))
return;
// if we seek within the existing buffer, then just move to the position by dummy reads
char buf[65536];
size_t n = min ((size_t) reqpos - (size_t) curpos, _countof (buf));
fread (buf, sizeof (buf[0]), n, f); // (this may fail, but really shouldn't)
curpos += n;
// since we mess with f->_cnt, if something unexpected happened to the buffer then back off
if (curpos != fgetpos (f) || curpos + f->_cnt != cureob)
break; // oops
}
// actually perform the seek
fpos_t post = reqpos;
int rc = ::fsetpos (f, &post);
if (rc != 0)
ERROR ("error setting file position: %s", strerror (errno));
}
// ----------------------------------------------------------------------------
// unlinkOrDie(): unlink() with error handling
// ----------------------------------------------------------------------------
void unlinkOrDie (const std::string & pathname)
{
if (_unlink (pathname.c_str()) != 0 && errno != ENOENT) // if file is missing that's what we want
ERROR ("error deleting file '%s': %s", pathname.c_str(), strerror (errno));
}
void unlinkOrDie (const std::wstring & pathname)
{
if (_wunlink (pathname.c_str()) != 0 && errno != ENOENT) // if file is missing that's what we want
ERROR ("error deleting file '%S': %s", pathname.c_str(), strerror (errno));
}
// ----------------------------------------------------------------------------
// renameOrDie(): rename() with error handling
// ----------------------------------------------------------------------------
#ifndef UNDER_CE // CE only supports Unicode APIs
void renameOrDie (const std::string & from, const std::string & to)
{
if (!MoveFileA (from.c_str(),to.c_str()))
ERROR ("error renaming: %s", GetLastError());
}
#endif
void renameOrDie (const std::wstring & from, const std::wstring & to)
{
if (!MoveFileW (from.c_str(),to.c_str()))
ERROR ("error renaming: %s", GetLastError());
}
// ----------------------------------------------------------------------------
// fexists(): test if a file exists
// ----------------------------------------------------------------------------
bool fexists (const wchar_t * pathname)
{
WIN32_FIND_DATAW findFileData;
HANDLE hFind = FindFirstFileW (pathname, &findFileData);
if (hFind != INVALID_HANDLE_VALUE)
{
FindClose (hFind);
return true;
}
else
{
return false;
}
}
#ifndef UNDER_CE // CE only supports Unicode APIs
bool fexists (const char * pathname)
{
WIN32_FIND_DATAA findFileData;
HANDLE hFind = FindFirstFileA (pathname, &findFileData);
if (hFind != INVALID_HANDLE_VALUE)
{
FindClose (hFind);
return true;
}
else
{
return false;
}
}
#endif
// ----------------------------------------------------------------------------
// funicode(): test if a file uses unicode by reading its BOM
// ----------------------------------------------------------------------------
bool funicode (FILE * f)
{
unsigned short testCode;
if (fread (&testCode, sizeof(short), 1, f) == 1 &&
(int)testCode == 0xFEFF)
return true;
fseek (f,0,SEEK_SET);
//rewind (f);
return false;
}
// ----------------------------------------------------------------------------
// fgetline(): like fgets() but terminate with err msg in case of error;
// removes the newline character at the end (like gets());
// Returns 'buf' (always). buf guaranteed to be 0-terminated.
// ----------------------------------------------------------------------------
static inline wchar_t * fgets (wchar_t * buf, int n, FILE * f) { return fgetws (buf, n, f); }
static inline string _utf8 (const string & s) { return s; }
static inline string _utf8 (const wstring & s) { return msra::strfun::utf8 (s); }
static inline size_t strnlen (wchar_t * s, size_t n) { return wcsnlen (s, n); }
#ifdef UNDER_CE // strlen for char * not defined in winCE
static inline size_t strnlen (const char *s, size_t n) { return std::find (s,s+n,'\0') - s; }
#endif
template<class CHAR>
CHAR * fgetline (FILE * f, CHAR * buf, int size)
{
unsigned __int64 filepos = fgetpos (f); // (for error message only)
CHAR * p = fgets (buf, size, f);
if (p == NULL) // EOF reached: next time feof() = true
{
if (ferror (f))
ERROR ("error reading line: %s", strerror (errno));
buf[0] = 0;
return buf;
}
size_t n = strnlen (p, size);
// check for buffer overflow
if (n >= (size_t) size -1)
{
basic_string<CHAR> example (p, n < 100 ? n : 100);
ERROR ("input line too long at file offset %I64d (max. %d characters allowed) [%s ...]",
filepos, size -1, _utf8 (example).c_str());
}
// remove newline at end
if (n > 0 && p[n-1] == '\n') // UNIX and Windows style
{
n--;
p[n] = 0;
if (n > 0 && p[n-1] == '\r') // Windows style
{
n--;
p[n] = 0;
}
}
else if (n > 0 && p[n-1] == '\r') // Mac style
{
n--;
p[n] = 0;
}
return buf;
}
#if 0
const wchar_t * fgetline (FILE * f, wchar_t * buf, int size)
{
wchar_t * p = fgetws (buf, size, f);
if (p == NULL) // EOF reached: next time feof() = true
{
if (ferror (f))
ERROR ("error reading line: %s", strerror (errno));
buf[0] = 0;
return buf;
}
size_t n = wcsnlen (p, size); // SECURITY NOTE: string use has been reviewed
// check for buffer overflow
if (n >= (size_t) size -1)
{
wstring example (buf, min (n, 100));
ERROR ("input line too long at file offset %U64d (max. %d characters allowed) [%S ...]",
fgetpos (f), size -1, example.c_str());
}
// remove newline at end
if (n > 0 && p[n-1] == L'\n') // UNIX and Windows style
{
n--;
p[n] = 0;
if (n > 0 && p[n-1] == L'\r') // Windows style
{
n--;
p[n] = 0;
}
}
else if (n > 0 && p[n-1] == L'\r') // Mac style
{
n--;
p[n] = 0;
}
return buf;
}
#endif
// STL string version
std::string fgetline (FILE * f)
{
fixed_vector<char> buf (1000000);
return fgetline (f, &buf[0], (int) buf.size());
}
// STL string version
std::wstring fgetlinew (FILE * f)
{
fixed_vector<wchar_t> buf (1000000);
return fgetline (f, &buf[0], (int) buf.size());
}
// STL string version avoiding most memory allocations
void fgetline (FILE * f, std::string & s, ARRAY<char> & buf)
{
buf.resize (1000000); // enough? // KIT: increased to 1M to be safe
const char * p = fgetline (f, &buf[0], (int) buf.size());
s.assign (p);
}
void fgetline (FILE * f, std::wstring & s, ARRAY<wchar_t> & buf)
{
buf.resize (1000000); // enough? // KIT: increased to 1M to be safe
const wchar_t * p = fgetline (f, &buf[0], (int) buf.size());
s.assign (p);
}
// char buffer version
void fgetline (FILE * f, ARRAY<char> & buf)
{
const int BUF_SIZE = 1000000; // enough? // KIT: increased to 1M to be safe
buf.resize (BUF_SIZE);
fgetline (f, &buf[0], (int) buf.size());
buf.resize (strnlen (&buf[0], BUF_SIZE) +1); // SECURITY NOTE: string use has been reviewed
}
void fgetline (FILE * f, ARRAY<wchar_t> & buf)
{
const int BUF_SIZE = 1000000; // enough? // KIT: increased to 1M to be safe
buf.resize (BUF_SIZE);
fgetline (f, &buf[0], (int) buf.size());
buf.resize (wcsnlen (&buf[0], BUF_SIZE) +1); // SECURITY NOTE: string use has been reviewed
}
// read a 0-terminated string
const char * fgetstring (FILE * f, __out_z_cap(size) char * buf, int size)
{
int i;
for (i = 0; ; i++)
{
int c = fgetc (f);
if (c == EOF)
ERROR ("error reading string or missing 0: %s", strerror (errno));
if (c == 0) break;
if (i >= size -1)
{
ERROR ("input line too long (max. %d characters allowed)", size -1);
}
buf[i] = (char) c;
}
ASSERT (i < size);
buf[i] = 0;
return buf;
}
const char * fgetstring (const HANDLE f, __out_z_cap(size) char * buf, int size)
{
int i;
for (i = 0; ; i++)
{
char c;
freadOrDie((void*) &c, sizeof(char), 1, f);
if (c == (char) 0) break;
if (i >= size -1)
{
ERROR ("input line too long (max. %d characters allowed)", size -1);
}
buf[i] = (char) c;
}
ASSERT (i < size);
buf[i] = 0;
return buf;
}
// read a 0-terminated wstring
wstring fgetwstring (FILE * f)
{
wstring res;
for (;;)
{
int c = fgetwc (f);
if (c == EOF)
ERROR ("error reading string or missing 0: %s", strerror (errno));
if (c == 0) break;
res.push_back ((wchar_t) c);
}
return res;
}
void fskipspace (FILE * f)
{
for (;;)
{
int c = fgetc (f);
if (c == EOF) // hit the end
{
if (ferror (f))
ERROR ("error reading from file: %s", strerror (errno));
break;
}
if (!isspace (c)) // end of space: undo getting that character
{
int rc = ungetc (c, f);
if (rc != c)
ERROR ("error in ungetc(): %s", strerror (errno));
break;
}
}
}
// fskipNewLine(): skip all white space until end of line incl. the newline
void fskipNewline (FILE * f)
{
char c;
// skip white space
do
{
freadOrDie (&c, sizeof (c), 1, f);
} while (c == ' ' || c == '\t');
if (c == '\r') // Windows-style CR-LF
{
freadOrDie (&c, sizeof (c), 1, f);
}
if (c != '\n')
{
ERROR ("unexpected garbage at end of line");
}
}
// read a space-terminated token
// ...TODO: eat trailing space like fscanf() doessurrounding space)
const char * fgettoken (FILE * f, __out_z_cap(size) char * buf, int size)
{
fskipspace (f); // skip leading space
int c = -1;
int i;
for (i = 0; ; i++)
{
c = fgetc (f);
if (c == EOF) break;
if (isspace (c)) break;
if (i >= size -1)
ERROR ("input token too long (max. %d characters allowed)", size -1);
buf[i] = (char) c;
}
// ... TODO: while (isspace (c)) c = fgetc (f); // skip trailing space
if (c != EOF)
{
int rc = ungetc (c, f);
if (rc != c)
ERROR ("error in ungetc(): %s", strerror (errno));
}
ASSERT (i < size);
buf[i] = 0;
return buf;
}
STRING fgettoken (FILE * f)
{
char buf[80];
return fgettoken (f, buf, sizeof(buf)/sizeof(*buf));
}
// ----------------------------------------------------------------------------
// fputstring(): write a 0-terminated string
// ----------------------------------------------------------------------------
void fputstring (FILE * f, const char * str)
{
fwriteOrDie ((void *) str, sizeof (*str), strnlen (str, SIZE_MAX)+1, f); // SECURITY NOTE: string use has been reviewed
}
void fputstring (const HANDLE f, const char * str)
{
fwriteOrDie ((void *) str, sizeof (*str), strnlen (str, SIZE_MAX)+1, f); // SECURITY NOTE: string use has been reviewed
}
void fputstring (FILE * f, const std::string & str)
{
fputstring (f, str.c_str());
}
void fputstring (FILE * f, const wchar_t * str)
{
fwriteOrDie ((void *) str, sizeof (*str), wcsnlen (str, SIZE_MAX)+1, f); // SECURITY NOTE: string use has been reviewed
}
void fputstring (FILE * f, const std::wstring & str)
{
fputstring (f, str.c_str());
}
// ----------------------------------------------------------------------------
// fgetTag(): read a 4-byte tag & return as a string
// ----------------------------------------------------------------------------
std::string fgetTag (FILE * f)
{
char tag[5];
freadOrDie (&tag[0], sizeof (tag[0]), 4, f);
tag[4] = 0;
return std::string (tag);
}
std::string fgetTag (const HANDLE f)
{
char tag[5];
freadOrDie (&tag[0], sizeof (tag[0]), 4, f);
tag[4] = 0;
return std::string (tag);
}
// ----------------------------------------------------------------------------
// fcheckTag(): read a 4-byte tag & verify it; terminate if wrong tag
// ----------------------------------------------------------------------------
void fcheckTag (FILE * f, const char * expectedTag)
{
fcompareTag (fgetTag (f), expectedTag);
}
void fcheckTag (const HANDLE f, const char * expectedTag)
{
fcompareTag (fgetTag (f), expectedTag);
}
void fcheckTag_ascii (FILE * f, const STRING & expectedTag)
{
char buf[20]; // long enough for a tag
fskipspace (f);
fgettoken (f, buf, sizeof(buf)/sizeof(*buf));
if (expectedTag != buf)
{
ERROR ("invalid tag '%s' found; expected '%s'", buf, expectedTag.c_str());
}
}
// ----------------------------------------------------------------------------
// fcompareTag(): compare two tags; terminate if wrong tag
// ----------------------------------------------------------------------------
void fcompareTag (const STRING & readTag, const STRING & expectedTag)
{
if (readTag != expectedTag)
{
ERROR ("invalid tag '%s' found; expected '%s'",
readTag.c_str(), expectedTag.c_str());
}
}
// ----------------------------------------------------------------------------
// fputTag(): write a 4-byte tag
// ----------------------------------------------------------------------------
void fputTag (FILE * f, const char * tag)
{
const int TAG_LEN = 4;
ASSERT (strnlen (tag, TAG_LEN + 1) == TAG_LEN);
fwriteOrDie ((void *) tag, sizeof (*tag), strnlen (tag, TAG_LEN), f);
}
void fputTag(const HANDLE f, const char * tag)
{
const int TAG_LEN = 4;
ASSERT (strnlen (tag, TAG_LEN + 1) == TAG_LEN);
fwriteOrDie ((void *) tag, sizeof (*tag), strnlen (tag, TAG_LEN), f);
}
// ----------------------------------------------------------------------------
// fskipstring(): skip a 0-terminated string, such as a pad string
// ----------------------------------------------------------------------------
void fskipstring (FILE * f)
{
char c;
do
{
freadOrDie (&c, sizeof (c), 1, f);
}
while (c);
}
// ----------------------------------------------------------------------------
// fpad(): write a 0-terminated string to pad file to a n-byte boundary
// (note: file must be opened in binmode to work properly on DOS/Windows!!!)
// ----------------------------------------------------------------------------
void fpad (FILE * f, int n)
{
// get current writing position
int pos = ftell (f);
if (pos == -1)
{
ERROR ("error in ftell(): %s", strerror (errno));
}
// determine how many bytes are needed (at least 1 for the 0-terminator)
// and create a dummy string of that length incl. terminator
int len = n - (pos % n);
const char dummyString[] = "MSR-Asia: JL+FS";
size_t offset = sizeof(dummyString)/sizeof(dummyString[0]) - len;
ASSERT (offset >= 0);
fputstring (f, dummyString + offset);
}
// ----------------------------------------------------------------------------
// fgetbyte(): read a byte value
// ----------------------------------------------------------------------------
char fgetbyte (FILE * f)
{
char v;
freadOrDie (&v, sizeof (v), 1, f);
return v;
}
// ----------------------------------------------------------------------------
// fgetshort(): read a short value
// ----------------------------------------------------------------------------
short fgetshort (FILE * f)
{
short v;
freadOrDie (&v, sizeof (v), 1, f);
return v;
}
short fgetshort_bigendian (FILE * f)
{
unsigned char b[2];
freadOrDie (&b, sizeof (b), 1, f);
return (short) ((b[0] << 8) + b[1]);
}
// ----------------------------------------------------------------------------
// fgetint24(): read a 3-byte (24-bit) int value
// ----------------------------------------------------------------------------
int fgetint24 (FILE * f)
{
int v;
ASSERT (sizeof (v) == 4);
freadOrDie (&v, sizeof (v) -1, 1, f); // only read 3 lower-order bytes
v <<= 8; // shift up (upper 8 bits uninit'ed)
v >>= 8; // shift down 8 bits with sign-extend
return v;
}
// ----------------------------------------------------------------------------
// fgetint(): read an int value
// ----------------------------------------------------------------------------
int fgetint (FILE * f)
{
int v;
freadOrDie (&v, sizeof (v), 1, f);
return v;
}
int fgetint (const HANDLE f)
{
int v;
freadOrDie (&v, sizeof (v), 1, f);
return v;
}
int fgetint_bigendian (FILE * f)
{
unsigned char b[4];
freadOrDie (&b, sizeof (b), 1, f);
return (int) (((((b[0] << 8) + b[1]) << 8) + b[2]) << 8) + b[3];
}
int fgetint_ascii (FILE * f)
{
fskipspace (f);
int res = 0;
char c;
freadOrDie (&c, sizeof (c), 1, f);
while (isdigit ((unsigned char)c))
{
res = (10 * res) + (c - '0');
freadOrDie (&c, sizeof (c), 1, f);
}
int rc = ungetc (c, f);
if (rc != c)
{
ERROR ("error in ungetc(): %s", strerror (errno));
}
return res;
}
// ----------------------------------------------------------------------------
// fgetfloat(): read a float value
// ----------------------------------------------------------------------------
float fgetfloat (FILE * f)
{
float v;
freadOrDie (&v, sizeof (v), 1, f);
return v;
}
float fgetfloat_bigendian (FILE * f)
{
int bitpattern = fgetint_bigendian (f);
return *((float*) &bitpattern);
}
float fgetfloat_ascii (FILE * f)
{
float val;
fskipspace (f);
int rc = fscanf (f, "%f", &val); // security hint: safe overloads
if (rc == 0)
ERROR ("error reading float value from file (invalid format): %s");
else if (rc == EOF)
ERROR ("error reading from file: %s", strerror (errno));
ASSERT (rc == 1);
return val;
}
// ----------------------------------------------------------------------------
// fgetdouble(): read a double value
// ----------------------------------------------------------------------------
double fgetdouble (FILE * f)
{
double v;
freadOrDie (&v, sizeof (v), 1, f);
return v;
}
// ----------------------------------------------------------------------------
// fgetwav(): read an entire .wav file
// ----------------------------------------------------------------------------
void WAVEHEADER::prepareRest (int sampleCount)
{
FmtLength = 16;
wFormatTag = 1;
nAvgBytesPerSec = nSamplesPerSec * nBlockAlign;
riffchar[0] = 'R';
riffchar[1] = 'I';
riffchar[2] = 'F';
riffchar[3] = 'F';
if (sampleCount != -1)
{
DataLength = sampleCount * nBlockAlign;
RiffLength = 36 + DataLength;
}
else
{
DataLength = 0xffffffff;
RiffLength = 0xffffffff;
}
wavechar[0] = 'W';
wavechar[1] = 'A';
wavechar[2] = 'V';
wavechar[3] = 'E';
wavechar[4] = 'f';
wavechar[5] = 'm';
wavechar[6] = 't';
wavechar[7] = ' ';
datachar[0] = 'd';
datachar[1] = 'a';
datachar[2] = 't';
datachar[3] = 'a';
}
void WAVEHEADER::prepare (unsigned int Fs, int Bits, int Channels, int SampleCount)
{
nChannels = (short) Channels;
nSamplesPerSec = Fs;
nBlockAlign = (short) (Channels * (Bits/8));
nAvgBytesPerSec = Fs * nBlockAlign;
wBitsPerSample = (short) Bits;
prepareRest (SampleCount);
}
void WAVEHEADER::prepare (const WAVEFORMATEX & wfx, int sampleCount /* -1 for unknown */)
{
nChannels = wfx.nChannels;
nSamplesPerSec = wfx.nSamplesPerSec;
nBlockAlign = wfx.nBlockAlign;
wBitsPerSample = wfx.wBitsPerSample;
prepareRest (sampleCount);
}
void WAVEHEADER::write (FILE * f)
{
fputTag (f, "RIFF");
fputint (f, RiffLength);
fputTag (f, "WAVE");
fputTag (f, "fmt ");
fputint (f, FmtLength);
fputshort (f, wFormatTag);
fputshort (f, nChannels);
fputint (f, nSamplesPerSec);
fputint (f, nAvgBytesPerSec);
fputshort (f, nBlockAlign);
fputshort (f, wBitsPerSample);
ASSERT (FmtLength == 16);
ASSERT (wFormatTag == 1);
fputTag (f, "data");
fputint (f, DataLength);
fflushOrDie (f);
}
/*static*/ void WAVEHEADER::update (FILE * f)
{
long curPos = ftell (f);
if (curPos == -1L)
{
ERROR ("error determining file position: %s", strerror (errno));
}
unsigned int len = (unsigned int) filesize (f);
unsigned int RiffLength = len - 8;
unsigned int DataLength = RiffLength - 36;
fseekOrDie (f, 4, SEEK_SET);
fputint (f, RiffLength);
fseekOrDie (f, 40, SEEK_SET);
fputint (f, DataLength);
fseekOrDie (f, curPos, SEEK_SET);
}
unsigned int WAVEHEADER::read (FILE * f, signed short & wRealFormatTag, int & bytesPerSample)
{
// read header
fcheckTag (f, "RIFF");
/*unsigned int riffLen = */ fgetint (f);
fcheckTag (f, "WAVE");
fcheckTag (f, "fmt ");
unsigned int fmtLen = fgetint (f);
wRealFormatTag = fgetshort (f);
if (wRealFormatTag == -2) // MARecorder.exe [Ivan Tashev] puts a -2 for
{ // 8-channel recordings (meaning unknown).
wRealFormatTag = 1; // Workaround: pretend it is 1 (seems safe)
}
(wRealFormatTag == 1 || wRealFormatTag == 7)
|| ERROR ("WAVEHEADER::read: wFormatTag=%d not supported for now", wRealFormatTag);
unsigned short wChannels = fgetshort (f);
unsigned long dwSamplesPerSec = fgetint (f);
unsigned int sampleRate = dwSamplesPerSec;
/*unsigned long dwAvgBytesPerSec = */ fgetint (f);
unsigned short wBlockAlign = fgetshort (f);
unsigned short wBitsPerSample = fgetshort (f);
(wBitsPerSample <= 16) || ERROR ("WAVEHEADER::read: invalid wBitsPerSample %d", wBitsPerSample);
bytesPerSample = wBitsPerSample / 8;
(wBlockAlign == wChannels * bytesPerSample)
|| ERROR ("WAVEHEADER::read: wBlockAlign != wChannels*bytesPerSample not supported");
while (fmtLen > 16) // unused extra garbage in header
{
fgetbyte (f);
fmtLen--;
}
if (wRealFormatTag == 7)
{
(bytesPerSample == 1) || ERROR ("WAVEHEADER::read: invalid wBitsPerSample %d for mulaw", wBitsPerSample);
fcheckTag (f, "fact");
unsigned int factLen = fgetint (f);
while (factLen > 0)
{
fgetbyte (f);
factLen--;
}
}
fcheckTag (f, "data");
unsigned int dataLen = fgetint (f);
unsigned int numSamples = dataLen / wBlockAlign;
// prepare a nice wave header without junk (44 bytes, 16-bit PCM)
prepare (sampleRate, wBitsPerSample, wChannels, numSamples);
return numSamples;
}
static short toolULawToLinear(unsigned char p_ucULawByte)
{
static short anExpLut[8] = { 0, 132, 396, 924, 1980, 4092, 8316, 16764 };
short nSign, nExponent, nMantissa, nSample;
p_ucULawByte=~p_ucULawByte;
nSign=(p_ucULawByte & 0x80);
nExponent=(p_ucULawByte >> 4) & 0x07;
nMantissa=p_ucULawByte & 0x0F;
nSample=anExpLut[nExponent]+(nMantissa<<(nExponent+3));
if(nSign != 0)
nSample = -nSample;
return nSample;
}
// fgetwavraw(): only read data of .wav file. For multi-channel data, samples
// are kept interleaved.
static void fgetwavraw(FILE * f, ARRAY<short> & wav, const WAVEHEADER & wavhd)
{
int bytesPerSample = wavhd.wBitsPerSample / 8; // (sample size on one channel)
wav.resize (wavhd.DataLength / bytesPerSample);
if (wavhd.wFormatTag == 7) // mulaw
{
(wavhd.nChannels == 1) || ERROR ("fgetwav: wChannels=%d not supported for mulaw", wavhd.nChannels);
ARRAY<unsigned char> data;
int numSamples = wavhd.DataLength/wavhd.nBlockAlign;
data.resize (numSamples);
freadOrDie (&data[0], sizeof (data[0]), numSamples, f);
for (int i = 0; i < numSamples; i++)
{
wav[i] = toolULawToLinear (data[i]);
}
}
else if (bytesPerSample == 2)
{ // note: we may be reading an interleaved multi-channel signal.
freadOrDie (&wav[0], sizeof (wav[0]), wav.size(), f);
}
// ... TODO: support 8 bit linear PCM samples (implement when needed; samples scaled to 'short')
else
{
ERROR ("bytesPerSample != 2 is not supported except mulaw format!\n");
}
}
// ----------------------------------------------------------------------------
// fgetwav(): read an entire .wav file. Stereo is mapped to mono.
// ----------------------------------------------------------------------------
void fgetwav (FILE * f, ARRAY<short> & wav, int & sampleRate)
{
WAVEHEADER wavhd; // will be filled in for 16-bit PCM!!
signed short wFormatTag; // real format tag as found in data
int bytesPerSample; // bytes per sample as found in data
unsigned int numSamples = wavhd.read (f, wFormatTag, bytesPerSample);
sampleRate = (int) wavhd.nSamplesPerSec;
if (wavhd.nChannels == 1)
{
fgetwavraw (f, wav, wavhd);
}
else if (wavhd.nChannels == 2)
{
//read raw data
ARRAY<short> buf;
buf.resize(numSamples * 2);
fgetwavraw(f, buf, wavhd);
//map to mono
wav.resize (numSamples);
const short * p = &buf[0];
for (int i = 0; i < (int) numSamples; i++)
{
int l = *p++;
int r = *p++;
int mono = ((l + r) + 1) >> 1;
wav[i] = (short) mono;
}
}
else
{
ERROR ("bytesPerSample/wChannels != 2 needs to be implemented");
}
}
void fgetwav (const wstring & fn, ARRAY<short> & wav, int & sampleRate)
{
auto_file_ptr f = fopenOrDie (fn, L"rbS");
fgetwav (f, wav, sampleRate);
}
// ----------------------------------------------------------------------------
// ... TODO:
// - rename this function!!
// - also change to read header itself and return sample rate and channels
// fgetraw(): read data of multi-channel .wav file, and separate data of multiple channels.
// For example, data[i][j]: i is channel index, 0 means the first
// channel. j is sample index.
// ----------------------------------------------------------------------------
void fgetraw (FILE *f, ARRAY< ARRAY<short> > & data, const WAVEHEADER & wavhd)
{
ARRAY<short> wavraw;
fgetwavraw (f, wavraw, wavhd);
data.resize (wavhd.nChannels);
int numSamples = wavhd.DataLength/wavhd.nBlockAlign;
ASSERT (numSamples == (int) wavraw.size() / wavhd.nChannels);
for (int i = 0; i < wavhd.nChannels; i++)
{
data[i].resize (numSamples);
for (int j = 0; j < numSamples; j++)
{
data[i][j] = wavraw[wavhd.nChannels*j + i];
}
}
}
// ----------------------------------------------------------------------------
// fgetwfx(), fputwfx(): direct access to simple WAV headers
// ----------------------------------------------------------------------------
// read header and skip to first data byte; return #samples
unsigned int fgetwfx (FILE * f, WAVEFORMATEX & wfx)
{
// read header
fcheckTag (f, "RIFF");
/*unsigned int riffLen = */ fgetint (f);
fcheckTag (f, "WAVE");
fcheckTag (f, "fmt ");
wfx.cbSize = sizeof (wfx);
int fmtLen = fgetint (f);
wfx.wFormatTag = fgetshort (f);
if (wfx.wFormatTag == -2) // MARecorder.exe [Ivan Tashev] puts a -2 for
{ // 8-channel recordings (meaning unknown).
wfx.wFormatTag = 1; // Workaround: pretend it is 1 (seems safe)
}
(wfx.wFormatTag == 1 || wfx.wFormatTag == 3 || wfx.wFormatTag == 7)
|| ERROR ("WAVEHEADER::read: wFormatTag=%d not supported for now", wfx.wFormatTag);
wfx.nChannels = fgetshort (f);
wfx.nSamplesPerSec = fgetint (f);
wfx.nAvgBytesPerSec = fgetint (f);
wfx.nBlockAlign = fgetshort (f);
wfx.wBitsPerSample = fgetshort (f);
// unused extra garbage in header
for ( ; fmtLen > 16; fmtLen--) fgetbyte (f);
fcheckTag (f, "data");
unsigned int dataLen = fgetint (f);
unsigned int numSamples = dataLen / wfx.nBlockAlign;
return numSamples;
}
void fputwfx (FILE *f, const WAVEFORMATEX & wfx, unsigned int numSamples)
{
unsigned int DataLength = numSamples * wfx.nBlockAlign;
(DataLength / wfx.nBlockAlign == numSamples)
|| ERROR ("fputwfx: data size exceeds WAV header 32-bit range");
unsigned int RiffLength = 36 + DataLength;
unsigned int FmtLength = 16;
// file header
ASSERT (wfx.cbSize == 0 || wfx.cbSize == FmtLength + 2);
fputTag (f, "RIFF");
fputint (f, RiffLength);
fputTag (f, "WAVE");
// 'fmt ' chunk (to hold wfx)
fputTag (f, "fmt ");
fputint (f, FmtLength);
fputshort (f, wfx.wFormatTag);
fputshort (f, wfx.nChannels);
fputint (f, wfx.nSamplesPerSec);
fputint (f, wfx.nAvgBytesPerSec);
fputshort (f, wfx.nBlockAlign);
fputshort (f, wfx.wBitsPerSample);
// data chunk
fputTag (f, "data");
fputint (f, DataLength);
fflushOrDie (f);
}
// ----------------------------------------------------------------------------
// fputwav(): write an entire .wav file (16 bit PCM)
// ----------------------------------------------------------------------------
void fputwav (FILE * f, const vector<short> & wav, int sampleRate, int nChannels)
{
f;wav;sampleRate;nChannels;
// construct WAVEFORMATEX
WAVEFORMATEX wfx;
wfx.cbSize = 16 + 2; //fmt data + extra data
wfx.nAvgBytesPerSec = (DWORD)(sampleRate * nChannels * 2); //short: 2 bytes per sample
wfx.nBlockAlign = (WORD)nChannels * 2; //short: 2bytes per sample
wfx.nChannels = (WORD)nChannels;
wfx.nSamplesPerSec = sampleRate;
wfx.wBitsPerSample = 16;
wfx.wFormatTag = WAVE_FORMAT_PCM;
//putwfx
fputwfx (f, wfx, (unsigned int) wav.size());
// wrtie the data
fwriteOrDie (&wav[0], sizeof(wav[0]), wav.size(), f);
}
void fputwav (const wstring & fn, const vector<short> & wav, int sampleRate, int nChannels)
{
auto_file_ptr f = fopenOrDie (fn, L"wbS");
fputwav (f, wav, sampleRate, nChannels);
fflushOrDie (f); // after this, fclose() (in destructor of f) cannot fail
}
// ----------------------------------------------------------------------------
// fputbyte(): write a byte value
// ----------------------------------------------------------------------------
void fputbyte (FILE * f, char v)
{
fwriteOrDie (&v, sizeof (v), 1, f);
}
// ----------------------------------------------------------------------------
// fputshort(): write a short value
// ----------------------------------------------------------------------------
void fputshort (FILE * f, short v)
{
fwriteOrDie (&v, sizeof (v), 1, f);
}
// ----------------------------------------------------------------------------
// fputint24(): write a 3-byte (24-bit) int value
// ----------------------------------------------------------------------------
void fputint24 (FILE * f, int v)
{
ASSERT (sizeof (v) == 4);
fwriteOrDie (&v, sizeof (v) -1, 1, f); // write low-order 3 bytes
}
// ----------------------------------------------------------------------------
// fputint(): write an int value
// ----------------------------------------------------------------------------
void fputint (FILE * f, int v)
{
fwriteOrDie (&v, sizeof (v), 1, f);
}
void fputint (const HANDLE f, int v)
{
fwriteOrDie (&v, sizeof (v), 1, f);
}
// ----------------------------------------------------------------------------
// fputfloat(): write a float value
// ----------------------------------------------------------------------------
void fputfloat (FILE * f, float v)
{
fwriteOrDie (&v, sizeof (v), 1, f);
}
// ----------------------------------------------------------------------------
// fputdouble(): write a double value
// ----------------------------------------------------------------------------
void fputdouble (FILE * f, double v)
{
fwriteOrDie (&v, sizeof (v), 1, f);
}
// ----------------------------------------------------------------------------
// fputfile(): write a binary block or a string as a file
// ----------------------------------------------------------------------------
void fputfile (const WSTRING & pathname, const ARRAY<char> & buffer)
{
FILE * f = fopenOrDie (pathname, L"wb");
try
{
if (buffer.size() > 0)
{ // ^^ otherwise buffer[0] is an illegal expression
fwriteOrDie (&buffer[0], sizeof (buffer[0]), buffer.size(), f);
}
fcloseOrDie (f);
}
catch (...)
{
fclose (f);
throw;
}
}
void fputfile (const WSTRING & pathname, const std::wstring & string)
{
FILE * f = fopenOrDie (pathname, L"wb");
try
{
if (string.length() > 0)
{ // ^^ otherwise buffer[0] is an illegal expression
fwriteOrDie (string.c_str(), sizeof (string[0]), string.length(), f);
}
fcloseOrDie (f);
}
catch (...)
{
fclose (f);
throw;
}
}
void fputfile (const WSTRING & pathname, const std::string & string)
{
FILE * f = fopenOrDie (pathname, L"wb");
try
{
if (string.length() > 0)
{ // ^^ otherwise buffer[0] is an illegal expression
fwriteOrDie (string.c_str(), sizeof (string[0]), string.length(), f);
}
fcloseOrDie (f);
}
catch (...)
{
fclose (f);
throw;
}
}
// ----------------------------------------------------------------------------
// fgetfile(): load a file as a binary block
// ----------------------------------------------------------------------------
void fgetfile (const WSTRING & pathname, ARRAY<char> & buffer)
{
FILE * f = fopenOrDie (pathname, L"rb");
size_t len = filesize (f);
buffer.resize (len);
if (buffer.size() > 0)
{ // ^^ otherwise buffer[0] is an illegal expression
freadOrDie (&buffer[0], sizeof (buffer[0]), buffer.size(), f);
}
fclose (f);
}
void fgetfile (FILE * f, ARRAY<char> & buffer)
{ // this version reads until eof
buffer.resize (0);
buffer.reserve (1000000); // avoid too many reallocations
ARRAY<char> inbuf;
inbuf.resize (65536); // read in chunks of this size
while (!feof (f)) // read until eof
{
size_t n = fread (&inbuf[0], sizeof (inbuf[0]), inbuf.size(), f);
if (ferror (f))
{
ERROR ("fgetfile: error reading from file: %s", strerror (errno));
}
buffer.insert (buffer.end(), inbuf.begin(), inbuf.begin() + n);
}
buffer.reserve (buffer.size());
}
// load it into RAM in one huge chunk
static size_t fgetfilechars (const std::wstring & path, vector<char> & buffer)
{
auto_file_ptr f = fopenOrDie (path, L"rb");
size_t len = filesize (f);
buffer.reserve (len +1);
freadOrDie (buffer, len, f);
buffer.push_back (0); // this makes it a proper C string
return len;
}
template<class LINES> static void strtoklines (char * s, LINES & lines)
{
char * context;
for (char * p = strtok_s (s, "\r\n", &context); p; p = strtok_s (NULL, "\r\n", &context))
lines.push_back (p);
}
void msra::files::fgetfilelines (const std::wstring & path, vector<char> & buffer, std::vector<std::string> & lines)
{
// load it into RAM in one huge chunk
const size_t len = fgetfilechars (path, buffer);
// parse into lines
lines.resize (0);
lines.reserve (len / 20);
strtoklines (&buffer[0], lines);
}
// same as above but returning const char* (avoiding the memory allocation)
vector<char*> msra::files::fgetfilelines (const wstring & path, vector<char> & buffer)
{
// load it into RAM in one huge chunk
const size_t len = fgetfilechars (path, buffer);
// parse into lines
vector<char *> lines;
lines.reserve (len / 20);
strtoklines (&buffer[0], lines);
return lines;
}
// ----------------------------------------------------------------------------
// getfiletime(), setfiletime(): access modification time
// ----------------------------------------------------------------------------
bool getfiletime (const wstring & path, FILETIME & time)
{ // return file modification time, false if cannot be determined
WIN32_FIND_DATAW findFileData;
auto_handle hFind (FindFirstFileW (path.c_str(), &findFileData), ::FindClose);
if (hFind != INVALID_HANDLE_VALUE)
{
time = findFileData.ftLastWriteTime;
return true;
}
else
{
return false;
}
}
void setfiletime (const wstring & path, const FILETIME & time)
{ // update the file modification time of an existing file
auto_handle h (CreateFileW (path.c_str(), FILE_WRITE_ATTRIBUTES,
FILE_SHARE_READ|FILE_SHARE_WRITE, NULL,
OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL));
if (h == INVALID_HANDLE_VALUE)
{
ERROR ("setfiletime: error opening file: %d", GetLastError());
}
BOOL rc = SetFileTime (h, NULL, NULL, &time);
if (!rc)
{
ERROR ("setfiletime: error setting file time information: %d", GetLastError());
}
}
// ----------------------------------------------------------------------------
// expand_wildcards -- wildcard expansion of a path, including directories.
// ----------------------------------------------------------------------------
// Win32-style variant of this function (in case we want to use it some day)
// Returns 0 in case of failure. May throw in case of bad_alloc.
static BOOL ExpandWildcards (wstring path, vector<wstring> & paths)
{
// convert root to DOS filename convention
for (size_t k = 0; k < path.length(); k++) if (path[k] == '/') path[k] = '\\';
// remove terminating backslash
size_t last = path.length() -1;
if (last >= 0 && path[last] == '\\') path.erase (last);
// convert root to long filename convention
//if (path.find (L"\\\\?\\") != 0)
// path = L"\\\\?\\" + root;
// split off everything after first wildcard
size_t wpos = path.find_first_of (L"*?");
if (wpos == 2 && path[0] == '\\' && path[1] == '\\')
wpos = path.find_first_of (L"*?", 4); // 4=skip "\\?\"
if (wpos == wstring::npos)
{ // no wildcard: just return it
paths.push_back (path);
return TRUE;
}
// split off everything afterwards if any
wstring rest; // remaining path after this directory
size_t spos = path.find_first_of (L"\\", wpos +1);
if (spos != wstring::npos)
{
rest = path.substr (spos +1);
path.erase (spos);
}
// crawl folder
WIN32_FIND_DATAW ffdata;
auto_handle hFind (::FindFirstFileW (path.c_str(), &ffdata), ::FindClose);
if (hFind == INVALID_HANDLE_VALUE)
{
DWORD err = ::GetLastError();
if (rest.empty() && err == 2) return TRUE; // no matching file: empty
return FALSE; // another error
}
size_t pos = path.find_last_of (L"\\");
if (pos == wstring::npos) throw std::logic_error ("unexpected missing \\ in path");
wstring parent = path.substr (0, pos);
do
{
// skip this and parent directory
bool isDir = ((ffdata.dwFileAttributes & (FILE_ATTRIBUTE_DIRECTORY | FILE_ATTRIBUTE_REPARSE_POINT)) != 0);
if (isDir && ffdata.cFileName[0] == '.') continue;
wstring filename = parent + L"\\" + ffdata.cFileName;
if (rest.empty())
{
paths.push_back (filename);
}
else if (isDir) // multi-wildcards: further expand
{
BOOL rc = ExpandWildcards (filename + L"\\" + rest, paths);
rc; // error here means no match, e.g. Access Denied to one subfolder
}
} while (::FindNextFileW(hFind, &ffdata) != 0);
return TRUE;
}
void expand_wildcards (const wstring & path, vector<wstring> & paths)
{
BOOL rc = ExpandWildcards (path, paths);
if (!rc)
ERROR ("error in expanding wild cards '%S': %S", path.c_str(), FormatWin32Error (::GetLastError()).c_str());
}
// ----------------------------------------------------------------------------
// make_intermediate_dirs() -- make all intermediate dirs on a path
// ----------------------------------------------------------------------------
static void mkdir (const wstring & path)
{
int rc = _wmkdir (path.c_str());
if (rc >= 0 || errno == EEXIST)
return; // no error or already existing --ok
if (errno == EACCES)
{
// bug in _wmkdir(): returns access_denied if folder exists but read-only --check existence
DWORD att = ::GetFileAttributesW (path.c_str());
if (att != INVALID_FILE_ATTRIBUTES || (att & FILE_ATTRIBUTE_DIRECTORY) != 0)
return; // ok
}
ERROR ("make_intermediate_dirs: error creating intermediate directory %S", path.c_str());
}
// make subdir of a file including parents
void msra::files::make_intermediate_dirs (const wstring & filepath)
{
vector<wchar_t> buf;
buf.resize (filepath.length() +1, 0);
wcscpy_s (&buf[0], buf.size(), filepath.c_str());
wstring subpath;
int skip = 0;
// if share (\\) then the first two levels (machine, share name) cannot be made
if ((buf[0] == '/' && buf[1] == '/') || (buf[0] == '\\' && buf[1] == '\\'))
{
subpath = L"/";
skip = 2; // skip two levels (machine, share)
}
// make all constituents except the filename (to make a dir, include a trailing slash)
for (const wchar_t * p = wcstok (&buf[0], L"/\\"); p; p = wcstok (NULL, L"/\\"))
{
if (subpath != L"" && subpath != L"/" && subpath != L"\\" && skip == 0)
{
mkdir (subpath);
}
else if (skip > 0) skip--; // skip this level
// rebuild the final path
if (subpath != L"") subpath += L"/";
subpath += p;
}
}
// ----------------------------------------------------------------------------
// fuptodate() -- test whether an output file is at least as new as an input file
// ----------------------------------------------------------------------------
// test if file 'target' is not older than 'input' --used for make mode
// 'input' must exist if 'inputrequired'; otherweise if 'target' exists, it is considered up to date
// 'target' may or may not exist
bool msra::files::fuptodate (const wstring & target, const wstring & input, bool inputrequired)
{
FILETIME targettime;
if (!getfiletime (target, targettime)) return false; // target missing: need to update
FILETIME inputtime;
if (!getfiletime (input, inputtime)) return !inputrequired; // input missing: if required, pretend to be out of date as to force caller to fail
ULARGE_INTEGER targett, inputt;
memcpy (&targett, &targettime, sizeof (targett));
memcpy (&inputt, &inputtime, sizeof (inputt));
return !(targett.QuadPart < inputt.QuadPart); // up to date if target not older than input
}