CNTK/MachineLearning/cn/fileutil.cpp

//
// fileutil.cpp - file I/O with error checking
//
//     Copyright (c) Microsoft Corporation.  All rights reserved.
//
// $Log: /Speech_To_Speech_Translation/dbn/dbn/fileutil.cpp $
//
// 125   1/03/13 8:53p Kaisheny
// Asynchronous SGD using data pipe.
//
// 124   9/30/12 10:46a Fseide
// new optional parameter to fuptodate()--caller can now choose whether a
// missing input file, with target file present, will cause a failure or
// considers the target up-to-date
//
// 123   8/20/12 12:29p V-hansu
// fixed a major bug in freadOrDie() for chunks > 15M units (breaking into
// chunks was broken)
//
// 122   4/01/12 12:02p Fseide
// (expanded an error message)
//
// 121   11/09/11 10:01 Fseide
// added a new overload for fgetfilelines() that returns an array of char*
// instead of strings, to avoid mem alloc
//
// 120   10/27/11 18:52 Fseide
// updated freadOrDie() to smaller chunk size
//
// 119   10/27/11 13:40 Fseide
// freadOrDie() now explicitly breaks up large reads because CRT fread()
// does not handle them (due to a Windows bug)
//
// 118   6/10/11 9:49 Fseide
// new function fgetfilelines() for reading text files
//
// 117   3/07/11 12:13 Fseide
// actually implemented unlinkOrDie() (was a dummy)
//
// 116   12/07/10 10:03 Fseide
// (corrected the buffer size in fsetpos() fro 65336 to 65536)
//
// 115   12/03/10 10:53 Fseide
// fsetpos() optimization when seeking forward within the current read
// buffer
//
// 114   11/18/10 4:32p Kit
// added missing header for errno
//
// 113   11/18/10 9:20 Fseide
// a basic optimization in fsetpos() to avoid rereading the buffer if
// fsetpos() does not actually move the file pointer
//
// 112   11/17/10 15:00 Fseide
// new function fuptodate();
// make_intermediate_dirs() moved to namespace msra::files (all new
// functions should be put in there)
//
// 111   11/12/10 16:43 Fseide
// bug in getfiletime(), totally broken
//
// 110   11/09/10 8:56 Fseide
// some cleanup of make_intermediate_dirs()
//
// 109   11/08/10 17:07 Fseide
// new function make_intermediate_dirs()
//
// 108   11/30/09 1:32p Kit
//
// 107   2/05/09 19:05 Fseide
// fgetline() now returns a non-const pointer, because user may want to
// post-process the line, and the returned value is a user-specified
// buffer anyway
//
// 106   1/16/09 8:59 Fseide
// exported fskipspace()
//
// 105   1/16/09 8:47 Fseide
// (a comment added)
//
// 104   1/15/09 7:38 Fseide
// some magic to unify fgetstring() for char and wchar_t to a single
// template function
//
// 103   1/14/09 19:27 Fseide
// new functions fsetpos() and fgetpos();
// added missing read-error checks to fget(w)string()
//
// 102   1/14/09 12:38 Fseide
// bug fix in fgetline(): missed an error check
//
// 101   1/09/09 7:40 Fseide
// (fixed a warning)
//
// 100   1/08/09 16:38 Fseide
// fopenOrDie() now supports "-" as the pathname, referring to stdin or
// stdout
//
// 99    1/08/09 15:32 Fseide
// new funtion expand_wildcards()
//
// 98    1/05/09 8:44 Fseide
// (added comments)
//
// 97    12/24/08 14:44 Fseide
// added an overflow check to fputwfx()
//
// 96    12/12/08 10:11a Qiluo
// (change marker of banned APIs)
//
// 95    12/11/08 7:40p Qiluo
// (change marker of banned APIs)
//
// 94    12/09/08 6:59p Qiluo
// reverted stringerror => strerror
//
// 93    12/09/08 6:37p Qiluo
// fixed a few compilation bugs
//
// 92    12/09/08 6:28p Qiluo
// strerror => stringerror
//
// 91    12/01/08 2:43p Qiluo
// add markers for banned APIs, and refine the api fixing
//
// 90    11/11/08 7:34p Qiluo
// fix bug in strnlen
//
// 89    11/11/08 18:27 Fseide
// no longer disables C4996
//
// 88    11/11/08 6:04p Qiluo
// recover the old fputstring functions
//
// 87    11/10/08 2:34p Qiluo
// remove the dependency of header "StringUtil.h"
//
// 86    10/31/08 5:08p Qiluo
// remove banned APIs
//
// 85    6/24/08 19:03 Fseide
// added fgetwstring() and fputstring() for wstrings
//
// 84    6/02/08 14:11 Fseide
// fgetwfx() and wputwfx() now a bit more tolerant
//
// 83    08-05-29 18:18 Llu
// fix the interface of fputwav
//
// 82    08-05-29 14:53 Llu
//
// 81    08-05-29 13:53 Llu
// add fputwav revise fgetwav using stl instead of short *
//
// 80    3/19/08 16:13 Fseide
// (better solution to prev. problem)
//
// 79    3/19/08 16:07 Fseide
// (#ifdef'ed out fprintfOrDie() in _MANAGED builds)
//
// 78    10/30/07 16:46 Fseide
//
// 77    3/27/07 13:54 Fseide
// added 'using namespace std;' (was removed from message.h as it does not
// belong there)
//
// 76    1/30/07 1:59p Kit
// Undid updates to fgetline error handling
//
// 70    12/20/06 10:48a Kit
// increased size of line buffer for fgetline because we seem to be
// getting large strings in some rss feeds
//
// 69    06-12-04 18:30 Llu
// (fixed an unnecessary "deprecated string function" warning under VS
// 2005)
//
// 68    11/27/06 11:40 Fseide
// new methods fgetwfx() and fputwfx() for direct access to simple PCM WAV
// files
//
// 67    10/14/06 18:31 Fseide
// added char* version of fexists()
//
// 66    5/14/06 19:58 Fseide
// new function fsetmode()
//
// 65    3/29/06 16:10 Fseide
// increased buffer size in fgetfile() to 64k
//
// 64    3/29/06 15:36 Fseide
// changed to reading entire file instead of line-by-line, not changing
// newlines anymore
//
// 63    3/24/06 4:40p Rogeryu
// workaround a VC 2003 header bug (va_start macro for references) in
// MESSAGE/ERROR functions
//
// 62    3/22/06 3:31p Rogeryu
// (comments changed)
//
// 61    3/21/06 5:21p Rogeryu
// review and fix level2_security OACR warnings
//
// 60    3/21/06 9:26a Rogeryu
// review and fix OACR warnings
//
// 59    06-03-15 15:41 Yushli
// Suppress C4996 Warning per function
//
// 58    06-03-14 12:11 Yushli
// Suppress C4996 Warning on strerror per function
//
// 57    06-03-14 10:33 Yushli
// Suppress C4996 Warning per function.
//
// 56    2/28/06 1:49p Kjchen
// suppress oacr warning
//
// 55    2/24/06 8:03p Kjchen
// depress oacr warnings
//
// 54    2/21/06 11:32a Kit
// aadded filesize64 to support large files
//
// 53    1/10/06 8:23p Rogeryu
// fix a warning
//
// 52    1/09/06 7:12p Rogeryu
// wide version of fgetline
//
// 51    12/20/05 21:15 Fseide
// changed CreateFile() to CreateFileW()
//
// 50    12/19/05 22:50 Fseide
// setfiletime() fixed, now actually works
//
// 49    12/19/05 21:52 Fseide
// fputfile() added in 8-bit string version
//
// 48    12/18/05 17:01 Fseide
// fixed file-handle leaks in error conditions
//
// 47    12/15/05 20:25 Fseide
// added getfiletime(), setfiletime(), and fputfile() for strings
//
// 46    9/27/05 12:22 Fseide
// added wstring version of renameOrDie()
//
// 45    9/22/05 12:26 Fseide
// new method fexists()
//
// 44    9/15/05 11:33 Fseide
// new version of fgetline() that avoids buffer allocations, since this
// seems very expensive esp. when reading a file line by line with
// fgetline()
//
// 43    9/05/05 4:57p F-xyzhao
// renameOrDie(): changed string to std::string
//
// 42    9/05/05 11:00 Fseide
// new method renameOrDie()
//
// 41    8/19/05 18:19 Fseide
// bugfixes in WAVEHEADER::write and prepare
//
// 40    8/19/05 18:02 Fseide
// WAVEHEADER::write() now flushes
//
// 39    8/19/05 17:56 Fseide
// extended WAVEHEADER with write() and update()
//
// 38    8/14/05 16:56 Fseide
// fopenOrDie() now sets large buffer if 'S' option
//
// 37    8/13/05 15:37 Fseide
// added new version of fgetline that takes a buffer
//
// 36    7/28/05 18:04 Fseide
// bug fix in fgetin24 and fputint24
//
// 35    7/26/05 18:54 Fseide
// new functions fgetint24() and fputint24()
//
// 34    5/10/05 14:12 Fseide
// (level-4 warning fixed)
//
// 33    5/10/05 11:57 Fseide
// (level-4 warnings removed)
//
// 32    5/09/05 12:07 Fseide
// fixed for-loop conformance issues
//
// 31    2/27/05 17:41 Fseide
// recovered v29 that somehow got overwritten
//
// 29    2/12/05 15:21 Fseide
// fgetdouble() and fputdouble() added
//
// 28    2/05/05 12:38 Fseide
// new methods fputfile(), fgetfile();
// new overload for filesize()
//
// 27    2/03/05 22:34 Fseide
// added new version of fgetline() that returns an STL string
//
// 26    5/31/04 10:06 Fseide
// new methods fseekOrDie(), ftellOrDie(), unlinkOrDie(), renameOrDie()
//
// 25    3/19/04 4:01p Fseide
// fwriteOrDie(): first argument changed to const
//
// 24    2/21/04 10:26 Fseide
// (compiler warnings eliminated)
//
// 23    2/19/04 9:46p V-xlshi
//
// 22    2/19/04 3:44p V-xlshi
// fgetwavraw and fgetraw function is added, fgetwav is changed but its
// functionality is the same with the old one.
//
// 21    2/03/04 8:17p V-xlshi
//
// 20    9/08/03 22:55 Fseide
// fgetwav() can now read stereo PCM files
//
// 19    8/15/03 15:40 Fseide
// new method filesize()
//
// 18    8/13/03 21:06 Fseide
// new function fputbyte()
//
// 17    8/13/03 15:37 Fseide
// an error msg corrected
//
// 16    8/07/03 22:04 Fseide
// fprintfOrDie() now really dies in case of error
//
// 15    7/30/03 5:09p Fseide
// (eliminated a compiler warning)
//
// 14    03-07-30 14:17 I-rogery
//
// 13    7/25/03 6:07p Fseide
// new functions fgetbyte() and fgetwav()
//
// 12    6/03/03 5:23p Fseide
// (some compiler warnings related to size_t eliminated)
//
// 11    3/27/03 3:42p Fseide
// fwriteOrDie() rewritten to break huge blocks into chunks of 16 MB
// because Windows std C lib can't handle fwrite() with e.g. 100 MB in one
// call
//
// 10    7/23/02 9:00p Jlzhou
//
// 9     7/03/02 9:25p Fseide
// fcompareTag() now uses STRING type for both of its arguments (before,
// it used const char * for one of them)
//
// 8     6/10/02 3:14p Fseide
// new functions fgettoken(), fgetfloat_ascii(), fskipNewline()
//
// 7     6/07/02 7:26p Fseide
// new functions fcheckTag_ascii() and fgetint_ascii()
//
// 6     6/03/02 10:58a Jlzhou
//
// 5     4/15/02 1:12p Fseide
// void fputstring (FILE * f, const TSTRING & str) and fpad() added
//
// 4     4/03/02 3:56p Fseide
// VSS keyword and copyright added
//
// F. Seide 5 Mar 2002
//

#ifndef UNDER_CE    // fixed-buffer overloads not available for wince
#ifdef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES  // fixed-buffer overloads for strcpy() etc.
#undef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
#endif
#define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
#endif

#include "basetypes.h"
#include "fileutil.h"
#include "message.h"
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "windows.h"    // for FILETIME
#include <algorithm>    // for std::find

#ifndef UNDER_CE  // some headers don't exist under winCE - the appropriate definitions seem to be in stdlib.h
#include <fcntl.h>      // for _O_BINARY/TEXT - not needed for wince
#include <io.h>         // for _setmode()
#endif

#include <errno.h>

using namespace std;

// ----------------------------------------------------------------------------
// fopenOrDie(): like fopen() but terminate with err msg in case of error.
// A pathname of "-" returns stdout or stdin, depending on mode, and it will
// change the binary mode if 'b' or 't' are given. If you use this, make sure
// not to fclose() such a handle.
// ----------------------------------------------------------------------------

static const wchar_t * strchr (const wchar_t * s, wchar_t v) { return wcschr (s, v); }

// pathname is "-" -- open stdin or stdout. Changes bin mode if 'b' or 't' given.
template<class _T> FILE * fopenStdHandle (const _T * mode)
{
    FILE * f = strchr (mode, 'r') ? stdin : stdout;
    if (strchr (mode, 'b') || strchr (mode, 't'))   // change binary mode
    {
        // switch to binary mode if not yet (in case it is stdin)
        int rc = _setmode (_fileno (f), strchr (mode, 'b') ? _O_BINARY : _O_TEXT);
        if (rc == -1)
            ERROR ("error switching stream to binary mode: %s", strerror (errno));
    }
    return f;
}

FILE * fopenOrDie (const STRING & pathname, const char * mode)
{
    FILE * f = (pathname[0] == '-') ? fopenStdHandle (mode) : fopen (pathname.c_str(), mode);
    if (f == NULL)
    {
	ERROR ("error opening file '%s': %s", pathname.c_str(), strerror (errno));
        return NULL;    // keep OACR happy
    }
    if (strchr (mode, 'S'))
    {   // if optimized for sequential access then use large buffer
	setvbuf (f, NULL, _IOFBF, 10000000);    // OK if it fails
    }
    return f;
}

FILE * fopenOrDie (const WSTRING & pathname, const wchar_t * mode)
{
    FILE * f = (pathname[0] == '-') ? fopenStdHandle (mode) : _wfopen (pathname.c_str(), mode);
    if (f == NULL)
    {
        ERROR ("error opening file '%S': %s", pathname.c_str(), strerror (errno));
        return NULL;    // keep OACR happy
    }
    if (strchr (mode, 'S'))
    {   // if optimized for sequential access then use large buffer
        setvbuf (f, NULL, _IOFBF, 10000000);    // OK if it fails
    }
    return f;
}

// ----------------------------------------------------------------------------
// set mode to binary or text (pass 'b' or 't')
// ----------------------------------------------------------------------------

void fsetmode (FILE * f, char type)
{
    if (type != 'b' && type != 't')
    {
        ERROR ("fsetmode: invalid type '%c'");
    }
#ifdef UNDER_CE // winCE and win32 have different return types for _fileno
    FILE *fd = _fileno (f);   // note: no error check possible
#else
    int fd = _fileno (f);   // note: no error check possible
#endif
    int mode = type == 'b' ? _O_BINARY : _O_TEXT;
    int rc = _setmode (fd, mode);
    if (rc == -1)
    {
	ERROR ("error changing file mode: %s", strerror (errno));
    }
}

// ----------------------------------------------------------------------------
// freadOrDie(): like fread() but terminate with err msg in case of error
// ----------------------------------------------------------------------------

void freadOrDie (void * ptr, size_t size, size_t count, FILE * f)
{
    // \\XXX\C$ reads are limited, with some randomness (e.g. 48 MB), on Windows 7 32 bit, so we break this into chunks of some MB. Meh.
    while (count > 0)
    {
        size_t chunkn = min (count, 15*1024*1024);  // BUGBUG: I surely meant this limit to be bytes, not units of 'size'...
        size_t n = fread (ptr, size, chunkn, f);
        if (n != chunkn)
            ERROR ("error reading from file: %s", strerror (errno));
        count -= n;
        ptr = n * size + (char*) ptr;
    }
}

void freadOrDie (void * ptr, size_t size, size_t count, const HANDLE f)
{
    // \\XXX\C$ reads are limited, with some randomness (e.g. 48 MB), on Windows 7 32 bit, so we break this into chunks of some MB. Meh.
    while (count > 0)
    {
        size_t chunkn = min (count * size, 15*1024*1024);
		DWORD n ;
    	ReadFile(f, ptr, (DWORD) chunkn, &n, NULL);
	    if (n != chunkn)
            ERROR ("error number for reading from file: %s", GetLastError());
        count -= (size_t) (n / size);
        ptr = n + (char*) ptr;
    }
}

// ----------------------------------------------------------------------------
// fwriteOrDie(): like fwrite() but terminate with err msg in case of error;
// Windows C std lib fwrite() has problems writing >100 MB at a time (fails
// with Invalid Argument error), so we break it into chunks (yak!!)
// ----------------------------------------------------------------------------

void fwriteOrDie (const void * ptr, size_t size, size_t count, FILE * f)
{
    const char * p1 = (const char *) ptr;
    size_t totalBytes = size * count;
    while (totalBytes > 0)
    {
        size_t wantWrite = totalBytes;
#define LIMIT (16*1024*1024)    // limit to 16 MB at a time
        if (wantWrite > LIMIT)
        {
            wantWrite = LIMIT;
        }
        size_t n = fwrite ((const void *) p1, 1, wantWrite, f);
        if (n != wantWrite)
        {
            ERROR ("error writing to file (ptr=0x%08lx, size=%d,"
                " count=%d, writing %d bytes after %d): %s",
                ptr, size, count, (int) wantWrite,
                (int) (size * count - totalBytes),
                strerror (errno));
        }
        totalBytes -= wantWrite;
        p1 += wantWrite;
    }
}

void fwriteOrDie (const void * ptr, size_t size, size_t count, const HANDLE f)
{
    const char * p1 = (const char *) ptr;
    DWORD totalBytes = (DWORD) (size * count);
    while (totalBytes > 0)
    {
        DWORD wantWrite = totalBytes;
#define LIMIT (16*1024*1024)    // limit to 16 MB at a time
        if (wantWrite > LIMIT)
        {
            wantWrite = LIMIT;
        }
		DWORD byteWritten = 0 ;
        if (WriteFile(f, (const void *) p1, wantWrite, &byteWritten, NULL) == false)
        {
            ERROR ("error writing to file (ptr=0x%08lx, size=%d,"
                " count=%d, writing %d bytes after %d): %s",
                ptr, size, count, (int) wantWrite,
                (int) (size * count - totalBytes),
                strerror (errno));
        }
        totalBytes -= wantWrite;
        p1 += wantWrite;
    }
}


// ----------------------------------------------------------------------------
// fprintfOrDie(): like fprintf() but terminate with err msg in case of error
// ----------------------------------------------------------------------------

#pragma warning(push)
#pragma warning(disable : 4793) // 'vararg' : causes native code generation
void fprintfOrDie (FILE * f, const char * fmt, ...)
{
    va_list arg_ptr;
    va_start (arg_ptr, fmt);
    int rc = vfprintf (f, fmt, arg_ptr);
    if (rc < 0)
    {
        ERROR ("error writing to file: %s", strerror (errno));
    }
}
#pragma warning(pop)

// ----------------------------------------------------------------------------
// fflushOrDie(): like fflush() but terminate with err msg in case of error
// ----------------------------------------------------------------------------

void fflushOrDie (FILE * f)
{
    int rc = fflush (f);
    if (rc != 0)
    {
	ERROR ("error flushing to file: %s", strerror (errno));
    }
}

// ----------------------------------------------------------------------------
// filesize(): determine size of the file in bytes (with open file)
// BUGBUG: how about files > 4 GB?
// ----------------------------------------------------------------------------
size_t filesize (FILE * f)
{
    long curPos = ftell (f);
    if (curPos == -1L)
    {
	ERROR ("error determining file position: %s", strerror (errno));
    }
    int rc = fseek (f, 0, SEEK_END);
    if (rc != 0)
    {
	ERROR ("error seeking to end of file: %s", strerror (errno));
    }
    long len = ftell (f);
    if (len == -1L)
    {
	ERROR ("error determining file position: %s", strerror (errno));
    }
    rc = fseek (f, curPos, SEEK_SET);
    if (rc != 0)
    {
	ERROR ("error resetting file position: %s", strerror (errno));
    }
    return (size_t) len;
}

// filesize(): determine size of the file in bytes (with pathname)
size_t filesize (const wchar_t * pathname)
{
    FILE * f = fopenOrDie (pathname, L"rb");
    try
    {
        size_t len = filesize (f);
        fclose (f);
        return (size_t) len;
    }
    catch (...)
    {
        fclose (f);
        throw;
    }
}

#ifndef UNDER_CE    // no 64-bit under winCE

// filesize64(): determine size of the file in bytes (with pathname)
__int64 filesize64 (const wchar_t * pathname)
{
    __stat64 fileinfo;
    if (_wstat64 (pathname,&fileinfo) == -1)
        return 0;
    else
        return fileinfo.st_size;
}
#endif

// ----------------------------------------------------------------------------
// fseekOrDie(),ftellOrDie(), fget/setpos(): seek functions with error handling
// ----------------------------------------------------------------------------

long fseekOrDie (FILE * f, long offset, int mode)
{
    long curPos = ftell (f);
    if (curPos == -1L)
    {
	ERROR ("error seeking: %s", strerror (errno));
    }
    int rc = fseek (f, offset, mode);
    if (rc != 0)
    {
	ERROR ("error seeking: %s", strerror (errno));
    }
    return curPos;
}

unsigned __int64 fgetpos (FILE * f)
{
    fpos_t post;
    int rc = ::fgetpos (f, &post);
    if (rc != 0)
        ERROR ("error getting file position: %s", strerror (errno));
    return post;
}

void fsetpos (FILE * f, unsigned __int64 reqpos)
{
    // ::fsetpos() flushes the read buffer. This conflicts with a situation where
    // we generally read linearly but skip a few bytes or KB occasionally, as is
    // the case in speech recognition tools. This requires a number of optimizations.

    unsigned __int64 curpos = fgetpos (f);
    unsigned __int64 cureob = curpos + f->_cnt; // UGH: we mess with an internal structure here
    while (reqpos >= curpos && reqpos < cureob)
    {
        // if we made it then do not call fsetpos()
        if (reqpos == fgetpos (f))
            return;

        // if we seek within the existing buffer, then just move to the position by dummy reads
        char buf[65536];
        size_t n = min ((size_t) reqpos - (size_t) curpos, _countof (buf));
        fread (buf, sizeof (buf[0]), n, f);     // (this may fail, but really shouldn't)
        curpos += n;

        // since we mess with f->_cnt, if something unexpected happened to the buffer then back off
        if (curpos != fgetpos (f) || curpos + f->_cnt != cureob)
            break;                              // oops
    }

    // actually perform the seek
    fpos_t post = reqpos;
    int rc = ::fsetpos (f, &post);
    if (rc != 0)
        ERROR ("error setting file position: %s", strerror (errno));
}

// ----------------------------------------------------------------------------
// unlinkOrDie(): unlink() with error handling
// ----------------------------------------------------------------------------

void unlinkOrDie (const std::string & pathname)
{
    if (_unlink (pathname.c_str()) != 0 && errno != ENOENT)     // if file is missing that's what we want
	ERROR ("error deleting file '%s': %s", pathname.c_str(), strerror (errno));
}
void unlinkOrDie (const std::wstring & pathname)
{
    if (_wunlink (pathname.c_str()) != 0 && errno != ENOENT)    // if file is missing that's what we want
	ERROR ("error deleting file '%S': %s", pathname.c_str(), strerror (errno));
}

// ----------------------------------------------------------------------------
// renameOrDie(): rename() with error handling
// ----------------------------------------------------------------------------

#ifndef UNDER_CE // CE only supports Unicode APIs
void renameOrDie (const std::string & from, const std::string & to)
{
    if (!MoveFileA (from.c_str(),to.c_str()))
	ERROR ("error renaming: %s", GetLastError());
}
#endif

void renameOrDie (const std::wstring & from, const std::wstring & to)
{
    if (!MoveFileW (from.c_str(),to.c_str()))
	ERROR ("error renaming: %s", GetLastError());
}

// ----------------------------------------------------------------------------
// fexists(): test if a file exists
// ----------------------------------------------------------------------------

bool fexists (const wchar_t * pathname)
{
    WIN32_FIND_DATAW findFileData;
    HANDLE hFind = FindFirstFileW (pathname, &findFileData);
    if (hFind != INVALID_HANDLE_VALUE)
    {
        FindClose (hFind);
        return true;
    }
    else
    {
        return false;
    }
}

#ifndef UNDER_CE // CE only supports Unicode APIs
bool fexists (const char * pathname)
{
    WIN32_FIND_DATAA findFileData;
    HANDLE hFind = FindFirstFileA (pathname, &findFileData);
    if (hFind != INVALID_HANDLE_VALUE)
    {
        FindClose (hFind);
        return true;
    }
    else
    {
        return false;
    }
}
#endif

// ----------------------------------------------------------------------------
// funicode(): test if a file uses unicode by reading its BOM
// ----------------------------------------------------------------------------

bool funicode (FILE * f)
{
    unsigned short testCode;
    if (fread (&testCode, sizeof(short), 1, f) == 1 &&
        (int)testCode == 0xFEFF)
        return true;
    fseek (f,0,SEEK_SET);
    //rewind (f);
    return false;
}

// ----------------------------------------------------------------------------
// fgetline(): like fgets() but terminate with err msg in case of error;
// removes the newline character at the end (like gets());
// Returns 'buf' (always). buf guaranteed to be 0-terminated.
// ----------------------------------------------------------------------------

static inline wchar_t * fgets (wchar_t * buf, int n, FILE * f) { return fgetws (buf, n, f); }
static inline string _utf8 (const string & s) { return s; }
static inline string _utf8 (const wstring & s) { return msra::strfun::utf8 (s); }
static inline size_t strnlen (wchar_t * s, size_t n) { return wcsnlen (s, n); }

#ifdef UNDER_CE     // strlen for char * not defined in winCE
static inline size_t strnlen (const char *s, size_t n) { return std::find (s,s+n,'\0') - s; }
#endif

template<class CHAR>
CHAR * fgetline (FILE * f, CHAR * buf, int size)
{

    unsigned __int64 filepos = fgetpos (f); // (for error message only)
    CHAR * p = fgets (buf, size, f);
    if (p == NULL)			// EOF reached: next time feof() = true
    {
        if (ferror (f))
            ERROR ("error reading line: %s", strerror (errno));
        buf[0] = 0;
        return buf;
    }
    size_t n = strnlen (p, size);

    // check for buffer overflow

    if (n >= (size_t) size -1)
    {
        basic_string<CHAR> example (p, n < 100 ? n : 100);
        ERROR ("input line too long at file offset %I64d (max. %d characters allowed) [%s ...]",
               filepos, size -1, _utf8 (example).c_str());
    }

    // remove newline at end

    if (n > 0 && p[n-1] == '\n')	// UNIX and Windows style
    {
        n--;
        p[n] = 0;
        if (n > 0 && p[n-1] == '\r')	// Windows style
        {
            n--;
            p[n] = 0;
        }
    }
    else if (n > 0 && p[n-1] == '\r')	// Mac style
    {
        n--;
        p[n] = 0;
    }

    return buf;
}

#if 0
const wchar_t * fgetline (FILE * f, wchar_t * buf, int size)
{
    wchar_t * p = fgetws (buf, size, f);
    if (p == NULL)			// EOF reached: next time feof() = true
    {
        if (ferror (f))
            ERROR ("error reading line: %s", strerror (errno));
        buf[0] = 0;
        return buf;
    }
    size_t n = wcsnlen (p, size); // SECURITY NOTE: string use has been reviewed

    // check for buffer overflow

    if (n >= (size_t) size -1)
    {
        wstring example (buf, min (n, 100));
        ERROR ("input line too long at file offset %U64d (max. %d characters allowed) [%S ...]",
               fgetpos (f), size -1, example.c_str());
    }

    // remove newline at end

    if (n > 0 && p[n-1] == L'\n')	// UNIX and Windows style
    {
        n--;
        p[n] = 0;
        if (n > 0 && p[n-1] == L'\r')	// Windows style
        {
            n--;
            p[n] = 0;
        }
    }
    else if (n > 0 && p[n-1] == L'\r')	// Mac style
    {
        n--;
        p[n] = 0;
    }

    return buf;
}
#endif

// STL string version
std::string fgetline (FILE * f)
{
    fixed_vector<char> buf (1000000);
    return fgetline (f, &buf[0], (int) buf.size());
}

// STL string version
std::wstring fgetlinew (FILE * f)
{
    fixed_vector<wchar_t> buf (1000000);
    return fgetline (f, &buf[0], (int) buf.size());
}

// STL string version avoiding most memory allocations
void fgetline (FILE * f, std::string & s, ARRAY<char> & buf)
{
    buf.resize (1000000);    // enough? // KIT: increased to 1M to be safe
    const char * p = fgetline (f, &buf[0], (int) buf.size());
    s.assign (p);
}

void fgetline (FILE * f, std::wstring & s, ARRAY<wchar_t> & buf)
{
    buf.resize (1000000);    // enough? // KIT: increased to 1M to be safe
    const wchar_t * p = fgetline (f, &buf[0], (int) buf.size());
    s.assign (p);
}

// char buffer version
void fgetline (FILE * f, ARRAY<char> & buf)
{
    const int BUF_SIZE = 1000000;    // enough? // KIT: increased to 1M to be safe
    buf.resize (BUF_SIZE);
    fgetline (f, &buf[0], (int) buf.size());
    buf.resize (strnlen (&buf[0], BUF_SIZE) +1); // SECURITY NOTE: string use has been reviewed
}

void fgetline (FILE * f, ARRAY<wchar_t> & buf)
{
    const int BUF_SIZE = 1000000;    // enough? // KIT: increased to 1M to be safe
    buf.resize (BUF_SIZE);
    fgetline (f, &buf[0], (int) buf.size());
    buf.resize (wcsnlen (&buf[0], BUF_SIZE) +1); // SECURITY NOTE: string use has been reviewed
}

// read a 0-terminated string
const char * fgetstring (FILE * f, __out_z_cap(size) char * buf, int size)
{
    int i;
    for (i = 0; ; i++)
    {
	int c = fgetc (f);
	if (c == EOF)
            ERROR ("error reading string or missing 0: %s", strerror (errno));
	if (c == 0) break;
	if (i >= size -1)
	{
	    ERROR ("input line too long (max. %d characters allowed)", size -1);
	}
	buf[i] = (char) c;
    }
    ASSERT (i < size);
    buf[i] = 0;
    return buf;
}

const char * fgetstring (const HANDLE f, __out_z_cap(size) char * buf, int size)
{
    int i;
    for (i = 0; ; i++)
    {
	    char c;
	    freadOrDie((void*) &c, sizeof(char), 1, f);
		if (c == (char) 0) break;
		if (i >= size -1)
		{
		    ERROR ("input line too long (max. %d characters allowed)", size -1);
		}
		buf[i] = (char) c;
    }
    ASSERT (i < size);
    buf[i] = 0;
    return buf;
}

// read a 0-terminated wstring
wstring fgetwstring (FILE * f)
{
    wstring res;
    for (;;)
    {
	int c = fgetwc (f);
	if (c == EOF)
            ERROR ("error reading string or missing 0: %s", strerror (errno));
	if (c == 0) break;
        res.push_back ((wchar_t) c);
    }
    return res;
}

void fskipspace (FILE * f)
{
    for (;;)
    {
	int c = fgetc (f);
	if (c == EOF)       // hit the end
        {
            if (ferror (f))
                ERROR ("error reading from file: %s", strerror (errno));
            break;
        }
	if (!isspace (c))    // end of space: undo getting that character
        {
            int rc = ungetc (c, f);
            if (rc != c)
                ERROR ("error in ungetc(): %s", strerror (errno));
            break;
        }
    }
}

// fskipNewLine(): skip all white space until end of line incl. the newline
void fskipNewline (FILE * f)
{
    char c;

    // skip white space

    do
    {
	freadOrDie (&c, sizeof (c), 1, f);
    } while (c == ' ' || c == '\t');

    if (c == '\r')			// Windows-style CR-LF
    {
	freadOrDie (&c, sizeof (c), 1, f);
    }

    if (c != '\n')
    {
	ERROR ("unexpected garbage at end of line");
    }
}

// read a space-terminated token
// ...TODO: eat trailing space like fscanf() doessurrounding space)
const char * fgettoken (FILE * f, __out_z_cap(size) char * buf, int size)
{
    fskipspace (f);                         // skip leading space
    int c = -1;
    int i;
    for (i = 0; ; i++)
    {
	c = fgetc (f);
	if (c == EOF) break;
	if (isspace (c)) break;
	if (i >= size -1)
	    ERROR ("input token too long (max. %d characters allowed)", size -1);
	buf[i] = (char) c;
    }
    // ... TODO: while (isspace (c)) c = fgetc (f);      // skip trailing space
    if (c != EOF)
    {
	int rc = ungetc (c, f);
	if (rc != c)
	    ERROR ("error in ungetc(): %s", strerror (errno));
    }
    ASSERT (i < size);
    buf[i] = 0;
    return buf;
}

STRING fgettoken (FILE * f)
{
    char buf[80];
    return fgettoken (f, buf, sizeof(buf)/sizeof(*buf));
}

// ----------------------------------------------------------------------------
// fputstring(): write a 0-terminated string
// ----------------------------------------------------------------------------

void fputstring (FILE * f, const char * str)
{
    fwriteOrDie ((void *) str, sizeof (*str), strnlen (str, SIZE_MAX)+1, f); // SECURITY NOTE: string use has been reviewed
}

void fputstring (const HANDLE f, const char * str)
{
    fwriteOrDie ((void *) str, sizeof (*str), strnlen (str, SIZE_MAX)+1, f); // SECURITY NOTE: string use has been reviewed
}

void fputstring (FILE * f, const std::string & str)
{
    fputstring (f, str.c_str());
}

void fputstring (FILE * f, const wchar_t * str)
{
    fwriteOrDie ((void *) str, sizeof (*str), wcsnlen (str, SIZE_MAX)+1, f); // SECURITY NOTE: string use has been reviewed
}

void fputstring (FILE * f, const std::wstring & str)
{
    fputstring (f, str.c_str());
}


// ----------------------------------------------------------------------------
// fgetTag(): read a 4-byte tag & return as a string
// ----------------------------------------------------------------------------

std::string fgetTag (FILE * f)
{
    char tag[5];
    freadOrDie (&tag[0], sizeof (tag[0]), 4, f);
    tag[4] = 0;
    return std::string (tag);
}

std::string fgetTag (const HANDLE f)
{
    char tag[5];
    freadOrDie (&tag[0], sizeof (tag[0]), 4, f);
    tag[4] = 0;
    return std::string (tag);
}

// ----------------------------------------------------------------------------
// fcheckTag(): read a 4-byte tag & verify it; terminate if wrong tag
// ----------------------------------------------------------------------------

void fcheckTag (FILE * f, const char * expectedTag)
{
    fcompareTag (fgetTag (f), expectedTag);
}


void fcheckTag (const HANDLE f, const char * expectedTag)
{
    fcompareTag (fgetTag (f), expectedTag);
}

void fcheckTag_ascii (FILE * f, const STRING & expectedTag)
{
    char buf[20];	// long enough for a tag
    fskipspace (f);
    fgettoken (f, buf, sizeof(buf)/sizeof(*buf));
    if (expectedTag != buf)
    {
        ERROR ("invalid tag '%s' found; expected '%s'", buf, expectedTag.c_str());
    }
}

// ----------------------------------------------------------------------------
// fcompareTag(): compare two tags; terminate if wrong tag
// ----------------------------------------------------------------------------

void fcompareTag (const STRING & readTag, const STRING & expectedTag)
{
    if (readTag != expectedTag)
    {
        ERROR ("invalid tag '%s' found; expected '%s'",
               readTag.c_str(), expectedTag.c_str());
    }
}

// ----------------------------------------------------------------------------
// fputTag(): write a 4-byte tag
// ----------------------------------------------------------------------------

void fputTag (FILE * f, const char * tag)
{
    const int TAG_LEN = 4;
    ASSERT (strnlen (tag, TAG_LEN + 1) == TAG_LEN);
    fwriteOrDie ((void *) tag, sizeof (*tag), strnlen (tag, TAG_LEN), f);
}

void fputTag(const HANDLE f, const char * tag)
{
    const int TAG_LEN = 4;
    ASSERT (strnlen (tag, TAG_LEN + 1) == TAG_LEN);
    fwriteOrDie ((void *) tag, sizeof (*tag), strnlen (tag, TAG_LEN), f);
}

// ----------------------------------------------------------------------------
// fskipstring(): skip a 0-terminated string, such as a pad string
// ----------------------------------------------------------------------------

void fskipstring (FILE * f)
{
    char c;
    do
    {
	freadOrDie (&c, sizeof (c), 1, f);
    }
    while (c);
}

// ----------------------------------------------------------------------------
// fpad(): write a 0-terminated string to pad file to a n-byte boundary
// (note: file must be opened in binmode to work properly on DOS/Windows!!!)
// ----------------------------------------------------------------------------
void fpad (FILE * f, int n)
{
    // get current writing position
    int pos = ftell (f);
    if (pos == -1)
    {
	ERROR ("error in ftell(): %s", strerror (errno));
    }
    // determine how many bytes are needed (at least 1 for the 0-terminator)
    // and create a dummy string of that length incl. terminator
    int len = n - (pos % n);
    const char dummyString[] = "MSR-Asia: JL+FS";
    size_t offset = sizeof(dummyString)/sizeof(dummyString[0]) - len;
    ASSERT (offset >= 0);
    fputstring (f, dummyString + offset);
}
// ----------------------------------------------------------------------------
// fgetbyte(): read a byte value
// ----------------------------------------------------------------------------

char fgetbyte (FILE * f)
{
    char v;
    freadOrDie (&v, sizeof (v), 1, f);
    return v;
}

// ----------------------------------------------------------------------------
// fgetshort(): read a short value
// ----------------------------------------------------------------------------

short fgetshort (FILE * f)
{
    short v;
    freadOrDie (&v, sizeof (v), 1, f);
    return v;
}

short fgetshort_bigendian (FILE * f)
{
    unsigned char b[2];
    freadOrDie (&b, sizeof (b), 1, f);
    return (short) ((b[0] << 8) + b[1]);
}

// ----------------------------------------------------------------------------
// fgetint24(): read a 3-byte (24-bit) int value
// ----------------------------------------------------------------------------

int fgetint24 (FILE * f)
{
    int v;
    ASSERT (sizeof (v) == 4);
    freadOrDie (&v, sizeof (v) -1, 1, f);   // only read 3 lower-order bytes
    v <<= 8;                                // shift up (upper 8 bits uninit'ed)
    v >>= 8;                                // shift down 8 bits with sign-extend
    return v;
}

// ----------------------------------------------------------------------------
// fgetint(): read an int value
// ----------------------------------------------------------------------------

int fgetint (FILE * f)
{
    int v;
    freadOrDie (&v, sizeof (v), 1, f);
    return v;
}

int fgetint (const HANDLE f)
{
    int v;
    freadOrDie (&v, sizeof (v), 1, f);
    return v;
}

int fgetint_bigendian (FILE * f)
{
    unsigned char b[4];
    freadOrDie (&b, sizeof (b), 1, f);
    return (int) (((((b[0] << 8) + b[1]) << 8) + b[2]) << 8) + b[3];
}

int fgetint_ascii (FILE * f)
{
    fskipspace (f);
    int res = 0;
    char c;
    freadOrDie (&c, sizeof (c), 1, f);
    while (isdigit ((unsigned char)c))
    {
	res = (10 * res) + (c - '0');
	freadOrDie (&c, sizeof (c), 1, f);
    }
    int rc = ungetc (c, f);
    if (rc != c)
    {
	ERROR ("error in ungetc(): %s", strerror (errno));
    }
    return res;
}

// ----------------------------------------------------------------------------
// fgetfloat(): read a float value
// ----------------------------------------------------------------------------

float fgetfloat (FILE * f)
{
    float v;
    freadOrDie (&v, sizeof (v), 1, f);
    return v;
}

float fgetfloat_bigendian (FILE * f)
{
    int bitpattern = fgetint_bigendian (f);
    return *((float*) &bitpattern);
}

float fgetfloat_ascii (FILE * f)
{
    float val;
    fskipspace (f);
    int rc = fscanf (f, "%f", &val); // security hint: safe overloads
    if (rc == 0)
	ERROR ("error reading float value from file (invalid format): %s");
    else if (rc == EOF)
	ERROR ("error reading from file: %s", strerror (errno));
    ASSERT (rc == 1);
    return val;
}

// ----------------------------------------------------------------------------
// fgetdouble(): read a double value
// ----------------------------------------------------------------------------

double fgetdouble (FILE * f)
{
    double v;
    freadOrDie (&v, sizeof (v), 1, f);
    return v;
}

// ----------------------------------------------------------------------------
// fgetwav(): read an entire .wav file
// ----------------------------------------------------------------------------

void WAVEHEADER::prepareRest (int sampleCount)
{
    FmtLength   = 16;

    wFormatTag      = 1;
    nAvgBytesPerSec = nSamplesPerSec * nBlockAlign;

    riffchar[0] = 'R';
    riffchar[1] = 'I';
    riffchar[2] = 'F';
    riffchar[3] = 'F';
    if (sampleCount != -1)
    {
        DataLength  = sampleCount * nBlockAlign;
        RiffLength  = 36 + DataLength;
    }
    else
    {
        DataLength  = 0xffffffff;
        RiffLength  = 0xffffffff;
    }

    wavechar[0] = 'W';
    wavechar[1] = 'A';
    wavechar[2] = 'V';
    wavechar[3] = 'E';
    wavechar[4] = 'f';
    wavechar[5] = 'm';
    wavechar[6] = 't';
    wavechar[7] = ' ';

    datachar[0] = 'd';
    datachar[1] = 'a';
    datachar[2] = 't';
    datachar[3] = 'a';
}

void WAVEHEADER::prepare (unsigned int Fs, int Bits, int Channels, int SampleCount)
{
    nChannels       = (short) Channels;
    nSamplesPerSec  = Fs;
    nBlockAlign     = (short) (Channels * (Bits/8));
    nAvgBytesPerSec = Fs * nBlockAlign;
    wBitsPerSample  = (short) Bits;

    prepareRest (SampleCount);
}

void WAVEHEADER::prepare (const WAVEFORMATEX & wfx, int sampleCount /* -1 for unknown */)
{
    nChannels       = wfx.nChannels;
    nSamplesPerSec  = wfx.nSamplesPerSec;
    nBlockAlign     = wfx.nBlockAlign;
    wBitsPerSample  = wfx.wBitsPerSample;

    prepareRest (sampleCount);
}

void WAVEHEADER::write (FILE * f)
{
    fputTag (f, "RIFF");
    fputint (f, RiffLength);
    fputTag (f, "WAVE");
    fputTag (f, "fmt ");
    fputint (f, FmtLength);
    fputshort (f, wFormatTag);
    fputshort (f, nChannels);
    fputint (f, nSamplesPerSec);
    fputint (f, nAvgBytesPerSec);
    fputshort (f, nBlockAlign);
    fputshort (f, wBitsPerSample);
    ASSERT (FmtLength == 16);
    ASSERT (wFormatTag == 1);
    fputTag (f, "data");
    fputint (f, DataLength);
    fflushOrDie (f);
}

/*static*/ void WAVEHEADER::update (FILE * f)
{
    long curPos = ftell (f);
    if (curPos == -1L)
    {
	ERROR ("error determining file position: %s", strerror (errno));
    }
    unsigned int len = (unsigned int) filesize (f);
    unsigned int RiffLength = len - 8;
    unsigned int DataLength = RiffLength - 36;
    fseekOrDie (f, 4, SEEK_SET);
    fputint (f, RiffLength);
    fseekOrDie (f, 40, SEEK_SET);
    fputint (f, DataLength);
    fseekOrDie (f, curPos, SEEK_SET);
}

unsigned int WAVEHEADER::read (FILE * f, signed short & wRealFormatTag, int & bytesPerSample)
{
    // read header
    fcheckTag (f, "RIFF");
    /*unsigned int riffLen = */ fgetint (f);
    fcheckTag (f, "WAVE");
    fcheckTag (f, "fmt ");
    unsigned int fmtLen = fgetint (f);
    wRealFormatTag = fgetshort (f);
    if (wRealFormatTag == -2)   // MARecorder.exe [Ivan Tashev] puts a -2 for
    {                           // 8-channel recordings (meaning unknown).
        wRealFormatTag = 1;     // Workaround: pretend it is 1 (seems safe)
    }
    (wRealFormatTag == 1 || wRealFormatTag == 7)
        || ERROR ("WAVEHEADER::read: wFormatTag=%d not supported for now", wRealFormatTag);
    unsigned short wChannels = fgetshort (f);
    unsigned long dwSamplesPerSec = fgetint (f);
    unsigned int sampleRate = dwSamplesPerSec;
    /*unsigned long dwAvgBytesPerSec = */ fgetint (f);
    unsigned short wBlockAlign = fgetshort (f);
    unsigned short wBitsPerSample = fgetshort (f);
    (wBitsPerSample <= 16) || ERROR ("WAVEHEADER::read: invalid wBitsPerSample %d", wBitsPerSample);
    bytesPerSample = wBitsPerSample / 8;
    (wBlockAlign == wChannels * bytesPerSample)
        || ERROR ("WAVEHEADER::read: wBlockAlign != wChannels*bytesPerSample not supported");
    while (fmtLen > 16) // unused extra garbage in header
    {
        fgetbyte (f);
        fmtLen--;
    }
    if (wRealFormatTag == 7)
    {
        (bytesPerSample == 1) || ERROR ("WAVEHEADER::read: invalid wBitsPerSample %d for mulaw", wBitsPerSample);
        fcheckTag (f, "fact");
        unsigned int factLen = fgetint (f);
        while (factLen > 0)
        {
            fgetbyte (f);
            factLen--;
        }
    }
    fcheckTag (f, "data");
    unsigned int dataLen = fgetint (f);
    unsigned int numSamples = dataLen / wBlockAlign;

    // prepare a nice wave header without junk (44 bytes, 16-bit PCM)
    prepare (sampleRate, wBitsPerSample, wChannels, numSamples);

    return numSamples;
}

static short toolULawToLinear(unsigned char p_ucULawByte)
{
    static short anExpLut[8] = { 0, 132, 396, 924, 1980, 4092, 8316, 16764 };
    short nSign, nExponent, nMantissa, nSample;

    p_ucULawByte=~p_ucULawByte;
    nSign=(p_ucULawByte & 0x80);
    nExponent=(p_ucULawByte >> 4) & 0x07;
    nMantissa=p_ucULawByte & 0x0F;
    nSample=anExpLut[nExponent]+(nMantissa<<(nExponent+3));
    if(nSign != 0)
        nSample = -nSample;

    return nSample;
}

// fgetwavraw(): only read data of .wav file. For multi-channel data, samples
// are kept interleaved.
static void fgetwavraw(FILE * f, ARRAY<short> & wav, const WAVEHEADER & wavhd)
{
    int bytesPerSample = wavhd.wBitsPerSample / 8;  // (sample size on one channel)
    wav.resize (wavhd.DataLength / bytesPerSample);
    if (wavhd.wFormatTag == 7)    // mulaw
    {
        (wavhd.nChannels == 1) || ERROR ("fgetwav: wChannels=%d not supported for mulaw", wavhd.nChannels);
        ARRAY<unsigned char> data;
        int numSamples = wavhd.DataLength/wavhd.nBlockAlign;
        data.resize (numSamples);
        freadOrDie (&data[0], sizeof (data[0]), numSamples, f);
        for (int i = 0; i < numSamples; i++)
        {
            wav[i] = toolULawToLinear (data[i]);
        }
    }
    else if (bytesPerSample == 2)
    {   // note: we may be reading an interleaved multi-channel signal.
        freadOrDie (&wav[0], sizeof (wav[0]), wav.size(), f);
    }
    // ... TODO: support 8 bit linear PCM samples (implement when needed; samples scaled to 'short')
    else
    {
        ERROR ("bytesPerSample != 2 is not supported except mulaw format!\n");
    }
}

// ----------------------------------------------------------------------------
// fgetwav(): read an entire .wav file. Stereo is mapped to mono.
// ----------------------------------------------------------------------------

void fgetwav (FILE * f, ARRAY<short> & wav, int & sampleRate)
{
    WAVEHEADER wavhd;           // will be filled in for 16-bit PCM!!
    signed short wFormatTag;    // real format tag as found in data
    int bytesPerSample;         // bytes per sample as found in data

    unsigned int numSamples = wavhd.read (f, wFormatTag, bytesPerSample);
    sampleRate = (int) wavhd.nSamplesPerSec;

    if (wavhd.nChannels == 1)
    {
        fgetwavraw (f, wav, wavhd);
    }
    else if (wavhd.nChannels == 2)
    {
        //read raw data
        ARRAY<short> buf;
        buf.resize(numSamples * 2);
        fgetwavraw(f, buf, wavhd);

        //map to mono
        wav.resize (numSamples);
        const short * p = &buf[0];
        for (int i = 0; i < (int) numSamples; i++)
        {
            int l = *p++;
            int r = *p++;
            int mono = ((l + r) + 1) >> 1;
            wav[i] = (short) mono;
        }
    }
    else
    {
        ERROR ("bytesPerSample/wChannels != 2 needs to be implemented");
    }
}

void fgetwav (const wstring & fn, ARRAY<short> & wav, int & sampleRate)
{
    auto_file_ptr f = fopenOrDie (fn, L"rbS");
    fgetwav (f, wav, sampleRate);
}

// ----------------------------------------------------------------------------
// ... TODO:
//  - rename this function!!
//  - also change to read header itself and return sample rate and channels
// fgetraw(): read data of multi-channel .wav file, and separate data of multiple channels.
//            For example, data[i][j]: i is channel index, 0 means the first
//            channel. j is sample index.
// ----------------------------------------------------------------------------

void fgetraw (FILE *f, ARRAY< ARRAY<short> > & data, const WAVEHEADER & wavhd)
{
    ARRAY<short> wavraw;
    fgetwavraw (f, wavraw, wavhd);
    data.resize (wavhd.nChannels);
    int numSamples = wavhd.DataLength/wavhd.nBlockAlign;
    ASSERT (numSamples == (int) wavraw.size() / wavhd.nChannels);

    for (int i = 0; i < wavhd.nChannels; i++)
    {
        data[i].resize (numSamples);

        for (int j = 0; j < numSamples; j++)
        {
            data[i][j] = wavraw[wavhd.nChannels*j + i];
        }
    }
}

// ----------------------------------------------------------------------------
// fgetwfx(), fputwfx(): direct access to simple WAV headers
// ----------------------------------------------------------------------------

// read header and skip to first data byte; return #samples
unsigned int fgetwfx (FILE * f, WAVEFORMATEX & wfx)
{
    // read header
    fcheckTag (f, "RIFF");
    /*unsigned int riffLen = */ fgetint (f);
    fcheckTag (f, "WAVE");
    fcheckTag (f, "fmt ");
    wfx.cbSize = sizeof (wfx);
    int fmtLen = fgetint (f);
    wfx.wFormatTag = fgetshort (f);
    if (wfx.wFormatTag == -2)   // MARecorder.exe [Ivan Tashev] puts a -2 for
    {                           // 8-channel recordings (meaning unknown).
        wfx.wFormatTag = 1;     // Workaround: pretend it is 1 (seems safe)
    }
    (wfx.wFormatTag == 1 || wfx.wFormatTag == 3 || wfx.wFormatTag == 7)
        || ERROR ("WAVEHEADER::read: wFormatTag=%d not supported for now", wfx.wFormatTag);
    wfx.nChannels = fgetshort (f);
    wfx.nSamplesPerSec = fgetint (f);
    wfx.nAvgBytesPerSec = fgetint (f);
    wfx.nBlockAlign = fgetshort (f);
    wfx.wBitsPerSample = fgetshort (f);
    // unused extra garbage in header
    for ( ; fmtLen > 16; fmtLen--) fgetbyte (f);
    fcheckTag (f, "data");
    unsigned int dataLen = fgetint (f);
    unsigned int numSamples = dataLen / wfx.nBlockAlign;
    return numSamples;
}

void fputwfx (FILE *f, const WAVEFORMATEX & wfx, unsigned int numSamples)
{
    unsigned int DataLength = numSamples * wfx.nBlockAlign;
    (DataLength / wfx.nBlockAlign == numSamples)
        || ERROR ("fputwfx: data size exceeds WAV header 32-bit range");
    unsigned int RiffLength = 36 + DataLength;
    unsigned int FmtLength  = 16;
    // file header
    ASSERT (wfx.cbSize == 0 || wfx.cbSize == FmtLength + 2);
    fputTag (f, "RIFF");
    fputint (f, RiffLength);
    fputTag (f, "WAVE");
    // 'fmt ' chunk (to hold wfx)
    fputTag (f, "fmt ");
    fputint (f, FmtLength);
    fputshort (f, wfx.wFormatTag);
    fputshort (f, wfx.nChannels);
    fputint (f, wfx.nSamplesPerSec);
    fputint (f, wfx.nAvgBytesPerSec);
    fputshort (f, wfx.nBlockAlign);
    fputshort (f, wfx.wBitsPerSample);
    // data chunk
    fputTag (f, "data");
    fputint (f, DataLength);
    fflushOrDie (f);
}

// ----------------------------------------------------------------------------
// fputwav(): write an entire .wav file (16 bit PCM)
// ----------------------------------------------------------------------------

void fputwav (FILE * f, const vector<short> & wav, int sampleRate, int nChannels)
{
    f;wav;sampleRate;nChannels;
    // construct WAVEFORMATEX
    WAVEFORMATEX wfx;
    wfx.cbSize = 16 + 2;  //fmt data + extra data
    wfx.nAvgBytesPerSec = (DWORD)(sampleRate * nChannels * 2); //short: 2 bytes per sample
    wfx.nBlockAlign = (WORD)nChannels * 2; //short: 2bytes per sample
    wfx.nChannels = (WORD)nChannels;
    wfx.nSamplesPerSec = sampleRate;
    wfx.wBitsPerSample = 16;
    wfx.wFormatTag = WAVE_FORMAT_PCM;
    //putwfx
    fputwfx (f, wfx, (unsigned int) wav.size());
    // wrtie the data
    fwriteOrDie (&wav[0], sizeof(wav[0]), wav.size(), f);
}

void fputwav (const wstring & fn, const vector<short> & wav, int sampleRate, int nChannels)
{
    auto_file_ptr f = fopenOrDie (fn, L"wbS");
    fputwav (f, wav, sampleRate, nChannels);
    fflushOrDie (f);    // after this, fclose() (in destructor of f) cannot fail
}

// ----------------------------------------------------------------------------
// fputbyte(): write a byte value
// ----------------------------------------------------------------------------

void fputbyte (FILE * f, char v)
{
    fwriteOrDie (&v, sizeof (v), 1, f);
}

// ----------------------------------------------------------------------------
// fputshort(): write a short value
// ----------------------------------------------------------------------------

void fputshort (FILE * f, short v)
{
    fwriteOrDie (&v, sizeof (v), 1, f);
}

// ----------------------------------------------------------------------------
// fputint24(): write a 3-byte (24-bit) int value
// ----------------------------------------------------------------------------

void fputint24 (FILE * f, int v)
{
    ASSERT (sizeof (v) == 4);
    fwriteOrDie (&v, sizeof (v) -1, 1, f);  // write low-order 3 bytes
}

// ----------------------------------------------------------------------------
// fputint(): write an int value
// ----------------------------------------------------------------------------

void fputint (FILE * f, int v)
{
    fwriteOrDie (&v, sizeof (v), 1, f);
}

void fputint (const HANDLE f, int v)
{
    fwriteOrDie (&v, sizeof (v), 1, f);
}

// ----------------------------------------------------------------------------
// fputfloat(): write a float value
// ----------------------------------------------------------------------------

void fputfloat (FILE * f, float v)
{
    fwriteOrDie (&v, sizeof (v), 1, f);
}

// ----------------------------------------------------------------------------
// fputdouble(): write a double value
// ----------------------------------------------------------------------------

void fputdouble (FILE * f, double v)
{
    fwriteOrDie (&v, sizeof (v), 1, f);
}

// ----------------------------------------------------------------------------
// fputfile(): write a binary block or a string as a file
// ----------------------------------------------------------------------------

void fputfile (const WSTRING & pathname, const ARRAY<char> & buffer)
{
    FILE * f = fopenOrDie (pathname, L"wb");
    try
    {
        if (buffer.size() > 0)
        {   // ^^ otherwise buffer[0] is an illegal expression
            fwriteOrDie (&buffer[0], sizeof (buffer[0]), buffer.size(), f);
        }
        fcloseOrDie (f);
    }
    catch (...)
    {
        fclose (f);
        throw;
    }
}

void fputfile (const WSTRING & pathname, const std::wstring & string)
{
    FILE * f = fopenOrDie (pathname, L"wb");
    try
    {
        if (string.length() > 0)
        {   // ^^ otherwise buffer[0] is an illegal expression
            fwriteOrDie (string.c_str(), sizeof (string[0]), string.length(), f);
        }
        fcloseOrDie (f);
    }
    catch (...)
    {
        fclose (f);
        throw;
    }
}

void fputfile (const WSTRING & pathname, const std::string & string)
{
    FILE * f = fopenOrDie (pathname, L"wb");
    try
    {
        if (string.length() > 0)
        {   // ^^ otherwise buffer[0] is an illegal expression
            fwriteOrDie (string.c_str(), sizeof (string[0]), string.length(), f);
        }
        fcloseOrDie (f);
    }
    catch (...)
    {
        fclose (f);
        throw;
    }
}

// ----------------------------------------------------------------------------
// fgetfile(): load a file as a binary block
// ----------------------------------------------------------------------------

void fgetfile (const WSTRING & pathname, ARRAY<char> & buffer)
{
    FILE * f = fopenOrDie (pathname, L"rb");
    size_t len = filesize (f);
    buffer.resize (len);
    if (buffer.size() > 0)
    {   // ^^ otherwise buffer[0] is an illegal expression
        freadOrDie (&buffer[0], sizeof (buffer[0]), buffer.size(), f);
    }
    fclose (f);
}

void fgetfile (FILE * f, ARRAY<char> & buffer)
{   // this version reads until eof
    buffer.resize (0);
    buffer.reserve (1000000);   // avoid too many reallocations
    ARRAY<char> inbuf;
    inbuf.resize (65536);         // read in chunks of this size
    while (!feof (f))           // read until eof
    {
        size_t n = fread (&inbuf[0], sizeof (inbuf[0]), inbuf.size(), f);
        if (ferror (f))
        {
            ERROR ("fgetfile: error reading from file: %s", strerror (errno));
        }
        buffer.insert (buffer.end(), inbuf.begin(), inbuf.begin() + n);
    }
    buffer.reserve (buffer.size());
}

// load it into RAM in one huge chunk
static size_t fgetfilechars (const std::wstring & path, vector<char> & buffer)
{
    auto_file_ptr f = fopenOrDie (path, L"rb");
    size_t len = filesize (f);
    buffer.reserve (len +1);
    freadOrDie (buffer, len, f);
    buffer.push_back (0);           // this makes it a proper C string
    return len;
}

template<class LINES> static void strtoklines (char * s, LINES & lines)
{
    char * context;
    for (char * p = strtok_s (s, "\r\n", &context); p; p = strtok_s (NULL, "\r\n", &context))
        lines.push_back (p);
}

void msra::files::fgetfilelines (const std::wstring & path, vector<char> & buffer, std::vector<std::string> & lines)
{
    // load it into RAM in one huge chunk
    const size_t len = fgetfilechars (path, buffer);

    // parse into lines
    lines.resize (0);
    lines.reserve (len / 20);
    strtoklines (&buffer[0], lines);
}

// same as above but returning const char* (avoiding the memory allocation)
vector<char*> msra::files::fgetfilelines (const wstring & path, vector<char> & buffer)
{
    // load it into RAM in one huge chunk
    const size_t len = fgetfilechars (path, buffer);

    // parse into lines
    vector<char *> lines;
    lines.reserve (len / 20);
    strtoklines (&buffer[0], lines);
    return lines;
}

// ----------------------------------------------------------------------------
// getfiletime(), setfiletime(): access modification time
// ----------------------------------------------------------------------------

bool getfiletime (const wstring & path, FILETIME & time)
{   // return file modification time, false if cannot be determined
    WIN32_FIND_DATAW findFileData;
    auto_handle hFind (FindFirstFileW (path.c_str(), &findFileData), ::FindClose);
    if (hFind != INVALID_HANDLE_VALUE)
    {
        time = findFileData.ftLastWriteTime;
        return true;
    }
    else
    {
        return false;
    }
}

void setfiletime (const wstring & path, const FILETIME & time)
{   // update the file modification time of an existing file
    auto_handle h (CreateFileW (path.c_str(), FILE_WRITE_ATTRIBUTES,
                                FILE_SHARE_READ|FILE_SHARE_WRITE, NULL,
                                OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL));
    if (h == INVALID_HANDLE_VALUE)
    {
        ERROR ("setfiletime: error opening file: %d", GetLastError());
    }
    BOOL rc = SetFileTime (h, NULL, NULL, &time);
    if (!rc)
    {
        ERROR ("setfiletime: error setting file time information: %d", GetLastError());
    }
}

// ----------------------------------------------------------------------------
// expand_wildcards -- wildcard expansion of a path, including directories.
// ----------------------------------------------------------------------------

// Win32-style variant of this function (in case we want to use it some day)
// Returns 0 in case of failure. May throw in case of bad_alloc.
static BOOL ExpandWildcards (wstring path, vector<wstring> & paths)
{
    // convert root to DOS filename convention
    for (size_t k = 0; k < path.length(); k++) if (path[k] == '/') path[k] = '\\';

    // remove terminating backslash
    size_t last = path.length() -1;
    if (last >= 0 && path[last] == '\\') path.erase (last);

    // convert root to long filename convention
    //if (path.find (L"\\\\?\\") != 0)
    //    path = L"\\\\?\\" + root;

    // split off everything after first wildcard
    size_t wpos = path.find_first_of (L"*?");
    if (wpos == 2 && path[0] == '\\' && path[1] == '\\')
        wpos = path.find_first_of (L"*?", 4);   // 4=skip "\\?\"
    if (wpos == wstring::npos)
    {   // no wildcard: just return it
        paths.push_back (path);
        return TRUE;
    }

    // split off everything afterwards if any
    wstring rest;   // remaining path after this directory
    size_t spos = path.find_first_of (L"\\", wpos +1);
    if (spos != wstring::npos)
    {
        rest = path.substr (spos +1);
        path.erase (spos);
    }

    // crawl folder
    WIN32_FIND_DATAW ffdata;
    auto_handle hFind (::FindFirstFileW (path.c_str(), &ffdata), ::FindClose);
    if (hFind == INVALID_HANDLE_VALUE)
    {
        DWORD err = ::GetLastError();
        if (rest.empty() && err == 2) return TRUE;  // no matching file: empty
        return FALSE;                   // another error
    }
    size_t pos = path.find_last_of (L"\\");
    if (pos == wstring::npos) throw std::logic_error ("unexpected missing \\ in path");
    wstring parent = path.substr (0, pos);
    do
    {
        // skip this and parent directory
        bool isDir = ((ffdata.dwFileAttributes & (FILE_ATTRIBUTE_DIRECTORY | FILE_ATTRIBUTE_REPARSE_POINT)) != 0);
        if (isDir && ffdata.cFileName[0] == '.') continue;

        wstring filename = parent + L"\\" + ffdata.cFileName;
        if (rest.empty())
        {
            paths.push_back (filename);
        }
        else if (isDir)     // multi-wildcards: further expand
        {
            BOOL rc = ExpandWildcards (filename + L"\\" + rest, paths);
            rc; // error here means no match, e.g. Access Denied to one subfolder
        }
    } while (::FindNextFileW(hFind, &ffdata) != 0);
    return TRUE;
}

void expand_wildcards (const wstring & path, vector<wstring> & paths)
{
    BOOL rc = ExpandWildcards (path, paths);
    if (!rc)
        ERROR ("error in expanding wild cards '%S': %S", path.c_str(), FormatWin32Error (::GetLastError()).c_str());
}

// ----------------------------------------------------------------------------
// make_intermediate_dirs() -- make all intermediate dirs on a path
// ----------------------------------------------------------------------------

static void mkdir (const wstring & path)
{
    int rc = _wmkdir (path.c_str());
    if (rc >= 0 || errno == EEXIST)
        return;     // no error or already existing --ok
    if (errno == EACCES)
    {
        // bug in _wmkdir(): returns access_denied if folder exists but read-only --check existence
        DWORD att = ::GetFileAttributesW (path.c_str());
        if (att != INVALID_FILE_ATTRIBUTES || (att & FILE_ATTRIBUTE_DIRECTORY) != 0)
            return; // ok
    }
    ERROR ("make_intermediate_dirs: error creating intermediate directory %S", path.c_str());
}

// make subdir of a file including parents
void msra::files::make_intermediate_dirs (const wstring & filepath)
{
    vector<wchar_t> buf;
    buf.resize (filepath.length() +1, 0);
    wcscpy_s (&buf[0], buf.size(), filepath.c_str());
    wstring subpath;
    int skip = 0;
    // if share (\\) then the first two levels (machine, share name) cannot be made
    if ((buf[0] == '/' && buf[1] == '/') || (buf[0] == '\\' && buf[1] == '\\'))
    {
        subpath = L"/";
        skip = 2;           // skip two levels (machine, share)
    }
    // make all constituents except the filename (to make a dir, include a trailing slash)
    for (const wchar_t * p = wcstok (&buf[0], L"/\\"); p; p = wcstok (NULL, L"/\\"))
    {
        if (subpath != L"" && subpath != L"/" && subpath != L"\\" && skip == 0)
        {
            mkdir (subpath);
        }
        else if (skip > 0) skip--;  // skip this level
        // rebuild the final path
        if (subpath != L"") subpath += L"/";
        subpath += p;
    }
}

// ----------------------------------------------------------------------------
// fuptodate() -- test whether an output file is at least as new as an input file
// ----------------------------------------------------------------------------

// test if file 'target' is not older than 'input' --used for make mode
// 'input' must exist if 'inputrequired'; otherweise if 'target' exists, it is considered up to date
// 'target' may or may not exist
bool msra::files::fuptodate (const wstring & target, const wstring & input, bool inputrequired)
{
    FILETIME targettime;
    if (!getfiletime (target, targettime)) return false;        // target missing: need to update
    FILETIME inputtime;
    if (!getfiletime (input, inputtime)) return !inputrequired; // input missing: if required, pretend to be out of date as to force caller to fail
    ULARGE_INTEGER targett, inputt;
    memcpy (&targett, &targettime, sizeof (targett));
    memcpy (&inputt,  &inputtime, sizeof (inputt));
    return !(targett.QuadPart < inputt.QuadPart);               // up to date if target not older than input
}