1741 строка
57 KiB
C++
1741 строка
57 KiB
C++
//
|
|
// <copyright file="FileUtil.cpp" company="Microsoft">
|
|
// Copyright (c) Microsoft Corporation. All rights reserved.
|
|
// </copyright>
|
|
//
|
|
|
|
#ifndef _CRT_SECURE_NO_WARNINGS
|
|
#define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms --add this at the top of all CPP files that give "function or variable may be unsafe" warnings
|
|
#endif
|
|
|
|
#define _CRT_NONSTDC_NO_DEPRECATE // make VS accept POSIX functions without _
|
|
#pragma warning (disable: 4996) // ^^ this does not seem to work--TODO: make it work
|
|
#define _FILE_OFFSET_BITS 64 // to force fseeko() and ftello() 64 bit in Linux
|
|
|
|
#ifndef UNDER_CE // fixed-buffer overloads not available for wince
|
|
#ifdef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES // fixed-buffer overloads for strcpy() etc.
|
|
#undef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
|
|
#endif
|
|
#define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
|
|
#endif
|
|
#include "Basics.h"
|
|
#include "fileutil.h"
|
|
#ifdef __unix__
|
|
#include <sys/types.h>
|
|
#include <sys/stat.h>
|
|
#include <unistd.h>
|
|
#endif
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
#include <stdint.h>
|
|
#include <assert.h>
|
|
#ifdef _WIN32
|
|
#include "Windows.h" // for FILETIME
|
|
#endif
|
|
#include <algorithm> // for std::find
|
|
#include <limits.h>
|
|
#include <memory>
|
|
#ifndef UNDER_CE // some headers don't exist under winCE - the appropriate definitions seem to be in stdlib.h
|
|
#if defined(_WIN32) || defined(__CYGWIN__)
|
|
#include <fcntl.h> // for _O_BINARY/TEXT - not needed for wince
|
|
#include <io.h> // for _setmode()
|
|
#define SET_BINARY_MODE(handle) setmode(handle, _O_BINARY)
|
|
#define SET_TEXT_MODE(handle) setmode(handle, _O_TEXT)
|
|
#else
|
|
#define SET_BINARY_MODE(handle) ((int)0)
|
|
#define SET_TEXT_MODE(handle) ((int)0)
|
|
#endif
|
|
#endif
|
|
|
|
#define __out_z_cap(x) // a fake SAL annotation; this may come in handy some day if we try static code analysis, so I don't want to delete it
|
|
|
|
#include <errno.h>
|
|
|
|
using namespace std;
|
|
using namespace Microsoft::MSR::CNTK;
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// some mappings for non-Windows builds
|
|
// ----------------------------------------------------------------------------
|
|
|
|
template <> const wchar_t* GetScanFormatString(char) {return L" %hc";}
|
|
template <> const wchar_t* GetScanFormatString(wchar_t) {return L" %lc";}
|
|
template <> const wchar_t* GetScanFormatString(short) {return L" %hi";}
|
|
template <> const wchar_t* GetScanFormatString(int) {return L" %i";}
|
|
template <> const wchar_t* GetScanFormatString(long) {return L" %li";}
|
|
template <> const wchar_t* GetScanFormatString(unsigned short) {return L" %hu";}
|
|
template <> const wchar_t* GetScanFormatString(unsigned int) {return L" %u";}
|
|
//template <> const wchar_t* GetScanFormatString(unsigned long) {return L" %lu";}
|
|
template <> const wchar_t* GetScanFormatString(float) {return L" %g";}
|
|
template <> const wchar_t* GetScanFormatString(double) {return L" %lg";}
|
|
template <> const wchar_t* GetScanFormatString(size_t) {return L" %llu";}
|
|
template <> const wchar_t* GetScanFormatString(long long) {return L" %lli";}
|
|
|
|
template <> const wchar_t* GetFormatString(char) {return L" %hc";}
|
|
template <> const wchar_t* GetFormatString(wchar_t) {return L" %lc";}
|
|
template <> const wchar_t* GetFormatString(short) {return L" %hi";}
|
|
template <> const wchar_t* GetFormatString(int) {return L" %i";}
|
|
template <> const wchar_t* GetFormatString(long) {return L" %li";}
|
|
template <> const wchar_t* GetFormatString(unsigned short) {return L" %hu";}
|
|
template <> const wchar_t* GetFormatString(unsigned int) {return L" %u";}
|
|
//template <> const wchar_t* GetFormatString(unsigned long) {return L" %lu";}
|
|
template <> const wchar_t* GetFormatString(float) {return L" %.9g";}
|
|
template <> const wchar_t* GetFormatString(double) {return L" %.17g";}
|
|
template <> const wchar_t* GetFormatString(size_t) {return L" %llu";}
|
|
template <> const wchar_t* GetFormatString(long long) {return L" %lli";}
|
|
template <> const wchar_t* GetFormatString(const char*) {return L" %hs";}
|
|
template <> const wchar_t* GetFormatString(const wchar_t*) {return L" %ls";}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// fgetText() specializations for fwscanf differences: get a value from a text file
|
|
// ----------------------------------------------------------------------------
|
|
void fgetText(FILE * f, char& v)
|
|
{
|
|
const wchar_t* formatString = GetFormatString(v);
|
|
int rc = fwscanf(f, formatString, &v);
|
|
if (rc == 0)
|
|
RuntimeError ("error reading value from file (invalid format): %s", formatString);
|
|
else if (rc == EOF)
|
|
RuntimeError ("error reading from file: %s", strerror (errno));
|
|
assert(rc == 1);
|
|
}
|
|
void fgetText(FILE * f, wchar_t& v)
|
|
{
|
|
const wchar_t* formatString = GetFormatString(v);
|
|
int rc = fwscanf(f, formatString, &v);
|
|
if (rc == 0)
|
|
RuntimeError ("error reading value from file (invalid format): %s", formatString);
|
|
else if (rc == EOF)
|
|
RuntimeError ("error reading from file: %s", strerror (errno));
|
|
assert(rc == 1);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// fopenOrDie(): like fopen() but terminate with err msg in case of error.
|
|
// A pathname of "-" returns stdout or stdin, depending on mode, and it will
|
|
// change the binary mode if 'b' or 't' are given. If you use this, make sure
|
|
// not to fclose() such a handle.
|
|
// ----------------------------------------------------------------------------
|
|
|
|
static const wchar_t * strchr (const wchar_t * s, wchar_t v) { return wcschr (s, v); }
|
|
|
|
// pathname is "-" -- open stdin or stdout. Changes bin mode if 'b' or 't' given.
|
|
template<class _T> FILE * fopenStdHandle (const _T * mode)
|
|
{
|
|
FILE * f = strchr (mode, 'r') ? stdin : stdout;
|
|
if (strchr (mode, 'b') || strchr (mode, 't')) // change binary mode
|
|
fsetmode (f, strchr(mode, 'b') ? 'b' : 't');
|
|
return f;
|
|
}
|
|
|
|
FILE * fopenOrDie (const string & pathname, const char * mode)
|
|
{
|
|
FILE * f = (pathname[0] == '-') ? fopenStdHandle (mode) : fopen (pathname.c_str(), mode);
|
|
if (f == NULL)
|
|
{
|
|
RuntimeError ("error opening file '%s': %s", pathname.c_str(), strerror (errno));
|
|
}
|
|
if (strchr (mode, 'S'))
|
|
{ // if optimized for sequential access then use large buffer
|
|
setvbuf (f, NULL, _IOFBF, 10000000); // OK if it fails
|
|
}
|
|
return f;
|
|
}
|
|
|
|
FILE * fopenOrDie (const wstring & pathname, const wchar_t * mode)
|
|
{
|
|
FILE * f = (pathname[0] == '-') ? fopenStdHandle (mode) : _wfopen (pathname.c_str(), mode);
|
|
if (f == NULL)
|
|
{
|
|
RuntimeError ("error opening file '%S': %s", pathname.c_str(), strerror (errno));
|
|
}
|
|
if (strchr (mode, 'S'))
|
|
{ // if optimized for sequential access then use large buffer
|
|
setvbuf (f, NULL, _IOFBF, 10000000); // OK if it fails
|
|
}
|
|
return f;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// set mode to binary or text (pass 'b' or 't')
|
|
// ----------------------------------------------------------------------------
|
|
|
|
void fsetmode(FILE * f, char type)
|
|
{
|
|
if (type != 'b' && type != 't')
|
|
{
|
|
RuntimeError ("fsetmode: invalid type '%c'", type);
|
|
}
|
|
#ifdef UNDER_CE // winCE and win32 have different return types for _fileno
|
|
FILE *fd = fileno (f); // note: no error check possible
|
|
#else
|
|
int fd = fileno (f); // note: no error check possible
|
|
#endif
|
|
int rc = ( type == 'b' ? SET_BINARY_MODE(fd) : SET_TEXT_MODE(fd) );
|
|
if (rc == -1)
|
|
{
|
|
RuntimeError ("error changing file mode: %s", strerror (errno));
|
|
}
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// freadOrDie(): like fread() but terminate with err msg in case of error
|
|
// ----------------------------------------------------------------------------
|
|
|
|
void freadOrDie (void * ptr, size_t size, size_t count, FILE * f)
|
|
{
|
|
// \\XXX\C$ reads are limited, with some randomness (e.g. 48 MB), on Windows 7 32 bit, so we break this into chunks of some MB. Meh.
|
|
while (count > 0)
|
|
{
|
|
size_t chunkn = min (count, (size_t)15*1024*1024); // BUGBUG: I surely meant this limit to be bytes, not units of 'size'...
|
|
size_t n = fread (ptr, size, chunkn, f);
|
|
if (n != chunkn)
|
|
RuntimeError ("error reading from file: %s", strerror (errno));
|
|
count -= n;
|
|
ptr = n * size + (char*) ptr;
|
|
}
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// fwriteOrDie(): like fwrite() but terminate with err msg in case of error;
|
|
// Windows C std lib fwrite() has problems writing >100 MB at a time (fails
|
|
// with Invalid Argument error), so we break it into chunks (yak!!)
|
|
// ----------------------------------------------------------------------------
|
|
|
|
void fwriteOrDie (const void * ptr, size_t size, size_t count, FILE * f)
|
|
{
|
|
const char * p1 = (const char *) ptr;
|
|
size_t totalBytes = size * count;
|
|
while (totalBytes > 0)
|
|
{
|
|
size_t wantWrite = totalBytes;
|
|
#define LIMIT (16*1024*1024) // limit to 16 MB at a time
|
|
if (wantWrite > LIMIT)
|
|
{
|
|
wantWrite = LIMIT;
|
|
}
|
|
size_t n = fwrite ((const void *) p1, 1, wantWrite, f);
|
|
if (n != wantWrite)
|
|
{
|
|
RuntimeError ("error writing to file (ptr=0x%08lx, size=%d,"
|
|
" count=%d, writing %d bytes after %d): %s",
|
|
ptr, size, count, (int) wantWrite,
|
|
(int) (size * count - totalBytes),
|
|
strerror (errno));
|
|
}
|
|
totalBytes -= wantWrite;
|
|
p1 += wantWrite;
|
|
}
|
|
}
|
|
|
|
long fseekOrDie (FILE * f, long offset, int mode)
|
|
{
|
|
long curPos = ftell (f);
|
|
if (curPos == -1L)
|
|
{
|
|
RuntimeError ("error seeking: %s", strerror (errno));
|
|
}
|
|
int rc = fseek (f, offset, mode);
|
|
if (rc != 0)
|
|
{
|
|
RuntimeError ("error seeking: %s", strerror (errno));
|
|
}
|
|
return curPos;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// fprintfOrDie(): like fprintf() but terminate with err msg in case of error
|
|
// ----------------------------------------------------------------------------
|
|
|
|
#pragma warning(push)
|
|
#pragma warning(disable : 4793) // 'vararg' : causes native code generation
|
|
void fprintfOrDie (FILE * f, const char * fmt, ...)
|
|
{
|
|
va_list arg_ptr;
|
|
va_start (arg_ptr, fmt);
|
|
int rc = vfprintf (f, fmt, arg_ptr);
|
|
if (rc < 0)
|
|
{
|
|
RuntimeError ("error writing to file: %s", strerror (errno));
|
|
}
|
|
}
|
|
#pragma warning(pop)
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// fflushOrDie(): like fflush() but terminate with err msg in case of error
|
|
// ----------------------------------------------------------------------------
|
|
|
|
void fflushOrDie (FILE * f)
|
|
{
|
|
int rc = fflush (f);
|
|
if (rc != 0)
|
|
{
|
|
RuntimeError ("error flushing to file: %s", strerror (errno));
|
|
}
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// filesize(): determine size of the file in bytes (with open file)
|
|
// ----------------------------------------------------------------------------
|
|
size_t filesize (FILE * f)
|
|
{
|
|
#ifdef _WIN32
|
|
size_t curPos = _ftelli64(f);
|
|
if (curPos == -1L)
|
|
{
|
|
RuntimeError ("error determining file position: %s", strerror (errno));
|
|
}
|
|
int rc = _fseeki64 (f, 0, SEEK_END);
|
|
if (rc != 0)
|
|
RuntimeError ("error seeking to end of file: %s", strerror (errno));
|
|
size_t len = _ftelli64 (f);
|
|
if (len == -1L)
|
|
RuntimeError ("error determining file position: %s", strerror (errno));
|
|
rc = _fseeki64 (f, curPos, SEEK_SET);
|
|
if (rc != 0)
|
|
RuntimeError ("error resetting file position: %s", strerror (errno));
|
|
return len;
|
|
#else // TODO: test this
|
|
struct stat stat_buf;
|
|
int rc = fstat(fileno(f), &stat_buf);
|
|
if (rc != 0)
|
|
RuntimeError("error determining length of file: %s", strerror(errno));
|
|
static_assert (sizeof(stat_buf.st_size)>=sizeof(uint64_t), "struct stat not compiled for 64-bit mode");
|
|
return stat_buf.st_size;
|
|
#endif
|
|
}
|
|
|
|
// filesize(): determine size of the file in bytes (with pathname)
|
|
size_t filesize (const wchar_t * pathname)
|
|
{
|
|
FILE * f = fopenOrDie (pathname, L"rb");
|
|
try
|
|
{
|
|
size_t len = filesize (f);
|
|
fclose (f);
|
|
return (size_t) len;
|
|
}
|
|
catch (...)
|
|
{
|
|
fclose (f);
|
|
throw;
|
|
}
|
|
}
|
|
|
|
#ifndef UNDER_CE // no 64-bit under winCE
|
|
|
|
// filesize64(): determine size of the file in bytes (with pathname)
|
|
int64_t filesize64 (const wchar_t * pathname)
|
|
{
|
|
#ifdef _WIN32
|
|
struct _stat64 fileinfo;
|
|
if (_wstat64 (pathname,&fileinfo) == -1)
|
|
return 0;
|
|
else
|
|
return fileinfo.st_size;
|
|
#else
|
|
return filesize (pathname);
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// fget/setpos(): seek functions with error handling
|
|
// ----------------------------------------------------------------------------
|
|
|
|
uint64_t fgetpos (FILE * f)
|
|
{
|
|
#ifdef _MSC_VER // standard does not allow to cast between fpos_t and integer numbers, and indeed it does not work on Linux (but on Windows and GCC)
|
|
fpos_t post;
|
|
int rc = ::fgetpos (f, &post);
|
|
if (rc != 0)
|
|
RuntimeError ("error getting file position: %s", strerror (errno));
|
|
#else
|
|
auto pos = ftello (f);
|
|
uint64_t post = (uint64_t) pos;
|
|
static_assert (sizeof (post) >= sizeof (pos), "64-bit file offsets not enabled");
|
|
if ((decltype (pos)) post != pos)
|
|
LogicError("64-bit file offsets not enabled");
|
|
#endif
|
|
return post;
|
|
}
|
|
|
|
void fsetpos (FILE * f, uint64_t reqpos)
|
|
{
|
|
#ifdef _MSC_VER // standard does not allow to cast between fpos_t and integer numbers, and indeed it does not work on Linux (but on Windows and GCC)
|
|
#ifdef _MSC_VER // special hack for VS CRT
|
|
// Visual Studio's ::fsetpos() flushes the read buffer. This conflicts with a situation where
|
|
// we generally read linearly but skip a few bytes or KB occasionally, as is
|
|
// the case in speech recognition tools. This requires a number of optimizations.
|
|
|
|
uint64_t curpos = fgetpos (f);
|
|
uint64_t cureob = curpos + f->_cnt; // UGH: we mess with an internal structure here
|
|
while (reqpos >= curpos && reqpos < cureob)
|
|
{
|
|
// if we made it then do not call fsetpos()
|
|
if (reqpos == fgetpos (f))
|
|
return;
|
|
|
|
// if we seek within the existing buffer, then just move to the position by dummy reads
|
|
char buf[65536];
|
|
size_t n = min ((size_t) reqpos - (size_t) curpos, _countof (buf));
|
|
fread (buf, sizeof (buf[0]), n, f); // (this may fail, but really shouldn't)
|
|
curpos += n;
|
|
|
|
// since we mess with f->_cnt, if something unexpected happened to the buffer then back off
|
|
if (curpos != fgetpos (f) || curpos + f->_cnt != cureob)
|
|
break; // oops
|
|
}
|
|
#endif // end special hack for VS CRT
|
|
|
|
// actually perform the seek
|
|
fpos_t post = reqpos;
|
|
int rc = ::fsetpos(f, &post);
|
|
#else // assuming __unix__
|
|
off_t post = (off_t) reqpos;
|
|
static_assert (sizeof (off_t) >= sizeof (reqpos), "64-bit file offsets not enabled");
|
|
if ((decltype (reqpos)) post != reqpos)
|
|
LogicError("64-bit file offsets not enabled");
|
|
int rc = fseeko(f, post, SEEK_SET);
|
|
#endif
|
|
if (rc != 0)
|
|
RuntimeError("error setting file position: %s", strerror(errno));
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// unlinkOrDie(): unlink() with error handling
|
|
// ----------------------------------------------------------------------------
|
|
|
|
void unlinkOrDie (const std::string & pathname)
|
|
{
|
|
if (unlink (pathname.c_str()) != 0 && errno != ENOENT) // if file is missing that's what we want
|
|
RuntimeError ("error deleting file '%s': %s", pathname.c_str(), strerror (errno));
|
|
}
|
|
void unlinkOrDie (const std::wstring & pathname)
|
|
{
|
|
if (_wunlink (pathname.c_str()) != 0 && errno != ENOENT) // if file is missing that's what we want
|
|
RuntimeError ("error deleting file '%S': %s", pathname.c_str(), strerror (errno));
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// renameOrDie(): rename() with error handling
|
|
// ----------------------------------------------------------------------------
|
|
|
|
void renameOrDie (const std::string & from, const std::string & to)
|
|
{
|
|
#ifdef _WIN32
|
|
// deleting destination file if exits (to match Linux semantic)
|
|
if (fexists(to.c_str()) && !DeleteFileA(to.c_str()))
|
|
RuntimeError("error deleting file: '%s': %d", to.c_str(), GetLastError());
|
|
|
|
if (!MoveFileA (from.c_str(), to.c_str()))
|
|
RuntimeError("error renaming file '%s': %d", from.c_str(), GetLastError());
|
|
#else
|
|
if (rename (from.c_str(), to.c_str()) != 0)
|
|
RuntimeError("error renaming file '%s': %s", from.c_str(), strerror(errno));
|
|
#endif
|
|
}
|
|
|
|
void renameOrDie (const std::wstring & from, const std::wstring & to)
|
|
{
|
|
#ifdef _WIN32
|
|
// deleting destination file if exits (to match Linux semantic)
|
|
if (fexists(to.c_str()) && !DeleteFileW(to.c_str()))
|
|
RuntimeError("error deleting file '%S': %d", to.c_str(), GetLastError());
|
|
|
|
if (!MoveFileW(from.c_str(), to.c_str()))
|
|
RuntimeError ("error renaming file '%S': %d", from.c_str(), GetLastError());
|
|
#else
|
|
renameOrDie (charpath(from), charpath(to));
|
|
#endif
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// fputstring(): write a 0-terminated string
|
|
// ----------------------------------------------------------------------------
|
|
|
|
void fputstring (FILE * f, const char * str)
|
|
{
|
|
fwriteOrDie ((void *) str, sizeof (*str), strnlen (str, SIZE_MAX)+1, f); // SECURITY NOTE: string use has been reviewed
|
|
}
|
|
|
|
void fputstring (FILE * f, const std::string & str)
|
|
{
|
|
fputstring (f, str.c_str());
|
|
}
|
|
|
|
#ifdef _WIN32
|
|
#pragma warning(push)
|
|
#pragma warning(disable : 4127)
|
|
#endif
|
|
void fputstring (FILE * f, const wchar_t * str)
|
|
{
|
|
if (sizeof (*str) == 2)
|
|
{
|
|
fwriteOrDie ((void *) str, sizeof (*str), wcsnlen (str, SIZE_MAX)+1, f); // SECURITY NOTE: string use has been reviewed
|
|
} else if (sizeof (*str) == 4)
|
|
{
|
|
size_t strLen = wcsnlen(str, SIZE_MAX);
|
|
std::unique_ptr<char16_t[]> str16(new char16_t[strLen + 1]);
|
|
for (int i = 0; i < strLen; i++)
|
|
{
|
|
str16[i] = (char16_t) str[i];
|
|
}
|
|
str16[strLen] = 0;
|
|
fwriteOrDie((void *)str16.get(), sizeof (*str) / 2, strLen + 1, f); // SECURITY NOTE: string use has been reviewed
|
|
} else
|
|
{
|
|
RuntimeError("error: unknown encoding\n");
|
|
}
|
|
}
|
|
#ifdef _WIN32
|
|
#pragma warning(pop)
|
|
#endif
|
|
|
|
void fputstring (FILE * f, const std::wstring & str)
|
|
{
|
|
fputstring (f, str.c_str());
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// fexists(): test if a file exists
|
|
// ----------------------------------------------------------------------------
|
|
|
|
bool fexists (const wchar_t * pathname)
|
|
{
|
|
#ifdef _MSC_VER
|
|
WIN32_FIND_DATAW findFileData;
|
|
HANDLE hFind = FindFirstFileW (pathname, &findFileData);
|
|
if (hFind != INVALID_HANDLE_VALUE)
|
|
{
|
|
FindClose (hFind);
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
return false;
|
|
}
|
|
#else
|
|
auto_file_ptr f (_wfopen (pathname, L"r"));
|
|
return f != nullptr;
|
|
#endif
|
|
}
|
|
|
|
bool fexists (const char * pathname)
|
|
{
|
|
#ifdef _MSC_VER
|
|
WIN32_FIND_DATAA findFileData;
|
|
HANDLE hFind = FindFirstFileA (pathname, &findFileData);
|
|
if (hFind != INVALID_HANDLE_VALUE)
|
|
{
|
|
FindClose (hFind);
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
return false;
|
|
}
|
|
#else
|
|
auto_file_ptr f (fopen (pathname, "r"));
|
|
return f != nullptr;
|
|
#endif
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// funicode(): test if a file uses unicode by reading its BOM
|
|
// ----------------------------------------------------------------------------
|
|
|
|
bool funicode (FILE * f)
|
|
{
|
|
unsigned short testCode;
|
|
if (fread (&testCode, sizeof(short), 1, f) == 1 &&
|
|
(int)testCode == 0xFEFF)
|
|
return true;
|
|
fseek (f,0,SEEK_SET);
|
|
//rewind (f);
|
|
return false;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// fgetline(): like fgets() but terminate with err msg in case of error;
|
|
// removes the newline character at the end (like gets());
|
|
// Returns 'buf' (always). buf guaranteed to be 0-terminated.
|
|
// ----------------------------------------------------------------------------
|
|
|
|
#ifdef __CYGWIN__ // strnlen() is somehow missing in Cygwin, which we use to quick-check GCC builds under Windows (although it is not a real target platform)
|
|
static inline size_t strnlen (const char *s, size_t n) { return std::find (s, s + n, '\0') - s; }
|
|
#endif
|
|
|
|
#ifdef UNDER_CE // strlen for char * not defined in winCE
|
|
static inline size_t strnlen (const char *s, size_t n) { return std::find (s, s + n, '\0') - s; }
|
|
#endif
|
|
|
|
static inline wchar_t * fgets(wchar_t * buf, int n, FILE * f) { return fgetws(buf, n, f); }
|
|
static inline size_t strnlen(wchar_t * s, size_t n) { return wcsnlen(s, n); }
|
|
|
|
template<class CHAR>
|
|
CHAR * fgetline (FILE * f, CHAR * buf, int size)
|
|
{
|
|
// TODO: we should redefine this to write UTF-16 (which matters on GCC which defines wchar_t as 32 bit)
|
|
CHAR * p = fgets(buf, size, f);
|
|
if (p == NULL) // EOF reached: next time feof() = true
|
|
{
|
|
if (ferror (f))
|
|
RuntimeError ("error reading line: %s", strerror (errno));
|
|
buf[0] = 0;
|
|
return buf;
|
|
}
|
|
size_t n = strnlen (p, size);
|
|
|
|
// check for buffer overflow
|
|
|
|
if (n >= (size_t) size -1)
|
|
{
|
|
basic_string<CHAR> example (p, n < 100 ? n : 100);
|
|
uint64_t filepos = fgetpos(f); // (for error message only)
|
|
RuntimeError("input line too long at file offset %I64d (max. %d characters allowed) [%s ...]", filepos, size - 1, msra::strfun::utf8(example).c_str());
|
|
}
|
|
|
|
// remove newline at end
|
|
|
|
if (n > 0 && p[n-1] == '\n') // UNIX and Windows style
|
|
{
|
|
n--;
|
|
p[n] = 0;
|
|
if (n > 0 && p[n-1] == '\r') // Windows style
|
|
{
|
|
n--;
|
|
p[n] = 0;
|
|
}
|
|
}
|
|
else if (n > 0 && p[n-1] == '\r') // Mac style
|
|
{
|
|
n--;
|
|
p[n] = 0;
|
|
}
|
|
|
|
return buf;
|
|
}
|
|
|
|
#if 0
|
|
const wchar_t * fgetline (FILE * f, wchar_t * buf, int size)
|
|
{
|
|
wchar_t * p = fgetws (buf, size, f);
|
|
if (p == NULL) // EOF reached: next time feof() = true
|
|
{
|
|
if (ferror (f))
|
|
RuntimeError ("error reading line: %s", strerror (errno));
|
|
buf[0] = 0;
|
|
return buf;
|
|
}
|
|
size_t n = wcsnlen (p, size); // SECURITY NOTE: string use has been reviewed
|
|
|
|
// check for buffer overflow
|
|
|
|
if (n >= (size_t) size -1)
|
|
{
|
|
wstring example (buf, min (n, 100));
|
|
RuntimeError ("input line too long at file offset %U64d (max. %d characters allowed) [%S ...]",
|
|
fgetpos (f), size -1, example.c_str());
|
|
}
|
|
|
|
// remove newline at end
|
|
|
|
if (n > 0 && p[n-1] == L'\n') // UNIX and Windows style
|
|
{
|
|
n--;
|
|
p[n] = 0;
|
|
if (n > 0 && p[n-1] == L'\r') // Windows style
|
|
{
|
|
n--;
|
|
p[n] = 0;
|
|
}
|
|
}
|
|
else if (n > 0 && p[n-1] == L'\r') // Mac style
|
|
{
|
|
n--;
|
|
p[n] = 0;
|
|
}
|
|
|
|
return buf;
|
|
}
|
|
#endif
|
|
|
|
// STL string version
|
|
std::string fgetline (FILE * f)
|
|
{
|
|
fixed_vector<char> buf (1000000);
|
|
return fgetline (f, &buf[0], (int) buf.size());
|
|
}
|
|
|
|
// STL string version
|
|
std::wstring fgetlinew (FILE * f)
|
|
{
|
|
fixed_vector<wchar_t> buf (1000000);
|
|
return fgetline (f, &buf[0], (int) buf.size());
|
|
}
|
|
|
|
// STL string version avoiding most memory allocations
|
|
void fgetline (FILE * f, std::string & s, std::vector<char> & buf)
|
|
{
|
|
buf.resize (1000000); // enough? // KIT: increased to 1M to be safe
|
|
const char * p = fgetline (f, &buf[0], (int) buf.size());
|
|
s.assign (p);
|
|
}
|
|
|
|
void fgetline (FILE * f, std::wstring & s, std::vector<wchar_t> & buf)
|
|
{
|
|
buf.resize (1000000); // enough? // KIT: increased to 1M to be safe
|
|
const wchar_t * p = fgetline (f, &buf[0], (int) buf.size());
|
|
s.assign (p);
|
|
}
|
|
|
|
// char buffer version
|
|
void fgetline (FILE * f, std::vector<char> & buf)
|
|
{
|
|
const int BUF_SIZE = 1000000; // enough? // KIT: increased to 1M to be safe
|
|
buf.resize (BUF_SIZE);
|
|
fgetline (f, &buf[0], (int) buf.size());
|
|
buf.resize (strnlen (&buf[0], BUF_SIZE) +1); // SECURITY NOTE: string use has been reviewed
|
|
}
|
|
|
|
void fgetline (FILE * f, std::vector<wchar_t> & buf)
|
|
{
|
|
const int BUF_SIZE = 1000000; // enough? // KIT: increased to 1M to be safe
|
|
buf.resize (BUF_SIZE);
|
|
fgetline (f, &buf[0], (int) buf.size());
|
|
buf.resize (wcsnlen (&buf[0], BUF_SIZE) +1); // SECURITY NOTE: string use has been reviewed
|
|
}
|
|
|
|
// read a 0-terminated string
|
|
const char * fgetstring (FILE * f, __out_z_cap(size) char * buf, int size)
|
|
{
|
|
int i;
|
|
for (i = 0; ; i++)
|
|
{
|
|
int c = fgetc(f);
|
|
if (c == EOF)
|
|
RuntimeError ("error reading string or missing 0: %s", strerror(errno));
|
|
if (c == 0) break;
|
|
if (i >= size - 1)
|
|
RuntimeError ("input line too long (max. %d characters allowed)", size - 1);
|
|
buf[i] = (char)c;
|
|
}
|
|
assert (i < size);
|
|
buf[i] = 0;
|
|
return buf;
|
|
}
|
|
|
|
// read a 0-terminated wstring
|
|
string fgetstring (FILE * f)
|
|
{
|
|
string res;
|
|
for (;;)
|
|
{
|
|
char c = (char)fgetc (f);
|
|
if (c == EOF)
|
|
RuntimeError ("error reading string or missing 0: %s", strerror (errno));
|
|
if (c == 0) break;
|
|
res.push_back (c);
|
|
}
|
|
return res;
|
|
}
|
|
|
|
// read a 0-terminated string
|
|
const wchar_t * fgetstring (FILE * f, __out_z_cap(size) wchar_t * buf, int size)
|
|
{
|
|
int i;
|
|
for (i = 0; ; i++)
|
|
{
|
|
// TODO: we should redefine this to write UTF-16 (which matters on GCC which defines wchar_t as 32 bit)
|
|
wint_t c = fgetwc(f);
|
|
if (c == WEOF)
|
|
RuntimeError ("error reading string or missing 0: %s", strerror(errno));
|
|
if (c == 0) break;
|
|
if (i >= size - 1)
|
|
{
|
|
RuntimeError ("input line too long (max. %d wchar_tacters allowed)", size - 1);
|
|
}
|
|
buf[i] = (wchar_t)c;
|
|
}
|
|
assert(i < size);
|
|
buf[i] = 0;
|
|
return buf;
|
|
}
|
|
|
|
#if (_MSC_VER < 1800)
|
|
// read a 0-terminated wstring
|
|
wstring fgetwstring (FILE * f)
|
|
{
|
|
// TODO: we should redefine this to write UTF-16 (which matters on GCC which defines wchar_t as 32 bit)
|
|
wstring res;
|
|
for (;;)
|
|
{
|
|
//
|
|
// there is a known vc++ runtime bug: Microsoft Connect 768113
|
|
// fgetwc can skip a byte in certain condition
|
|
// this is already fixed in update release to VS 2012
|
|
// for now the workaround is to use fgetc twice to simulate fgetwc
|
|
//
|
|
//wint_t c = fgetwc (f);
|
|
int c1 = fgetc(f);
|
|
int c2 = fgetc(f);
|
|
|
|
// synthetic fgetc output to simulate fgetwc
|
|
// note the order below works only for little endian
|
|
wint_t c = (wint_t)((c2 << 8) | c1);
|
|
if (c == WEOF)
|
|
RuntimeError ("error reading string or missing 0: %s", strerror (errno));
|
|
if (c == 0) break;
|
|
res.push_back ((wchar_t) c);
|
|
}
|
|
return res;
|
|
}
|
|
|
|
#else
|
|
// read a 0-terminated wstring
|
|
wstring fgetwstring (FILE * f)
|
|
{
|
|
// TODO: we should redefine this to write UTF-16 (which matters on GCC which defines wchar_t as 32 bit)
|
|
wstring res;
|
|
for (;;)
|
|
{
|
|
wint_t c = fgetwc(f);
|
|
if (c == WEOF)
|
|
RuntimeError ("error reading string or missing 0: %s", strerror(errno));
|
|
if (c == 0) break;
|
|
res.push_back((wchar_t)c);
|
|
}
|
|
return res;
|
|
}
|
|
#endif
|
|
|
|
bool fskipspace (FILE * f)
|
|
{
|
|
int count=0;
|
|
for (;;count++)
|
|
{
|
|
int c = fgetc (f);
|
|
if (c == EOF) // hit the end
|
|
{
|
|
if (ferror(f))
|
|
RuntimeError ("error reading from file: %s", strerror(errno));
|
|
break;
|
|
}
|
|
if (!isspace (c)) // end of space: undo getting that character
|
|
{
|
|
int rc = ungetc(c, f);
|
|
if (rc != c)
|
|
RuntimeError ("error in ungetc(): %s", strerror(errno));
|
|
break;
|
|
}
|
|
}
|
|
return count>0;
|
|
}
|
|
|
|
bool fskipwspace (FILE * f)
|
|
{
|
|
// TODO: we should redefine this to write UTF-16 (which matters on GCC which defines wchar_t as 32 bit)
|
|
int count = 0;
|
|
for (;;count++)
|
|
{
|
|
wint_t c = fgetwc (f);
|
|
if (c == WEOF) // hit the end
|
|
{
|
|
if (ferror (f))
|
|
RuntimeError ("error reading from file: %s", strerror (errno));
|
|
break;
|
|
}
|
|
if (!iswspace (c)) // end of space: undo getting that character
|
|
{
|
|
wint_t rc = ungetwc (c, f);
|
|
if (rc != c)
|
|
RuntimeError ("error in ungetc(): %s", strerror (errno));
|
|
break;
|
|
}
|
|
}
|
|
return count > 0;
|
|
}
|
|
|
|
// fskipNewLine(): skip all white space until end of line incl. the newline
|
|
// skip - skip the end of line if true, otherwise leave the end of line (but eat any leading space)
|
|
int fskipNewline (FILE * f, bool skip)
|
|
{
|
|
int c;
|
|
bool found = false;
|
|
|
|
// skip white space
|
|
|
|
do
|
|
{
|
|
c = fgetc(f);
|
|
} while (c == ' ' || c == '\t');
|
|
|
|
if (c == '\r' || c == '\n') // Accept any type of newline
|
|
{
|
|
found = true;
|
|
if (skip)
|
|
c = fgetc(f);
|
|
}
|
|
|
|
if ((found && !skip) ||
|
|
!(c == '\r' || c == '\n'))
|
|
{
|
|
// if we found an EOF, return that unless there was a newline before the EOF
|
|
if (c == EOF)
|
|
return found?(int)true:EOF;
|
|
int rc = ungetc (c, f);
|
|
if (rc != c)
|
|
RuntimeError ("error in ungetc(): %s", strerror (errno));
|
|
return (int)found;
|
|
}
|
|
// if we get here we saw a newline
|
|
return (int)true;
|
|
}
|
|
|
|
// fskipwNewLine(): skip all white space until end of line incl. the newline
|
|
// skip - skip the end of line if true, otherwise leave the end of line (but eat any leading space)
|
|
int fskipwNewline (FILE * f, bool skip)
|
|
{
|
|
// TODO: we should redefine this to write UTF-16 (which matters on GCC which defines wchar_t as 32 bit)
|
|
wint_t c;
|
|
bool found = false;
|
|
// skip white space
|
|
|
|
do
|
|
{
|
|
c = fgetwc(f);
|
|
} while (c == L' ' || c == L'\t');
|
|
|
|
if (c == L'\r' || c == L'\n') // accept any style of newline
|
|
{
|
|
found = true;
|
|
if (skip)
|
|
c = fgetwc(f);
|
|
}
|
|
|
|
if ((found && !skip)
|
|
||!(c == L'\r' || c == L'\n'))
|
|
{
|
|
if (c == WEOF)
|
|
return found?(int)true:EOF;
|
|
wint_t rc = ungetwc (c, f);
|
|
if (rc != c)
|
|
RuntimeError ("error in ungetwc(): %s", strerror (errno));
|
|
return (int)found;
|
|
}
|
|
// if we get here we saw a double newline
|
|
return (int)true;
|
|
}
|
|
|
|
// read a space-terminated token
|
|
// ...TODO: eat trailing space like fscanf() doessurrounding space)
|
|
const char * fgettoken (FILE * f, __out_z_cap(size) char * buf, int size)
|
|
{
|
|
fskipspace (f); // skip leading space
|
|
int c = -1;
|
|
int i;
|
|
for (i = 0; ; i++)
|
|
{
|
|
c = fgetc (f);
|
|
if (c == EOF) break;
|
|
if (isspace (c)) break;
|
|
if (i >= size -1)
|
|
RuntimeError ("input token too long (max. %d characters allowed)", size -1);
|
|
buf[i] = (char) c;
|
|
}
|
|
// ... TODO: while (IsWhiteSpace (c)) c = fgetc (f); // skip trailing space
|
|
if (c != EOF)
|
|
{
|
|
int rc = ungetc (c, f);
|
|
if (rc != c)
|
|
RuntimeError ("error in ungetc(): %s", strerror (errno));
|
|
}
|
|
assert (i < size);
|
|
buf[i] = 0;
|
|
return buf;
|
|
}
|
|
|
|
string fgettoken (FILE * f)
|
|
{
|
|
char buf[80];
|
|
return fgettoken (f, buf, sizeof(buf)/sizeof(*buf));
|
|
}
|
|
|
|
// read a space-terminated token
|
|
const wchar_t * fgettoken (FILE * f, __out_z_cap(size) wchar_t * buf, int size)
|
|
{
|
|
// TODO: we should redefine this to write UTF-16 (which matters on GCC which defines wchar_t as 32 bit)
|
|
fskipwspace(f); // skip leading space
|
|
wint_t c = WEOF;
|
|
int i;
|
|
for (i = 0; ; i++)
|
|
{
|
|
c = fgetwc (f);
|
|
if (c == WEOF) break;
|
|
if (iswspace (c)) break;
|
|
if (i >= size -1)
|
|
RuntimeError ("input token too long (max. %d wchar_tacters allowed)", size -1);
|
|
buf[i] = (wchar_t) c;
|
|
}
|
|
// ... TODO: while (IsWhiteSpace (c)) c = fgetc (f); // skip trailing space
|
|
if (c != WEOF)
|
|
{
|
|
int rc = ungetwc (c, f);
|
|
if (rc != c)
|
|
RuntimeError ("error in ungetwc(): %s", strerror (errno));
|
|
}
|
|
assert (i < size);
|
|
buf[i] = 0;
|
|
return buf;
|
|
}
|
|
|
|
wstring fgetwtoken (FILE * f)
|
|
{
|
|
wchar_t buf[80];
|
|
return fgettoken(f, buf, sizeof(buf) / sizeof(*buf));
|
|
}
|
|
|
|
template <>
|
|
int ftrygetText<bool>(FILE * f, bool& v)
|
|
{
|
|
wchar_t c;
|
|
int rc = ftrygetText(f, c);
|
|
v = (c == L'T');
|
|
return rc;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// fputText(): write a bool out as character
|
|
// ----------------------------------------------------------------------------
|
|
template <>
|
|
void fputText<bool>(FILE * f, bool v)
|
|
{
|
|
fputText(f, v?L'T':L'F');
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// fgetTag(): read a 4-byte tag & return as a string
|
|
// ----------------------------------------------------------------------------
|
|
|
|
std::string fgetTag (FILE * f)
|
|
{
|
|
char tag[5];
|
|
freadOrDie (&tag[0], sizeof (tag[0]), 4, f);
|
|
tag[4] = 0;
|
|
return std::string (tag);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// fcheckTag(): read a 4-byte tag & verify it; terminate if wrong tag
|
|
// ----------------------------------------------------------------------------
|
|
|
|
void fcheckTag (FILE * f, const char * expectedTag)
|
|
{
|
|
fcompareTag (fgetTag (f), expectedTag);
|
|
}
|
|
|
|
|
|
void fcheckTag_ascii (FILE * f, const string & expectedTag)
|
|
{
|
|
char buf[20]; // long enough for a tag
|
|
fskipspace (f);
|
|
fgettoken (f, buf, sizeof(buf)/sizeof(*buf));
|
|
if (expectedTag != buf)
|
|
{
|
|
RuntimeError ("invalid tag '%s' found; expected '%s'", buf, expectedTag.c_str());
|
|
}
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// fcompareTag(): compare two tags; terminate if wrong tag
|
|
// ----------------------------------------------------------------------------
|
|
|
|
void fcompareTag (const string & readTag, const string & expectedTag)
|
|
{
|
|
if (readTag != expectedTag)
|
|
{
|
|
RuntimeError ("invalid tag '%s' found; expected '%s'",
|
|
readTag.c_str(), expectedTag.c_str());
|
|
}
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// fputTag(): write a 4-byte tag
|
|
// ----------------------------------------------------------------------------
|
|
|
|
void fputTag (FILE * f, const char * tag)
|
|
{
|
|
const int TAG_LEN = 4;
|
|
assert (strnlen (tag, TAG_LEN + 1) == TAG_LEN);
|
|
fwriteOrDie ((void *) tag, sizeof (*tag), strnlen (tag, TAG_LEN), f);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// fskipstring(): skip a 0-terminated string, such as a pad string
|
|
// ----------------------------------------------------------------------------
|
|
|
|
void fskipstring (FILE * f)
|
|
{
|
|
char c;
|
|
do
|
|
{
|
|
freadOrDie(&c, sizeof (c), 1, f);
|
|
}
|
|
while (c);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// fpad(): write a 0-terminated string to pad file to a n-byte boundary
|
|
// (note: file must be opened in binmode to work properly on DOS/Windows!!!)
|
|
// ----------------------------------------------------------------------------
|
|
void fpad (FILE * f, int n)
|
|
{
|
|
// get current writing position
|
|
int pos = ftell (f);
|
|
if (pos == -1)
|
|
{
|
|
RuntimeError ("error in ftell(): %s", strerror (errno));
|
|
}
|
|
// determine how many bytes are needed (at least 1 for the 0-terminator)
|
|
// and create a dummy string of that length incl. terminator
|
|
int len = n - (pos % n);
|
|
const char dummyString[] = "MSR-Asia: JL+FS";
|
|
size_t offset = sizeof(dummyString)/sizeof(dummyString[0]) - len;
|
|
assert (offset >= 0);
|
|
fputstring (f, dummyString + offset);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// fgetbyte(): read a byte value
|
|
// ----------------------------------------------------------------------------
|
|
|
|
char fgetbyte (FILE * f)
|
|
{
|
|
char v;
|
|
freadOrDie (&v, sizeof (v), 1, f);
|
|
return v;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// fgetshort(): read a short value
|
|
// ----------------------------------------------------------------------------
|
|
|
|
short fgetshort (FILE * f)
|
|
{
|
|
short v;
|
|
freadOrDie (&v, sizeof (v), 1, f);
|
|
return v;
|
|
}
|
|
|
|
short fgetshort_bigendian (FILE * f)
|
|
{
|
|
unsigned char b[2];
|
|
freadOrDie (&b, sizeof (b), 1, f);
|
|
return (short) ((b[0] << 8) + b[1]);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// fgetint24(): read a 3-byte (24-bit) int value
|
|
// ----------------------------------------------------------------------------
|
|
|
|
int fgetint24 (FILE * f)
|
|
{
|
|
int v;
|
|
assert (sizeof (v) == 4);
|
|
freadOrDie (&v, sizeof (v) -1, 1, f); // only read 3 lower-order bytes
|
|
v <<= 8; // shift up (upper 8 bits uninit'ed)
|
|
v >>= 8; // shift down 8 bits with sign-extend
|
|
return v;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// fgetint(): read an int value
|
|
// ----------------------------------------------------------------------------
|
|
|
|
int fgetint (FILE * f)
|
|
{
|
|
int v;
|
|
freadOrDie (&v, sizeof (v), 1, f);
|
|
return v;
|
|
}
|
|
|
|
int fgetint_bigendian (FILE * f)
|
|
{
|
|
unsigned char b[4];
|
|
freadOrDie (&b, sizeof (b), 1, f);
|
|
return (int) (((((b[0] << 8) + b[1]) << 8) + b[2]) << 8) + b[3];
|
|
}
|
|
|
|
int fgetint_ascii (FILE * f)
|
|
{
|
|
fskipspace (f);
|
|
int res = 0;
|
|
char c;
|
|
freadOrDie (&c, sizeof (c), 1, f);
|
|
while (isdigit ((unsigned char)c))
|
|
{
|
|
res = (10 * res) + (c - '0');
|
|
freadOrDie (&c, sizeof (c), 1, f);
|
|
}
|
|
int rc = ungetc (c, f);
|
|
if (rc != c)
|
|
{
|
|
RuntimeError ("error in ungetc(): %s", strerror (errno));
|
|
}
|
|
return res;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// fgetlong(): read an long value
|
|
// ----------------------------------------------------------------------------
|
|
|
|
long fgetlong (FILE * f)
|
|
{
|
|
long v;
|
|
freadOrDie (&v, sizeof (v), 1, f);
|
|
return v;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// fgetfloat(): read a float value
|
|
// ----------------------------------------------------------------------------
|
|
|
|
float fgetfloat (FILE * f)
|
|
{
|
|
float v;
|
|
freadOrDie (&v, sizeof (v), 1, f);
|
|
return v;
|
|
}
|
|
|
|
float fgetfloat_bigendian (FILE * f)
|
|
{
|
|
int bitpattern = fgetint_bigendian (f);
|
|
return *((float*) &bitpattern);
|
|
}
|
|
|
|
float fgetfloat_ascii (FILE * f)
|
|
{
|
|
float val;
|
|
fskipspace (f);
|
|
int rc = fscanf (f, "%f", &val); // security hint: safe overloads
|
|
if (rc == 0)
|
|
RuntimeError ("error reading float value from file (invalid format): %s");
|
|
else if (rc == EOF)
|
|
RuntimeError ("error reading from file: %s", strerror (errno));
|
|
assert (rc == 1);
|
|
return val;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// fgetdouble(): read a double value
|
|
// ----------------------------------------------------------------------------
|
|
|
|
double fgetdouble (FILE * f)
|
|
{
|
|
double v;
|
|
freadOrDie (&v, sizeof (v), 1, f);
|
|
return v;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// fputbyte(): write a byte value
|
|
// ----------------------------------------------------------------------------
|
|
|
|
void fputbyte (FILE * f, char v)
|
|
{
|
|
fwriteOrDie (&v, sizeof (v), 1, f);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// fputshort(): write a short value
|
|
// ----------------------------------------------------------------------------
|
|
|
|
void fputshort (FILE * f, short v)
|
|
{
|
|
fwriteOrDie (&v, sizeof (v), 1, f);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// fputint24(): write a 3-byte (24-bit) int value
|
|
// ----------------------------------------------------------------------------
|
|
|
|
void fputint24 (FILE * f, int v)
|
|
{
|
|
assert (sizeof (v) == 4);
|
|
fwriteOrDie (&v, sizeof (v) -1, 1, f); // write low-order 3 bytes
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// fputint(): write an int value
|
|
// ----------------------------------------------------------------------------
|
|
|
|
void fputint (FILE * f, int v)
|
|
{
|
|
fwriteOrDie (&v, sizeof (v), 1, f);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// fputlong(): write an long value
|
|
// ----------------------------------------------------------------------------
|
|
|
|
void fputlong (FILE * f, long v)
|
|
{
|
|
fwriteOrDie (&v, sizeof (v), 1, f);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// fputfloat(): write a float value
|
|
// ----------------------------------------------------------------------------
|
|
|
|
void fputfloat (FILE * f, float v)
|
|
{
|
|
fwriteOrDie (&v, sizeof (v), 1, f);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// fputdouble(): write a double value
|
|
// ----------------------------------------------------------------------------
|
|
|
|
void fputdouble (FILE * f, double v)
|
|
{
|
|
fwriteOrDie (&v, sizeof (v), 1, f);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// fputfile(): write a binary block or a string as a file
|
|
// ----------------------------------------------------------------------------
|
|
|
|
void fputfile (const wstring & pathname, const std::vector<char> & buffer)
|
|
{
|
|
FILE * f = fopenOrDie (pathname, L"wb");
|
|
try
|
|
{
|
|
if (buffer.size() > 0)
|
|
{ // ^^ otherwise buffer[0] is an illegal expression
|
|
fwriteOrDie (&buffer[0], sizeof (buffer[0]), buffer.size(), f);
|
|
}
|
|
fcloseOrDie (f);
|
|
}
|
|
catch (...)
|
|
{
|
|
fclose (f);
|
|
throw;
|
|
}
|
|
}
|
|
|
|
void fputfile (const wstring & pathname, const std::wstring & string)
|
|
{
|
|
FILE * f = fopenOrDie (pathname, L"wb");
|
|
try
|
|
{
|
|
if (string.length() > 0)
|
|
{ // ^^ otherwise buffer[0] is an illegal expression
|
|
fwriteOrDie (string.c_str(), sizeof (string[0]), string.length(), f);
|
|
}
|
|
fcloseOrDie (f);
|
|
}
|
|
catch (...)
|
|
{
|
|
fclose (f);
|
|
throw;
|
|
}
|
|
}
|
|
|
|
void fputfile (const wstring & pathname, const std::string & string)
|
|
{
|
|
FILE * f = fopenOrDie (pathname, L"wb");
|
|
try
|
|
{
|
|
if (string.length() > 0)
|
|
{ // ^^ otherwise buffer[0] is an illegal expression
|
|
fwriteOrDie (string.c_str(), sizeof (string[0]), string.length(), f);
|
|
}
|
|
fcloseOrDie (f);
|
|
}
|
|
catch (...)
|
|
{
|
|
fclose (f);
|
|
throw;
|
|
}
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// fgetfile(): load a file as a binary block
|
|
// ----------------------------------------------------------------------------
|
|
|
|
void fgetfile (const wstring & pathname, std::vector<char> & buffer)
|
|
{
|
|
FILE * f = fopenOrDie (pathname, L"rb");
|
|
size_t len = filesize (f);
|
|
buffer.resize (len);
|
|
if (buffer.size() > 0)
|
|
{ // ^^ otherwise buffer[0] is an illegal expression
|
|
freadOrDie (&buffer[0], sizeof (buffer[0]), buffer.size(), f);
|
|
}
|
|
fclose (f);
|
|
}
|
|
|
|
void fgetfile (FILE * f, std::vector<char> & buffer)
|
|
{ // this version reads until eof
|
|
buffer.resize (0);
|
|
buffer.reserve (1000000); // avoid too many reallocations
|
|
std::vector<char> inbuf;
|
|
inbuf.resize (65536); // read in chunks of this size
|
|
while (!feof (f)) // read until eof
|
|
{
|
|
size_t n = fread (&inbuf[0], sizeof (inbuf[0]), inbuf.size(), f);
|
|
if (ferror (f))
|
|
{
|
|
RuntimeError ("fgetfile: error reading from file: %s", strerror (errno));
|
|
}
|
|
buffer.insert (buffer.end(), inbuf.begin(), inbuf.begin() + n);
|
|
}
|
|
buffer.reserve (buffer.size());
|
|
}
|
|
|
|
// load it into RAM in one huge chunk
|
|
static size_t fgetfilechars (const std::wstring & path, vector<char> & buffer)
|
|
{
|
|
auto_file_ptr f (fopenOrDie (path, L"rb"));
|
|
size_t len = filesize (f);
|
|
buffer.reserve (len +1);
|
|
freadOrDie (buffer, len, f);
|
|
buffer.push_back (0); // this makes it a proper C string
|
|
return len;
|
|
}
|
|
|
|
template<class LINES> static void strtoklines (char * s, LINES & lines)
|
|
{
|
|
for (char * p = strtok (s, "\r\n"); p; p = strtok (NULL, "\r\n"))
|
|
lines.push_back (p);
|
|
}
|
|
|
|
void msra::files::fgetfilelines (const std::wstring & path, vector<char> & buffer, std::vector<std::string> & lines)
|
|
{
|
|
// load it into RAM in one huge chunk
|
|
const size_t len = fgetfilechars (path, buffer);
|
|
|
|
// parse into lines
|
|
lines.resize (0);
|
|
lines.reserve (len / 20);
|
|
strtoklines (&buffer[0], lines);
|
|
}
|
|
|
|
// same as above but returning const char* (avoiding the memory allocation)
|
|
vector<char*> msra::files::fgetfilelines (const wstring & path, vector<char> & buffer)
|
|
{
|
|
// load it into RAM in one huge chunk
|
|
const size_t len = fgetfilechars (path, buffer);
|
|
|
|
// parse into lines
|
|
vector<char *> lines;
|
|
lines.reserve (len / 20);
|
|
strtoklines (&buffer[0], lines);
|
|
return lines;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// getfiletime(): access modification time
|
|
// ----------------------------------------------------------------------------
|
|
|
|
#ifndef _FILETIME_
|
|
//typedef struct _FILETIME { DWORD dwLowDateTime; DWORD dwHighDateTime; }; // from minwindef.h
|
|
typedef time_t FILETIME;
|
|
#else
|
|
bool operator>= (const FILETIME & targettime, const FILETIME & inputtime) // for use in fuptodate()
|
|
{
|
|
return (targettime.dwHighDateTime > inputtime.dwHighDateTime) ||
|
|
(targettime.dwHighDateTime == inputtime.dwHighDateTime && targettime.dwLowDateTime >= inputtime.dwLowDateTime);
|
|
}
|
|
#endif
|
|
|
|
bool getfiletime (const wstring & path, FILETIME & time)
|
|
{ // return file modification time, false if cannot be determined
|
|
#ifdef _WIN32
|
|
WIN32_FIND_DATAW findFileData;
|
|
auto_handle hFind (FindFirstFileW (path.c_str(), &findFileData), ::FindClose);
|
|
if (hFind != INVALID_HANDLE_VALUE)
|
|
{
|
|
time = findFileData.ftLastWriteTime;
|
|
return true;
|
|
}
|
|
else
|
|
return false;
|
|
#else // TODO: test this; e.g. does st_mtime have the desired resolution?
|
|
struct stat buf;
|
|
int result;
|
|
|
|
// Get data associated with "crt_stat.c":
|
|
result = stat(charpath(path), &buf);
|
|
// Check if statistics are valid:
|
|
if (result != 0)
|
|
return false;
|
|
|
|
time = buf.st_mtime;
|
|
return true;
|
|
#endif
|
|
}
|
|
|
|
#if 0
|
|
void setfiletime (const wstring & path, const FILETIME & time)
|
|
{ // update the file modification time of an existing file
|
|
auto_handle h (CreateFileW (path.c_str(), FILE_WRITE_ATTRIBUTES,
|
|
FILE_SHARE_READ|FILE_SHARE_WRITE, NULL,
|
|
OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL));
|
|
if (h == INVALID_HANDLE_VALUE)
|
|
{
|
|
RuntimeError ("setfiletime: error opening file: %d", GetLastError());
|
|
}
|
|
BOOL rc = SetFileTime (h, NULL, NULL, &time);
|
|
if (!rc)
|
|
{
|
|
RuntimeError ("setfiletime: error setting file time information: %d", GetLastError());
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#if 0
|
|
// ----------------------------------------------------------------------------
|
|
// expand_wildcards -- wildcard expansion of a path, including directories.
|
|
// ----------------------------------------------------------------------------
|
|
|
|
// Win32-style variant of this function (in case we want to use it some day)
|
|
// Returns 0 in case of failure. May throw in case of bad_alloc.
|
|
static BOOL ExpandWildcards (wstring path, vector<wstring> & paths)
|
|
{
|
|
// convert root to DOS filename convention
|
|
for (size_t k = 0; k < path.length(); k++) if (path[k] == '/') path[k] = '\\';
|
|
|
|
// remove terminating backslash
|
|
size_t last = path.length() -1;
|
|
if (last >= 0 && path[last] == '\\') path.erase (last);
|
|
|
|
// convert root to long filename convention
|
|
//if (path.find (L"\\\\?\\") != 0)
|
|
// path = L"\\\\?\\" + root;
|
|
|
|
// split off everything after first wildcard
|
|
size_t wpos = path.find_first_of (L"*?");
|
|
if (wpos == 2 && path[0] == '\\' && path[1] == '\\')
|
|
wpos = path.find_first_of (L"*?", 4); // 4=skip "\\?\"
|
|
if (wpos == wstring::npos)
|
|
{ // no wildcard: just return it
|
|
paths.push_back (path);
|
|
return TRUE;
|
|
}
|
|
|
|
// split off everything afterwards if any
|
|
wstring rest; // remaining path after this directory
|
|
size_t spos = path.find_first_of (L"\\", wpos +1);
|
|
if (spos != wstring::npos)
|
|
{
|
|
rest = path.substr (spos +1);
|
|
path.erase (spos);
|
|
}
|
|
|
|
// crawl folder
|
|
WIN32_FIND_DATAW ffdata;
|
|
auto_handle hFind (::FindFirstFileW (path.c_str(), &ffdata), ::FindClose);
|
|
if (hFind == INVALID_HANDLE_VALUE)
|
|
{
|
|
DWORD err = ::GetLastError();
|
|
if (rest.empty() && err == 2) return TRUE; // no matching file: empty
|
|
return FALSE; // another error
|
|
}
|
|
size_t pos = path.find_last_of (L"\\");
|
|
if (pos == wstring::npos) LogicError ("unexpected missing \\ in path");
|
|
wstring parent = path.substr (0, pos);
|
|
do
|
|
{
|
|
// skip this and parent directory
|
|
bool isDir = ((ffdata.dwFileAttributes & (FILE_ATTRIBUTE_DIRECTORY | FILE_ATTRIBUTE_REPARSE_POINT)) != 0);
|
|
if (isDir && ffdata.cFileName[0] == '.') continue;
|
|
|
|
wstring filename = parent + L"\\" + ffdata.cFileName;
|
|
if (rest.empty())
|
|
{
|
|
paths.push_back (filename);
|
|
}
|
|
else if (isDir) // multi-wildcards: further expand
|
|
{
|
|
BOOL rc = ExpandWildcards (filename + L"\\" + rest, paths);
|
|
rc; // error here means no match, e.g. Access Denied to one subfolder
|
|
}
|
|
} while (::FindNextFileW(hFind, &ffdata) != 0);
|
|
return TRUE;
|
|
}
|
|
|
|
void expand_wildcards (const wstring & path, vector<wstring> & paths)
|
|
{
|
|
BOOL rc = ExpandWildcards (path, paths);
|
|
if (!rc)
|
|
RuntimeError ("error in expanding wild cards '%S': %S", path.c_str(), FormatWin32Error(::GetLastError()).c_str());
|
|
}
|
|
#endif
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// make_intermediate_dirs() -- make all intermediate dirs on a path
|
|
// ----------------------------------------------------------------------------
|
|
|
|
static void mkdir (const wstring & path)
|
|
{
|
|
int rc = _wmkdir (path.c_str());
|
|
if (rc >= 0 || errno == EEXIST)
|
|
return; // no error or already existing --ok
|
|
#ifdef _WIN32 // bug in _wmkdir(): returns access_denied if folder exists but read-only --check existence
|
|
if (errno == EACCES)
|
|
{
|
|
DWORD att = ::GetFileAttributesW (path.c_str());
|
|
if (att != INVALID_FILE_ATTRIBUTES || (att & FILE_ATTRIBUTE_DIRECTORY) != 0)
|
|
return; // ok
|
|
}
|
|
#endif
|
|
RuntimeError ("mkdir: error creating intermediate directory %S", path.c_str());
|
|
}
|
|
|
|
// make subdir of a file including parents
|
|
void msra::files::make_intermediate_dirs (const wstring & filepath)
|
|
{
|
|
vector<wchar_t> buf;
|
|
buf.resize (filepath.length() +1, 0);
|
|
wcscpy (&buf[0], filepath.c_str());
|
|
wstring subpath;
|
|
int skip = 0;
|
|
#ifdef _WIN32
|
|
// On windows, if share (\\) then the first two levels (machine, share name) cannot be made.
|
|
if ((buf[0] == '/' && buf[1] == '/') || (buf[0] == '\\' && buf[1] == '\\'))
|
|
{
|
|
subpath = L"/";
|
|
skip = 2; // skip two levels (machine, share)
|
|
}
|
|
#else
|
|
// On unix, if the filepath starts with '/' then it is absolute
|
|
// path and the created sub-paths should also start with '/'
|
|
if (buf[0] == '/')
|
|
{
|
|
subpath = L"/";
|
|
}
|
|
#endif
|
|
// make all constituents except the filename (to make a dir, include a trailing slash)
|
|
wchar_t * context = nullptr;
|
|
for (const wchar_t * p = wcstok_s (&buf[0], L"/\\", &context); p; p = wcstok_s (NULL, L"/\\", &context))
|
|
{
|
|
if (subpath != L"" && subpath != L"/" && subpath != L"\\" && skip == 0)
|
|
{
|
|
mkdir (subpath);
|
|
}
|
|
else if (skip > 0) skip--; // skip this level
|
|
// rebuild the final path
|
|
if (subpath != L"") subpath += L"/";
|
|
subpath += p;
|
|
}
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// fuptodate() -- test whether an output file is at least as new as an input file
|
|
// ----------------------------------------------------------------------------
|
|
|
|
// test if file 'target' is not older than 'input' --used for make mode
|
|
// 'input' must exist if 'inputrequired'; otherweise if 'target' exists, it is considered up to date
|
|
// 'target' may or may not exist
|
|
bool msra::files::fuptodate (const wstring & target, const wstring & input, bool inputrequired)
|
|
{
|
|
FILETIME targettime;
|
|
if (!getfiletime (target, targettime)) return false; // target missing: need to update
|
|
FILETIME inputtime;
|
|
if (!getfiletime (input, inputtime)) return !inputrequired; // input missing: if required, pretend to be out of date as to force caller to fail
|
|
// up to date if target has higher time stamp
|
|
return targettime >= inputtime; // note: uses an overload for WIN32 FILETIME (in Linux, FILETIME=time_t=size_t)
|
|
}
|
|
|
|
/// separate string by separator
|
|
vector<string> sep_string(const string & istr, const string & sep)
|
|
{
|
|
string str = istr;
|
|
str = trim(str);
|
|
vector<string> vstr;
|
|
string csub;
|
|
size_t ifound = 0;
|
|
size_t ifoundlast = ifound;
|
|
ifound = str.find(sep, ifound);
|
|
while (ifound != std::string::npos)
|
|
{
|
|
csub = str.substr(ifoundlast, ifound - ifoundlast);
|
|
vstr.push_back(trim(csub));
|
|
|
|
ifoundlast = ifound+1;
|
|
ifound = str.find(sep, ifoundlast);
|
|
}
|
|
csub = str.substr(ifoundlast, str.length() - ifoundlast);
|
|
vstr.push_back(trim(csub));
|
|
|
|
return vstr;
|
|
}
|
|
|
|
/// separate string by separator
|
|
vector<wstring> wsep_string(const wstring & istr, const wstring & sep)
|
|
{
|
|
wstring str = istr;
|
|
str = wtrim(str);
|
|
vector<wstring> vstr;
|
|
wstring csub;
|
|
size_t ifound = 0;
|
|
size_t ifoundlast = ifound;
|
|
ifound = str.find(sep, ifound);
|
|
while (ifound != std::wstring::npos)
|
|
{
|
|
csub = str.substr(ifoundlast, ifound - ifoundlast);
|
|
vstr.push_back(wtrim(csub));
|
|
|
|
ifoundlast = ifound + 1;
|
|
ifound = str.find(sep, ifoundlast);
|
|
}
|
|
csub = str.substr(ifoundlast, str.length() - ifoundlast);
|
|
vstr.push_back(wtrim(csub));
|
|
|
|
return vstr;
|
|
}
|
|
static inline std::string wcstombs(const std::wstring & p) // output: MBCS
|
|
{
|
|
size_t len = p.length();
|
|
msra::basetypes::fixed_vector<char> buf(2 * len + 1); // max: 1 wchar => 2 mb chars
|
|
std::fill(buf.begin(), buf.end(), 0);
|
|
::wcstombs(&buf[0], p.c_str(), 2 * len + 1);
|
|
return std::string(&buf[0]);
|
|
}
|
|
static inline std::wstring mbstowcs(const std::string & p) // input: MBCS
|
|
{
|
|
size_t len = p.length();
|
|
msra::basetypes::fixed_vector<wchar_t> buf(len + 1); // max: >1 mb chars => 1 wchar
|
|
std::fill(buf.begin(), buf.end(), (wchar_t)0);
|
|
OACR_WARNING_SUPPRESS(UNSAFE_STRING_FUNCTION, "Reviewed OK. size checked. [rogeryu 2006/03/21]");
|
|
::mbstowcs(&buf[0], p.c_str(), len + 1);
|
|
return std::wstring(&buf[0]);
|
|
}
|
|
|
|
wstring s2ws(const string& str)
|
|
{
|
|
#ifdef __unix__
|
|
return mbstowcs(str);
|
|
#else
|
|
typedef std::codecvt_utf8<wchar_t> convert_typeX;
|
|
std::wstring_convert<convert_typeX, wchar_t> converterX;
|
|
return converterX.from_bytes(str);
|
|
|
|
#endif
|
|
}
|
|
|
|
string ws2s(const wstring& wstr)
|
|
{
|
|
#ifdef __unix__
|
|
return wcstombs(wstr);
|
|
#else
|
|
typedef codecvt_utf8<wchar_t> convert_typeX;
|
|
wstring_convert<convert_typeX, wchar_t> converterX;
|
|
return converterX.to_bytes(wstr);
|
|
#endif
|
|
|
|
}
|