CNTK/Common/fileutil.cpp

1741 строка
57 KiB
C++

//
// <copyright file="FileUtil.cpp" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
#ifndef _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms --add this at the top of all CPP files that give "function or variable may be unsafe" warnings
#endif
#define _CRT_NONSTDC_NO_DEPRECATE // make VS accept POSIX functions without _
#pragma warning (disable: 4996) // ^^ this does not seem to work--TODO: make it work
#define _FILE_OFFSET_BITS 64 // to force fseeko() and ftello() 64 bit in Linux
#ifndef UNDER_CE // fixed-buffer overloads not available for wince
#ifdef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES // fixed-buffer overloads for strcpy() etc.
#undef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
#endif
#define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
#endif
#include "Basics.h"
#include "fileutil.h"
#ifdef __unix__
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#endif
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdint.h>
#include <assert.h>
#ifdef _WIN32
#include "Windows.h" // for FILETIME
#endif
#include <algorithm> // for std::find
#include <limits.h>
#include <memory>
#ifndef UNDER_CE // some headers don't exist under winCE - the appropriate definitions seem to be in stdlib.h
#if defined(_WIN32) || defined(__CYGWIN__)
#include <fcntl.h> // for _O_BINARY/TEXT - not needed for wince
#include <io.h> // for _setmode()
#define SET_BINARY_MODE(handle) setmode(handle, _O_BINARY)
#define SET_TEXT_MODE(handle) setmode(handle, _O_TEXT)
#else
#define SET_BINARY_MODE(handle) ((int)0)
#define SET_TEXT_MODE(handle) ((int)0)
#endif
#endif
#define __out_z_cap(x) // a fake SAL annotation; this may come in handy some day if we try static code analysis, so I don't want to delete it
#include <errno.h>
using namespace std;
using namespace Microsoft::MSR::CNTK;
// ----------------------------------------------------------------------------
// some mappings for non-Windows builds
// ----------------------------------------------------------------------------
template <> const wchar_t* GetScanFormatString(char) {return L" %hc";}
template <> const wchar_t* GetScanFormatString(wchar_t) {return L" %lc";}
template <> const wchar_t* GetScanFormatString(short) {return L" %hi";}
template <> const wchar_t* GetScanFormatString(int) {return L" %i";}
template <> const wchar_t* GetScanFormatString(long) {return L" %li";}
template <> const wchar_t* GetScanFormatString(unsigned short) {return L" %hu";}
template <> const wchar_t* GetScanFormatString(unsigned int) {return L" %u";}
//template <> const wchar_t* GetScanFormatString(unsigned long) {return L" %lu";}
template <> const wchar_t* GetScanFormatString(float) {return L" %g";}
template <> const wchar_t* GetScanFormatString(double) {return L" %lg";}
template <> const wchar_t* GetScanFormatString(size_t) {return L" %llu";}
template <> const wchar_t* GetScanFormatString(long long) {return L" %lli";}
template <> const wchar_t* GetFormatString(char) {return L" %hc";}
template <> const wchar_t* GetFormatString(wchar_t) {return L" %lc";}
template <> const wchar_t* GetFormatString(short) {return L" %hi";}
template <> const wchar_t* GetFormatString(int) {return L" %i";}
template <> const wchar_t* GetFormatString(long) {return L" %li";}
template <> const wchar_t* GetFormatString(unsigned short) {return L" %hu";}
template <> const wchar_t* GetFormatString(unsigned int) {return L" %u";}
//template <> const wchar_t* GetFormatString(unsigned long) {return L" %lu";}
template <> const wchar_t* GetFormatString(float) {return L" %.9g";}
template <> const wchar_t* GetFormatString(double) {return L" %.17g";}
template <> const wchar_t* GetFormatString(size_t) {return L" %llu";}
template <> const wchar_t* GetFormatString(long long) {return L" %lli";}
template <> const wchar_t* GetFormatString(const char*) {return L" %hs";}
template <> const wchar_t* GetFormatString(const wchar_t*) {return L" %ls";}
// ----------------------------------------------------------------------------
// fgetText() specializations for fwscanf differences: get a value from a text file
// ----------------------------------------------------------------------------
void fgetText(FILE * f, char& v)
{
const wchar_t* formatString = GetFormatString(v);
int rc = fwscanf(f, formatString, &v);
if (rc == 0)
RuntimeError ("error reading value from file (invalid format): %s", formatString);
else if (rc == EOF)
RuntimeError ("error reading from file: %s", strerror (errno));
assert(rc == 1);
}
void fgetText(FILE * f, wchar_t& v)
{
const wchar_t* formatString = GetFormatString(v);
int rc = fwscanf(f, formatString, &v);
if (rc == 0)
RuntimeError ("error reading value from file (invalid format): %s", formatString);
else if (rc == EOF)
RuntimeError ("error reading from file: %s", strerror (errno));
assert(rc == 1);
}
// ----------------------------------------------------------------------------
// fopenOrDie(): like fopen() but terminate with err msg in case of error.
// A pathname of "-" returns stdout or stdin, depending on mode, and it will
// change the binary mode if 'b' or 't' are given. If you use this, make sure
// not to fclose() such a handle.
// ----------------------------------------------------------------------------
static const wchar_t * strchr (const wchar_t * s, wchar_t v) { return wcschr (s, v); }
// pathname is "-" -- open stdin or stdout. Changes bin mode if 'b' or 't' given.
template<class _T> FILE * fopenStdHandle (const _T * mode)
{
FILE * f = strchr (mode, 'r') ? stdin : stdout;
if (strchr (mode, 'b') || strchr (mode, 't')) // change binary mode
fsetmode (f, strchr(mode, 'b') ? 'b' : 't');
return f;
}
FILE * fopenOrDie (const string & pathname, const char * mode)
{
FILE * f = (pathname[0] == '-') ? fopenStdHandle (mode) : fopen (pathname.c_str(), mode);
if (f == NULL)
{
RuntimeError ("error opening file '%s': %s", pathname.c_str(), strerror (errno));
}
if (strchr (mode, 'S'))
{ // if optimized for sequential access then use large buffer
setvbuf (f, NULL, _IOFBF, 10000000); // OK if it fails
}
return f;
}
FILE * fopenOrDie (const wstring & pathname, const wchar_t * mode)
{
FILE * f = (pathname[0] == '-') ? fopenStdHandle (mode) : _wfopen (pathname.c_str(), mode);
if (f == NULL)
{
RuntimeError ("error opening file '%S': %s", pathname.c_str(), strerror (errno));
}
if (strchr (mode, 'S'))
{ // if optimized for sequential access then use large buffer
setvbuf (f, NULL, _IOFBF, 10000000); // OK if it fails
}
return f;
}
// ----------------------------------------------------------------------------
// set mode to binary or text (pass 'b' or 't')
// ----------------------------------------------------------------------------
void fsetmode(FILE * f, char type)
{
if (type != 'b' && type != 't')
{
RuntimeError ("fsetmode: invalid type '%c'", type);
}
#ifdef UNDER_CE // winCE and win32 have different return types for _fileno
FILE *fd = fileno (f); // note: no error check possible
#else
int fd = fileno (f); // note: no error check possible
#endif
int rc = ( type == 'b' ? SET_BINARY_MODE(fd) : SET_TEXT_MODE(fd) );
if (rc == -1)
{
RuntimeError ("error changing file mode: %s", strerror (errno));
}
}
// ----------------------------------------------------------------------------
// freadOrDie(): like fread() but terminate with err msg in case of error
// ----------------------------------------------------------------------------
void freadOrDie (void * ptr, size_t size, size_t count, FILE * f)
{
// \\XXX\C$ reads are limited, with some randomness (e.g. 48 MB), on Windows 7 32 bit, so we break this into chunks of some MB. Meh.
while (count > 0)
{
size_t chunkn = min (count, (size_t)15*1024*1024); // BUGBUG: I surely meant this limit to be bytes, not units of 'size'...
size_t n = fread (ptr, size, chunkn, f);
if (n != chunkn)
RuntimeError ("error reading from file: %s", strerror (errno));
count -= n;
ptr = n * size + (char*) ptr;
}
}
// ----------------------------------------------------------------------------
// fwriteOrDie(): like fwrite() but terminate with err msg in case of error;
// Windows C std lib fwrite() has problems writing >100 MB at a time (fails
// with Invalid Argument error), so we break it into chunks (yak!!)
// ----------------------------------------------------------------------------
void fwriteOrDie (const void * ptr, size_t size, size_t count, FILE * f)
{
const char * p1 = (const char *) ptr;
size_t totalBytes = size * count;
while (totalBytes > 0)
{
size_t wantWrite = totalBytes;
#define LIMIT (16*1024*1024) // limit to 16 MB at a time
if (wantWrite > LIMIT)
{
wantWrite = LIMIT;
}
size_t n = fwrite ((const void *) p1, 1, wantWrite, f);
if (n != wantWrite)
{
RuntimeError ("error writing to file (ptr=0x%08lx, size=%d,"
" count=%d, writing %d bytes after %d): %s",
ptr, size, count, (int) wantWrite,
(int) (size * count - totalBytes),
strerror (errno));
}
totalBytes -= wantWrite;
p1 += wantWrite;
}
}
long fseekOrDie (FILE * f, long offset, int mode)
{
long curPos = ftell (f);
if (curPos == -1L)
{
RuntimeError ("error seeking: %s", strerror (errno));
}
int rc = fseek (f, offset, mode);
if (rc != 0)
{
RuntimeError ("error seeking: %s", strerror (errno));
}
return curPos;
}
// ----------------------------------------------------------------------------
// fprintfOrDie(): like fprintf() but terminate with err msg in case of error
// ----------------------------------------------------------------------------
#pragma warning(push)
#pragma warning(disable : 4793) // 'vararg' : causes native code generation
void fprintfOrDie (FILE * f, const char * fmt, ...)
{
va_list arg_ptr;
va_start (arg_ptr, fmt);
int rc = vfprintf (f, fmt, arg_ptr);
if (rc < 0)
{
RuntimeError ("error writing to file: %s", strerror (errno));
}
}
#pragma warning(pop)
// ----------------------------------------------------------------------------
// fflushOrDie(): like fflush() but terminate with err msg in case of error
// ----------------------------------------------------------------------------
void fflushOrDie (FILE * f)
{
int rc = fflush (f);
if (rc != 0)
{
RuntimeError ("error flushing to file: %s", strerror (errno));
}
}
// ----------------------------------------------------------------------------
// filesize(): determine size of the file in bytes (with open file)
// ----------------------------------------------------------------------------
size_t filesize (FILE * f)
{
#ifdef _WIN32
size_t curPos = _ftelli64(f);
if (curPos == -1L)
{
RuntimeError ("error determining file position: %s", strerror (errno));
}
int rc = _fseeki64 (f, 0, SEEK_END);
if (rc != 0)
RuntimeError ("error seeking to end of file: %s", strerror (errno));
size_t len = _ftelli64 (f);
if (len == -1L)
RuntimeError ("error determining file position: %s", strerror (errno));
rc = _fseeki64 (f, curPos, SEEK_SET);
if (rc != 0)
RuntimeError ("error resetting file position: %s", strerror (errno));
return len;
#else // TODO: test this
struct stat stat_buf;
int rc = fstat(fileno(f), &stat_buf);
if (rc != 0)
RuntimeError("error determining length of file: %s", strerror(errno));
static_assert (sizeof(stat_buf.st_size)>=sizeof(uint64_t), "struct stat not compiled for 64-bit mode");
return stat_buf.st_size;
#endif
}
// filesize(): determine size of the file in bytes (with pathname)
size_t filesize (const wchar_t * pathname)
{
FILE * f = fopenOrDie (pathname, L"rb");
try
{
size_t len = filesize (f);
fclose (f);
return (size_t) len;
}
catch (...)
{
fclose (f);
throw;
}
}
#ifndef UNDER_CE // no 64-bit under winCE
// filesize64(): determine size of the file in bytes (with pathname)
int64_t filesize64 (const wchar_t * pathname)
{
#ifdef _WIN32
struct _stat64 fileinfo;
if (_wstat64 (pathname,&fileinfo) == -1)
return 0;
else
return fileinfo.st_size;
#else
return filesize (pathname);
#endif
}
#endif
// ----------------------------------------------------------------------------
// fget/setpos(): seek functions with error handling
// ----------------------------------------------------------------------------
uint64_t fgetpos (FILE * f)
{
#ifdef _MSC_VER // standard does not allow to cast between fpos_t and integer numbers, and indeed it does not work on Linux (but on Windows and GCC)
fpos_t post;
int rc = ::fgetpos (f, &post);
if (rc != 0)
RuntimeError ("error getting file position: %s", strerror (errno));
#else
auto pos = ftello (f);
uint64_t post = (uint64_t) pos;
static_assert (sizeof (post) >= sizeof (pos), "64-bit file offsets not enabled");
if ((decltype (pos)) post != pos)
LogicError("64-bit file offsets not enabled");
#endif
return post;
}
void fsetpos (FILE * f, uint64_t reqpos)
{
#ifdef _MSC_VER // standard does not allow to cast between fpos_t and integer numbers, and indeed it does not work on Linux (but on Windows and GCC)
#ifdef _MSC_VER // special hack for VS CRT
// Visual Studio's ::fsetpos() flushes the read buffer. This conflicts with a situation where
// we generally read linearly but skip a few bytes or KB occasionally, as is
// the case in speech recognition tools. This requires a number of optimizations.
uint64_t curpos = fgetpos (f);
uint64_t cureob = curpos + f->_cnt; // UGH: we mess with an internal structure here
while (reqpos >= curpos && reqpos < cureob)
{
// if we made it then do not call fsetpos()
if (reqpos == fgetpos (f))
return;
// if we seek within the existing buffer, then just move to the position by dummy reads
char buf[65536];
size_t n = min ((size_t) reqpos - (size_t) curpos, _countof (buf));
fread (buf, sizeof (buf[0]), n, f); // (this may fail, but really shouldn't)
curpos += n;
// since we mess with f->_cnt, if something unexpected happened to the buffer then back off
if (curpos != fgetpos (f) || curpos + f->_cnt != cureob)
break; // oops
}
#endif // end special hack for VS CRT
// actually perform the seek
fpos_t post = reqpos;
int rc = ::fsetpos(f, &post);
#else // assuming __unix__
off_t post = (off_t) reqpos;
static_assert (sizeof (off_t) >= sizeof (reqpos), "64-bit file offsets not enabled");
if ((decltype (reqpos)) post != reqpos)
LogicError("64-bit file offsets not enabled");
int rc = fseeko(f, post, SEEK_SET);
#endif
if (rc != 0)
RuntimeError("error setting file position: %s", strerror(errno));
}
// ----------------------------------------------------------------------------
// unlinkOrDie(): unlink() with error handling
// ----------------------------------------------------------------------------
void unlinkOrDie (const std::string & pathname)
{
if (unlink (pathname.c_str()) != 0 && errno != ENOENT) // if file is missing that's what we want
RuntimeError ("error deleting file '%s': %s", pathname.c_str(), strerror (errno));
}
void unlinkOrDie (const std::wstring & pathname)
{
if (_wunlink (pathname.c_str()) != 0 && errno != ENOENT) // if file is missing that's what we want
RuntimeError ("error deleting file '%S': %s", pathname.c_str(), strerror (errno));
}
// ----------------------------------------------------------------------------
// renameOrDie(): rename() with error handling
// ----------------------------------------------------------------------------
void renameOrDie (const std::string & from, const std::string & to)
{
#ifdef _WIN32
// deleting destination file if exits (to match Linux semantic)
if (fexists(to.c_str()) && !DeleteFileA(to.c_str()))
RuntimeError("error deleting file: '%s': %d", to.c_str(), GetLastError());
if (!MoveFileA (from.c_str(), to.c_str()))
RuntimeError("error renaming file '%s': %d", from.c_str(), GetLastError());
#else
if (rename (from.c_str(), to.c_str()) != 0)
RuntimeError("error renaming file '%s': %s", from.c_str(), strerror(errno));
#endif
}
void renameOrDie (const std::wstring & from, const std::wstring & to)
{
#ifdef _WIN32
// deleting destination file if exits (to match Linux semantic)
if (fexists(to.c_str()) && !DeleteFileW(to.c_str()))
RuntimeError("error deleting file '%S': %d", to.c_str(), GetLastError());
if (!MoveFileW(from.c_str(), to.c_str()))
RuntimeError ("error renaming file '%S': %d", from.c_str(), GetLastError());
#else
renameOrDie (charpath(from), charpath(to));
#endif
}
// ----------------------------------------------------------------------------
// fputstring(): write a 0-terminated string
// ----------------------------------------------------------------------------
void fputstring (FILE * f, const char * str)
{
fwriteOrDie ((void *) str, sizeof (*str), strnlen (str, SIZE_MAX)+1, f); // SECURITY NOTE: string use has been reviewed
}
void fputstring (FILE * f, const std::string & str)
{
fputstring (f, str.c_str());
}
#ifdef _WIN32
#pragma warning(push)
#pragma warning(disable : 4127)
#endif
void fputstring (FILE * f, const wchar_t * str)
{
if (sizeof (*str) == 2)
{
fwriteOrDie ((void *) str, sizeof (*str), wcsnlen (str, SIZE_MAX)+1, f); // SECURITY NOTE: string use has been reviewed
} else if (sizeof (*str) == 4)
{
size_t strLen = wcsnlen(str, SIZE_MAX);
std::unique_ptr<char16_t[]> str16(new char16_t[strLen + 1]);
for (int i = 0; i < strLen; i++)
{
str16[i] = (char16_t) str[i];
}
str16[strLen] = 0;
fwriteOrDie((void *)str16.get(), sizeof (*str) / 2, strLen + 1, f); // SECURITY NOTE: string use has been reviewed
} else
{
RuntimeError("error: unknown encoding\n");
}
}
#ifdef _WIN32
#pragma warning(pop)
#endif
void fputstring (FILE * f, const std::wstring & str)
{
fputstring (f, str.c_str());
}
// ----------------------------------------------------------------------------
// fexists(): test if a file exists
// ----------------------------------------------------------------------------
bool fexists (const wchar_t * pathname)
{
#ifdef _MSC_VER
WIN32_FIND_DATAW findFileData;
HANDLE hFind = FindFirstFileW (pathname, &findFileData);
if (hFind != INVALID_HANDLE_VALUE)
{
FindClose (hFind);
return true;
}
else
{
return false;
}
#else
auto_file_ptr f (_wfopen (pathname, L"r"));
return f != nullptr;
#endif
}
bool fexists (const char * pathname)
{
#ifdef _MSC_VER
WIN32_FIND_DATAA findFileData;
HANDLE hFind = FindFirstFileA (pathname, &findFileData);
if (hFind != INVALID_HANDLE_VALUE)
{
FindClose (hFind);
return true;
}
else
{
return false;
}
#else
auto_file_ptr f (fopen (pathname, "r"));
return f != nullptr;
#endif
}
// ----------------------------------------------------------------------------
// funicode(): test if a file uses unicode by reading its BOM
// ----------------------------------------------------------------------------
bool funicode (FILE * f)
{
unsigned short testCode;
if (fread (&testCode, sizeof(short), 1, f) == 1 &&
(int)testCode == 0xFEFF)
return true;
fseek (f,0,SEEK_SET);
//rewind (f);
return false;
}
// ----------------------------------------------------------------------------
// fgetline(): like fgets() but terminate with err msg in case of error;
// removes the newline character at the end (like gets());
// Returns 'buf' (always). buf guaranteed to be 0-terminated.
// ----------------------------------------------------------------------------
#ifdef __CYGWIN__ // strnlen() is somehow missing in Cygwin, which we use to quick-check GCC builds under Windows (although it is not a real target platform)
static inline size_t strnlen (const char *s, size_t n) { return std::find (s, s + n, '\0') - s; }
#endif
#ifdef UNDER_CE // strlen for char * not defined in winCE
static inline size_t strnlen (const char *s, size_t n) { return std::find (s, s + n, '\0') - s; }
#endif
static inline wchar_t * fgets(wchar_t * buf, int n, FILE * f) { return fgetws(buf, n, f); }
static inline size_t strnlen(wchar_t * s, size_t n) { return wcsnlen(s, n); }
template<class CHAR>
CHAR * fgetline (FILE * f, CHAR * buf, int size)
{
// TODO: we should redefine this to write UTF-16 (which matters on GCC which defines wchar_t as 32 bit)
CHAR * p = fgets(buf, size, f);
if (p == NULL) // EOF reached: next time feof() = true
{
if (ferror (f))
RuntimeError ("error reading line: %s", strerror (errno));
buf[0] = 0;
return buf;
}
size_t n = strnlen (p, size);
// check for buffer overflow
if (n >= (size_t) size -1)
{
basic_string<CHAR> example (p, n < 100 ? n : 100);
uint64_t filepos = fgetpos(f); // (for error message only)
RuntimeError("input line too long at file offset %I64d (max. %d characters allowed) [%s ...]", filepos, size - 1, msra::strfun::utf8(example).c_str());
}
// remove newline at end
if (n > 0 && p[n-1] == '\n') // UNIX and Windows style
{
n--;
p[n] = 0;
if (n > 0 && p[n-1] == '\r') // Windows style
{
n--;
p[n] = 0;
}
}
else if (n > 0 && p[n-1] == '\r') // Mac style
{
n--;
p[n] = 0;
}
return buf;
}
#if 0
const wchar_t * fgetline (FILE * f, wchar_t * buf, int size)
{
wchar_t * p = fgetws (buf, size, f);
if (p == NULL) // EOF reached: next time feof() = true
{
if (ferror (f))
RuntimeError ("error reading line: %s", strerror (errno));
buf[0] = 0;
return buf;
}
size_t n = wcsnlen (p, size); // SECURITY NOTE: string use has been reviewed
// check for buffer overflow
if (n >= (size_t) size -1)
{
wstring example (buf, min (n, 100));
RuntimeError ("input line too long at file offset %U64d (max. %d characters allowed) [%S ...]",
fgetpos (f), size -1, example.c_str());
}
// remove newline at end
if (n > 0 && p[n-1] == L'\n') // UNIX and Windows style
{
n--;
p[n] = 0;
if (n > 0 && p[n-1] == L'\r') // Windows style
{
n--;
p[n] = 0;
}
}
else if (n > 0 && p[n-1] == L'\r') // Mac style
{
n--;
p[n] = 0;
}
return buf;
}
#endif
// STL string version
std::string fgetline (FILE * f)
{
fixed_vector<char> buf (1000000);
return fgetline (f, &buf[0], (int) buf.size());
}
// STL string version
std::wstring fgetlinew (FILE * f)
{
fixed_vector<wchar_t> buf (1000000);
return fgetline (f, &buf[0], (int) buf.size());
}
// STL string version avoiding most memory allocations
void fgetline (FILE * f, std::string & s, std::vector<char> & buf)
{
buf.resize (1000000); // enough? // KIT: increased to 1M to be safe
const char * p = fgetline (f, &buf[0], (int) buf.size());
s.assign (p);
}
void fgetline (FILE * f, std::wstring & s, std::vector<wchar_t> & buf)
{
buf.resize (1000000); // enough? // KIT: increased to 1M to be safe
const wchar_t * p = fgetline (f, &buf[0], (int) buf.size());
s.assign (p);
}
// char buffer version
void fgetline (FILE * f, std::vector<char> & buf)
{
const int BUF_SIZE = 1000000; // enough? // KIT: increased to 1M to be safe
buf.resize (BUF_SIZE);
fgetline (f, &buf[0], (int) buf.size());
buf.resize (strnlen (&buf[0], BUF_SIZE) +1); // SECURITY NOTE: string use has been reviewed
}
void fgetline (FILE * f, std::vector<wchar_t> & buf)
{
const int BUF_SIZE = 1000000; // enough? // KIT: increased to 1M to be safe
buf.resize (BUF_SIZE);
fgetline (f, &buf[0], (int) buf.size());
buf.resize (wcsnlen (&buf[0], BUF_SIZE) +1); // SECURITY NOTE: string use has been reviewed
}
// read a 0-terminated string
const char * fgetstring (FILE * f, __out_z_cap(size) char * buf, int size)
{
int i;
for (i = 0; ; i++)
{
int c = fgetc(f);
if (c == EOF)
RuntimeError ("error reading string or missing 0: %s", strerror(errno));
if (c == 0) break;
if (i >= size - 1)
RuntimeError ("input line too long (max. %d characters allowed)", size - 1);
buf[i] = (char)c;
}
assert (i < size);
buf[i] = 0;
return buf;
}
// read a 0-terminated wstring
string fgetstring (FILE * f)
{
string res;
for (;;)
{
char c = (char)fgetc (f);
if (c == EOF)
RuntimeError ("error reading string or missing 0: %s", strerror (errno));
if (c == 0) break;
res.push_back (c);
}
return res;
}
// read a 0-terminated string
const wchar_t * fgetstring (FILE * f, __out_z_cap(size) wchar_t * buf, int size)
{
int i;
for (i = 0; ; i++)
{
// TODO: we should redefine this to write UTF-16 (which matters on GCC which defines wchar_t as 32 bit)
wint_t c = fgetwc(f);
if (c == WEOF)
RuntimeError ("error reading string or missing 0: %s", strerror(errno));
if (c == 0) break;
if (i >= size - 1)
{
RuntimeError ("input line too long (max. %d wchar_tacters allowed)", size - 1);
}
buf[i] = (wchar_t)c;
}
assert(i < size);
buf[i] = 0;
return buf;
}
#if (_MSC_VER < 1800)
// read a 0-terminated wstring
wstring fgetwstring (FILE * f)
{
// TODO: we should redefine this to write UTF-16 (which matters on GCC which defines wchar_t as 32 bit)
wstring res;
for (;;)
{
//
// there is a known vc++ runtime bug: Microsoft Connect 768113
// fgetwc can skip a byte in certain condition
// this is already fixed in update release to VS 2012
// for now the workaround is to use fgetc twice to simulate fgetwc
//
//wint_t c = fgetwc (f);
int c1 = fgetc(f);
int c2 = fgetc(f);
// synthetic fgetc output to simulate fgetwc
// note the order below works only for little endian
wint_t c = (wint_t)((c2 << 8) | c1);
if (c == WEOF)
RuntimeError ("error reading string or missing 0: %s", strerror (errno));
if (c == 0) break;
res.push_back ((wchar_t) c);
}
return res;
}
#else
// read a 0-terminated wstring
wstring fgetwstring (FILE * f)
{
// TODO: we should redefine this to write UTF-16 (which matters on GCC which defines wchar_t as 32 bit)
wstring res;
for (;;)
{
wint_t c = fgetwc(f);
if (c == WEOF)
RuntimeError ("error reading string or missing 0: %s", strerror(errno));
if (c == 0) break;
res.push_back((wchar_t)c);
}
return res;
}
#endif
bool fskipspace (FILE * f)
{
int count=0;
for (;;count++)
{
int c = fgetc (f);
if (c == EOF) // hit the end
{
if (ferror(f))
RuntimeError ("error reading from file: %s", strerror(errno));
break;
}
if (!isspace (c)) // end of space: undo getting that character
{
int rc = ungetc(c, f);
if (rc != c)
RuntimeError ("error in ungetc(): %s", strerror(errno));
break;
}
}
return count>0;
}
bool fskipwspace (FILE * f)
{
// TODO: we should redefine this to write UTF-16 (which matters on GCC which defines wchar_t as 32 bit)
int count = 0;
for (;;count++)
{
wint_t c = fgetwc (f);
if (c == WEOF) // hit the end
{
if (ferror (f))
RuntimeError ("error reading from file: %s", strerror (errno));
break;
}
if (!iswspace (c)) // end of space: undo getting that character
{
wint_t rc = ungetwc (c, f);
if (rc != c)
RuntimeError ("error in ungetc(): %s", strerror (errno));
break;
}
}
return count > 0;
}
// fskipNewLine(): skip all white space until end of line incl. the newline
// skip - skip the end of line if true, otherwise leave the end of line (but eat any leading space)
int fskipNewline (FILE * f, bool skip)
{
int c;
bool found = false;
// skip white space
do
{
c = fgetc(f);
} while (c == ' ' || c == '\t');
if (c == '\r' || c == '\n') // Accept any type of newline
{
found = true;
if (skip)
c = fgetc(f);
}
if ((found && !skip) ||
!(c == '\r' || c == '\n'))
{
// if we found an EOF, return that unless there was a newline before the EOF
if (c == EOF)
return found?(int)true:EOF;
int rc = ungetc (c, f);
if (rc != c)
RuntimeError ("error in ungetc(): %s", strerror (errno));
return (int)found;
}
// if we get here we saw a newline
return (int)true;
}
// fskipwNewLine(): skip all white space until end of line incl. the newline
// skip - skip the end of line if true, otherwise leave the end of line (but eat any leading space)
int fskipwNewline (FILE * f, bool skip)
{
// TODO: we should redefine this to write UTF-16 (which matters on GCC which defines wchar_t as 32 bit)
wint_t c;
bool found = false;
// skip white space
do
{
c = fgetwc(f);
} while (c == L' ' || c == L'\t');
if (c == L'\r' || c == L'\n') // accept any style of newline
{
found = true;
if (skip)
c = fgetwc(f);
}
if ((found && !skip)
||!(c == L'\r' || c == L'\n'))
{
if (c == WEOF)
return found?(int)true:EOF;
wint_t rc = ungetwc (c, f);
if (rc != c)
RuntimeError ("error in ungetwc(): %s", strerror (errno));
return (int)found;
}
// if we get here we saw a double newline
return (int)true;
}
// read a space-terminated token
// ...TODO: eat trailing space like fscanf() doessurrounding space)
const char * fgettoken (FILE * f, __out_z_cap(size) char * buf, int size)
{
fskipspace (f); // skip leading space
int c = -1;
int i;
for (i = 0; ; i++)
{
c = fgetc (f);
if (c == EOF) break;
if (isspace (c)) break;
if (i >= size -1)
RuntimeError ("input token too long (max. %d characters allowed)", size -1);
buf[i] = (char) c;
}
// ... TODO: while (IsWhiteSpace (c)) c = fgetc (f); // skip trailing space
if (c != EOF)
{
int rc = ungetc (c, f);
if (rc != c)
RuntimeError ("error in ungetc(): %s", strerror (errno));
}
assert (i < size);
buf[i] = 0;
return buf;
}
string fgettoken (FILE * f)
{
char buf[80];
return fgettoken (f, buf, sizeof(buf)/sizeof(*buf));
}
// read a space-terminated token
const wchar_t * fgettoken (FILE * f, __out_z_cap(size) wchar_t * buf, int size)
{
// TODO: we should redefine this to write UTF-16 (which matters on GCC which defines wchar_t as 32 bit)
fskipwspace(f); // skip leading space
wint_t c = WEOF;
int i;
for (i = 0; ; i++)
{
c = fgetwc (f);
if (c == WEOF) break;
if (iswspace (c)) break;
if (i >= size -1)
RuntimeError ("input token too long (max. %d wchar_tacters allowed)", size -1);
buf[i] = (wchar_t) c;
}
// ... TODO: while (IsWhiteSpace (c)) c = fgetc (f); // skip trailing space
if (c != WEOF)
{
int rc = ungetwc (c, f);
if (rc != c)
RuntimeError ("error in ungetwc(): %s", strerror (errno));
}
assert (i < size);
buf[i] = 0;
return buf;
}
wstring fgetwtoken (FILE * f)
{
wchar_t buf[80];
return fgettoken(f, buf, sizeof(buf) / sizeof(*buf));
}
template <>
int ftrygetText<bool>(FILE * f, bool& v)
{
wchar_t c;
int rc = ftrygetText(f, c);
v = (c == L'T');
return rc;
}
// ----------------------------------------------------------------------------
// fputText(): write a bool out as character
// ----------------------------------------------------------------------------
template <>
void fputText<bool>(FILE * f, bool v)
{
fputText(f, v?L'T':L'F');
}
// ----------------------------------------------------------------------------
// fgetTag(): read a 4-byte tag & return as a string
// ----------------------------------------------------------------------------
std::string fgetTag (FILE * f)
{
char tag[5];
freadOrDie (&tag[0], sizeof (tag[0]), 4, f);
tag[4] = 0;
return std::string (tag);
}
// ----------------------------------------------------------------------------
// fcheckTag(): read a 4-byte tag & verify it; terminate if wrong tag
// ----------------------------------------------------------------------------
void fcheckTag (FILE * f, const char * expectedTag)
{
fcompareTag (fgetTag (f), expectedTag);
}
void fcheckTag_ascii (FILE * f, const string & expectedTag)
{
char buf[20]; // long enough for a tag
fskipspace (f);
fgettoken (f, buf, sizeof(buf)/sizeof(*buf));
if (expectedTag != buf)
{
RuntimeError ("invalid tag '%s' found; expected '%s'", buf, expectedTag.c_str());
}
}
// ----------------------------------------------------------------------------
// fcompareTag(): compare two tags; terminate if wrong tag
// ----------------------------------------------------------------------------
void fcompareTag (const string & readTag, const string & expectedTag)
{
if (readTag != expectedTag)
{
RuntimeError ("invalid tag '%s' found; expected '%s'",
readTag.c_str(), expectedTag.c_str());
}
}
// ----------------------------------------------------------------------------
// fputTag(): write a 4-byte tag
// ----------------------------------------------------------------------------
void fputTag (FILE * f, const char * tag)
{
const int TAG_LEN = 4;
assert (strnlen (tag, TAG_LEN + 1) == TAG_LEN);
fwriteOrDie ((void *) tag, sizeof (*tag), strnlen (tag, TAG_LEN), f);
}
// ----------------------------------------------------------------------------
// fskipstring(): skip a 0-terminated string, such as a pad string
// ----------------------------------------------------------------------------
void fskipstring (FILE * f)
{
char c;
do
{
freadOrDie(&c, sizeof (c), 1, f);
}
while (c);
}
// ----------------------------------------------------------------------------
// fpad(): write a 0-terminated string to pad file to a n-byte boundary
// (note: file must be opened in binmode to work properly on DOS/Windows!!!)
// ----------------------------------------------------------------------------
void fpad (FILE * f, int n)
{
// get current writing position
int pos = ftell (f);
if (pos == -1)
{
RuntimeError ("error in ftell(): %s", strerror (errno));
}
// determine how many bytes are needed (at least 1 for the 0-terminator)
// and create a dummy string of that length incl. terminator
int len = n - (pos % n);
const char dummyString[] = "MSR-Asia: JL+FS";
size_t offset = sizeof(dummyString)/sizeof(dummyString[0]) - len;
assert (offset >= 0);
fputstring (f, dummyString + offset);
}
// ----------------------------------------------------------------------------
// fgetbyte(): read a byte value
// ----------------------------------------------------------------------------
char fgetbyte (FILE * f)
{
char v;
freadOrDie (&v, sizeof (v), 1, f);
return v;
}
// ----------------------------------------------------------------------------
// fgetshort(): read a short value
// ----------------------------------------------------------------------------
short fgetshort (FILE * f)
{
short v;
freadOrDie (&v, sizeof (v), 1, f);
return v;
}
short fgetshort_bigendian (FILE * f)
{
unsigned char b[2];
freadOrDie (&b, sizeof (b), 1, f);
return (short) ((b[0] << 8) + b[1]);
}
// ----------------------------------------------------------------------------
// fgetint24(): read a 3-byte (24-bit) int value
// ----------------------------------------------------------------------------
int fgetint24 (FILE * f)
{
int v;
assert (sizeof (v) == 4);
freadOrDie (&v, sizeof (v) -1, 1, f); // only read 3 lower-order bytes
v <<= 8; // shift up (upper 8 bits uninit'ed)
v >>= 8; // shift down 8 bits with sign-extend
return v;
}
// ----------------------------------------------------------------------------
// fgetint(): read an int value
// ----------------------------------------------------------------------------
int fgetint (FILE * f)
{
int v;
freadOrDie (&v, sizeof (v), 1, f);
return v;
}
int fgetint_bigendian (FILE * f)
{
unsigned char b[4];
freadOrDie (&b, sizeof (b), 1, f);
return (int) (((((b[0] << 8) + b[1]) << 8) + b[2]) << 8) + b[3];
}
int fgetint_ascii (FILE * f)
{
fskipspace (f);
int res = 0;
char c;
freadOrDie (&c, sizeof (c), 1, f);
while (isdigit ((unsigned char)c))
{
res = (10 * res) + (c - '0');
freadOrDie (&c, sizeof (c), 1, f);
}
int rc = ungetc (c, f);
if (rc != c)
{
RuntimeError ("error in ungetc(): %s", strerror (errno));
}
return res;
}
// ----------------------------------------------------------------------------
// fgetlong(): read an long value
// ----------------------------------------------------------------------------
long fgetlong (FILE * f)
{
long v;
freadOrDie (&v, sizeof (v), 1, f);
return v;
}
// ----------------------------------------------------------------------------
// fgetfloat(): read a float value
// ----------------------------------------------------------------------------
float fgetfloat (FILE * f)
{
float v;
freadOrDie (&v, sizeof (v), 1, f);
return v;
}
float fgetfloat_bigendian (FILE * f)
{
int bitpattern = fgetint_bigendian (f);
return *((float*) &bitpattern);
}
float fgetfloat_ascii (FILE * f)
{
float val;
fskipspace (f);
int rc = fscanf (f, "%f", &val); // security hint: safe overloads
if (rc == 0)
RuntimeError ("error reading float value from file (invalid format): %s");
else if (rc == EOF)
RuntimeError ("error reading from file: %s", strerror (errno));
assert (rc == 1);
return val;
}
// ----------------------------------------------------------------------------
// fgetdouble(): read a double value
// ----------------------------------------------------------------------------
double fgetdouble (FILE * f)
{
double v;
freadOrDie (&v, sizeof (v), 1, f);
return v;
}
// ----------------------------------------------------------------------------
// fputbyte(): write a byte value
// ----------------------------------------------------------------------------
void fputbyte (FILE * f, char v)
{
fwriteOrDie (&v, sizeof (v), 1, f);
}
// ----------------------------------------------------------------------------
// fputshort(): write a short value
// ----------------------------------------------------------------------------
void fputshort (FILE * f, short v)
{
fwriteOrDie (&v, sizeof (v), 1, f);
}
// ----------------------------------------------------------------------------
// fputint24(): write a 3-byte (24-bit) int value
// ----------------------------------------------------------------------------
void fputint24 (FILE * f, int v)
{
assert (sizeof (v) == 4);
fwriteOrDie (&v, sizeof (v) -1, 1, f); // write low-order 3 bytes
}
// ----------------------------------------------------------------------------
// fputint(): write an int value
// ----------------------------------------------------------------------------
void fputint (FILE * f, int v)
{
fwriteOrDie (&v, sizeof (v), 1, f);
}
// ----------------------------------------------------------------------------
// fputlong(): write an long value
// ----------------------------------------------------------------------------
void fputlong (FILE * f, long v)
{
fwriteOrDie (&v, sizeof (v), 1, f);
}
// ----------------------------------------------------------------------------
// fputfloat(): write a float value
// ----------------------------------------------------------------------------
void fputfloat (FILE * f, float v)
{
fwriteOrDie (&v, sizeof (v), 1, f);
}
// ----------------------------------------------------------------------------
// fputdouble(): write a double value
// ----------------------------------------------------------------------------
void fputdouble (FILE * f, double v)
{
fwriteOrDie (&v, sizeof (v), 1, f);
}
// ----------------------------------------------------------------------------
// fputfile(): write a binary block or a string as a file
// ----------------------------------------------------------------------------
void fputfile (const wstring & pathname, const std::vector<char> & buffer)
{
FILE * f = fopenOrDie (pathname, L"wb");
try
{
if (buffer.size() > 0)
{ // ^^ otherwise buffer[0] is an illegal expression
fwriteOrDie (&buffer[0], sizeof (buffer[0]), buffer.size(), f);
}
fcloseOrDie (f);
}
catch (...)
{
fclose (f);
throw;
}
}
void fputfile (const wstring & pathname, const std::wstring & string)
{
FILE * f = fopenOrDie (pathname, L"wb");
try
{
if (string.length() > 0)
{ // ^^ otherwise buffer[0] is an illegal expression
fwriteOrDie (string.c_str(), sizeof (string[0]), string.length(), f);
}
fcloseOrDie (f);
}
catch (...)
{
fclose (f);
throw;
}
}
void fputfile (const wstring & pathname, const std::string & string)
{
FILE * f = fopenOrDie (pathname, L"wb");
try
{
if (string.length() > 0)
{ // ^^ otherwise buffer[0] is an illegal expression
fwriteOrDie (string.c_str(), sizeof (string[0]), string.length(), f);
}
fcloseOrDie (f);
}
catch (...)
{
fclose (f);
throw;
}
}
// ----------------------------------------------------------------------------
// fgetfile(): load a file as a binary block
// ----------------------------------------------------------------------------
void fgetfile (const wstring & pathname, std::vector<char> & buffer)
{
FILE * f = fopenOrDie (pathname, L"rb");
size_t len = filesize (f);
buffer.resize (len);
if (buffer.size() > 0)
{ // ^^ otherwise buffer[0] is an illegal expression
freadOrDie (&buffer[0], sizeof (buffer[0]), buffer.size(), f);
}
fclose (f);
}
void fgetfile (FILE * f, std::vector<char> & buffer)
{ // this version reads until eof
buffer.resize (0);
buffer.reserve (1000000); // avoid too many reallocations
std::vector<char> inbuf;
inbuf.resize (65536); // read in chunks of this size
while (!feof (f)) // read until eof
{
size_t n = fread (&inbuf[0], sizeof (inbuf[0]), inbuf.size(), f);
if (ferror (f))
{
RuntimeError ("fgetfile: error reading from file: %s", strerror (errno));
}
buffer.insert (buffer.end(), inbuf.begin(), inbuf.begin() + n);
}
buffer.reserve (buffer.size());
}
// load it into RAM in one huge chunk
static size_t fgetfilechars (const std::wstring & path, vector<char> & buffer)
{
auto_file_ptr f (fopenOrDie (path, L"rb"));
size_t len = filesize (f);
buffer.reserve (len +1);
freadOrDie (buffer, len, f);
buffer.push_back (0); // this makes it a proper C string
return len;
}
template<class LINES> static void strtoklines (char * s, LINES & lines)
{
for (char * p = strtok (s, "\r\n"); p; p = strtok (NULL, "\r\n"))
lines.push_back (p);
}
void msra::files::fgetfilelines (const std::wstring & path, vector<char> & buffer, std::vector<std::string> & lines)
{
// load it into RAM in one huge chunk
const size_t len = fgetfilechars (path, buffer);
// parse into lines
lines.resize (0);
lines.reserve (len / 20);
strtoklines (&buffer[0], lines);
}
// same as above but returning const char* (avoiding the memory allocation)
vector<char*> msra::files::fgetfilelines (const wstring & path, vector<char> & buffer)
{
// load it into RAM in one huge chunk
const size_t len = fgetfilechars (path, buffer);
// parse into lines
vector<char *> lines;
lines.reserve (len / 20);
strtoklines (&buffer[0], lines);
return lines;
}
// ----------------------------------------------------------------------------
// getfiletime(): access modification time
// ----------------------------------------------------------------------------
#ifndef _FILETIME_
//typedef struct _FILETIME { DWORD dwLowDateTime; DWORD dwHighDateTime; }; // from minwindef.h
typedef time_t FILETIME;
#else
bool operator>= (const FILETIME & targettime, const FILETIME & inputtime) // for use in fuptodate()
{
return (targettime.dwHighDateTime > inputtime.dwHighDateTime) ||
(targettime.dwHighDateTime == inputtime.dwHighDateTime && targettime.dwLowDateTime >= inputtime.dwLowDateTime);
}
#endif
bool getfiletime (const wstring & path, FILETIME & time)
{ // return file modification time, false if cannot be determined
#ifdef _WIN32
WIN32_FIND_DATAW findFileData;
auto_handle hFind (FindFirstFileW (path.c_str(), &findFileData), ::FindClose);
if (hFind != INVALID_HANDLE_VALUE)
{
time = findFileData.ftLastWriteTime;
return true;
}
else
return false;
#else // TODO: test this; e.g. does st_mtime have the desired resolution?
struct stat buf;
int result;
// Get data associated with "crt_stat.c":
result = stat(charpath(path), &buf);
// Check if statistics are valid:
if (result != 0)
return false;
time = buf.st_mtime;
return true;
#endif
}
#if 0
void setfiletime (const wstring & path, const FILETIME & time)
{ // update the file modification time of an existing file
auto_handle h (CreateFileW (path.c_str(), FILE_WRITE_ATTRIBUTES,
FILE_SHARE_READ|FILE_SHARE_WRITE, NULL,
OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL));
if (h == INVALID_HANDLE_VALUE)
{
RuntimeError ("setfiletime: error opening file: %d", GetLastError());
}
BOOL rc = SetFileTime (h, NULL, NULL, &time);
if (!rc)
{
RuntimeError ("setfiletime: error setting file time information: %d", GetLastError());
}
}
#endif
#if 0
// ----------------------------------------------------------------------------
// expand_wildcards -- wildcard expansion of a path, including directories.
// ----------------------------------------------------------------------------
// Win32-style variant of this function (in case we want to use it some day)
// Returns 0 in case of failure. May throw in case of bad_alloc.
static BOOL ExpandWildcards (wstring path, vector<wstring> & paths)
{
// convert root to DOS filename convention
for (size_t k = 0; k < path.length(); k++) if (path[k] == '/') path[k] = '\\';
// remove terminating backslash
size_t last = path.length() -1;
if (last >= 0 && path[last] == '\\') path.erase (last);
// convert root to long filename convention
//if (path.find (L"\\\\?\\") != 0)
// path = L"\\\\?\\" + root;
// split off everything after first wildcard
size_t wpos = path.find_first_of (L"*?");
if (wpos == 2 && path[0] == '\\' && path[1] == '\\')
wpos = path.find_first_of (L"*?", 4); // 4=skip "\\?\"
if (wpos == wstring::npos)
{ // no wildcard: just return it
paths.push_back (path);
return TRUE;
}
// split off everything afterwards if any
wstring rest; // remaining path after this directory
size_t spos = path.find_first_of (L"\\", wpos +1);
if (spos != wstring::npos)
{
rest = path.substr (spos +1);
path.erase (spos);
}
// crawl folder
WIN32_FIND_DATAW ffdata;
auto_handle hFind (::FindFirstFileW (path.c_str(), &ffdata), ::FindClose);
if (hFind == INVALID_HANDLE_VALUE)
{
DWORD err = ::GetLastError();
if (rest.empty() && err == 2) return TRUE; // no matching file: empty
return FALSE; // another error
}
size_t pos = path.find_last_of (L"\\");
if (pos == wstring::npos) LogicError ("unexpected missing \\ in path");
wstring parent = path.substr (0, pos);
do
{
// skip this and parent directory
bool isDir = ((ffdata.dwFileAttributes & (FILE_ATTRIBUTE_DIRECTORY | FILE_ATTRIBUTE_REPARSE_POINT)) != 0);
if (isDir && ffdata.cFileName[0] == '.') continue;
wstring filename = parent + L"\\" + ffdata.cFileName;
if (rest.empty())
{
paths.push_back (filename);
}
else if (isDir) // multi-wildcards: further expand
{
BOOL rc = ExpandWildcards (filename + L"\\" + rest, paths);
rc; // error here means no match, e.g. Access Denied to one subfolder
}
} while (::FindNextFileW(hFind, &ffdata) != 0);
return TRUE;
}
void expand_wildcards (const wstring & path, vector<wstring> & paths)
{
BOOL rc = ExpandWildcards (path, paths);
if (!rc)
RuntimeError ("error in expanding wild cards '%S': %S", path.c_str(), FormatWin32Error(::GetLastError()).c_str());
}
#endif
// ----------------------------------------------------------------------------
// make_intermediate_dirs() -- make all intermediate dirs on a path
// ----------------------------------------------------------------------------
static void mkdir (const wstring & path)
{
int rc = _wmkdir (path.c_str());
if (rc >= 0 || errno == EEXIST)
return; // no error or already existing --ok
#ifdef _WIN32 // bug in _wmkdir(): returns access_denied if folder exists but read-only --check existence
if (errno == EACCES)
{
DWORD att = ::GetFileAttributesW (path.c_str());
if (att != INVALID_FILE_ATTRIBUTES || (att & FILE_ATTRIBUTE_DIRECTORY) != 0)
return; // ok
}
#endif
RuntimeError ("mkdir: error creating intermediate directory %S", path.c_str());
}
// make subdir of a file including parents
void msra::files::make_intermediate_dirs (const wstring & filepath)
{
vector<wchar_t> buf;
buf.resize (filepath.length() +1, 0);
wcscpy (&buf[0], filepath.c_str());
wstring subpath;
int skip = 0;
#ifdef _WIN32
// On windows, if share (\\) then the first two levels (machine, share name) cannot be made.
if ((buf[0] == '/' && buf[1] == '/') || (buf[0] == '\\' && buf[1] == '\\'))
{
subpath = L"/";
skip = 2; // skip two levels (machine, share)
}
#else
// On unix, if the filepath starts with '/' then it is absolute
// path and the created sub-paths should also start with '/'
if (buf[0] == '/')
{
subpath = L"/";
}
#endif
// make all constituents except the filename (to make a dir, include a trailing slash)
wchar_t * context = nullptr;
for (const wchar_t * p = wcstok_s (&buf[0], L"/\\", &context); p; p = wcstok_s (NULL, L"/\\", &context))
{
if (subpath != L"" && subpath != L"/" && subpath != L"\\" && skip == 0)
{
mkdir (subpath);
}
else if (skip > 0) skip--; // skip this level
// rebuild the final path
if (subpath != L"") subpath += L"/";
subpath += p;
}
}
// ----------------------------------------------------------------------------
// fuptodate() -- test whether an output file is at least as new as an input file
// ----------------------------------------------------------------------------
// test if file 'target' is not older than 'input' --used for make mode
// 'input' must exist if 'inputrequired'; otherweise if 'target' exists, it is considered up to date
// 'target' may or may not exist
bool msra::files::fuptodate (const wstring & target, const wstring & input, bool inputrequired)
{
FILETIME targettime;
if (!getfiletime (target, targettime)) return false; // target missing: need to update
FILETIME inputtime;
if (!getfiletime (input, inputtime)) return !inputrequired; // input missing: if required, pretend to be out of date as to force caller to fail
// up to date if target has higher time stamp
return targettime >= inputtime; // note: uses an overload for WIN32 FILETIME (in Linux, FILETIME=time_t=size_t)
}
/// separate string by separator
vector<string> sep_string(const string & istr, const string & sep)
{
string str = istr;
str = trim(str);
vector<string> vstr;
string csub;
size_t ifound = 0;
size_t ifoundlast = ifound;
ifound = str.find(sep, ifound);
while (ifound != std::string::npos)
{
csub = str.substr(ifoundlast, ifound - ifoundlast);
vstr.push_back(trim(csub));
ifoundlast = ifound+1;
ifound = str.find(sep, ifoundlast);
}
csub = str.substr(ifoundlast, str.length() - ifoundlast);
vstr.push_back(trim(csub));
return vstr;
}
/// separate string by separator
vector<wstring> wsep_string(const wstring & istr, const wstring & sep)
{
wstring str = istr;
str = wtrim(str);
vector<wstring> vstr;
wstring csub;
size_t ifound = 0;
size_t ifoundlast = ifound;
ifound = str.find(sep, ifound);
while (ifound != std::wstring::npos)
{
csub = str.substr(ifoundlast, ifound - ifoundlast);
vstr.push_back(wtrim(csub));
ifoundlast = ifound + 1;
ifound = str.find(sep, ifoundlast);
}
csub = str.substr(ifoundlast, str.length() - ifoundlast);
vstr.push_back(wtrim(csub));
return vstr;
}
static inline std::string wcstombs(const std::wstring & p) // output: MBCS
{
size_t len = p.length();
msra::basetypes::fixed_vector<char> buf(2 * len + 1); // max: 1 wchar => 2 mb chars
std::fill(buf.begin(), buf.end(), 0);
::wcstombs(&buf[0], p.c_str(), 2 * len + 1);
return std::string(&buf[0]);
}
static inline std::wstring mbstowcs(const std::string & p) // input: MBCS
{
size_t len = p.length();
msra::basetypes::fixed_vector<wchar_t> buf(len + 1); // max: >1 mb chars => 1 wchar
std::fill(buf.begin(), buf.end(), (wchar_t)0);
OACR_WARNING_SUPPRESS(UNSAFE_STRING_FUNCTION, "Reviewed OK. size checked. [rogeryu 2006/03/21]");
::mbstowcs(&buf[0], p.c_str(), len + 1);
return std::wstring(&buf[0]);
}
wstring s2ws(const string& str)
{
#ifdef __unix__
return mbstowcs(str);
#else
typedef std::codecvt_utf8<wchar_t> convert_typeX;
std::wstring_convert<convert_typeX, wchar_t> converterX;
return converterX.from_bytes(str);
#endif
}
string ws2s(const wstring& wstr)
{
#ifdef __unix__
return wcstombs(wstr);
#else
typedef codecvt_utf8<wchar_t> convert_typeX;
wstring_convert<convert_typeX, wchar_t> converterX;
return converterX.to_bytes(wstr);
#endif
}