new class (w)cstring to make passing STL strings to C functions easier;

consolidating some mappings for VS-only functions in basetypes.h
This commit is contained in:
Frank Seide 2014-10-31 11:42:30 -07:00
Родитель ff71e32586
Коммит b0aef3dd2f
4 изменённых файлов: 57 добавлений и 79 удалений

Просмотреть файл

@ -8,6 +8,7 @@
#include "stdafx.h"
#define DATAREADER_LOCAL
#include "basetypes.h"
#include "DataReader.h"
namespace Microsoft { namespace MSR { namespace CNTK {

Просмотреть файл

@ -102,36 +102,57 @@ using namespace std;
#define strerror(x) "strerror error but can't report error number sorry!"
#endif
#if 0
#ifndef __in // dummies for sal annotations if compiler does not support it
#define __in
#define __inout_z
#define __in_count(x)
#define __inout_cap(x)
#define __inout_cap_c(x)
#endif
#ifndef __out_z_cap // non-VS2005 annotations
#define __out_cap(x)
#define __out_z_cap(x)
#define __out_cap_c(x)
#endif
#ifndef __override // and some more non-std extensions required by Office
#define __override virtual
#endif
#endif
// disable warnings for which fixing would make code less readable
#pragma warning(disable : 4290) // throw() declaration ignored
#pragma warning(disable : 4244) // conversion from typeA to typeB, possible loss of data
// ----------------------------------------------------------------------------
// basic macros
// (w)cstring -- helper class like std::string but with auto-cast to char*
// ----------------------------------------------------------------------------
#define SAFE_DELETE(p) { if(p) { delete (p); (p)=NULL; } }
#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } } // nasty! use CComPtr<>
namespace msra { namespace strfun {
// a class that can return a std::string with auto-convert into a const char*
template<typename C> struct basic_cstring : std::basic_string<C>
{
template<typename S> basic_cstring (S p) : basic_string (p) { }
operator const char * () const { return c_str(); }
};
typedef basic_cstring<char> cstring;
typedef basic_cstring<wchar_t> wcstring;
}}
// ----------------------------------------------------------------------------
// some mappings for non-Windows builds
// ----------------------------------------------------------------------------
#ifndef _MSC_VER // add some functions that are VS-only
// --- basic file functions
// convert a wchar_t path to what gets passed to CRT functions that take narrow characters
// This is needed for the Linux CRT which does not accept wide-char strings for pathnames anywhere.
// Always use this function for mapping the paths.
msra::strfun::cstring charpath (const std::wstring & p)
{
#ifdef _WIN32
return std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>>().to_bytes(p);
#else // old version, delete once we know it works
size_t len = p.length();
std::vector<char> buf(2 * len + 1, 0); // max: 1 wchar => 2 mb chars
::wcstombs(buf.data(), p.c_str(), 2 * len + 1);
return msra::strfun::cstring (&buf[0]);
#endif
}
static inline FILE* _wfopen(const wchar_t * path, const wchar_t * mode) { return fopen(charpath(path), charpath(mode)); }
// --- basic string functions
static inline wchar_t* wcstok_s(wchar_t* s, const wchar_t* delim, wchar_t** ptr) { return ::wcstok(s, delim, ptr); }
#endif
// ----------------------------------------------------------------------------
// basic macros --TODO: do we need those? delete what we dont' need
// ----------------------------------------------------------------------------
//#define SAFE_DELETE(p) { if(p) { delete (p); (p)=NULL; } }
//#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } } // nasty! use CComPtr<>
#ifndef ASSERT
#define ASSERT assert
#endif
@ -584,45 +605,13 @@ static inline std::wstring mbstowcs(const std::string & p) // input: MBCS
#pragma warning(pop)
#ifdef _WIN32
struct utf8 : std::string { utf8 (const std::wstring & p) // utf-16 to -8
{
#if 1
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> cv;
(*(std::string*)this) = cv.to_bytes(p);
#else // old version, delete once we know it works
size_t len = p.length();
if (len == 0) { return;} // empty string
msra::basetypes::fixed_vector<char> buf (3 * len + 1); // max: 1 wchar => up to 3 mb chars
// ... TODO: this fill() should be unnecessary (a 0 is appended)--but verify
std::fill (buf.begin (), buf.end (), 0);
int rc = WideCharToMultiByte (CP_UTF8, 0, p.c_str(), (int) len,
&buf[0], (int) buf.size(), NULL, NULL);
if (rc == 0) throw std::runtime_error ("WideCharToMultiByte");
(*(std::string*)this) = &buf[0];
#endif
}};
struct utf16 : std::wstring { utf16 (const std::string & p) // utf-8 to -16
{
#if 1
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> cv;
(*(std::wstring*)this) = cv.from_bytes(p);
#else // old version, delete once we know it works
size_t len = p.length();
if (len == 0) { return;} // empty string
msra::basetypes::fixed_vector<wchar_t> buf (len + 1);
// ... TODO: this fill() should be unnecessary (a 0 is appended)--but verify
std::fill(buf.begin(), buf.end(), (wchar_t)0);
int rc = MultiByteToWideChar(CP_UTF8, 0, p.c_str(), (int)len,
&buf[0], (int)buf.size());
if (rc == 0) throw std::runtime_error("MultiByteToWideChar");
ASSERT(rc < buf.size());
(*(std::wstring*)this) = &buf[0];
#endif
}};
static inline cstring utf8(const std::wstring & p) { return std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>>().to_bytes(p); } // utf-16 to -8
static inline wcstring utf16 (const std::string & p) { return std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>>().from_bytes(p); } // utf-8 to -16
#else // BUGBUG: we cannot compile the above on Cygwin GCC, so for now fake it using the mbs functions, which will only work for 7-bit ASCII strings
static inline std::string utf8(const std::wstring & p) { return msra::strfun::wcstombs (p); } // output: UTF-8... not really
static inline std::wstring utf16(const std::string & p) { return msra::strfun::mbstowcs (p); } // input: UTF-8... not really
static inline std::string utf8(const std::wstring & p) { return msra::strfun::wcstombs (p.c_str()); } // output: UTF-8... not really
static inline std::wstring utf16(const std::string & p) { return msra::strfun::mbstowcs(p.c_str()); } // input: UTF-8... not really
#endif
static inline cstring utf8(const std::string & p) { return p; } // no converstion (useful in templated functions)
// split and join -- tokenize a string like strtok() would, join() strings together
template<class _T> static inline std::vector<std::basic_string<_T>> split (const std::basic_string<_T> & s, const _T * delim)
@ -737,10 +726,6 @@ public:
// wrappers for some basic types (files, handles, timer)
// ----------------------------------------------------------------------------
#ifndef _MSC_VER // add some functions that are VS-only
static inline FILE* _wfopen(const wchar_t * path, const wchar_t * mode) { return fopen(msra::strfun::wcstombs(path).c_str(), msra::strfun::utf8(mode).c_str()); }
#endif
namespace msra { namespace basetypes {
// FILE* with auto-close; use auto_file_ptr instead of FILE*.

Просмотреть файл

@ -402,7 +402,7 @@ void renameOrDie (const std::wstring & from, const std::wstring & to)
if (!MoveFileW(from.c_str(), to.c_str()))
RuntimeError ("error renaming: %s", GetLastError());
#else
renameOrDie (msra::strfun::utf8(from), msra::strfun::utf8(to));
renameOrDie (charpath(from), charpath(to));
#endif
}
@ -471,12 +471,6 @@ bool funicode (FILE * f)
// Returns 'buf' (always). buf guaranteed to be 0-terminated.
// ----------------------------------------------------------------------------
// TODO: we should redefine this to write UTF-16 (which matters on GCC which defines wchar_t as 32 bit)
static inline wchar_t * fgets(wchar_t * buf, int n, FILE * f) { return fgetws(buf, n, f); }
static inline string _utf8 (const string & s) { return s; }
static inline string _utf8 (const wstring & s) { return msra::strfun::utf8 (s); }
static inline size_t strnlen (wchar_t * s, size_t n) { return wcsnlen (s, n); }
#ifndef _MSC_VER // strnlen is VS proprietary
static inline size_t strnlen(const char * s, size_t /*n*/) { return strlen(s); }
#endif
@ -485,6 +479,9 @@ static inline size_t strnlen(const char * s, size_t /*n*/) { return strlen(s); }
static inline size_t strnlen (const char *s, size_t n) { return std::find (s,s+n,'\0') - s; }
#endif
static inline wchar_t * fgets(wchar_t * buf, int n, FILE * f) { return fgetws(buf, n, f); }
static inline size_t strnlen(wchar_t * s, size_t n) { return wcsnlen(s, n); }
template<class CHAR>
CHAR * fgetline (FILE * f, CHAR * buf, int size)
{
@ -504,9 +501,8 @@ CHAR * fgetline (FILE * f, CHAR * buf, int size)
if (n >= (size_t) size -1)
{
basic_string<CHAR> example (p, n < 100 ? n : 100);
uint64_t filepos = fgetpos(f); // (for error message only)
RuntimeError ("input line too long at file offset %I64d (max. %d characters allowed) [%s ...]",
filepos, size -1, _utf8 (example).c_str());
uint64_t filepos = fgetpos(f); // (for error message only)
RuntimeError("input line too long at file offset %I64d (max. %d characters allowed) [%s ...]", filepos, size - 1, msra::strfun::utf8(example).c_str());
}
// remove newline at end
@ -1539,10 +1535,6 @@ static void mkdir (const wstring & path)
RuntimeError ("mkdir: error creating intermediate directory %S", path.c_str());
}
#ifndef _MSC_VER
wchar_t* wcstok_s(wchar_t* s, const wchar_t* delim, wchar_t** ptr) { return wcstok (s, delim, ptr); }
#endif
// make subdir of a file including parents
void msra::files::make_intermediate_dirs (const wstring & filepath)
{

Просмотреть файл

@ -432,7 +432,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
return m_name;
// if name is not set yet, get it from the description header
msra::strfun::utf16 nameDescription(m_sectionHeader->nameDescription);
std::wstring nameDescription (msra::strfun::utf16(m_sectionHeader->nameDescription));
auto firstColon = nameDescription.find_first_of(L':');
if (firstColon != npos && nameDescription.size() >= firstColon)
{