File now supports pipes ("cmd|", "|cmd") and "-" (stdin/stdout) for input and output;

stderr redirection now uses File class instead of auto_file_ptr, allowing e.g. stderr="|tee path.log";
new method File::Flush();
class File implementation now uses RuntimeError() and LogicError() instead of throw std::runtime_error() and logic_error();
moved RuntimeError() and class Plugin from basetypes.h to Basics.h;
new VS++ CRT emulation function _wpopen() (Linux only_
This commit is contained in:
Frank Seide 2015-06-29 19:24:18 -07:00
Родитель aae4d9c240
Коммит 234c331405
9 изменённых файлов: 205 добавлений и 126 удалений

Просмотреть файл

@ -193,16 +193,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
File file(filePath, fileOptionsRead);
// initialize with file name
std::string path = msra::strfun::utf8(filePath);
auto location = path.find_last_of("/\\");
// initialize configName with file name
std::string configName = msra::strfun::utf8(filePath);
auto location = configName.find_last_of("/\\");
if (location != npos)
path = path.substr(location+1);
m_configName = move(path);
configName = configName.substr(location+1);
m_configName = move(configName);
// read the entire file into a string
// CONSIDER: should the File API support this, instead of line by line?
size_t fileLength = file.Size();
// CONSIDER: should the File API support this, instead of us having to call it line by line?
size_t fileLength = file.CanSeek() ? file.Size() : 0;
string str;
string configFile;
configFile.reserve(fileLength);

Просмотреть файл

@ -7,6 +7,7 @@
#ifndef _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms --add this at the top of all CPP files that give "function or variable may be unsafe" warnings
#endif
#define _CRT_NONSTDC_NO_DEPRECATE // make VS accept POSIX functions without _
#include "Basics.h"
#define FORMAT_SPECIALIZE // to get the specialized version of the format routines
@ -22,7 +23,7 @@
#include <unistd.h>
#endif
namespace Microsoft{ namespace MSR { namespace CNTK {
namespace Microsoft { namespace MSR { namespace CNTK {
// File creation
// filename - the path
@ -43,16 +44,32 @@ File::File(const wchar_t* filename, int fileOptions)
Init(filename, fileOptions);
}
// all constructors call this
void File::Init(const wchar_t* filename, int fileOptions)
{
msra::files::make_intermediate_dirs(filename);
m_filename = filename;
m_options = fileOptions;
if (m_filename.empty())
RuntimeError("File: filename is empty");
const auto outputPipe = (m_filename.front() == '|');
const auto inputPipe = (m_filename.back() == '|');
// translate the options string into a string for fopen()
wstring options = fileOptions&fileOptionsRead?L"r":L"";
if (fileOptions&fileOptionsWrite)
const auto reading = !!(fileOptions & fileOptionsRead);
const auto writing = !!(fileOptions & fileOptionsWrite);
if (!reading && !writing)
RuntimeError("File: either fileOptionsRead or fileOptionsWrite must be specified");
// convert fileOptions to fopen()'s mode string
wstring options = reading ? L"r" : L"";
if (writing)
{
// if we already are reading the file, change to read/write
options.clear();
options.append(L"w+");
options.append(L"w");
if (!outputPipe && m_filename != L"-")
{
options.append(L"+");
msra::files::make_intermediate_dirs(m_filename.c_str()); // writing to regular file -> also create the intermediate directories as a convenience
}
}
if (fileOptions&fileOptionsBinary)
{
@ -71,13 +88,40 @@ void File::Init(const wchar_t* filename, int fileOptions)
// add sequential flag to allocate big read buffer
if (fileOptions & fileOptionsSequential)
options += L"S";
attempt([=](){m_file = fopenOrDie(filename, options.c_str());});
m_options = fileOptions;
m_size = filesize(m_file);
// now open the file
// Special path syntax understood here:
// - "-" refers to stdin or stdout
// - "|cmd" writes to a pipe
// - "cmd|" reads from a pipe
m_pcloseNeeded = false;
m_seekable = false;
if (m_filename == L"-") // stdin/stdout
{
if (writing && reading)
RuntimeError("File: path '-' fileOptionsRead and fileOptionsWrite at once");
m_file = writing ? stdout : stdin;
}
else if (outputPipe || inputPipe) // pipe syntax
{
if (inputPipe && outputPipe)
RuntimeError("File: pipes cannot specify fileOptionsRead and fileOptionsWrite at once");
if (inputPipe != reading)
RuntimeError("File: pipes must use consistent fileOptionsRead/fileOptionsWrite");
const auto command = inputPipe ? m_filename.substr(0, m_filename.size() - 1) : m_filename.substr(1);
m_file = _wpopen(command.c_str(), options.c_str());
if (!m_file)
RuntimeError("File: error exexuting pipe command '%S': %s", command.c_str(), strerror(errno));
m_pcloseNeeded = true;
}
else attempt([=]() // regular file: use a retry loop
{
m_file = fopenOrDie(filename, options.c_str());
m_seekable = true;
});
}
void File::goToDelimiter(int delim)
// skip to given delimiter character
void File::SkipToDelimiter(int delim)
{
int ch=0;
@ -85,7 +129,7 @@ void File::goToDelimiter(int delim)
ch=fgetc(m_file);
if (feof(m_file)) {
printf("Unexpected end of file\n");
throw std::logic_error("Unexpected end of file\n");
LogicError("Unexpected end of file\n");
}
}
}
@ -97,9 +141,18 @@ bool File::IsTextBased()
// File Destructor
// closes the file
// Note: this does not check for errors. Use Flush() before closing a file you are writing.
File::~File(void)
{
attempt([=] {fcloseOrDie(m_file);});
if (m_pcloseNeeded)
_pclose(m_file);
else if (m_file != stdin && m_file != stdout && m_file != stderr)
fclose(m_file); // (since destructors may not throw, we ignore the return code here)
}
void File::Flush()
{
fflushOrDie(m_file);
}
// GetLine - get a line from the file
@ -208,7 +261,8 @@ File& File::PutMarker(FileMarker marker, const std::wstring& section)
// val - value to read from the file
File& File::operator>>(std::wstring& val)
{
attempt([&]{
attempt([&]
{
if (IsTextBased())
val = fgetwtoken(m_file);
else
@ -221,7 +275,8 @@ File& File::operator>>(std::wstring& val)
// val - value to read from the file
File& File::operator>>(std::string& val)
{
attempt([&]{
attempt([&]
{
if (IsTextBased())
val = fgettoken(m_file);
else
@ -341,7 +396,8 @@ void File::WriteString(const wchar_t* str, int size)
// size - size of the string string buffer
void File::ReadString(wchar_t* str, int size)
{
attempt([&]{
attempt([&]
{
if (IsTextBased())
fgettoken(m_file, str, size);
else
@ -390,6 +446,8 @@ bool File::IsUnicodeBOM(bool skip)
// WARNING: calling this will reset the EOF marker, so do so with care
size_t File::Size()
{
if (!CanSeek())
RuntimeError("File: attempted to get Size() on non-seekable stream");
return filesize(m_file);
}
@ -469,7 +527,7 @@ File& File::operator>>(FileMarker marker)
break;
case fileMarkerEndFile: // end of file marker, should we throw if it's not the end of the file?
if (!IsEOF())
throw std::runtime_error("fileMarkerEndFile not found");
RuntimeError("fileMarkerEndFile not found");
break;
case fileMarkerBeginList: // Beginning of list marker
// no marker written unless an list with a count header
@ -483,7 +541,7 @@ File& File::operator>>(FileMarker marker)
{
int found = EndOfLineOrEOF(true);
if (found != (int)true) // EOF can also be returned
throw std::runtime_error("Newline not found");
RuntimeError("Newline not found");
}
break;
case fileMarkerBeginSection: // beginning of section
@ -556,7 +614,7 @@ File& File::GetMarker(FileMarker marker, const std::string& section)
string str;
*this >> str;
if (str != section)
throw std::runtime_error(std::string("section name mismatch ") + str + " != " + section);
RuntimeError(std::string("section name mismatch ") + str + " != " + section);
return *this;
}
@ -569,7 +627,7 @@ File& File::GetMarker(FileMarker marker, const std::wstring& section)
wstring str;
*this >> str;
if (str != section)
throw std::runtime_error(std::string("section name mismatch ") + msra::strfun::utf8(str) + " != " + msra::strfun::utf8(section));
RuntimeError(std::string("section name mismatch ") + msra::strfun::utf8(str) + " != " + msra::strfun::utf8(section));
return *this;
}
@ -620,6 +678,8 @@ bool File::TryGetMarker(FileMarker marker, const std::string& section)
// GetPosition - Get position in a file
uint64_t File::GetPosition()
{
if (!CanSeek())
RuntimeError("File: attempted to GetPosition() on non-seekable stream");
return fgetpos(m_file);
}
@ -627,7 +687,9 @@ uint64_t File::GetPosition()
// pos - position in the file
void File::SetPosition(uint64_t pos)
{
fsetpos (m_file, pos);
if (!CanSeek())
RuntimeError("File: attempted to SetPosition() on non-seekable stream");
fsetpos(m_file, pos);
}
}}}

Просмотреть файл

@ -25,7 +25,95 @@ namespace Microsoft { namespace MSR { namespace CNTK {
bool operator()(const std::wstring& left, const std::wstring& right) { return _wcsicmp(left.c_str(), right.c_str()) < 0; }
};
}}}
// RuntimeError - throw a std::runtime_error with a formatted error string
#ifdef _MSC_VER
__declspec(noreturn)
#endif
static inline void RuntimeError(const char * format, ...)
{
va_list args;
char buffer[1024];
va_start(args, format);
vsprintf(buffer, format, args);
throw std::runtime_error(buffer);
};
static inline void RuntimeError(const string & message) { RuntimeError("%s", message.c_str()); }
// LogicError - throw a std::logic_error with a formatted error string
#ifdef _MSC_VER
__declspec(noreturn)
#endif
static inline void LogicError(const char * format, ...)
{
va_list args;
char buffer[1024];
va_start(args, format);
vsprintf(buffer, format, args);
throw std::logic_error(buffer);
};
static inline void LogicError(const string & message) { RuntimeError("%s", message.c_str()); }
// ----------------------------------------------------------------------------
// dynamic loading of modules --TODO: not Basics, should move to its own header
// ----------------------------------------------------------------------------
#ifdef _WIN32
class Plugin
{
HMODULE m_hModule; // module handle for the writer DLL
std::wstring m_dllName; // name of the writer DLL
public:
Plugin() { m_hModule = NULL; }
template<class STRING> // accepts char (UTF-8) and wide string
FARPROC Load(const STRING & plugin, const std::string & proc)
{
m_dllName = msra::strfun::utf16(plugin);
m_dllName += L".dll";
m_hModule = LoadLibrary(m_dllName.c_str());
if (m_hModule == NULL)
Microsoft::MSR::CNTK::RuntimeError("Plugin not found: %s", msra::strfun::utf8(m_dllName).c_str());
// create a variable of each type just to call the proper templated version
return GetProcAddress(m_hModule, proc.c_str());
}
~Plugin(){}
// removed because this causes the exception messages to be lost (exception vftables are unloaded when DLL is unloaded)
// ~Plugin() { if (m_hModule) FreeLibrary(m_hModule); }
};
#else
class Plugin
{
private:
void *handle;
public:
Plugin()
{
handle = NULL;
}
template<class STRING> // accepts char (UTF-8) and wide string
void * Load(const STRING & plugin, const std::string & proc)
{
string soName = msra::strfun::utf8(plugin);
soName = soName + ".so";
void *handle = dlopen(soName.c_str(), RTLD_LAZY);
if (handle == NULL)
RuntimeError("Plugin not found: %s", soName.c_str());
return dlsym(handle, proc.c_str());
}
~Plugin() {
if (handle != NULL)
dlclose(handle);
}
};
#endif
}
}
}
// ===========================================================================
// emulation of some MSVC proprietary CRT
@ -33,6 +121,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
#ifndef _MSC_VER
static inline int _wsystem(const wchar_t *command) { return system(msra::strfun::utf8(command).c_str()); }
static inline int _wpopen(const wchar_t * command, const wchar_t mode) { return open(msra::strfun::utf8(command).c_str(), msra::strfun::utf8(mode).c_str()); }
#endif
#endif // _BASICS_H_

Просмотреть файл

@ -18,6 +18,8 @@
namespace Microsoft{ namespace MSR { namespace CNTK {
using namespace std;
// file options, Type of textfile to use
enum FileOptions
{
@ -86,9 +88,11 @@ template<typename FUNCTION> static void attempt (const FUNCTION & body)
class File
{
private:
FILE* m_file;
size_t m_size;
int m_options; // FileOptions ored togther
std::wstring m_filename;
FILE* m_file; // file handle
bool m_pcloseNeeded; // was opened with popen(), use pclose() when destructing
bool m_seekable; // this stream is seekable
int m_options; // FileOptions ored togther
void Init(const wchar_t* filename, int fileOptions);
public:
@ -97,13 +101,16 @@ public:
File(const wchar_t* filename, int fileOptions);
~File(void);
void Flush();
bool CanSeek() const { return m_seekable; }
size_t Size();
uint64_t GetPosition();
void SetPosition(uint64_t pos);
void goToDelimiter(int delim);
void SkipToDelimiter(int delim);
bool IsTextBased();
size_t Size();
bool IsUnicodeBOM(bool skip=false);
bool IsEOF();
bool IsWhiteSpace(bool skip=false);
@ -230,6 +237,8 @@ public:
}
return *this;
}
operator FILE*() const { return m_file; }
};
}}}

Просмотреть файл

@ -1019,90 +1019,6 @@ using namespace msra::basetypes; // for compatibility
#pragma warning (pop)
// RuntimeError - throw a std::runtime_error with a formatted error string
#ifdef _MSC_VER
__declspec(noreturn)
#endif
static inline void RuntimeError(const char * format, ...)
{
va_list args;
char buffer[1024];
va_start (args, format);
vsprintf (buffer, format, args);
throw std::runtime_error(buffer);
};
// LogicError - throw a std::logic_error with a formatted error string
#ifdef _MSC_VER
__declspec(noreturn)
#endif
static inline void LogicError(const char * format, ...)
{
va_list args;
char buffer[1024];
va_start(args, format);
vsprintf(buffer, format, args);
throw std::logic_error(buffer);
};
// ----------------------------------------------------------------------------
// dynamic loading of modules
// ----------------------------------------------------------------------------
#ifdef _WIN32
class Plugin
{
HMODULE m_hModule; // module handle for the writer DLL
std::wstring m_dllName; // name of the writer DLL
public:
Plugin() { m_hModule = NULL; }
template<class STRING> // accepts char (UTF-8) and wide string
FARPROC Load(const STRING & plugin, const std::string & proc)
{
m_dllName = msra::strfun::utf16(plugin);
m_dllName += L".dll";
m_hModule = LoadLibrary(m_dllName.c_str());
if (m_hModule == NULL)
RuntimeError("Plugin not found: %s", msra::strfun::utf8(m_dllName).c_str());
// create a variable of each type just to call the proper templated version
return GetProcAddress(m_hModule, proc.c_str());
}
~Plugin(){}
// removed because this causes the exception messages to be lost (exception vftables are unloaded when DLL is unloaded)
// ~Plugin() { if (m_hModule) FreeLibrary(m_hModule); }
};
#else
class Plugin
{
private:
void *handle;
public:
Plugin()
{
handle = NULL;
}
template<class STRING> // accepts char (UTF-8) and wide string
void * Load(const STRING & plugin, const std::string & proc)
{
string soName = msra::strfun::utf8(plugin);
soName = soName + ".so";
void *handle = dlopen(soName.c_str(), RTLD_LAZY);
if (handle == NULL)
RuntimeError("Plugin not found: %s", soName.c_str());
return dlsym(handle, proc.c_str());
}
~Plugin() {
if (handle != NULL)
dlclose(handle);
}
};
#endif
#if 0 // construction site
// ----------------------------------------------------------------------------
// class RegisterModule

Просмотреть файл

@ -54,6 +54,7 @@
#include <errno.h>
using namespace std;
using namespace Microsoft::MSR::CNTK;
// ----------------------------------------------------------------------------
// some mappings for non-Windows builds

Просмотреть файл

@ -7,12 +7,14 @@
//
#include "stdafx.h"
#include "Basics.h"
#include "SequenceParser.h"
#include <stdexcept>
#include <stdint.h>
#include "Basics.h"
#include "SequenceParser.h"
#include "fileutil.h"
using namespace Microsoft::MSR::CNTK;
// SetState for a particular value
template <typename NumType, typename LabelType>
void SequenceParser<NumType, LabelType>::SetState(int value, ParseState m_current_state, ParseState next_state)

Просмотреть файл

@ -12,6 +12,7 @@
#include <fstream>
#include <map>
#include <stdint.h>
#include "Basics.h"
#include "fileutil.h"
using namespace std;
@ -232,10 +233,10 @@ public:
errno_t err = _wfopen_s( &m_pFile, fileName, L"rb" );
if (err)
RuntimeError("SequenceParser::ParseInit - error opening file");
Microsoft::MSR::CNTK::RuntimeError("SequenceParser::ParseInit - error opening file");
int rc = _fseeki64(m_pFile, 0, SEEK_END);
if (rc)
RuntimeError("SequenceParser::ParseInit - error seeking in file");
Microsoft::MSR::CNTK::RuntimeError("SequenceParser::ParseInit - error seeking in file");
m_fileSize = GetFilePosition();
m_fileBuffer = new BYTE[m_bufferSize];
@ -543,7 +544,7 @@ public:
if (mFile) fclose(mFile);
if (_wfopen_s(&mFile, fileName, L"rt") != 0)
RuntimeError("cannot open file %s", fileName);
Microsoft::MSR::CNTK::RuntimeError("cannot open file %s", fileName);
}
void ParseReset()

Просмотреть файл

@ -68,11 +68,11 @@ struct compare_second
void RedirectStdErr(wstring logpath)
{
fprintf(stderr, "Redirecting stderr to file %S\n", logpath.c_str());
msra::files::make_intermediate_dirs(logpath);
auto_file_ptr f(logpath.c_str(), "wb");
if (dup2(fileno(f), 2) == -1)
auto f = make_shared<File>(logpath.c_str(), fileOptionsWrite | fileOptionsText);
if (dup2(fileno(*f), 2) == -1)
RuntimeError("unexpected failure to redirect stderr to log file");
setvbuf(stderr, NULL, _IONBF, 16384); // unbuffer it
static auto fKept = f; // keep it around (until it gets changed)
}
std::string WCharToString(const wchar_t* wst)
@ -1017,7 +1017,6 @@ void PrintUsageInfo()
int wmain(int argc, wchar_t* argv[])
{
try
{
#ifdef MPI_SUPPORT