diff --git a/Common/ConfigFile.cpp b/Common/ConfigFile.cpp index 8bfe6149d..1bde59bcc 100644 --- a/Common/ConfigFile.cpp +++ b/Common/ConfigFile.cpp @@ -193,16 +193,16 @@ namespace Microsoft { namespace MSR { namespace CNTK { { File file(filePath, fileOptionsRead); - // initialize with file name - std::string path = msra::strfun::utf8(filePath); - auto location = path.find_last_of("/\\"); + // initialize configName with file name + std::string configName = msra::strfun::utf8(filePath); + auto location = configName.find_last_of("/\\"); if (location != npos) - path = path.substr(location+1); - m_configName = move(path); + configName = configName.substr(location+1); + m_configName = move(configName); // read the entire file into a string - // CONSIDER: should the File API support this, instead of line by line? - size_t fileLength = file.Size(); + // CONSIDER: should the File API support this, instead of us having to call it line by line? + size_t fileLength = file.CanSeek() ? file.Size() : 0; string str; string configFile; configFile.reserve(fileLength); diff --git a/Common/File.cpp b/Common/File.cpp index 3ca67301d..bae1e61f6 100644 --- a/Common/File.cpp +++ b/Common/File.cpp @@ -7,6 +7,7 @@ #ifndef _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms --add this at the top of all CPP files that give "function or variable may be unsafe" warnings #endif +#define _CRT_NONSTDC_NO_DEPRECATE // make VS accept POSIX functions without _ #include "Basics.h" #define FORMAT_SPECIALIZE // to get the specialized version of the format routines @@ -22,7 +23,7 @@ #include #endif -namespace Microsoft{ namespace MSR { namespace CNTK { +namespace Microsoft { namespace MSR { namespace CNTK { // File creation // filename - the path @@ -43,16 +44,32 @@ File::File(const wchar_t* filename, int fileOptions) Init(filename, fileOptions); } +// all constructors call this void File::Init(const wchar_t* filename, int fileOptions) { - msra::files::make_intermediate_dirs(filename); + m_filename = filename; + m_options = fileOptions; + if (m_filename.empty()) + RuntimeError("File: filename is empty"); + const auto outputPipe = (m_filename.front() == '|'); + const auto inputPipe = (m_filename.back() == '|'); // translate the options string into a string for fopen() - wstring options = fileOptions&fileOptionsRead?L"r":L""; - if (fileOptions&fileOptionsWrite) + const auto reading = !!(fileOptions & fileOptionsRead); + const auto writing = !!(fileOptions & fileOptionsWrite); + if (!reading && !writing) + RuntimeError("File: either fileOptionsRead or fileOptionsWrite must be specified"); + // convert fileOptions to fopen()'s mode string + wstring options = reading ? L"r" : L""; + if (writing) { // if we already are reading the file, change to read/write options.clear(); - options.append(L"w+"); + options.append(L"w"); + if (!outputPipe && m_filename != L"-") + { + options.append(L"+"); + msra::files::make_intermediate_dirs(m_filename.c_str()); // writing to regular file -> also create the intermediate directories as a convenience + } } if (fileOptions&fileOptionsBinary) { @@ -71,13 +88,40 @@ void File::Init(const wchar_t* filename, int fileOptions) // add sequential flag to allocate big read buffer if (fileOptions & fileOptionsSequential) options += L"S"; - - attempt([=](){m_file = fopenOrDie(filename, options.c_str());}); - m_options = fileOptions; - m_size = filesize(m_file); + // now open the file + // Special path syntax understood here: + // - "-" refers to stdin or stdout + // - "|cmd" writes to a pipe + // - "cmd|" reads from a pipe + m_pcloseNeeded = false; + m_seekable = false; + if (m_filename == L"-") // stdin/stdout + { + if (writing && reading) + RuntimeError("File: path '-' fileOptionsRead and fileOptionsWrite at once"); + m_file = writing ? stdout : stdin; + } + else if (outputPipe || inputPipe) // pipe syntax + { + if (inputPipe && outputPipe) + RuntimeError("File: pipes cannot specify fileOptionsRead and fileOptionsWrite at once"); + if (inputPipe != reading) + RuntimeError("File: pipes must use consistent fileOptionsRead/fileOptionsWrite"); + const auto command = inputPipe ? m_filename.substr(0, m_filename.size() - 1) : m_filename.substr(1); + m_file = _wpopen(command.c_str(), options.c_str()); + if (!m_file) + RuntimeError("File: error exexuting pipe command '%S': %s", command.c_str(), strerror(errno)); + m_pcloseNeeded = true; + } + else attempt([=]() // regular file: use a retry loop + { + m_file = fopenOrDie(filename, options.c_str()); + m_seekable = true; + }); } -void File::goToDelimiter(int delim) +// skip to given delimiter character +void File::SkipToDelimiter(int delim) { int ch=0; @@ -85,7 +129,7 @@ void File::goToDelimiter(int delim) ch=fgetc(m_file); if (feof(m_file)) { printf("Unexpected end of file\n"); - throw std::logic_error("Unexpected end of file\n"); + LogicError("Unexpected end of file\n"); } } } @@ -97,9 +141,18 @@ bool File::IsTextBased() // File Destructor // closes the file +// Note: this does not check for errors. Use Flush() before closing a file you are writing. File::~File(void) { - attempt([=] {fcloseOrDie(m_file);}); + if (m_pcloseNeeded) + _pclose(m_file); + else if (m_file != stdin && m_file != stdout && m_file != stderr) + fclose(m_file); // (since destructors may not throw, we ignore the return code here) +} + +void File::Flush() +{ + fflushOrDie(m_file); } // GetLine - get a line from the file @@ -208,7 +261,8 @@ File& File::PutMarker(FileMarker marker, const std::wstring& section) // val - value to read from the file File& File::operator>>(std::wstring& val) { - attempt([&]{ + attempt([&] + { if (IsTextBased()) val = fgetwtoken(m_file); else @@ -221,7 +275,8 @@ File& File::operator>>(std::wstring& val) // val - value to read from the file File& File::operator>>(std::string& val) { - attempt([&]{ + attempt([&] + { if (IsTextBased()) val = fgettoken(m_file); else @@ -341,7 +396,8 @@ void File::WriteString(const wchar_t* str, int size) // size - size of the string string buffer void File::ReadString(wchar_t* str, int size) { - attempt([&]{ + attempt([&] + { if (IsTextBased()) fgettoken(m_file, str, size); else @@ -390,6 +446,8 @@ bool File::IsUnicodeBOM(bool skip) // WARNING: calling this will reset the EOF marker, so do so with care size_t File::Size() { + if (!CanSeek()) + RuntimeError("File: attempted to get Size() on non-seekable stream"); return filesize(m_file); } @@ -469,7 +527,7 @@ File& File::operator>>(FileMarker marker) break; case fileMarkerEndFile: // end of file marker, should we throw if it's not the end of the file? if (!IsEOF()) - throw std::runtime_error("fileMarkerEndFile not found"); + RuntimeError("fileMarkerEndFile not found"); break; case fileMarkerBeginList: // Beginning of list marker // no marker written unless an list with a count header @@ -483,7 +541,7 @@ File& File::operator>>(FileMarker marker) { int found = EndOfLineOrEOF(true); if (found != (int)true) // EOF can also be returned - throw std::runtime_error("Newline not found"); + RuntimeError("Newline not found"); } break; case fileMarkerBeginSection: // beginning of section @@ -556,7 +614,7 @@ File& File::GetMarker(FileMarker marker, const std::string& section) string str; *this >> str; if (str != section) - throw std::runtime_error(std::string("section name mismatch ") + str + " != " + section); + RuntimeError(std::string("section name mismatch ") + str + " != " + section); return *this; } @@ -569,7 +627,7 @@ File& File::GetMarker(FileMarker marker, const std::wstring& section) wstring str; *this >> str; if (str != section) - throw std::runtime_error(std::string("section name mismatch ") + msra::strfun::utf8(str) + " != " + msra::strfun::utf8(section)); + RuntimeError(std::string("section name mismatch ") + msra::strfun::utf8(str) + " != " + msra::strfun::utf8(section)); return *this; } @@ -620,6 +678,8 @@ bool File::TryGetMarker(FileMarker marker, const std::string& section) // GetPosition - Get position in a file uint64_t File::GetPosition() { + if (!CanSeek()) + RuntimeError("File: attempted to GetPosition() on non-seekable stream"); return fgetpos(m_file); } @@ -627,7 +687,9 @@ uint64_t File::GetPosition() // pos - position in the file void File::SetPosition(uint64_t pos) { - fsetpos (m_file, pos); + if (!CanSeek()) + RuntimeError("File: attempted to SetPosition() on non-seekable stream"); + fsetpos(m_file, pos); } }}} diff --git a/Common/Include/Basics.h b/Common/Include/Basics.h index 559c55354..a04f9865f 100644 --- a/Common/Include/Basics.h +++ b/Common/Include/Basics.h @@ -25,7 +25,95 @@ namespace Microsoft { namespace MSR { namespace CNTK { bool operator()(const std::wstring& left, const std::wstring& right) { return _wcsicmp(left.c_str(), right.c_str()) < 0; } }; -}}} + // RuntimeError - throw a std::runtime_error with a formatted error string +#ifdef _MSC_VER + __declspec(noreturn) +#endif + static inline void RuntimeError(const char * format, ...) + { + va_list args; + char buffer[1024]; + + va_start(args, format); + vsprintf(buffer, format, args); + throw std::runtime_error(buffer); + }; + static inline void RuntimeError(const string & message) { RuntimeError("%s", message.c_str()); } + + // LogicError - throw a std::logic_error with a formatted error string +#ifdef _MSC_VER + __declspec(noreturn) +#endif + static inline void LogicError(const char * format, ...) + { + va_list args; + char buffer[1024]; + + va_start(args, format); + vsprintf(buffer, format, args); + throw std::logic_error(buffer); + }; + static inline void LogicError(const string & message) { RuntimeError("%s", message.c_str()); } + + // ---------------------------------------------------------------------------- + // dynamic loading of modules --TODO: not Basics, should move to its own header + // ---------------------------------------------------------------------------- + +#ifdef _WIN32 + class Plugin + { + HMODULE m_hModule; // module handle for the writer DLL + std::wstring m_dllName; // name of the writer DLL + public: + Plugin() { m_hModule = NULL; } + template // accepts char (UTF-8) and wide string + FARPROC Load(const STRING & plugin, const std::string & proc) + { + m_dllName = msra::strfun::utf16(plugin); + m_dllName += L".dll"; + m_hModule = LoadLibrary(m_dllName.c_str()); + if (m_hModule == NULL) + Microsoft::MSR::CNTK::RuntimeError("Plugin not found: %s", msra::strfun::utf8(m_dllName).c_str()); + + // create a variable of each type just to call the proper templated version + return GetProcAddress(m_hModule, proc.c_str()); + } + ~Plugin(){} + // removed because this causes the exception messages to be lost (exception vftables are unloaded when DLL is unloaded) + // ~Plugin() { if (m_hModule) FreeLibrary(m_hModule); } + }; +#else + class Plugin + { + private: + void *handle; + public: + Plugin() + { + handle = NULL; + } + + template // accepts char (UTF-8) and wide string + void * Load(const STRING & plugin, const std::string & proc) + { + string soName = msra::strfun::utf8(plugin); + soName = soName + ".so"; + void *handle = dlopen(soName.c_str(), RTLD_LAZY); + if (handle == NULL) + RuntimeError("Plugin not found: %s", soName.c_str()); + return dlsym(handle, proc.c_str()); + } + + ~Plugin() { + if (handle != NULL) + dlclose(handle); + } + }; +#endif + +} +} +} // =========================================================================== // emulation of some MSVC proprietary CRT @@ -33,6 +121,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { #ifndef _MSC_VER static inline int _wsystem(const wchar_t *command) { return system(msra::strfun::utf8(command).c_str()); } +static inline int _wpopen(const wchar_t * command, const wchar_t mode) { return open(msra::strfun::utf8(command).c_str(), msra::strfun::utf8(mode).c_str()); } #endif #endif // _BASICS_H_ diff --git a/Common/Include/File.h b/Common/Include/File.h index b6c5296e3..0665e2035 100644 --- a/Common/Include/File.h +++ b/Common/Include/File.h @@ -18,6 +18,8 @@ namespace Microsoft{ namespace MSR { namespace CNTK { +using namespace std; + // file options, Type of textfile to use enum FileOptions { @@ -86,9 +88,11 @@ template static void attempt (const FUNCTION & body) class File { private: - FILE* m_file; - size_t m_size; - int m_options; // FileOptions ored togther + std::wstring m_filename; + FILE* m_file; // file handle + bool m_pcloseNeeded; // was opened with popen(), use pclose() when destructing + bool m_seekable; // this stream is seekable + int m_options; // FileOptions ored togther void Init(const wchar_t* filename, int fileOptions); public: @@ -97,13 +101,16 @@ public: File(const wchar_t* filename, int fileOptions); ~File(void); + void Flush(); + + bool CanSeek() const { return m_seekable; } + size_t Size(); uint64_t GetPosition(); void SetPosition(uint64_t pos); - void goToDelimiter(int delim); + void SkipToDelimiter(int delim); bool IsTextBased(); - size_t Size(); bool IsUnicodeBOM(bool skip=false); bool IsEOF(); bool IsWhiteSpace(bool skip=false); @@ -230,6 +237,8 @@ public: } return *this; } + + operator FILE*() const { return m_file; } }; }}} diff --git a/Common/Include/basetypes.h b/Common/Include/basetypes.h index f65d6d308..76b3bf88c 100644 --- a/Common/Include/basetypes.h +++ b/Common/Include/basetypes.h @@ -1019,90 +1019,6 @@ using namespace msra::basetypes; // for compatibility #pragma warning (pop) -// RuntimeError - throw a std::runtime_error with a formatted error string -#ifdef _MSC_VER -__declspec(noreturn) -#endif -static inline void RuntimeError(const char * format, ...) -{ - va_list args; - char buffer[1024]; - - va_start (args, format); - vsprintf (buffer, format, args); - throw std::runtime_error(buffer); -}; - -// LogicError - throw a std::logic_error with a formatted error string -#ifdef _MSC_VER -__declspec(noreturn) -#endif -static inline void LogicError(const char * format, ...) -{ - va_list args; - char buffer[1024]; - - va_start(args, format); - vsprintf(buffer, format, args); - throw std::logic_error(buffer); -}; - -// ---------------------------------------------------------------------------- -// dynamic loading of modules -// ---------------------------------------------------------------------------- - -#ifdef _WIN32 -class Plugin -{ - HMODULE m_hModule; // module handle for the writer DLL - std::wstring m_dllName; // name of the writer DLL -public: - Plugin() { m_hModule = NULL; } - template // accepts char (UTF-8) and wide string - FARPROC Load(const STRING & plugin, const std::string & proc) - { - m_dllName = msra::strfun::utf16(plugin); - m_dllName += L".dll"; - m_hModule = LoadLibrary(m_dllName.c_str()); - if (m_hModule == NULL) - RuntimeError("Plugin not found: %s", msra::strfun::utf8(m_dllName).c_str()); - - // create a variable of each type just to call the proper templated version - return GetProcAddress(m_hModule, proc.c_str()); - } - ~Plugin(){} - // removed because this causes the exception messages to be lost (exception vftables are unloaded when DLL is unloaded) - // ~Plugin() { if (m_hModule) FreeLibrary(m_hModule); } -}; -#else -class Plugin -{ -private: - void *handle; -public: - Plugin() - { - handle = NULL; - } - - template // accepts char (UTF-8) and wide string - void * Load(const STRING & plugin, const std::string & proc) - { - string soName = msra::strfun::utf8(plugin); - soName = soName + ".so"; - void *handle = dlopen(soName.c_str(), RTLD_LAZY); - if (handle == NULL) - RuntimeError("Plugin not found: %s", soName.c_str()); - return dlsym(handle, proc.c_str()); - } - - ~Plugin() { - if (handle != NULL) - dlclose(handle); - } -}; -#endif - #if 0 // construction site // ---------------------------------------------------------------------------- // class RegisterModule diff --git a/Common/fileutil.cpp b/Common/fileutil.cpp index 19e1caed7..c11b722fd 100644 --- a/Common/fileutil.cpp +++ b/Common/fileutil.cpp @@ -54,6 +54,7 @@ #include using namespace std; +using namespace Microsoft::MSR::CNTK; // ---------------------------------------------------------------------------- // some mappings for non-Windows builds diff --git a/DataReader/LMSequenceReader/SequenceParser.cpp b/DataReader/LMSequenceReader/SequenceParser.cpp index c04861560..0c6c1f6ab 100644 --- a/DataReader/LMSequenceReader/SequenceParser.cpp +++ b/DataReader/LMSequenceReader/SequenceParser.cpp @@ -7,12 +7,14 @@ // #include "stdafx.h" -#include "Basics.h" -#include "SequenceParser.h" #include #include +#include "Basics.h" +#include "SequenceParser.h" #include "fileutil.h" +using namespace Microsoft::MSR::CNTK; + // SetState for a particular value template void SequenceParser::SetState(int value, ParseState m_current_state, ParseState next_state) diff --git a/DataReader/LMSequenceReader/SequenceParser.h b/DataReader/LMSequenceReader/SequenceParser.h index 219d37bc7..6d89ced90 100644 --- a/DataReader/LMSequenceReader/SequenceParser.h +++ b/DataReader/LMSequenceReader/SequenceParser.h @@ -12,6 +12,7 @@ #include #include #include +#include "Basics.h" #include "fileutil.h" using namespace std; @@ -232,10 +233,10 @@ public: errno_t err = _wfopen_s( &m_pFile, fileName, L"rb" ); if (err) - RuntimeError("SequenceParser::ParseInit - error opening file"); + Microsoft::MSR::CNTK::RuntimeError("SequenceParser::ParseInit - error opening file"); int rc = _fseeki64(m_pFile, 0, SEEK_END); if (rc) - RuntimeError("SequenceParser::ParseInit - error seeking in file"); + Microsoft::MSR::CNTK::RuntimeError("SequenceParser::ParseInit - error seeking in file"); m_fileSize = GetFilePosition(); m_fileBuffer = new BYTE[m_bufferSize]; @@ -543,7 +544,7 @@ public: if (mFile) fclose(mFile); if (_wfopen_s(&mFile, fileName, L"rt") != 0) - RuntimeError("cannot open file %s", fileName); + Microsoft::MSR::CNTK::RuntimeError("cannot open file %s", fileName); } void ParseReset() diff --git a/MachineLearning/CNTK/CNTK.cpp b/MachineLearning/CNTK/CNTK.cpp index b565c9bc2..37f71a191 100644 --- a/MachineLearning/CNTK/CNTK.cpp +++ b/MachineLearning/CNTK/CNTK.cpp @@ -68,11 +68,11 @@ struct compare_second void RedirectStdErr(wstring logpath) { fprintf(stderr, "Redirecting stderr to file %S\n", logpath.c_str()); - msra::files::make_intermediate_dirs(logpath); - auto_file_ptr f(logpath.c_str(), "wb"); - if (dup2(fileno(f), 2) == -1) + auto f = make_shared(logpath.c_str(), fileOptionsWrite | fileOptionsText); + if (dup2(fileno(*f), 2) == -1) RuntimeError("unexpected failure to redirect stderr to log file"); setvbuf(stderr, NULL, _IONBF, 16384); // unbuffer it + static auto fKept = f; // keep it around (until it gets changed) } std::string WCharToString(const wchar_t* wst) @@ -1017,7 +1017,6 @@ void PrintUsageInfo() int wmain(int argc, wchar_t* argv[]) { - try { #ifdef MPI_SUPPORT