// // // Copyright (c) Microsoft Corporation. All rights reserved. // // // ConfigFile.cpp : Defines the configuration file loader. // #ifndef _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms --add this at the top of all CPP files that give "function or variable may be unsafe" warnings #endif #include "File.h" #include "commandArgUtil.h" #include "ScriptableObjects.h" namespace Microsoft { namespace MSR { namespace CNTK { // ParseCommandLine - parse the command line parameters // argc - count of arguments // argv - array of argument parameters // config - config to return std::string ConfigParameters::ParseCommandLine(int argc, wchar_t* argv[], ConfigParameters& config) { config.SetName(std::string("global")); // This vector keeps track of the config files we have already read std::vector resolvedConfigFiles; std::string configString; // start at 1, because 0 is the name of the EXE for (int i=1; i < argc; ++i) { wstring str = argv[i]; // allow to change current directory, for easier debugging wstring cdDescriptor = L"currentDirectory="; if (_wcsnicmp(cdDescriptor.c_str(), str.c_str(), cdDescriptor.length()) == 0) { wstring dir = str.substr(cdDescriptor.length()); if(_wchdir(dir.c_str()) != 0) InvalidArgument("Failed to set the current directory to '%ls'", dir.c_str()); } // see if they are loading a config file wstring configDescriptor = L"configFile="; int compare = _wcsnicmp(configDescriptor.c_str(), str.c_str(), configDescriptor.length()); // no config file, parse as regular argument if (compare) { configString += (msra::strfun::utf8(str) + "\n"); } else // One or more config file paths specified in a "+"-separated list. { const std::string filePaths = msra::strfun::utf8(str.substr(configDescriptor.length())); std::vector filePathsVec = msra::strfun::split(filePaths, "+"); for (auto filePath : filePathsVec) { if (std::find(resolvedConfigFiles.begin(), resolvedConfigFiles.end(), filePath) == resolvedConfigFiles.end()) { // if haven't already read this file, read it resolvedConfigFiles.push_back(filePath); configString += config.ReadConfigFile(filePath); } else RuntimeError("Cannot specify same config file multiple times at the command line."); } } } // now, configString is a concatenation of lines, including parameters from the command line, with comments stripped // expand any lines of the form include= configString = config.ResolveIncludeStatements(configString, resolvedConfigFiles); // convert into a ConfigDictionary--top-level expressions of the form var=val; if val is a block in braces, it is kept verbatim (not parsed inside) config.FileParse(configString); return configString; } // ResolveIncludeStatements - this function takes a config string, and looks for all lines of the // form "include=configPaths", where 'configPaths' is a "+" separated list of paths to config files. // If it encounters one of these lines, it reads the config files listed in 'configPaths' (in the specified order), // and includes the body of each file in the string which is eventually returned by this function. If the included // config file includes other config files, this function will recursively include those files as well. // configString - the config string within which to look for "include" statements // resolvedConfigFiles - the paths to all the config files that have already been resolved. This vector is used to prevent include loops, // and to prevent files from being included multiple times. // returns: The config string, with all the "include" statements replaced with the bodies of the specified config files. std::string ConfigParser::ResolveIncludeStatements(const std::string &configString, std::vector &resolvedConfigFiles) { std::vector lines = msra::strfun::split(configString, "\n"); std::string includeKeyword = "include="; std::size_t includeKeywordSize = includeKeyword.size(); std::string newConfigString; for (std::string line : lines) { if (line.compare(0, includeKeywordSize, includeKeyword) == 0) { std::string filePaths = line.substr(includeKeywordSize, line.size() - includeKeywordSize); if (filePaths.find(openBraceVar) != std::string::npos) { RuntimeError("Variable usage (eg, \"$varName$\") not supported in \"include\" statements. Explicit path to config file must be provided"); } std::vector filePathVec = msra::strfun::split (filePaths, "+"); for (auto filePath : filePathVec) { // if file hasn't already been resolved (the resolvedPaths vector doesn't contain it), resolve it. if (std::find(resolvedConfigFiles.begin(), resolvedConfigFiles.end(), filePath) == resolvedConfigFiles.end()) { // Recursively resolve the include statements in the included config files. // Ensure that the same config file isn't included twice, by keeping track of the config // files that have already been resolved in the resolvedPaths vector. resolvedConfigFiles.push_back(filePath); newConfigString += ResolveIncludeStatements(ReadConfigFile(filePath), resolvedConfigFiles); } else { // We already resolved this path. Write a warning so that user is aware of this. // TODO: This message is written to stderr before stderr gets redirected to the specified file. Fix this. fprintf(stderr, "Warning: Config file included multiple times. Not including config file again: %s", filePath.c_str()); } } } else { newConfigString += (line + "\n"); } } return newConfigString; } // LoadConfigFiles - load multiple configuration file, and adds to config parameters // filePaths - A "+" delimited list of file paths, corresponding to config files to load // configStringToAppend - A config string which should be processed together with the config files void ConfigParser::LoadConfigFiles(const std::wstring &filePaths, const std::string *configStringToAppend) { std::string configString = ReadConfigFiles(filePaths); if(configStringToAppend != nullptr) { configString += *configStringToAppend; } FileParse(configString); } // LoadConfigFileAndResolveVariables - load a configuration file, and add to config parameters. // If the config file contains references to variables, which are defined in the 'config' ConfigParameters, // then this method will resolve those variables. This method is meant for the processing of NDL/MEL config files, // in order to allow them to access variables defined in the primary config file via $varName$ syntax. // filePath - filePath to the file to load // config - These ConfigParameters are used in order to resolve the $varName$ instances in the config file. void ConfigParser::LoadConfigFileAndResolveVariables(const std::wstring &filePath, const ConfigParameters& config) { // read file, resolve variables, and then parse. std::string fileContents = ReadConfigFile(filePath); fileContents = config.ResolveVariables(fileContents); FileParse(fileContents); } // LoadConfigFile - load a configuration file, and add to config parameters // filePath - filePath to the file to read void ConfigParser::LoadConfigFile(const std::wstring &filePath) { // read and then parse FileParse(ReadConfigFile(filePath)); } // Same as "ReadConfigFiles" function below, but takes as input string instead of wstring std::string ConfigParser::ReadConfigFiles(const std::string &filePaths) { return ReadConfigFiles(msra::strfun::utf16(filePaths)); } // ReadConfigFiles - reads multiple config files, concatenates the content from each file, and returns a string // filePaths - A "+" delimited list of file paths, corresponding to config files to read // returns: a string with the concatentated file contents std::string ConfigParser::ReadConfigFiles(const std::wstring &filePaths) { std::string configString; std::vector filePathVec = msra::strfun::split (filePaths, L"+"); for (auto filePath : filePathVec) { configString += ReadConfigFile(filePath); } return configString; } // Same as "ReadConfigFile" function below, but takes as input string instead of wstring std::string ConfigParser::ReadConfigFile(const std::string &filePath) { return ReadConfigFile(msra::strfun::utf16(filePath)); } // ReadConfigFile - read a configuration file, and return all lines, stripped of comments, concatenated by newlines, as one long string (no other processing, expansion etc.) // filePath - the path to the config file to read // returns: a string with the concatentated file contents std::string ConfigParser::ReadConfigFile(const std::wstring &filePath) { File file(filePath, fileOptionsRead); // initialize configName with file name std::string configName = msra::strfun::utf8(filePath); auto location = configName.find_last_of("/\\"); if (location != npos) configName = configName.substr(location+1); m_configName = move(configName); // read the entire file into a string // CONSIDER: should the File API support this, instead of us having to call it line by line? size_t fileLength = file.CanSeek() ? file.Size() : 0; string str; string configFile; configFile.reserve(fileLength); while (!file.IsEOF()) { file.GetLine(str); str = StripComments(str); if (str != "") { configFile.append(str); configFile.append("\n"); } } return configFile; } // GetFileConfigNames - determine the names of the features and labels sections in the config file // features - [in,out] a vector of feature name strings // labels - [in,out] a vector of label name strings template void GetFileConfigNames(const ConfigRecordType& config, std::vector& features, std::vector& labels) { for (const auto & id : config.GetMemberIds()) { if (!config.CanBeConfigRecord(id)) continue; const ConfigRecordType & temp = config(id); // see if we have a config parameters that contains a "dim" element, it's a sub key, use it if (temp.ExistsCurrent(L"dim")) { if (temp.ExistsCurrent(L"labelMappingFile") || temp.ExistsCurrent(L"labelDim") || temp.ExistsCurrent(L"labelType") || (temp.ExistsCurrent(L"sectionType") && (const wstring&)temp(L"sectionType") == L"labels")) { labels.push_back(id); } else { features.push_back(id); } } } } template void GetFileConfigNames(const ConfigParameters &, std::vector& features, std::vector& labels); template void GetFileConfigNames(const ScriptableObjects::IConfigRecord &, std::vector& features, std::vector& labels); // FindConfigNames - determine the names of the heirarchy of sections in the config file that contain a particular key // config - configuration to search // key - string we ar searching for in each config section // names - [in,out] a vector of section names in "path" format (i.e. base\subsection) template void FindConfigNames(const ConfigRecordType& config, std::string key, std::vector& names) { wstring wkey = wstring(key.begin(), key.end()); for (const auto & id : config.GetMemberIds()) { if (!config.CanBeConfigRecord(id)) continue; const ConfigRecordType & temp = config(id); // see if we have a config parameters that contains a "key" element, if so use it if (temp.ExistsCurrent(wkey.c_str())) { names.push_back(id); } } } template void FindConfigNames(const ConfigParameters &, std::string key, std::vector& names); template void FindConfigNames(const ScriptableObjects::IConfigRecord &, std::string key, std::vector& names); // Trim - trim white space off the start and end of the string // str - string to trim // NOTE: if the entire string is empty, then the string will be set to an empty string void Trim(std::string& str) { auto found = str.find_first_not_of(" \t"); if (found == npos) { str.erase(0); return; } str.erase(0, found); found = str.find_last_not_of(" \t"); if (found != npos) str.erase(found+1); } // TrimQuotes - trim surrounding quotation marks // str - string to trim void TrimQuotes(std::string& str) { if (str.empty()) return; if (str.front() == '"' && str.back() == '"') str = str.substr(1, str.size() - 2); } }}}