pict/cli/mparser.cpp

649 строки
18 KiB
C++

#include <fstream>
#include <sstream>
#include "model.h"
using namespace std;
const wchar_t COMMENT_CHAR = L'#';
const wchar_t PARAMNAME_SEP = L':';
const wchar_t PARAM_ORDER = L'@';
const wchar_t PARAM_REF_BEGIN = L'<';
const wchar_t PARAM_REF_END = L'>';
const wchar_t WEIGHT_BEGIN = L'(';
const wchar_t WEIGHT_END = L')';
const wchar_t SET_BEGIN = L'{';
const wchar_t SET_END = L'}';
const wchar_t SET_ORDER = L'@';
const wchar_t SET_SEP = L','; // default separator of param names in submodel/cluster definition
const wchar_t RESULT_PARAM_PREFIX = L'$';
// note: keep it consistent with cpsyntax
// TODO: change the way we detect the constraints, this is error-prone
wstring CONSTRAINT_PATTERN1 = L"IF";
wstring CONSTRAINT_PATTERN2 = L"IF*[*]*";
wstring CONSTRAINT_PATTERN3 = L"[*]*";
wstring CONSTRAINT_PATTERN4 = L"(*[*]*";
wstring CONSTRAINT_PATTERN5 = L"IF*ISNEGATIVE";
wstring CONSTRAINT_PATTERN6 = L"IF*ISNEGATIVE*(*";
wstring CONSTRAINT_PATTERN7 = L"IF*ISPOSITIVE";
wstring CONSTRAINT_PATTERN8 = L"IF*ISPOSITIVE*(*";
//
//
//
bool lineIsComment( wstring& line )
{
wstring trimmedLine = trim( line );
if ( trimmedLine.empty() ) return( false );
return( trimmedLine.at( 0 ) == COMMENT_CHAR );
}
//
// detects whether a line is a constraint
// TODO: have better detection here
//
bool lineIsConstraint( wstring& line )
{
wstring trimmed = line;
toUpper( trimmed );
trimmed = trim( trimmed );
// if the line contains just "IF", it is a constraint
if( 0 == stringCompare( trimmed, CONSTRAINT_PATTERN1, false ) )
{
return( true );
}
// if the line matches any of the other patterns, it's a constraint
return ( patternMatch( CONSTRAINT_PATTERN2, trimmed )
|| patternMatch( CONSTRAINT_PATTERN3, trimmed )
|| patternMatch( CONSTRAINT_PATTERN4, trimmed )
|| patternMatch( CONSTRAINT_PATTERN5, trimmed )
|| patternMatch( CONSTRAINT_PATTERN6, trimmed )
|| patternMatch( CONSTRAINT_PATTERN7, trimmed )
|| patternMatch( CONSTRAINT_PATTERN8, trimmed ) );
}
//
// detects whether a line is a submodel or a cluster definition
// must begin with { and must have } somewhere
//
bool lineIsParamSet( wstring& line )
{
wstring trimmed = trim( line );
if( trimmed.empty() )
{
return( false );
}
if( trimmed[ 0 ] != SET_BEGIN )
{
return( false );
}
size_t setend = trimmed.find( SET_END );
if( wstring::npos == setend )
{
return( false );
}
return( true );
}
//
// reads one line from a file
//
bool readLineFromFile( wifstream& file, wstring& line )
{
line = L"";
if( file.eof() )
return( false );
wchar_t c;
while( true )
{
file.get( c );
if( file.eof()
|| c == L'\n'
|| c == L'\0' ) return( true );
line += c;
}
return( true );
}
//
// read one parameter, these are in the following format:
// param [@ N] : val1, ~val2, val3a | val3b, val4
//
bool CModelData::readParameter( wstring& line )
{
CModelParameter parameter;
// param name can be separated by : or ,
wstring::size_type paramSep = line.find( PARAMNAME_SEP );
if( paramSep == wstring::npos )
{
paramSep = line.find( ValuesDelim );
if( paramSep == wstring::npos )
{
PrintMessage( InputDataError, L"Parameter", line.c_str(), L"should have at least one value defined" );
return( false );
}
}
wstring name = trim( line.substr( 0, paramSep ));
unsigned int order = static_cast<unsigned int>(UNDEFINED_ORDER);
//check if this param has custom-order defined
wstrings nameAndOrder;
split( name, PARAM_ORDER, nameAndOrder );
double d;
if( nameAndOrder.size() == 2 && stringToNumber( nameAndOrder[ 1 ], d ))
{
name = trim( nameAndOrder[ 0 ]);
if( d > 0 )
{
order = static_cast< unsigned int >( d );
}
}
parameter.Name = name;
parameter.Order = order;
if ( ! parameter.Name.empty() && parameter.Name[ 0 ] == RESULT_PARAM_PREFIX )
{
parameter.IsResultParameter = true;
}
// now get the values
wstring rawValues = line.substr( paramSep + 1, line.length() - paramSep - 1 );
wstrings values;
split( rawValues, ValuesDelim, values );
for( wstrings::iterator i_val = values.begin(); i_val != values.end(); i_val++ )
{
*i_val = trim( *i_val );
//
// if it is in a form <text> it is a reference to another parameter
// find an existing parameter and add all its values here instead
//
vector< CModelParameter >::iterator refParam;
if ( ! i_val->empty()
&& *(i_val->begin()) == PARAM_REF_BEGIN
&& *(i_val->rbegin()) == PARAM_REF_END
&&( refParam = FindParameterByName( i_val->substr( 1, i_val->length() - 2 ))) !=
Parameters.end() )
{
__push_back( parameter.Values, refParam->Values.begin(), refParam->Values.end() );
}
else
{
//
// value weight
// Param: Val1 (3), Val21|Val22 (2), Val3
//
int weight = 1;
size_t weightBegin = i_val->find_last_of( WEIGHT_BEGIN );
size_t weightEnd = i_val->find_last_of( WEIGHT_END );
// '(' must exist, ')' must be the last character
if ( weightBegin != wstring::npos && weightEnd == i_val->length() - 1 )
{
wstring weightStr = trim( i_val->substr( weightBegin + 1, weightEnd - weightBegin - 1 ));
double weightDbl = 0;
// anything after @ must be a positive integer
if ( stringToNumber( weightStr, weightDbl ) && ( static_cast< unsigned int > (weightDbl) ) > 0 )
{
weight = static_cast< unsigned int > (weightDbl);
// trim the weight off the value
i_val->erase( weightBegin, wstring::npos );
*i_val = trim( *i_val );
}
}
//
// names
//
wstrings names;
split( *i_val, NamesDelim, names );
bool positive = true;
for ( wstrings::iterator i_name = names.begin(); i_name != names.end(); i_name++ )
{
*i_name = trim( *i_name );
// only the first name determines the negativity of a value
if ( i_name->length() > 0
&& i_name == names.begin()
&&(*i_name)[ 0 ] == InvalidPrefix )
{
positive = false;
*i_name = trim( i_name->substr( 1, i_name->length() - 1 ));
}
}
if ( ! positive )
{
m_hasNegativeValues = true;
}
CModelValue value( names, weight, positive );
parameter.Values.push_back( value );
}
}
Parameters.push_back( parameter );
return( true );
}
//
//
//
void CModelData::getUnmatchedParameterNames( wstrings& paramsOfSubmodel, wstrings& unmatchedParams )
{
for( auto & cparam : paramsOfSubmodel )
{
bool found = false;
for( auto & param : Parameters )
{
if ( 0 == stringCompare( cparam, param.Name, CaseSensitive ))
{
found = true;
break;
}
}
if ( ! found )
{
unmatchedParams.push_back( cparam );
}
}
}
//
//
//
bool CModelData::readParamSet( wstring& line )
{
const wstring STD_MSG = L"Submodel definition is incorrect: " + line;
wstringstream ist( line );
// it's always in a form of { paramName1 @ N, paramName2 @ N, ... } @ N but "@ N" is optional
wstring s;
ist >> s;
wstring::iterator next = line.begin();
// {
wstring::iterator begin = findFirstNonWhitespace( next, line.end() );
if( begin == line.end() || *begin != SET_BEGIN )
{
PrintMessage( InputDataError, STD_MSG.data() );
return( false );
}
++begin;
// find }
wstring::iterator end;
end = find( begin, line.end(), SET_END );
if ( end == line.end() )
{
PrintMessage( InputDataError, STD_MSG.data() );
return( false );
}
// params in the middle
wstring setp;
setp.assign( begin, end );
setp = trim( setp );
if ( setp.empty() )
{
PrintMessage( InputDataError, STD_MSG.data() );
return( false );
}
//
// Two attempts to resolve submodel names:
// 1. Use a comma as a separator
// 2. If 1 fails to produce matching names, use ModelData.ValuesDelim as a separator
//
// first figure out whether "," or a delimiter specified by /d option applies
wstrings setParams;
split( setp, SET_SEP, setParams );
transform( setParams.begin(), setParams.end(), setParams.begin(), trim );
wstrings unmatched;
getUnmatchedParameterNames( setParams, unmatched );
if( !unmatched.empty() )
{
setParams.clear();
unmatched.clear();
split( setp, ValuesDelim, setParams );
transform( setParams.begin(), setParams.end(), setParams.begin(), trim );
getUnmatchedParameterNames( setParams, unmatched );
if( !unmatched.empty() )
{
PrintMessage( InputDataWarning, L"Submodel defintion", trim( line ).data(), L"contains unknown parameter. Skipping..." );
return( true ); // just a warning so don't exit
}
}
// remove duplicates
sort( setParams.begin(), setParams.end(), stringCaseInsensitiveLess );
wstrings::iterator newEnd = unique( setParams.begin(), setParams.end(), stringCaseInsensitiveEquals );
if( setParams.end() != newEnd )
{
PrintMessage( InputDataWarning, L"Submodel defintion", trim( line ).data(), L"contains duplicate parameters. Removing duplicates..." );
setParams.erase( newEnd, setParams.end() );
}
CModelSubmodel submodel;
// match to names, set up the structure
for( auto & cparam : setParams )
{
bool found = false;
unsigned int index = 0;
for( auto & param : Parameters )
{
if ( 0 == stringCompare( cparam, param.Name, CaseSensitive ))
{
found = true;
break;
}
++index;
}
// at this point we should always match the name
assert( found );
submodel.Parameters.push_back( index );
}
// @
++end;
wstring::iterator at = findFirstNonWhitespace( end, line.end() );
// anything other than @, quit
if ( at != line.end() && *at != SET_ORDER )
{
PrintMessage( InputDataError, STD_MSG.data() );
return( false );
}
if ( at == line.end() )
{
// if this is the end then order will be assigned later
NOOP
}
else
{
++at;
// number
wstring numberText;
numberText.assign( at, line.end() );
double number;
bool ret = stringToNumber( numberText, number );
int order = 0;
if( ret )
{
order = static_cast<int> (number);
if( order <= 0 )
{
order = 0;
ret = false;
}
}
if ( !ret )
{
PrintMessage( InputDataError, STD_MSG.data() );
return( false );
}
submodel.Order = order;
}
Submodels.push_back( submodel );
return ( true );
}
//
//
//
bool CModelData::readModel( const wstring& filePath )
{
// Some implementations of wifstream only allow ANSI strings as file names so converting before using
string ansiFilePath = wideCharToAnsi( filePath );
wifstream file( ansiFilePath );
if ( !file )
{
PrintMessage( InputDataError, L"Couldn't open file:", filePath.data() );
return( false );
}
wstring line;
// read definition of parameters
bool firstLine = true;
while( true )
{
// skip not important stuff
if ( lineIsEmpty( line ) || lineIsComment( line ))
{
if ( ! readLineFromFile( file, line )) return( true );
continue;
}
if ( firstLine )
{
m_encoding = getEncodingType( line );
if ( m_encoding != EncodingType::ANSI
&& m_encoding != EncodingType::UTF8 )
{
PrintMessage( InputDataError, L"Only ANSI and UTF-8 are supported" );
return( false );
}
firstLine = false;
}
// continue reading until a submodel/cluster or a constraint
if ( lineIsParamSet( line ) || lineIsConstraint( line )) break;
if ( ! readParameter( line )) return( false );
if ( ! readLineFromFile( file, line )) return( true );
}
// read submodels
if ( lineIsParamSet( line ))
{
while( true )
{
// skip not important stuff
if ( lineIsEmpty( line ) || lineIsComment( line ))
{
if ( ! readLineFromFile( file, line )) return( true );
continue;
}
// continue reading until a constraint
if ( lineIsConstraint( line )) break;
if ( ! readParamSet( line )) return( false );
if ( ! readLineFromFile( file, line )) return( true );
}
}
// anything that's left is constraints
while( true )
{
// if only a line is not empty or not a comment,
// it's got to be a part of constraints definition
if ( ! ( lineIsEmpty( line ) || lineIsComment( line )))
{
ConstraintPredicates += line;
}
if ( ! readLineFromFile( file, line )) return( true );
}
return( true );
}
//
// reads model file
//
bool CModelData::ReadModel( const wstring& filePath )
{
if( !readModel( filePath ))
{
return( false );
}
if( !ValidateParams() )
{
return( false );
}
return( true );
}
//
//
//
bool CModelData::ReadRowSeedFile( const wstring& filePath )
{
if( trim( filePath ).empty() ) return( true );
// Some implementations of wifstream only allow ANSI strings as file names so converting before using
string ansiFilePath = wideCharToAnsi( filePath );
wifstream file( ansiFilePath );
if ( !file )
{
PrintMessage( InputDataError, L"Couldn't open file:", filePath.data() );
return( false );
}
wstring line;
// parameter names
bool fileEmpty = false;
if ( readLineFromFile( file, line ))
{
if ( trim( line ).empty() ) fileEmpty = true;
}
else
{
fileEmpty = true;
}
if ( fileEmpty )
{
PrintMessage( RowSeedsWarning, L"Seeding file is empty" );
return( true );
}
EncodingType encoding = getEncodingType( line );
if ( encoding != EncodingType::ANSI
&& encoding != EncodingType::UTF8 )
{
PrintMessage( RowSeedsError, L"Only ANSI and UTF-8 are supported" );
return( false );
}
vector< vector<CModelParameter>::iterator > parameters;
wstrings params;
split( line, RESULT_DELIMITER, params );
for( auto & param : params )
{
vector<CModelParameter>::iterator found = FindParameterByName( param );
if ( found == Parameters.end())
{
PrintMessage( RowSeedsWarning, L"Parameter",
param.data(),
L"not found in the model. Skipping..." );
}
parameters.push_back( found );
}
// if any parameter equals to ModelData.Parameters.end()
// this parameter could not be found in the model
while( readLineFromFile( file, line ))
{
if ( trim(line).empty() ) break;
wstrings values;
split( line, RESULT_DELIMITER, values );
unsigned int n_param = 0;
CModelRowSeed rowSeed;
for ( wstrings::iterator i_value = values.begin();
i_value != values.end();
++i_value, ++n_param )
{
// There could be fewer parameter names (in the first line)
// than there is values in the following lines. This has
// to be detected and a warning issued
if ( n_param < (unsigned int) parameters.size() && parameters[ n_param ] != Parameters.end() )
{
CModelParameter &param = *(parameters[ n_param ]);
// remove the negative marker and match up the raw name
if ( i_value->length() > 0 && (*i_value)[ 0 ] == InvalidPrefix )
{
*i_value = trim( i_value->substr( 1, i_value->length() - 1 ));
}
// if any value could not be found, the whole seed row is not invalid
// we just remove that one offending value and the rest of the row can
// stay intact; we cannot really warn about this as in a model with
// submodels this is very normal
int found = param.GetValueOrdinal( *i_value, CaseSensitive );
if ( found == -1 )
{
if ( ! i_value->empty() )
{
PrintMessage( RowSeedsWarning, L"Value",
i_value->data(),
L"not found in the model. Skipping this value..." );
}
}
else
{
// we don't care about result parameters as we should not seed we expected results
if ( ! param.IsResultParameter )
{
rowSeed.push_back( make_pair( param.Name, *i_value ));
}
}
}
}
if ( ! rowSeed.empty() )
{
RowSeeds.push_back( rowSeed );
}
}
if( ! ValidateRowSeeds())
{
return( false );
}
return( true );
}