gecko-dev/lib/libpics/csparse.h

349 строки
9.7 KiB
C

/* W3 Copyright statement
Copyright 1995 by: Massachusetts Institute of Technology (MIT), INRIA</H2>
This W3C software is being provided by the copyright holders under the
following license. By obtaining, using and/or copying this software,
you agree that you have read, understood, and will comply with the
following terms and conditions:
Permission to use, copy, modify, and distribute this software and its
documentation for any purpose and without fee or royalty is hereby
granted, provided that the full text of this NOTICE appears on
<EM>ALL</EM> copies of the software and documentation or portions
thereof, including modifications, that you make.
<B>THIS SOFTWARE IS PROVIDED "AS IS," AND COPYRIGHT HOLDERS MAKE NO
REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE,
BUT NOT LIMITATION, COPYRIGHT HOLDERS MAKE NO REPRESENTATIONS OR
WARRANTIES OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR
THAT THE USE OF THE SOFTWARE OR DOCUMENTATION WILL NOT INFRINGE ANY
THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS.
COPYRIGHT HOLDERS WILL BEAR NO LIABILITY FOR ANY USE OF THIS SOFTWARE
OR DOCUMENTATION.
The name and trademarks of copyright holders may NOT be used
in advertising or publicity pertaining to the software without
specific, written prior permission. Title to copyright in this
software and any associated documentation will at all times remain
with copyright holders.
*/
/* Parser for libpics
PARSER FOR LIBPICS
*/
/*
** (c) COPYRIGHT MIT 1996.
** Please first read the full copyright statement in the file COPYRIGH.
*/
/*
This module provides the interface to CSParse.c. The parser is used to parse labels,
machine-readable descriptions, and users. The application creates one of these and
iteratevely calls CSParse_parseChunk until it returns a done or an error.
*/
#ifndef CSPARSE_H
#define CSPARSE_H
#include "cslutils.h"
#include "htchunk.h"
/*
NOWIN
tells CSParse where it is in the task of tokenizing
*/
typedef enum {
NowIn_INVALID = 0,
NowIn_NEEDOPEN,
NowIn_ENGINE,
NowIn_NEEDCLOSE,
NowIn_END,
NowIn_MATCHCLOSE,
NowIn_ERROR,
NowIn_CHAIN
} NowIn_t;
/*
Construction/Destruction
The parse objects are never created by the application, but instead by one of the
objects that it is used to parse.
*/
extern CSParse_t * CSParse_new(void);
extern void CSParse_delete(CSParse_t * me);
/*
some handy definitions
*/
#define LPAREN '('
#define RPAREN ')'
#define LCURLY '{'
#define RCURLY '}'
#define LBRACKET '['
#define RBRACKET ']'
#define SQUOTE 0x27 /* avoid confusing parens checking editors */
#define DQUOTE 0x22
#define LPARENSTR "("
#define RPARENSTR ")"
#define raysize(A) (sizeof(A)/sizeof(A[0]))
/*
SUBPARSER DATA
PUNCT
valid punctuation
*/
typedef enum {Punct_ZERO = 1, Punct_WHITE = 2, Punct_LPAREN = 4,
Punct_WHITE_LPAREN = (Punct_WHITE|Punct_LPAREN),
Punct_RPAREN = 8, Punct_LPAREN_RPAREN = (Punct_LPAREN|Punct_RPAREN),
Punct_WHITE_LPAREN_RPAREN = (Punct_WHITE|Punct_LPAREN|Punct_RPAREN),
Punct_ALL = 0xf} Punct_t;
/*
SUBSTATE
Enumerated bits that are used to mark a parsing state. Because they are bits, as
opposed to sequential numbers, a StateToken may or more than one together and serve
more than one state. They must have identical outcomes if this is to be exploited.
By convention, the following SubState names are used:
X - has no state
N - is a newly created object
A-H - substate definitions. Because they are non-conflicting bits, a subparser may have
options that sit in more than state. For instance, the string "error" may be matched in
states A and C with:
{"error test", SubState_A|SubState_C, Punct_LPAREN, 0, "error"} *probs* I meant to keep
these 16 bit caompatible, but ran up short at the end of one StateToken list. This can
be fixed if anyone needs a 16 bit enum.
*/
typedef enum {SubState_X = -1, SubState_N = 0x4000, SubState_A = 1,
SubState_B = 2, SubState_C = 4, SubState_D = 8,
SubState_E = 0x10, SubState_F = 0x20, SubState_G = 0x40,
SubState_H = 0x80, SubState_I = 0x100} SubState_t;
/*
forward declaration for StateToken_t
*/
typedef struct StateToken_s StateToken_t;
/*
ENGINE
called by CSParse to process tokens and punctuation
*/
typedef NowIn_t Engine_t(CSParse_t * pCSParse, char demark, void * pVoid);
/*
Engine employed by the Label, MacRed, and User parsers
*/
Engine_t CSParse_targetParser;
/*
SUBSTATE METHODS
All methods return a StateRet.
Check
see if a value is legitimate, may also record it
*/
typedef StateRet_t Check_t(CSParse_t * pCSParse, StateToken_t * pStateToken,
char * token, char demark);
/*
Punctuation checker to be employed by Check_t functions
*/
extern BOOL Punct_badDemark(Punct_t validPunctuation, char demark);
/*
Open
create a new data structure to be filled by the parser
*/
typedef StateRet_t Open_t(CSParse_t * pCSParse, char * token, char demark);
/*
Close
tell the state that the data structure is no longer current
*/
typedef StateRet_t Close_t(CSParse_t * pCSParse, char * token, char demark);
/*
Prep
get ready for next state
*/
typedef StateRet_t Prep_t(CSParse_t * pCSParse, char * token, char demark);
/*
Destroy
something went wrong, throw away the current object
*/
typedef void Destroy_t(CSParse_t * pCSParse);
/*
COMMAND
substate commands
open - call the open function for the current data structure
close - call the close
chain - call again on the next state without re-reading data
notoken - clear the token before a chain (so next state just gets punct)
matchany - match any string
*/
typedef enum {Command_NONE = 0, Command_OPEN = 1, Command_CLOSE = 2,
Command_CHAIN = 4, Command_NOTOKEN = 8,
Command_CLOSE_CHAIN = (Command_CLOSE|Command_CHAIN),
Command_CHAIN_NOTOKEN = (Command_CHAIN|Command_NOTOKEN),
Command_CLOSE_CHAIN_NOTOKEN = (Command_CLOSE|Command_CHAIN|Command_NOTOKEN),
Command_MATCHANY = 0x10,
Command_MATCHANY_OPEN_CHAIN = (Command_MATCHANY|Command_OPEN|Command_CHAIN),
Command_MATCHANY_CLOSE = (Command_MATCHANY|Command_CLOSE),
Command_MATCHANY_CLOSE_CHAIN = (Command_MATCHANY|Command_CLOSE|Command_CHAIN)
} Command_t;
/*
STATETOKEN STRUCTURE
Contains all the information about what tokens are expected in what substates. The
StateTokens are kept in array referenced by a TargetObject.
*/
struct StateToken_s {
char * note; /* some usefull text that describes the state - usefulll f
or debugging */
SubState_t validSubStates;
Punct_t validPunctuation;
Check_t * pCheck; /* call this function to check token */
char * name1; /* or compare to this name */
char * name2; /* many strings have 2 spellings ("ratings" vs. "r") */
CSParseTC_t targetChange; /* whether target change implies diving or climbing from cur
rent state */
TargetObject_t * pNextTargetObject;
SubState_t nextSubState;
Command_t command; /* open, close, chain, etc. */
Prep_t * pPrep; /* prepare for next state */
};
/*
TARGETOBJECT STRUCTURE
Methods and a lists of StateTokens associated with a data structure. The methods know
how to read data into current object and the StateTokens tell when to proceed to the
next object.
*/
struct TargetObject_s {
char * note;
Open_t * pOpen; /* call this function to open structure */
Close_t * pClose; /* call this function to close structure */
Destroy_t * pDestroy;
StateToken_t * stateTokens; /* array of sub states */
int stateTokenCount; /* number of sub states */
CSParseTC_t targetChange; /* target change signal for opening this parse state */
};
/*
VALTARGET
*/
typedef union {
BVal_t * pTargetBVal;
FVal_t * pTargetFVal;
SVal_t * pTargetSVal;
DVal_t * pTargetDVal;
HTList ** pTargetList;
} ValTarget_t;
/*
VALTYPE
Write down what value is to be read, and what type it is
*/
typedef enum {ValType_NONE, ValType_BVAL, ValType_FVAL,
ValType_SVAL, ValType_DVAL,
ValType_COMMENT} ValType_t;
/*
PARSECONTEXT
Part of a CSParse. The boundry is a litte fuzzy. Maybe it should not exist.
*/
typedef struct {
Engine_t * engineOf;
TargetChangeCallback_t * pTargetChangeCallback;
ParseErrorHandler_t * pParseErrorHandler;
/* for reading [BFSD]Val_t */
ValTarget_t valTarget;
ValType_t valType;
char * pTokenError;
BOOL observeQuotes;
BOOL observedQuotes;
char * legalChars;
int legalCharCount;
} ParseContext_t;
/*
CSPARSE STRUCTURE
Full parser state and pointer to the object that it is reading.
*/
struct CSParse_s {
char quoteState;
NowIn_t nowIn;
HTChunk * token;
char demark;
int offset;
int depth;
ParseContext_t * pParseContext;
union { /* all the types this parse engine fills */
CSMachRead_t * pCSMachRead; /* defined in CSMacRed.c */
CSLabel_t * pCSLabel; /* defined in CSLabel.c */
CSUser_t * pCSUser; /* defined in CSUser.c */
} target;
TargetObject_t * pTargetObject;
SubState_t currentSubState;
StateToken_t * pStateToken;
};
/*
*/
#endif /* CSPARSE_H */
/*
End of Declaration */