/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=8 sts=2 et sw=2 tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /* tokenization of CSS style sheets */ #ifndef nsCSSScanner_h___ #define nsCSSScanner_h___ #include "nsString.h" namespace mozilla { namespace css { class ErrorReporter; } // namespace css } // namespace mozilla // Token types; in close but not perfect correspondence to the token // categorization in section 4.1.1 of CSS2.1. (The deviations are all // the fault of css3-selectors, which has requirements that can only be // met by changing the generic tokenization.) The comment on each line // illustrates the form of each identifier. enum nsCSSTokenType { // White space of any kind. No value fields are used. Note that // comments do *not* count as white space; comments separate tokens // but are not themselves tokens. eCSSToken_Whitespace, // // A comment. eCSSToken_Comment, // /*...*/ // Identifier-like tokens. mIdent is the text of the identifier. // The difference between ID and Hash is: if the text after the # // would have been a valid Ident if the # hadn't been there, the // scanner produces an ID token. Otherwise it produces a Hash token. // (This distinction is required by css3-selectors.) eCSSToken_Ident, // word eCSSToken_Function, // word( eCSSToken_AtKeyword, // @word eCSSToken_ID, // #word eCSSToken_Hash, // #0word // Numeric tokens. mNumber is the floating-point value of the // number, and mHasSign indicates whether there was an explicit sign // (+ or -) in front of the number. If mIntegerValid is true, the // number had the lexical form of an integer, and mInteger is its // integer value. Lexically integer values outside the range of a // 32-bit signed number are clamped to the maximum values; mNumber // will indicate a 'truer' value in that case. Percentage tokens // are always considered not to be integers, even if their numeric // value is integral (100% => mNumber = 1.0). For Dimension // tokens, mIdent holds the text of the unit. eCSSToken_Number, // 1 -5 +2e3 3.14159 7.297352e-3 eCSSToken_Dimension, // 24px 8.5in eCSSToken_Percentage, // 85% 1280.4% // String-like tokens. In all cases, mIdent holds the text // belonging to the string, and mSymbol holds the delimiter // character, which may be ', ", or zero (only for unquoted URLs). // Bad_String and Bad_URL tokens are emitted when the closing // delimiter or parenthesis was missing. eCSSToken_String, // 'foo bar' "foo bar" eCSSToken_Bad_String, // 'foo bar eCSSToken_URL, // url(foobar) url("foo bar") eCSSToken_Bad_URL, // url(foo // Any one-character symbol. mSymbol holds the character. eCSSToken_Symbol, // . ; { } ! * // Match operators. These are single tokens rather than pairs of // Symbol tokens because css3-selectors forbids the presence of // comments between the two characters. No value fields are used; // the token type indicates which operator. eCSSToken_Includes, // ~= eCSSToken_Dashmatch, // |= eCSSToken_Beginsmatch, // ^= eCSSToken_Endsmatch, // $= eCSSToken_Containsmatch, // *= // Unicode-range token: currently used only in @font-face. // The lexical rule for this token includes several forms that are // semantically invalid. Therefore, mIdent always holds the // complete original text of the token (so we can print it // accurately in diagnostics), and mIntegerValid is true iff the // token is semantically valid. In that case, mInteger holds the // lowest value included in the range, and mInteger2 holds the // highest value included in the range. eCSSToken_URange, // U+007e U+01?? U+2000-206F // HTML comment delimiters, ignored as a unit when they appear at // the top level of a style sheet, for compatibility with websites // written for compatibility with pre-CSS browsers. This token type // subsumes the css2.1 CDO and CDC tokens, which are always treated // the same by the parser. mIdent holds the text of the token, for // diagnostics. eCSSToken_HTMLComment, // }; // Classification of tokens used to determine if a "/**/" string must be // inserted if pasting token streams together when serializing. We include // values corresponding to eCSSToken_Dashmatch and eCSSToken_Containsmatch, // as css-syntax does not treat these as whole tokens, but we will still // need to insert a "/**/" string between a '|' delim and a '|=' dashmatch // and between a '/' delim and a '*=' containsmatch. // // https://drafts.csswg.org/css-syntax/#serialization enum nsCSSTokenSerializationType { eCSSTokenSerialization_Nothing, eCSSTokenSerialization_Whitespace, eCSSTokenSerialization_AtKeyword_or_Hash, eCSSTokenSerialization_Number, eCSSTokenSerialization_Dimension, eCSSTokenSerialization_Percentage, eCSSTokenSerialization_URange, eCSSTokenSerialization_URL_or_BadURL, eCSSTokenSerialization_Function, eCSSTokenSerialization_Ident, eCSSTokenSerialization_CDC, eCSSTokenSerialization_DashMatch, eCSSTokenSerialization_ContainsMatch, eCSSTokenSerialization_Symbol_Hash, // '#' eCSSTokenSerialization_Symbol_At, // '@' eCSSTokenSerialization_Symbol_Dot_or_Plus, // '.', '+' eCSSTokenSerialization_Symbol_Minus, // '-' eCSSTokenSerialization_Symbol_OpenParen, // '(' eCSSTokenSerialization_Symbol_Question, // '?' eCSSTokenSerialization_Symbol_Assorted, // '$', '^', '~' eCSSTokenSerialization_Symbol_Equals, // '=' eCSSTokenSerialization_Symbol_Bar, // '|' eCSSTokenSerialization_Symbol_Slash, // '/' eCSSTokenSerialization_Symbol_Asterisk, // '*' eCSSTokenSerialization_Other // anything else }; // A single token returned from the scanner. mType is always // meaningful; comments above describe which other fields are // meaningful for which token types. struct nsCSSToken { nsAutoString mIdent; float mNumber; int32_t mInteger; int32_t mInteger2; nsCSSTokenType mType; char16_t mSymbol; bool mIntegerValid; bool mHasSign; nsCSSToken() : mNumber(0), mInteger(0), mInteger2(0), mType(eCSSToken_Whitespace), mSymbol('\0'), mIntegerValid(false), mHasSign(false) {} bool IsSymbol(char16_t aSymbol) const { return mType == eCSSToken_Symbol && mSymbol == aSymbol; } void AppendToString(nsString& aBuffer) const; }; // Represents an nsCSSScanner's saved position in the input buffer. class nsCSSScannerPosition { friend class nsCSSScanner; public: nsCSSScannerPosition() : mInitialized(false) { } uint32_t LineNumber() { MOZ_ASSERT(mInitialized); return mLineNumber; } uint32_t LineOffset() { MOZ_ASSERT(mInitialized); return mLineOffset; } private: uint32_t mOffset; uint32_t mLineNumber; uint32_t mLineOffset; uint32_t mTokenLineNumber; uint32_t mTokenLineOffset; uint32_t mTokenOffset; bool mInitialized; }; enum nsCSSScannerExclude { // Return all tokens, including whitespace and comments. eCSSScannerExclude_None, // Include whitespace but exclude comments. eCSSScannerExclude_Comments, // Exclude whitespace and comments. eCSSScannerExclude_WhitespaceAndComments }; // nsCSSScanner tokenizes an input stream using the CSS2.1 forward // compatible tokenization rules. Used internally by nsCSSParser; // not available for use by other code. class nsCSSScanner { public: // |aLineNumber == 1| is the beginning of a file, use |aLineNumber == 0| // when the line number is unknown. The scanner does not take // ownership of |aBuffer|, so the caller must be sure to keep it // alive for the lifetime of the scanner. nsCSSScanner(const nsAString& aBuffer, uint32_t aLineNumber); ~nsCSSScanner(); // Reset or check whether a BAD_URL or BAD_STRING token has been seen. void ClearSeenBadToken() { mSeenBadToken = false; } bool SeenBadToken() const { return mSeenBadToken; } // Reset or check whether a "var(" FUNCTION token has been seen. void ClearSeenVariableReference() { mSeenVariableReference = false; } bool SeenVariableReference() const { return mSeenVariableReference; } // Get the 1-based line number of the last character of // the most recently processed token. uint32_t GetLineNumber() const { return mTokenLineNumber; } // Get the 0-based column number of the first character of // the most recently processed token. uint32_t GetColumnNumber() const { return mTokenOffset - mTokenLineOffset; } uint32_t GetTokenOffset() const { return mTokenOffset; } uint32_t GetTokenEndOffset() const { return mOffset; } const nsAString& GetSourceMapURL() const { return mSourceMapURL; } const nsAString& GetSourceURL() const { return mSourceURL; } // Get the text of the line containing the first character of // the most recently processed token. nsDependentSubstring GetCurrentLine() const; // Get the next token. Return false on EOF. aTokenResult is filled // in with the data for the token. aSkip controls whether // whitespace and/or comment tokens are ever returned. bool Next(nsCSSToken& aTokenResult, nsCSSScannerExclude aSkip); // Get the body of an URL token (everything after the 'url('). // This is exposed for use by nsCSSParser::ParseMozDocumentRule, // which, for historical reasons, must make additional function // tokens behave like url(). Please do not add new uses to the // parser. void NextURL(nsCSSToken& aTokenResult); // This is exposed for use by nsCSSParser::ParsePseudoClassWithNthPairArg, // because "2n-1" is a single DIMENSION token, and "n-1" is a single // IDENT token, but the :nth() selector syntax wants to interpret // them the same as "2n -1" and "n -1" respectively. Please do not // add new uses to the parser. // // Note: this function may not be used to back up over a line boundary. void Backup(uint32_t n); // Starts recording the input stream from the current position. void StartRecording(); // Abandons recording of the input stream. void StopRecording(); // Stops recording of the input stream and appends the recorded // input to aBuffer. void StopRecording(nsString& aBuffer); // Returns the length of the current recording. uint32_t RecordingLength() const; #ifdef DEBUG bool IsRecording() const; #endif // Stores the current scanner offset into the specified object. void SavePosition(nsCSSScannerPosition& aState); // Resets the scanner offset to a position saved by SavePosition. void RestoreSavedPosition(const nsCSSScannerPosition& aState); enum EOFCharacters { eEOFCharacters_None = 0x0000, // to handle \ inside strings eEOFCharacters_DropBackslash = 0x0001, // to handle \ outside strings eEOFCharacters_ReplacementChar = 0x0002, // to close comments eEOFCharacters_Asterisk = 0x0004, eEOFCharacters_Slash = 0x0008, // to close double-quoted strings eEOFCharacters_DoubleQuote = 0x0010, // to close single-quoted strings eEOFCharacters_SingleQuote = 0x0020, // to close URLs eEOFCharacters_CloseParen = 0x0040, }; // Appends any characters to the specified string the input stream to make the // last token not rely on special EOF handling behavior. // // If eEOFCharacters_DropBackslash is in aEOFCharacters, it is ignored. static void AppendImpliedEOFCharacters(EOFCharacters aEOFCharacters, nsAString& aString); EOFCharacters GetEOFCharacters() const { #ifdef DEBUG AssertEOFCharactersValid(mEOFCharacters); #endif return mEOFCharacters; } #ifdef DEBUG static void AssertEOFCharactersValid(uint32_t c); #endif protected: int32_t Peek(uint32_t n = 0); void Advance(uint32_t n = 1); void AdvanceLine(); void SkipWhitespace(); bool CheckCommentDirective(const nsAString& aDirective); void SkipComment(); bool GatherEscape(nsString& aOutput, bool aInString); bool GatherText(uint8_t aClass, nsString& aIdent); bool ScanIdent(nsCSSToken& aResult); bool ScanAtKeyword(nsCSSToken& aResult); bool ScanHash(nsCSSToken& aResult); bool ScanNumber(nsCSSToken& aResult); bool ScanString(nsCSSToken& aResult); bool ScanURange(nsCSSToken& aResult); void SetEOFCharacters(uint32_t aEOFCharacters); void AddEOFCharacters(uint32_t aEOFCharacters); const char16_t *mBuffer; uint32_t mOffset; uint32_t mCount; uint32_t mLineNumber; uint32_t mLineOffset; uint32_t mTokenLineNumber; uint32_t mTokenLineOffset; uint32_t mTokenOffset; uint32_t mRecordStartOffset; EOFCharacters mEOFCharacters; bool mRecording; bool mSeenBadToken; bool mSeenVariableReference; nsString mSourceMapURL; nsString mSourceURL; }; // Token for the grid-template-areas micro-syntax // http://dev.w3.org/csswg/css-grid/#propdef-grid-template-areas struct MOZ_STACK_CLASS nsCSSGridTemplateAreaToken { nsAutoString mName; // Empty for a null cell, non-empty for a named cell bool isTrash; // True for a trash token, mName is ignored in this case. }; // Scanner for the grid-template-areas micro-syntax class nsCSSGridTemplateAreaScanner { public: explicit nsCSSGridTemplateAreaScanner(const nsAString& aBuffer); // Get the next token. Return false on EOF. // aTokenResult is filled in with the data for the token. bool Next(nsCSSGridTemplateAreaToken& aTokenResult); private: const char16_t *mBuffer; uint32_t mOffset; uint32_t mCount; }; #endif /* nsCSSScanner_h___ */