Bug 1152033 - Expose CSS lexer to js. r=heycam, r=bz

--HG--
extra : rebase_source : 1eaabcb6629c185f2e18f27b8c09d9a11611869a
This commit is contained in:
Tom Tromey 2015-05-04 10:28:00 -04:00
Родитель fb09be63fd
Коммит 8e26488934
14 изменённых файлов: 507 добавлений и 14 удалений

Просмотреть файл

@ -310,6 +310,10 @@ DOMInterfaces = {
'nativeType': 'nsDOMCSSDeclaration'
},
'CSSLexer': {
'wrapperCache': False
},
'CSSPrimitiveValue': {
'nativeType': 'nsROCSSPrimitiveValue',
},

132
dom/webidl/CSSLexer.webidl Normal file
Просмотреть файл

@ -0,0 +1,132 @@
/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// The possible values for CSSToken.tokenType.
enum CSSTokenType {
// Whitespace.
"whitespace",
// A CSS comment.
"comment",
// An identifier. |text| holds the identifier text.
"ident",
// A function token. |text| holds the function name. Note that the
// function token includes (i.e., consumes) the "(" -- but this is
// not included in |text|.
"function",
// "@word". |text| holds "word", without the "@".
"at",
// "#word". |text| holds "word", without the "#".
"id",
// "#word". ID is used when "word" would have been a valid IDENT
// token without the "#"; otherwise, HASH is used.
"hash",
// A number.
"number",
// A dimensioned number.
"dimension",
// A percentage.
"percentage",
// A string.
"string",
// A "bad string". This can only be returned when a string is
// unterminated at EOF. (However, currently the lexer returns
// ordinary STRING tokens in this situation.)
"bad_string",
// A URL. |text| holds the URL.
"url",
// A "bad URL". This is a URL that is unterminated at EOF. |text|
// holds the URL.
"bad_url",
// A "symbol" is any one-character symbol. This corresponds to the
// DELIM token in the CSS specification.
"symbol",
// The "~=" token.
"includes",
// The "|=" token.
"dashmatch",
// The "^=" token.
"beginsmatch",
// The "$=" token.
"endsmatch",
// The "*=" token.
"containsmatch",
// A unicode-range token. This is currently not fully represented
// by CSSToken.
"urange",
// HTML comment delimiters, either "<!--" or "-->". Note that each
// is emitted as a separate token, and the intervening text is lexed
// as normal; whereas ordinary CSS comments are lexed as a unit.
"htmlcomment"
};
dictionary CSSToken {
// The token type.
CSSTokenType tokenType = "whitespace";
// Offset of the first character of the token.
unsigned long startOffset = 0;
// Offset of the character after the final character of the token.
// This is chosen so that the offsets can be passed to |substring|
// to yield the exact contents of the token.
unsigned long endOffset = 0;
// If the token is a number, percentage, or dimension, this holds
// the value. This is not present for other token types.
double number;
// If the token is a number, percentage, or dimension, this is true
// iff the number had an explicit sign. This is not present for
// other token types.
boolean hasSign;
// If the token is a number, percentage, or dimension, this is true
// iff the number was specified as an integer. This is not present
// for other token types.
boolean isInteger;
// Text associated with the token. This is not present for all
// token types. In particular it is:
//
// Token type Meaning
// ===============================
// ident The identifier.
// function The function name. Note that the "(" is part
// of the token but is not present in |text|.
// at The word.
// id The word.
// hash The word.
// dimension The dimension.
// string The string contents after escape processing.
// bad_string Ditto.
// url The URL after escape processing.
// bad_url Ditto.
// symbol The symbol text.
DOMString text;
};
/**
* CSSLexer is an interface to the CSS lexer. It tokenizes an
* input stream and returns CSS tokens.
*
* @see inIDOMUtils.getCSSLexer to create an instance of the lexer.
*/
[ChromeOnly]
interface CSSLexer
{
/**
* The line number of the most recently returned token. Line
* numbers are 0-based.
*/
readonly attribute unsigned long lineNumber;
/**
* The column number of the most recently returned token. Column
* numbers are 0-based.
*/
readonly attribute unsigned long columnNumber;
/**
* Return the next token, or null at EOF.
*/
CSSToken? nextToken();
};

Просмотреть файл

@ -88,6 +88,7 @@ WEBIDL_FILES = [
'Crypto.webidl',
'CSPReport.webidl',
'CSS.webidl',
'CSSLexer.webidl',
'CSSPrimitiveValue.webidl',
'CSSRuleList.webidl',
'CSSStyleDeclaration.webidl',

Просмотреть файл

@ -37,6 +37,7 @@
#include "nsRuleWalker.h"
#include "nsRuleProcessorData.h"
#include "nsCSSRuleProcessor.h"
#include "mozilla/dom/CSSLexer.h"
#include "mozilla/dom/InspectorUtilsBinding.h"
#include "mozilla/dom/ToJSValue.h"
#include "nsCSSParser.h"
@ -289,6 +290,19 @@ inDOMUtils::GetRuleColumn(nsIDOMCSSRule* aRule, uint32_t* _retval)
return NS_OK;
}
NS_IMETHODIMP
inDOMUtils::GetCSSLexer(const nsAString& aText, JSContext* aCx,
JS::MutableHandleValue aResult)
{
MOZ_ASSERT(JS::CurrentGlobalOrNull(aCx));
JS::Rooted<JSObject*> scope(aCx, JS::CurrentGlobalOrNull(aCx));
nsAutoPtr<CSSLexer> lexer(new CSSLexer(aText));
if (!WrapNewBindingNonWrapperCachedObject(aCx, scope, lexer, aResult)) {
return NS_ERROR_FAILURE;
}
return NS_OK;
}
NS_IMETHODIMP
inDOMUtils::GetSelectorCount(nsIDOMCSSStyleRule* aRule, uint32_t *aCount)
{

Просмотреть файл

@ -17,7 +17,7 @@ interface nsIDOMFontFaceList;
interface nsIDOMRange;
interface nsIDOMCSSStyleSheet;
[scriptable, uuid(1f5b7f08-fa80-49e9-b881-888f081240da)]
[scriptable, uuid(60b4cbf7-2a08-4419-8937-6ef495417824)]
interface inIDOMUtils : nsISupports
{
// CSS utilities
@ -28,6 +28,9 @@ interface inIDOMUtils : nsISupports
unsigned long getRuleLine(in nsIDOMCSSRule aRule);
unsigned long getRuleColumn(in nsIDOMCSSRule aRule);
[implicit_jscontext]
jsval getCSSLexer(in DOMString aText);
// Utilities for working with selectors. We don't have a JS OM representation
// of a single selector or a selector list yet, but given a rule we can index
// into the selector list.

142
layout/style/CSSLexer.cpp Normal file
Просмотреть файл

@ -0,0 +1,142 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "mozilla/dom/CSSLexer.h"
#include "js/Value.h"
#include "mozilla/dom/CSSLexerBinding.h"
#include "mozilla/dom/ToJSValue.h"
namespace mozilla {
namespace dom {
// Ensure that constants are consistent.
#define CHECK(X, Y) \
static_assert(static_cast<int>(X) == static_cast<int>(Y), \
"nsCSSToken and CSSTokenType should have identical values")
CHECK(eCSSToken_Whitespace, CSSTokenType::Whitespace);
CHECK(eCSSToken_Comment, CSSTokenType::Comment);
CHECK(eCSSToken_Ident, CSSTokenType::Ident);
CHECK(eCSSToken_Function, CSSTokenType::Function);
CHECK(eCSSToken_AtKeyword, CSSTokenType::At);
CHECK(eCSSToken_ID, CSSTokenType::Id);
CHECK(eCSSToken_Hash, CSSTokenType::Hash);
CHECK(eCSSToken_Number, CSSTokenType::Number);
CHECK(eCSSToken_Dimension, CSSTokenType::Dimension);
CHECK(eCSSToken_Percentage, CSSTokenType::Percentage);
CHECK(eCSSToken_String, CSSTokenType::String);
CHECK(eCSSToken_Bad_String, CSSTokenType::Bad_string);
CHECK(eCSSToken_URL, CSSTokenType::Url);
CHECK(eCSSToken_Bad_URL, CSSTokenType::Bad_url);
CHECK(eCSSToken_Symbol, CSSTokenType::Symbol);
CHECK(eCSSToken_Includes, CSSTokenType::Includes);
CHECK(eCSSToken_Dashmatch, CSSTokenType::Dashmatch);
CHECK(eCSSToken_Beginsmatch, CSSTokenType::Beginsmatch);
CHECK(eCSSToken_Endsmatch, CSSTokenType::Endsmatch);
CHECK(eCSSToken_Containsmatch, CSSTokenType::Containsmatch);
CHECK(eCSSToken_URange, CSSTokenType::Urange);
CHECK(eCSSToken_HTMLComment, CSSTokenType::Htmlcomment);
#undef CHECK
CSSLexer::CSSLexer(const nsAString& aText)
: mInput(aText)
, mScanner(mInput, 1)
{
}
CSSLexer::~CSSLexer()
{
}
bool
CSSLexer::WrapObject(JSContext* aCx, JS::Handle<JSObject*> aGivenProto,
JS::MutableHandle<JSObject*> aReflector)
{
return CSSLexerBinding::Wrap(aCx, this, aGivenProto, aReflector);
}
uint32_t
CSSLexer::LineNumber()
{
// The scanner uses 1-based line numbers, but our callers expect
// 0-based.
return mScanner.GetLineNumber() - 1;
}
uint32_t
CSSLexer::ColumnNumber()
{
return mScanner.GetColumnNumber();
}
void
CSSLexer::NextToken(Nullable<CSSToken>& aResult)
{
nsCSSToken token;
if (!mScanner.Next(token, eCSSScannerExclude_None)) {
return;
}
CSSToken& resultToken(aResult.SetValue());
resultToken.mTokenType = static_cast<CSSTokenType>(token.mType);
resultToken.mStartOffset = mScanner.GetTokenOffset();
resultToken.mEndOffset = mScanner.GetTokenEndOffset();
switch (token.mType) {
case eCSSToken_Whitespace:
break;
case eCSSToken_Ident:
case eCSSToken_Function:
case eCSSToken_AtKeyword:
case eCSSToken_ID:
case eCSSToken_Hash:
resultToken.mText.Construct(token.mIdent);
break;
case eCSSToken_Dimension:
resultToken.mText.Construct(token.mIdent);
/* FALLTHROUGH */
case eCSSToken_Number:
case eCSSToken_Percentage:
resultToken.mNumber.Construct(token.mNumber);
resultToken.mHasSign.Construct(token.mHasSign);
resultToken.mIsInteger.Construct(token.mIntegerValid);
break;
case eCSSToken_String:
case eCSSToken_Bad_String:
case eCSSToken_URL:
case eCSSToken_Bad_URL:
resultToken.mText.Construct(token.mIdent);
/* Don't bother emitting the delimiter, as it is readily extracted
from the source string when needed. */
break;
case eCSSToken_Symbol:
resultToken.mText.Construct(nsString(&token.mSymbol, 1));
break;
case eCSSToken_Includes:
case eCSSToken_Dashmatch:
case eCSSToken_Beginsmatch:
case eCSSToken_Endsmatch:
case eCSSToken_Containsmatch:
case eCSSToken_URange:
break;
case eCSSToken_Comment:
case eCSSToken_HTMLComment:
/* The comment text is easily extracted from the source string,
and is rarely useful. */
break;
}
}
} // namespace dom
} // namespace mozilla

37
layout/style/CSSLexer.h Normal file
Просмотреть файл

@ -0,0 +1,37 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef CSSLexer_h___
#define CSSLexer_h___
#include "mozilla/UniquePtr.h"
#include "nsCSSScanner.h"
#include "mozilla/dom/CSSLexerBinding.h"
namespace mozilla {
namespace dom {
class CSSLexer : public NonRefcountedDOMObject
{
public:
explicit CSSLexer(const nsAString&);
~CSSLexer();
bool WrapObject(JSContext* aCx, JS::Handle<JSObject*> aGivenProto,
JS::MutableHandle<JSObject*> aReflector);
uint32_t LineNumber();
uint32_t ColumnNumber();
void NextToken(Nullable<CSSToken>& aResult);
private:
nsString mInput;
nsCSSScanner mScanner;
};
} // namespace dom
} // namespace mozilla
#endif /* CSSLexer_h___ */

Просмотреть файл

@ -89,6 +89,7 @@ EXPORTS.mozilla += [
EXPORTS.mozilla.dom += [
'CSS.h',
'CSSLexer.h',
'CSSRuleList.h',
'CSSValue.h',
'FontFace.h',
@ -113,6 +114,7 @@ UNIFIED_SOURCES += [
'AnimationCommon.cpp',
'CounterStyleManager.cpp',
'CSS.cpp',
'CSSLexer.cpp',
'CSSRuleList.cpp',
'CSSStyleSheet.cpp',
'CSSVariableDeclarations.cpp',

Просмотреть файл

@ -2734,7 +2734,9 @@ CSSParserImpl::GetToken(bool aSkipWS)
return true;
}
}
return mScanner->Next(mToken, aSkipWS);
return mScanner->Next(mToken, aSkipWS ?
eCSSScannerExclude_WhitespaceAndComments :
eCSSScannerExclude_Comments);
}
void

Просмотреть файл

@ -552,7 +552,8 @@ nsCSSScanner::SkipComment()
for (;;) {
int32_t ch = Peek();
if (ch < 0) {
mReporter->ReportUnexpectedEOF("PECommentEOF");
if (mReporter)
mReporter->ReportUnexpectedEOF("PECommentEOF");
SetEOFCharacters(eEOFCharacters_Asterisk | eEOFCharacters_Slash);
return;
}
@ -560,7 +561,8 @@ nsCSSScanner::SkipComment()
Advance();
ch = Peek();
if (ch < 0) {
mReporter->ReportUnexpectedEOF("PECommentEOF");
if (mReporter)
mReporter->ReportUnexpectedEOF("PECommentEOF");
SetEOFCharacters(eEOFCharacters_Slash);
return;
}
@ -985,7 +987,8 @@ nsCSSScanner::ScanString(nsCSSToken& aToken)
mSeenBadToken = true;
aToken.mType = eCSSToken_Bad_String;
mReporter->ReportUnexpected("SEUnterminatedString", aToken);
if (mReporter)
mReporter->ReportUnexpected("SEUnterminatedString", aToken);
break;
}
return true;
@ -1192,15 +1195,15 @@ nsCSSScanner::NextURL(nsCSSToken& aToken)
/**
* Primary scanner entry point. Consume one token and fill in
* |aToken| accordingly. Will skip over any number of comments first,
* and will also skip over rather than return whitespace tokens if
* |aSkipWS| is true.
* and will also skip over rather than return whitespace and comment
* tokens, depending on the value of |aSkip|.
*
* Returns true if it successfully consumed a token, false if EOF has
* been reached. Will always advance the current read position by at
* least one character unless called when already at EOF.
*/
bool
nsCSSScanner::Next(nsCSSToken& aToken, bool aSkipWS)
nsCSSScanner::Next(nsCSSToken& aToken, nsCSSScannerExclude aSkip)
{
int32_t ch;
@ -1218,15 +1221,18 @@ nsCSSScanner::Next(nsCSSToken& aToken, bool aSkipWS)
ch = Peek();
if (IsWhitespace(ch)) {
SkipWhitespace();
if (!aSkipWS) {
if (aSkip != eCSSScannerExclude_WhitespaceAndComments) {
aToken.mType = eCSSToken_Whitespace;
return true;
}
continue; // start again at the beginning
}
if (ch == '/' && !IsSVGMode() && Peek(1) == '*') {
// FIXME: Editor wants comments to be preserved (bug 60290).
SkipComment();
if (aSkip == eCSSScannerExclude_None) {
aToken.mType = eCSSToken_Comment;
return true;
}
continue; // start again at the beginning
}
break;

Просмотреть файл

@ -27,6 +27,8 @@ enum nsCSSTokenType {
// comments do *not* count as white space; comments separate tokens
// but are not themselves tokens.
eCSSToken_Whitespace, //
// A comment.
eCSSToken_Comment, // /*...*/
// Identifier-like tokens. mIdent is the text of the identifier.
// The difference between ID and Hash is: if the text after the #
@ -182,13 +184,24 @@ private:
bool mInitialized;
};
enum nsCSSScannerExclude {
// Return all tokens, including whitespace and comments.
eCSSScannerExclude_None,
// Include whitespace but exclude comments.
eCSSScannerExclude_Comments,
// Exclude whitespace and comments.
eCSSScannerExclude_WhitespaceAndComments
};
// nsCSSScanner tokenizes an input stream using the CSS2.1 forward
// compatible tokenization rules. Used internally by nsCSSParser;
// not available for use by other code.
class nsCSSScanner {
public:
// |aLineNumber == 1| is the beginning of a file, use |aLineNumber == 0|
// when the line number is unknown.
// when the line number is unknown. The scanner does not take
// ownership of |aBuffer|, so the caller must be sure to keep it
// alive for the lifetime of the scanner.
nsCSSScanner(const nsAString& aBuffer, uint32_t aLineNumber);
~nsCSSScanner();
@ -220,14 +233,20 @@ class nsCSSScanner {
uint32_t GetColumnNumber() const
{ return mTokenOffset - mTokenLineOffset; }
uint32_t GetTokenOffset() const
{ return mTokenOffset; }
uint32_t GetTokenEndOffset() const
{ return mOffset; }
// Get the text of the line containing the first character of
// the most recently processed token.
nsDependentSubstring GetCurrentLine() const;
// Get the next token. Return false on EOF. aTokenResult is filled
// in with the data for the token. If aSkipWS is true, skip over
// eCSSToken_Whitespace tokens rather than returning them.
bool Next(nsCSSToken& aTokenResult, bool aSkipWS);
// in with the data for the token. aSkip controls whether
// whitespace and/or comment tokens are ever returned.
bool Next(nsCSSToken& aTokenResult, nsCSSScannerExclude aSkip);
// Get the body of an URL token (everything after the 'url(').
// This is exposed for use by nsCSSParser::ParseMozDocumentRule,

Просмотреть файл

@ -15,6 +15,7 @@ MOCHITEST_MANIFESTS += [
'css-visited/mochitest.ini',
'mochitest.ini',
]
XPCSHELL_TESTS_MANIFESTS += ['xpcshell.ini']
BROWSER_CHROME_MANIFESTS += ['browser.ini']
MOCHITEST_CHROME_MANIFESTS += ['chrome/chrome.ini']

Просмотреть файл

@ -0,0 +1,125 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
function test_lexer(domutils, cssText, tokenTypes) {
let lexer = domutils.getCSSLexer(cssText);
let reconstructed = '';
let lastTokenEnd = 0;
let i = 0;
while (true) {
let token = lexer.nextToken();
if (!token) {
break;
}
let combined = token.tokenType;
if (token.text)
combined += ":" + token.text;
equal(combined, tokenTypes[i]);
ok(token.endOffset > token.startOffset);
equal(token.startOffset, lastTokenEnd);
lastTokenEnd = token.endOffset;
reconstructed += cssText.substring(token.startOffset, token.endOffset);
++i;
}
// Ensure that we saw the correct number of tokens.
equal(i, tokenTypes.length);
// Ensure that the reported offsets cover all the text.
equal(reconstructed, cssText);
}
let LEX_TESTS = [
["simple", ["ident:simple"]],
["simple: { hi; }",
["ident:simple", "symbol::",
"whitespace", "symbol:{",
"whitespace", "ident:hi",
"symbol:;", "whitespace",
"symbol:}"]],
["/* whatever */", ["comment"]],
["'string'", ["string:string"]],
['"string"', ["string:string"]],
["rgb(1,2,3)", ["function:rgb", "number",
"symbol:,", "number",
"symbol:,", "number",
"symbol:)"]],
["@media", ["at:media"]],
["#hibob", ["id:hibob"]],
["#123", ["hash:123"]],
["23px", ["dimension:px"]],
["23%", ["percentage"]],
["url(http://example.com)", ["url:http://example.com"]],
["url('http://example.com')", ["url:http://example.com"]],
["url( 'http://example.com' )",
["url:http://example.com"]],
// In CSS Level 3, this is an ordinary URL, not a BAD_URL.
["url(http://example.com", ["url:http://example.com"]],
// See bug 1153981 to understand why this gets a SYMBOL token.
["url(http://example.com @", ["bad_url:http://example.com", "symbol:@"]],
["quo\\ting", ["ident:quoting"]],
["'bad string\n", ["bad_string:bad string", "whitespace"]],
["~=", ["includes"]],
["|=", ["dashmatch"]],
["^=", ["beginsmatch"]],
["$=", ["endsmatch"]],
["*=", ["containsmatch"]],
// URANGE may be on the way out, and it isn't used by devutils, so
// let's skip it.
["<!-- html comment -->", ["htmlcomment", "whitespace", "ident:html",
"whitespace", "ident:comment", "whitespace",
"htmlcomment"]],
// earlier versions of CSS had "bad comment" tokens, but in level 3,
// unterminated comments are just comments.
["/* bad comment", ["comment"]]
];
function test_lexer_linecol(domutils, cssText, locations) {
let lexer = domutils.getCSSLexer(cssText);
let i = 0;
while (true) {
let token = lexer.nextToken();
let startLine = lexer.lineNumber;
let startColumn = lexer.columnNumber;
// We do this in a bit of a funny way so that we can also test the
// location of the EOF.
let combined = ":" + startLine + ":" + startColumn;
if (token)
combined = token.tokenType + combined;
equal(combined, locations[i]);
++i;
if (!token) {
break;
}
}
// Ensure that we saw the correct number of tokens.
equal(i, locations.length);
}
let LINECOL_TESTS = [
["simple", ["ident:0:0", ":0:6"]],
["\n stuff", ["whitespace:0:0", "ident:1:4", ":1:9"]],
['"string with \\\nnewline" \r\n', ["string:0:0", "whitespace:1:8",
":2:0"]]
];
function run_test()
{
let domutils = Components.classes["@mozilla.org/inspector/dom-utils;1"]
.getService(Components.interfaces.inIDOMUtils);
let text, result;
for ([text, result] of LEX_TESTS) {
test_lexer(domutils, text, result);
}
for ([text, result] of LINECOL_TESTS) {
test_lexer_linecol(domutils, text, result);
}
}

Просмотреть файл

@ -0,0 +1,5 @@
[DEFAULT]
head =
tail =
[test_csslexer.js]