Bug 1152033 - Expose CSS lexer to js. r=heycam, r=bz

--HG-- extra : rebase_source : 1eaabcb6629c185f2e18f27b8c09d9a11611869a
2015-05-04 10:28:00 -04:00 · 2015-05-04 10:28:00 -04:00 · 8e26488934
--- a/dom/bindings/Bindings.conf
+++ b/dom/bindings/Bindings.conf
@ -310,6 +310,10 @@ DOMInterfaces = {
    'nativeType': 'nsDOMCSSDeclaration'
 },

+'CSSLexer': {
+    'wrapperCache': False
+},
+
 'CSSPrimitiveValue': {
    'nativeType': 'nsROCSSPrimitiveValue',
 },
--- a/dom/webidl/CSSLexer.webidl
+++ b/dom/webidl/CSSLexer.webidl
@ -0,0 +1,132 @@
+/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// The possible values for CSSToken.tokenType.
+enum CSSTokenType {
+  // Whitespace.
+  "whitespace",
+  // A CSS comment.
+  "comment",
+  // An identifier.  |text| holds the identifier text.
+  "ident",
+  // A function token.  |text| holds the function name.  Note that the
+  // function token includes (i.e., consumes) the "(" -- but this is
+  // not included in |text|.
+  "function",
+  // "@word".  |text| holds "word", without the "@".
+  "at",
+  // "#word".  |text| holds "word", without the "#".
+  "id",
+  // "#word".  ID is used when "word" would have been a valid IDENT
+  // token without the "#"; otherwise, HASH is used.
+  "hash",
+  // A number.
+  "number",
+  // A dimensioned number.
+  "dimension",
+  // A percentage.
+  "percentage",
+  // A string.
+  "string",
+  // A "bad string".  This can only be returned when a string is
+  // unterminated at EOF.  (However, currently the lexer returns
+  // ordinary STRING tokens in this situation.)
+  "bad_string",
+  // A URL.  |text| holds the URL.
+  "url",
+  // A "bad URL".  This is a URL that is unterminated at EOF.  |text|
+  // holds the URL.
+  "bad_url",
+  // A "symbol" is any one-character symbol.  This corresponds to the
+  // DELIM token in the CSS specification.
+  "symbol",
+  // The "~=" token.
+  "includes",
+  // The "|=" token.
+  "dashmatch",
+  // The "^=" token.
+  "beginsmatch",
+  // The "$=" token.
+  "endsmatch",
+  // The "*=" token.
+  "containsmatch",
+  // A unicode-range token.  This is currently not fully represented
+  // by CSSToken.
+  "urange",
+  // HTML comment delimiters, either "<!--" or "-->".  Note that each
+  // is emitted as a separate token, and the intervening text is lexed
+  // as normal; whereas ordinary CSS comments are lexed as a unit.
+  "htmlcomment"
+};
+
+dictionary CSSToken {
+  // The token type.
+  CSSTokenType tokenType = "whitespace";
+
+  // Offset of the first character of the token.
+  unsigned long startOffset = 0;
+  // Offset of the character after the final character of the token.
+  // This is chosen so that the offsets can be passed to |substring|
+  // to yield the exact contents of the token.
+  unsigned long endOffset = 0;
+
+  // If the token is a number, percentage, or dimension, this holds
+  // the value.  This is not present for other token types.
+  double number;
+  // If the token is a number, percentage, or dimension, this is true
+  // iff the number had an explicit sign.  This is not present for
+  // other token types.
+  boolean hasSign;
+  // If the token is a number, percentage, or dimension, this is true
+  // iff the number was specified as an integer.  This is not present
+  // for other token types.
+  boolean isInteger;
+
+  // Text associated with the token.  This is not present for all
+  // token types.  In particular it is:
+  //
+  // Token type    Meaning
+  // ===============================
+  //    ident      The identifier.
+  //    function   The function name.  Note that the "(" is part
+  //               of the token but is not present in |text|.
+  //    at         The word.
+  //    id         The word.
+  //    hash       The word.
+  //    dimension  The dimension.
+  //    string     The string contents after escape processing.
+  //    bad_string Ditto.
+  //    url        The URL after escape processing.
+  //    bad_url    Ditto.
+  //    symbol     The symbol text.
+  DOMString text;
+};
+
+/**
+ * CSSLexer is an interface to the CSS lexer.  It tokenizes an
+ * input stream and returns CSS tokens.
+ *
+ * @see inIDOMUtils.getCSSLexer to create an instance of the lexer.
+ */
+[ChromeOnly]
+interface CSSLexer
+{
+  /**
+   * The line number of the most recently returned token.  Line
+   * numbers are 0-based.
+   */
+  readonly attribute unsigned long lineNumber;
+
+  /**
+   * The column number of the most recently returned token.  Column
+   * numbers are 0-based.
+   */
+  readonly attribute unsigned long columnNumber;
+
+  /**
+   * Return the next token, or null at EOF.
+   */
+  CSSToken? nextToken();
+};
--- a/dom/webidl/moz.build
+++ b/dom/webidl/moz.build
@ -88,6 +88,7 @@ WEBIDL_FILES = [
    'Crypto.webidl',
    'CSPReport.webidl',
    'CSS.webidl',
+    'CSSLexer.webidl',
    'CSSPrimitiveValue.webidl',
    'CSSRuleList.webidl',
    'CSSStyleDeclaration.webidl',
--- a/layout/inspector/inDOMUtils.cpp
+++ b/layout/inspector/inDOMUtils.cpp
@ -37,6 +37,7 @@
 #include "nsRuleWalker.h"
 #include "nsRuleProcessorData.h"
 #include "nsCSSRuleProcessor.h"
+#include "mozilla/dom/CSSLexer.h"
 #include "mozilla/dom/InspectorUtilsBinding.h"
 #include "mozilla/dom/ToJSValue.h"
 #include "nsCSSParser.h"
@ -289,6 +290,19 @@ inDOMUtils::GetRuleColumn(nsIDOMCSSRule* aRule, uint32_t* _retval)
  return NS_OK;
 }

+NS_IMETHODIMP
+inDOMUtils::GetCSSLexer(const nsAString& aText, JSContext* aCx,
+                        JS::MutableHandleValue aResult)
+{
+  MOZ_ASSERT(JS::CurrentGlobalOrNull(aCx));
+  JS::Rooted<JSObject*> scope(aCx, JS::CurrentGlobalOrNull(aCx));
+  nsAutoPtr<CSSLexer> lexer(new CSSLexer(aText));
+  if (!WrapNewBindingNonWrapperCachedObject(aCx, scope, lexer, aResult)) {
+    return NS_ERROR_FAILURE;
+  }
+  return NS_OK;
+}
+
 NS_IMETHODIMP
 inDOMUtils::GetSelectorCount(nsIDOMCSSStyleRule* aRule, uint32_t *aCount)
 {
--- a/layout/inspector/inIDOMUtils.idl
+++ b/layout/inspector/inIDOMUtils.idl
@ -17,7 +17,7 @@ interface nsIDOMFontFaceList;
 interface nsIDOMRange;
 interface nsIDOMCSSStyleSheet;

-[scriptable, uuid(1f5b7f08-fa80-49e9-b881-888f081240da)]
+[scriptable, uuid(60b4cbf7-2a08-4419-8937-6ef495417824)]
 interface inIDOMUtils : nsISupports
 {
  // CSS utilities
@ -28,6 +28,9 @@ interface inIDOMUtils : nsISupports
  unsigned long getRuleLine(in nsIDOMCSSRule aRule);
  unsigned long getRuleColumn(in nsIDOMCSSRule aRule);

+  [implicit_jscontext]
+  jsval getCSSLexer(in DOMString aText);
+
  // Utilities for working with selectors.  We don't have a JS OM representation
  // of a single selector or a selector list yet, but given a rule we can index
  // into the selector list.
--- a/layout/style/CSSLexer.cpp
+++ b/layout/style/CSSLexer.cpp
@ -0,0 +1,142 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/dom/CSSLexer.h"
+#include "js/Value.h"
+#include "mozilla/dom/CSSLexerBinding.h"
+#include "mozilla/dom/ToJSValue.h"
+
+namespace mozilla {
+namespace dom {
+
+// Ensure that constants are consistent.
+
+#define CHECK(X, Y) \
+  static_assert(static_cast<int>(X) == static_cast<int>(Y),       \
+                "nsCSSToken and CSSTokenType should have identical values")
+
+CHECK(eCSSToken_Whitespace, CSSTokenType::Whitespace);
+CHECK(eCSSToken_Comment, CSSTokenType::Comment);
+CHECK(eCSSToken_Ident, CSSTokenType::Ident);
+CHECK(eCSSToken_Function, CSSTokenType::Function);
+CHECK(eCSSToken_AtKeyword, CSSTokenType::At);
+CHECK(eCSSToken_ID, CSSTokenType::Id);
+CHECK(eCSSToken_Hash, CSSTokenType::Hash);
+CHECK(eCSSToken_Number, CSSTokenType::Number);
+CHECK(eCSSToken_Dimension, CSSTokenType::Dimension);
+CHECK(eCSSToken_Percentage, CSSTokenType::Percentage);
+CHECK(eCSSToken_String, CSSTokenType::String);
+CHECK(eCSSToken_Bad_String, CSSTokenType::Bad_string);
+CHECK(eCSSToken_URL, CSSTokenType::Url);
+CHECK(eCSSToken_Bad_URL, CSSTokenType::Bad_url);
+CHECK(eCSSToken_Symbol, CSSTokenType::Symbol);
+CHECK(eCSSToken_Includes, CSSTokenType::Includes);
+CHECK(eCSSToken_Dashmatch, CSSTokenType::Dashmatch);
+CHECK(eCSSToken_Beginsmatch, CSSTokenType::Beginsmatch);
+CHECK(eCSSToken_Endsmatch, CSSTokenType::Endsmatch);
+CHECK(eCSSToken_Containsmatch, CSSTokenType::Containsmatch);
+CHECK(eCSSToken_URange, CSSTokenType::Urange);
+CHECK(eCSSToken_HTMLComment, CSSTokenType::Htmlcomment);
+
+#undef CHECK
+
+CSSLexer::CSSLexer(const nsAString& aText)
+  : mInput(aText)
+  , mScanner(mInput, 1)
+{
+}
+
+CSSLexer::~CSSLexer()
+{
+}
+
+bool
+CSSLexer::WrapObject(JSContext* aCx, JS::Handle<JSObject*> aGivenProto,
+                     JS::MutableHandle<JSObject*> aReflector)
+{
+  return CSSLexerBinding::Wrap(aCx, this, aGivenProto, aReflector);
+}
+
+uint32_t
+CSSLexer::LineNumber()
+{
+  // The scanner uses 1-based line numbers, but our callers expect
+  // 0-based.
+  return mScanner.GetLineNumber() - 1;
+}
+
+uint32_t
+CSSLexer::ColumnNumber()
+{
+  return mScanner.GetColumnNumber();
+}
+
+void
+CSSLexer::NextToken(Nullable<CSSToken>& aResult)
+{
+  nsCSSToken token;
+  if (!mScanner.Next(token, eCSSScannerExclude_None)) {
+    return;
+  }
+
+  CSSToken& resultToken(aResult.SetValue());
+
+  resultToken.mTokenType = static_cast<CSSTokenType>(token.mType);
+  resultToken.mStartOffset = mScanner.GetTokenOffset();
+  resultToken.mEndOffset = mScanner.GetTokenEndOffset();
+
+  switch (token.mType) {
+    case eCSSToken_Whitespace:
+      break;
+
+    case eCSSToken_Ident:
+    case eCSSToken_Function:
+    case eCSSToken_AtKeyword:
+    case eCSSToken_ID:
+    case eCSSToken_Hash:
+      resultToken.mText.Construct(token.mIdent);
+      break;
+
+    case eCSSToken_Dimension:
+      resultToken.mText.Construct(token.mIdent);
+      /* FALLTHROUGH */
+    case eCSSToken_Number:
+    case eCSSToken_Percentage:
+      resultToken.mNumber.Construct(token.mNumber);
+      resultToken.mHasSign.Construct(token.mHasSign);
+      resultToken.mIsInteger.Construct(token.mIntegerValid);
+      break;
+
+    case eCSSToken_String:
+    case eCSSToken_Bad_String:
+    case eCSSToken_URL:
+    case eCSSToken_Bad_URL:
+      resultToken.mText.Construct(token.mIdent);
+      /* Don't bother emitting the delimiter, as it is readily extracted
+         from the source string when needed.  */
+      break;
+
+    case eCSSToken_Symbol:
+      resultToken.mText.Construct(nsString(&token.mSymbol, 1));
+      break;
+
+    case eCSSToken_Includes:
+    case eCSSToken_Dashmatch:
+    case eCSSToken_Beginsmatch:
+    case eCSSToken_Endsmatch:
+    case eCSSToken_Containsmatch:
+    case eCSSToken_URange:
+      break;
+
+    case eCSSToken_Comment:
+    case eCSSToken_HTMLComment:
+      /* The comment text is easily extracted from the source string,
+         and is rarely useful.  */
+      break;
+  }
+}
+
+} // namespace dom
+} // namespace mozilla
--- a/layout/style/CSSLexer.h
+++ b/layout/style/CSSLexer.h
@ -0,0 +1,37 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef CSSLexer_h___
+#define CSSLexer_h___
+
+#include "mozilla/UniquePtr.h"
+#include "nsCSSScanner.h"
+#include "mozilla/dom/CSSLexerBinding.h"
+
+namespace mozilla {
+namespace dom {
+
+class CSSLexer : public NonRefcountedDOMObject
+{
+public:
+  explicit CSSLexer(const nsAString&);
+  ~CSSLexer();
+
+  bool WrapObject(JSContext* aCx, JS::Handle<JSObject*> aGivenProto,
+                  JS::MutableHandle<JSObject*> aReflector);
+
+  uint32_t LineNumber();
+  uint32_t ColumnNumber();
+  void NextToken(Nullable<CSSToken>& aResult);
+
+private:
+  nsString mInput;
+  nsCSSScanner mScanner;
+};
+
+} // namespace dom
+} // namespace mozilla
+
+#endif /* CSSLexer_h___ */
--- a/layout/style/moz.build
+++ b/layout/style/moz.build
@ -89,6 +89,7 @@ EXPORTS.mozilla += [

 EXPORTS.mozilla.dom += [
    'CSS.h',
+    'CSSLexer.h',
    'CSSRuleList.h',
    'CSSValue.h',
    'FontFace.h',
@ -113,6 +114,7 @@ UNIFIED_SOURCES += [
    'AnimationCommon.cpp',
    'CounterStyleManager.cpp',
    'CSS.cpp',
+    'CSSLexer.cpp',
    'CSSRuleList.cpp',
    'CSSStyleSheet.cpp',
    'CSSVariableDeclarations.cpp',
--- a/layout/style/nsCSSParser.cpp
+++ b/layout/style/nsCSSParser.cpp
@ -2734,7 +2734,9 @@ CSSParserImpl::GetToken(bool aSkipWS)
      return true;
    }
  }
-  return mScanner->Next(mToken, aSkipWS);
+  return mScanner->Next(mToken, aSkipWS ?
+                        eCSSScannerExclude_WhitespaceAndComments :
+                        eCSSScannerExclude_Comments);
 }

 void
--- a/layout/style/nsCSSScanner.cpp
+++ b/layout/style/nsCSSScanner.cpp
@ -552,7 +552,8 @@ nsCSSScanner::SkipComment()
  for (;;) {
    int32_t ch = Peek();
    if (ch < 0) {
-      mReporter->ReportUnexpectedEOF("PECommentEOF");
+      if (mReporter)
+        mReporter->ReportUnexpectedEOF("PECommentEOF");
      SetEOFCharacters(eEOFCharacters_Asterisk | eEOFCharacters_Slash);
      return;
    }
@ -560,7 +561,8 @@ nsCSSScanner::SkipComment()
      Advance();
      ch = Peek();
      if (ch < 0) {
-        mReporter->ReportUnexpectedEOF("PECommentEOF");
+        if (mReporter)
+          mReporter->ReportUnexpectedEOF("PECommentEOF");
        SetEOFCharacters(eEOFCharacters_Slash);
        return;
      }
@ -985,7 +987,8 @@ nsCSSScanner::ScanString(nsCSSToken& aToken)

    mSeenBadToken = true;
    aToken.mType = eCSSToken_Bad_String;
-    mReporter->ReportUnexpected("SEUnterminatedString", aToken);
+    if (mReporter)
+      mReporter->ReportUnexpected("SEUnterminatedString", aToken);
    break;
  }
  return true;
@ -1192,15 +1195,15 @@ nsCSSScanner::NextURL(nsCSSToken& aToken)
 /**
 * Primary scanner entry point.  Consume one token and fill in
 * |aToken| accordingly.  Will skip over any number of comments first,
- * and will also skip over rather than return whitespace tokens if
- * |aSkipWS| is true.
+ * and will also skip over rather than return whitespace and comment
+ * tokens, depending on the value of |aSkip|.
 *
 * Returns true if it successfully consumed a token, false if EOF has
 * been reached.  Will always advance the current read position by at
 * least one character unless called when already at EOF.
 */
 bool
-nsCSSScanner::Next(nsCSSToken& aToken, bool aSkipWS)
+nsCSSScanner::Next(nsCSSToken& aToken, nsCSSScannerExclude aSkip)
 {
  int32_t ch;

@ -1218,15 +1221,18 @@ nsCSSScanner::Next(nsCSSToken& aToken, bool aSkipWS)
    ch = Peek();
    if (IsWhitespace(ch)) {
      SkipWhitespace();
-      if (!aSkipWS) {
+      if (aSkip != eCSSScannerExclude_WhitespaceAndComments) {
        aToken.mType = eCSSToken_Whitespace;
        return true;
      }
      continue; // start again at the beginning
    }
    if (ch == '/' && !IsSVGMode() && Peek(1) == '*') {
-      // FIXME: Editor wants comments to be preserved (bug 60290).
      SkipComment();
+      if (aSkip == eCSSScannerExclude_None) {
+        aToken.mType = eCSSToken_Comment;
+        return true;
+      }
      continue; // start again at the beginning
    }
    break;
--- a/layout/style/nsCSSScanner.h
+++ b/layout/style/nsCSSScanner.h
@ -27,6 +27,8 @@ enum nsCSSTokenType {
  // comments do *not* count as white space; comments separate tokens
  // but are not themselves tokens.
  eCSSToken_Whitespace,     //
+  // A comment.
+  eCSSToken_Comment,        // /*...*/

  // Identifier-like tokens.  mIdent is the text of the identifier.
  // The difference between ID and Hash is: if the text after the #
@ -182,13 +184,24 @@ private:
  bool mInitialized;
 };

+enum nsCSSScannerExclude {
+  // Return all tokens, including whitespace and comments.
+  eCSSScannerExclude_None,
+  // Include whitespace but exclude comments.
+  eCSSScannerExclude_Comments,
+  // Exclude whitespace and comments.
+  eCSSScannerExclude_WhitespaceAndComments
+};
+
 // nsCSSScanner tokenizes an input stream using the CSS2.1 forward
 // compatible tokenization rules.  Used internally by nsCSSParser;
 // not available for use by other code.
 class nsCSSScanner {
  public:
  // |aLineNumber == 1| is the beginning of a file, use |aLineNumber == 0|
-  // when the line number is unknown.
+  // when the line number is unknown.  The scanner does not take
+  // ownership of |aBuffer|, so the caller must be sure to keep it
+  // alive for the lifetime of the scanner.
  nsCSSScanner(const nsAString& aBuffer, uint32_t aLineNumber);
  ~nsCSSScanner();

@ -220,14 +233,20 @@ class nsCSSScanner {
  uint32_t GetColumnNumber() const
  { return mTokenOffset - mTokenLineOffset; }

+  uint32_t GetTokenOffset() const
+  { return mTokenOffset; }
+
+  uint32_t GetTokenEndOffset() const
+  { return mOffset; }
+
  // Get the text of the line containing the first character of
  // the most recently processed token.
  nsDependentSubstring GetCurrentLine() const;

  // Get the next token.  Return false on EOF.  aTokenResult is filled
-  // in with the data for the token.  If aSkipWS is true, skip over
-  // eCSSToken_Whitespace tokens rather than returning them.
-  bool Next(nsCSSToken& aTokenResult, bool aSkipWS);
+  // in with the data for the token.  aSkip controls whether
+  // whitespace and/or comment tokens are ever returned.
+  bool Next(nsCSSToken& aTokenResult, nsCSSScannerExclude aSkip);

  // Get the body of an URL token (everything after the 'url(').
  // This is exposed for use by nsCSSParser::ParseMozDocumentRule,
--- a/layout/style/test/moz.build
+++ b/layout/style/test/moz.build
@ -15,6 +15,7 @@ MOCHITEST_MANIFESTS += [
    'css-visited/mochitest.ini',
    'mochitest.ini',
 ]
+XPCSHELL_TESTS_MANIFESTS += ['xpcshell.ini']
 BROWSER_CHROME_MANIFESTS += ['browser.ini']
 MOCHITEST_CHROME_MANIFESTS += ['chrome/chrome.ini']

--- a/layout/style/test/test_csslexer.js
+++ b/layout/style/test/test_csslexer.js
@ -0,0 +1,125 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+function test_lexer(domutils, cssText, tokenTypes) {
+  let lexer = domutils.getCSSLexer(cssText);
+  let reconstructed = '';
+  let lastTokenEnd = 0;
+  let i = 0;
+  while (true) {
+    let token = lexer.nextToken();
+    if (!token) {
+      break;
+    }
+    let combined = token.tokenType;
+    if (token.text)
+      combined += ":" + token.text;
+    equal(combined, tokenTypes[i]);
+    ok(token.endOffset > token.startOffset);
+    equal(token.startOffset, lastTokenEnd);
+    lastTokenEnd = token.endOffset;
+    reconstructed += cssText.substring(token.startOffset, token.endOffset);
+    ++i;
+  }
+  // Ensure that we saw the correct number of tokens.
+  equal(i, tokenTypes.length);
+  // Ensure that the reported offsets cover all the text.
+  equal(reconstructed, cssText);
+}
+
+let LEX_TESTS = [
+  ["simple", ["ident:simple"]],
+  ["simple: { hi; }",
+             ["ident:simple", "symbol::",
+              "whitespace", "symbol:{",
+              "whitespace", "ident:hi",
+              "symbol:;", "whitespace",
+              "symbol:}"]],
+  ["/* whatever */", ["comment"]],
+  ["'string'", ["string:string"]],
+  ['"string"', ["string:string"]],
+  ["rgb(1,2,3)", ["function:rgb", "number",
+                                      "symbol:,", "number",
+                                      "symbol:,", "number",
+                                      "symbol:)"]],
+  ["@media", ["at:media"]],
+  ["#hibob", ["id:hibob"]],
+  ["#123", ["hash:123"]],
+  ["23px", ["dimension:px"]],
+  ["23%", ["percentage"]],
+  ["url(http://example.com)", ["url:http://example.com"]],
+  ["url('http://example.com')", ["url:http://example.com"]],
+  ["url(  'http://example.com'  )",
+             ["url:http://example.com"]],
+  // In CSS Level 3, this is an ordinary URL, not a BAD_URL.
+  ["url(http://example.com", ["url:http://example.com"]],
+  // See bug 1153981 to understand why this gets a SYMBOL token.
+  ["url(http://example.com @", ["bad_url:http://example.com", "symbol:@"]],
+  ["quo\\ting", ["ident:quoting"]],
+  ["'bad string\n", ["bad_string:bad string", "whitespace"]],
+  ["~=", ["includes"]],
+  ["|=", ["dashmatch"]],
+  ["^=", ["beginsmatch"]],
+  ["$=", ["endsmatch"]],
+  ["*=", ["containsmatch"]],
+
+  // URANGE may be on the way out, and it isn't used by devutils, so
+  // let's skip it.
+
+  ["<!-- html comment -->", ["htmlcomment", "whitespace", "ident:html",
+                             "whitespace", "ident:comment", "whitespace",
+                             "htmlcomment"]],
+
+  // earlier versions of CSS had "bad comment" tokens, but in level 3,
+  // unterminated comments are just comments.
+  ["/* bad comment", ["comment"]]
+];
+
+function test_lexer_linecol(domutils, cssText, locations) {
+  let lexer = domutils.getCSSLexer(cssText);
+  let i = 0;
+  while (true) {
+    let token = lexer.nextToken();
+    let startLine = lexer.lineNumber;
+    let startColumn = lexer.columnNumber;
+
+    // We do this in a bit of a funny way so that we can also test the
+    // location of the EOF.
+    let combined = ":" + startLine + ":" + startColumn;
+    if (token)
+      combined = token.tokenType + combined;
+
+    equal(combined, locations[i]);
+    ++i;
+
+    if (!token) {
+      break;
+    }
+  }
+  // Ensure that we saw the correct number of tokens.
+  equal(i, locations.length);
+}
+
+let LINECOL_TESTS = [
+  ["simple", ["ident:0:0", ":0:6"]],
+  ["\n    stuff", ["whitespace:0:0", "ident:1:4", ":1:9"]],
+  ['"string with \\\nnewline"    \r\n', ["string:0:0", "whitespace:1:8",
+                                         ":2:0"]]
+];
+
+function run_test()
+{
+  let domutils = Components.classes["@mozilla.org/inspector/dom-utils;1"]
+                           .getService(Components.interfaces.inIDOMUtils);
+
+  let text, result;
+  for ([text, result] of LEX_TESTS) {
+    test_lexer(domutils, text, result);
+  }
+
+  for ([text, result] of LINECOL_TESTS) {
+    test_lexer_linecol(domutils, text, result);
+  }
+}
--- a/layout/style/test/xpcshell.ini
+++ b/layout/style/test/xpcshell.ini
@ -0,0 +1,5 @@
+[DEFAULT]
+head =
+tail =
+
+[test_csslexer.js]