Rewrite parsing of CSS url() function to make it more like the tokenization that's described in the spec. (bug 476856, though originally written for bug 337287) r=bzbarsky

2009-08-05 17:45:49 -07:00 · 2009-08-05 17:45:49 -07:00 · 19e0871ec3
--- a/layout/reftests/css-namespace/reftest.list
+++ b/layout/reftests/css-namespace/reftest.list
@ -10,7 +10,7 @@
 == syntax-010.xml ref-lime-3.xml
 == syntax-011.xml ref-lime-6.xml
 == syntax-012.xml ref-lime-3.xml
-fails == syntax-013.xml ref-lime-5.xml # bug 476856
+== syntax-013.xml ref-lime-5.xml
 == syntax-014.xml ref-lime-3.xml
 == syntax-015.xml ref-lime-1.xml
 == scope-001.xml ref-lime-1.xml
--- a/layout/style/nsCSSParser.cpp
+++ b/layout/style/nsCSSParser.cpp
@ -260,9 +260,17 @@ protected:
 #endif

  PRBool GetToken(PRBool aSkipWS);
-  PRBool GetURLToken();
  void UngetToken();

+  // get the part in paretheses of the url() function, which is really a
+  // part of a token in the CSS grammar, but we're using a combination
+  // of the parser and the scanner to do it to handle the backtracking
+  // required by the error handling of the tokenization (since if we
+  // fail to scan the full token, we should fall back to tokenizing as
+  // FUNCTION ... ')').
+  // Note that this function WILL WRITE TO aURL IN SOME FAILURE CASES.
+  PRBool GetURLInParens(nsString& aURL);
+
  void AssertInitialState() {
    NS_PRECONDITION(!mHTMLMediaMode, "Bad initial state");
    NS_PRECONDITION(!mUnresolvablePrefixException, "Bad initial state");
@ -1284,21 +1292,33 @@ CSSParserImpl::GetToken(PRBool aSkipWS)
 }

 PRBool
-CSSParserImpl::GetURLToken()
+CSSParserImpl::GetURLInParens(nsString& aURL)
 {
-  for (;;) {
-    // XXXldb This pushback code doesn't make sense.
-    if (! mHavePushBack) {
-      if (! mScanner.NextURL(mToken)) {
-        break;
-      }
-    }
-    mHavePushBack = PR_FALSE;
-    if (eCSSToken_WhiteSpace != mToken.mType) {
-      return PR_TRUE;
+  if (!ExpectSymbol('(', PR_FALSE))
+    return PR_FALSE;
+
+  NS_ASSERTION(!mHavePushBack,
+               "ExpectSymbol returning success shouldn't leave pushback");
+  do {
+    if (! mScanner.NextURL(mToken)) {
+      return PR_FALSE;
    }
+  } while (eCSSToken_WhiteSpace == mToken.mType);
+
+  aURL = mToken.mIdent;
+
+  if ((eCSSToken_String != mToken.mType && eCSSToken_URL != mToken.mType) ||
+      !ExpectSymbol(')', PR_TRUE)) {
+    // in the failure case, we have to match parentheses, as if this
+    // weren't treated as a URL token by the tokenization
+
+    // XXX We really need to push aURL back into the buffer before this
+    // SkipUntil!
+    SkipUntil(')');
+    return PR_FALSE;
  }
-  return PR_FALSE;
+
+  return PR_TRUE;
 }

 void
@ -1505,14 +1525,8 @@ CSSParserImpl::GatherURL(nsString& aURL)
  }
  else if (eCSSToken_Function == mToken.mType &&
           mToken.mIdent.LowerCaseEqualsLiteral("url") &&
-           ExpectSymbol('(', PR_FALSE) &&
-           GetURLToken() &&
-           (eCSSToken_String == mToken.mType ||
-            eCSSToken_URL == mToken.mType)) {
-    aURL = mToken.mIdent;
-    if (ExpectSymbol(')', PR_TRUE)) {
-      return PR_TRUE;
-    }
+           GetURLInParens(aURL)) {
+    return PR_TRUE;
  }
  return PR_FALSE;
 }
@ -1978,23 +1992,17 @@ CSSParserImpl::ParseMozDocumentRule(RuleAppendFunc aAppendFunc, void* aData)
      cur->func = nsCSSDocumentRule::eDomain;
    }

-    if (!ExpectSymbol('(', PR_FALSE) ||
-        !GetURLToken() ||
-        (eCSSToken_String != mToken.mType &&
-         eCSSToken_URL != mToken.mType)) {
+    nsAutoString url;
+    if (!GetURLInParens(url)) {
      REPORT_UNEXPECTED_TOKEN(PEMozDocRuleNotURI);
      delete urls;
      return PR_FALSE;
    }
-    if (!ExpectSymbol(')', PR_TRUE)) {
-      delete urls;
-      return PR_FALSE;
-    }

    // We could try to make the URL (as long as it's not domain())
    // canonical and absolute with NS_NewURI and GetSpec, but I'm
    // inclined to think we shouldn't.
-    CopyUTF16toUTF8(mToken.mIdent, cur->url);
+    CopyUTF16toUTF8(url, cur->url);
  } while (ExpectSymbol(',', PR_TRUE));

  nsRefPtr<nsCSSDocumentRule> rule(new nsCSSDocumentRule());
@ -2038,18 +2046,10 @@ CSSParserImpl::ParseNameSpaceRule(RuleAppendFunc aAppendFunc, void* aData)
  }
  else if ((eCSSToken_Function == mToken.mType) &&
           (mToken.mIdent.LowerCaseEqualsLiteral("url"))) {
-    if (ExpectSymbol('(', PR_FALSE)) {
-      if (GetURLToken()) {
-        if ((eCSSToken_String == mToken.mType) || (eCSSToken_URL == mToken.mType)) {
-          url = mToken.mIdent;
-          if (ExpectSymbol(')', PR_TRUE)) {
-            if (ExpectSymbol(';', PR_TRUE)) {
-              ProcessNameSpace(prefix, url, aAppendFunc, aData);
-              return PR_TRUE;
-            }
-          }
-        }
-      }
+    if (GetURLInParens(url) &&
+        ExpectSymbol(';', PR_TRUE)) {
+      ProcessNameSpace(prefix, url, aAppendFunc, aData);
+      return PR_TRUE;
    }
  }
  REPORT_UNEXPECTED_TOKEN(PEAtNSUnexpected);
@ -4738,17 +4738,8 @@ CSSParserImpl::ParseURL(nsCSSValue& aValue)
    return PR_FALSE;
  }

-  if (!ExpectSymbol('(', PR_FALSE))
-    return PR_FALSE;
-  if (!GetURLToken())
-    return PR_FALSE;
-
-  nsCSSToken* tk = &mToken;
-  if (eCSSToken_String != tk->mType && eCSSToken_URL != tk->mType)
-    return PR_FALSE;
-
-  nsString url = tk->mIdent;
-  if (!ExpectSymbol(')', PR_TRUE))
+  nsString url;
+  if (!GetURLInParens(url))
    return PR_FALSE;

  // Translate url into an absolute url if the url is relative to the
--- a/layout/style/nsCSSScanner.cpp
+++ b/layout/style/nsCSSScanner.cpp
@ -846,55 +846,55 @@ nsCSSScanner::NextURL(nsCSSToken& aToken)
  // apply very well. To simplify the parser and relax some of the
  // requirements on the scanner we parse url's here. If we find a
  // malformed URL then we emit a token of type "InvalidURL" so that
-  // the CSS1 parser can ignore the invalid input. We attempt to eat
-  // the right amount of input data when an invalid URL is presented.
+  // the CSS1 parser can ignore the invalid input.  The parser must
+  // treat an InvalidURL token like a Function token, and process
+  // tokens until a matching parenthesis.

  aToken.mType = eCSSToken_InvalidURL;
  nsString& ident = aToken.mIdent;
  ident.SetLength(0);

-  if (ch == ')') {
-    Pushback(ch);
-    // empty url spec; just get out of here
-    aToken.mType = eCSSToken_URL;
-  } else {
-    // start of a non-quoted url
-    Pushback(ch);
-    PRBool ok = PR_TRUE;
-    for (;;) {
-      ch = Read();
-      if (ch < 0) break;
-      if (ch == CSS_ESCAPE) {
-        ParseAndAppendEscape(ident);
-      } else if ((ch == '"') || (ch == '\'') || (ch == '(')) {
-        // This is an invalid URL spec
-        ok = PR_FALSE;
-      } else if (IsWhitespace(ch)) {
-        // Whitespace is allowed at the end of the URL
+  Pushback(ch);
+
+  // start of a non-quoted url (which may be empty)
+  PRBool ok = PR_TRUE;
+  for (;;) {
+    ch = Read();
+    if (ch < 0) break;
+    if (ch == CSS_ESCAPE) {
+      ParseAndAppendEscape(ident);
+    } else if ((ch == '"') || (ch == '\'') || (ch == '(')) {
+      // This is an invalid URL spec
+      ok = PR_FALSE;
+      Pushback(ch); // push it back so the parser can match tokens and
+                    // then closing parenthesis
+      break;
+    } else if (IsWhitespace(ch)) {
+      // Whitespace is allowed at the end of the URL
        EatWhiteSpace();
        if (LookAhead(')')) {
-          Pushback(')');  // leave the closing symbol
-          // done!
-          break;
-        }
-        // Whitespace is followed by something other than a
-        // ")". This is an invalid url spec.
-        ok = PR_FALSE;
-      } else if (ch == ')') {
-        Pushback(ch);
-        // All done
+        Pushback(')');  // leave the closing symbol
+        // done!
        break;
-      } else {
-        // A regular url character.
-        ident.Append(PRUnichar(ch));
      }
+      // Whitespace is followed by something other than a
+      // ")". This is an invalid url spec.
+      ok = PR_FALSE;
+      break;
+    } else if (ch == ')') {
+      Pushback(ch);
+      // All done
+      break;
+    } else {
+      // A regular url character.
+      ident.Append(PRUnichar(ch));
    }
+  }

-    // If the result of the above scanning is ok then change the token
-    // type to a useful one.
-    if (ok) {
-      aToken.mType = eCSSToken_URL;
-    }
+  // If the result of the above scanning is ok then change the token
+  // type to a useful one.
+  if (ok) {
+    aToken.mType = eCSSToken_URL;
  }
  return PR_TRUE;
 }