Bug 829816 - Treat \0 and U+0000 in CSS style sheets as U+FFFD. r=dbaron

2013-06-26 15:17:14 -07:00 · 2013-06-26 15:17:14 -07:00 · cb2184b31c
--- a/layout/reftests/bugs/228856-1-ref.html
+++ b/layout/reftests/bugs/228856-1-ref.html
@ -1,22 +1,22 @@
 <!doctype html>
 <html><head>
-<!-- This is a test for behavior made up by Mozilla in the absence of
+<!-- This tests behavior specified in CSS Syntax Level 3,
-     specification.  Future CSS specifications may define the behavior
+     as of the Editor's Draft 19 June 2013:
-     differently.  -->
+     http://dev.w3.org/csswg/css-syntax/ -->
 <title>\0 in CSS</title>
 <style>
-p#a:before { content: "0x" }
+p#a:before { content: "\FFFDx" }
-p#b:before { content: "00x" }
+p#b:before { content: "\FFFDx" }
-p#c:before { content: "000x" }
+p#c:before { content: "\FFFDx" }
-p#d:before { content: "0000x" }
+p#d:before { content: "\FFFDx" }
-p#e:before { content: "00000x" }
+p#e:before { content: "\FFFDx" }
-p#f:before { content: "000000x" }
+p#f:before { content: "\FFFDx" }
-p#g:before { content: "0 x" }
+p#g:before { content: "\FFFD x" }
-p#h:before { content: "00 x" }
+p#h:before { content: "\FFFD x" }
-p#i:before { content: "000 x" }
+p#i:before { content: "\FFFD x" }
-p#j:before { content: "0000 x" }
+p#j:before { content: "\FFFD x" }
-p#k:before { content: "00000 x" }
+p#k:before { content: "\FFFD x" }
-p#l:before { content: "000000 x" }
+p#l:before { content: "\FFFD x" }
 </style>
 </head><body>
 <p id="a">(a)</p>
--- a/layout/reftests/bugs/228856-1.html
+++ b/layout/reftests/bugs/228856-1.html
@ -1,8 +1,8 @@
 <!doctype html>
 <html><head>
-<!-- This is a test for behavior made up by Mozilla in the absence of
+<!-- This tests behavior specified in CSS Syntax Level 3,
-     specification.  Future CSS specifications may define the behavior
+     as of the Editor's Draft 19 June 2013:
-     differently.  -->
+     http://dev.w3.org/csswg/css-syntax/ -->
 <title>\0 in CSS</title>
 <style>
 p#a:before { content: "\0x" }
--- a/layout/reftests/bugs/228856-2-ref.html
+++ b/layout/reftests/bugs/228856-2-ref.html
--- a/layout/reftests/bugs/228856-2-style-1.css
+++ b/layout/reftests/bugs/228856-2-style-1.css
--- a/layout/reftests/bugs/228856-2.html
+++ b/layout/reftests/bugs/228856-2.html
--- a/layout/style/nsCSSScanner.cpp
+++ b/layout/style/nsCSSScanner.cpp
@ -138,21 +138,24 @@ IsVertSpace(int32_t ch) {
 }
 /**
- * True if 'ch' is a character that can appear in the middle of an
+ * True if 'ch' is a character that can appear in the middle of an identifier.
- * identifier.
+ * This includes U+0000 since it is handled as U+FFFD, but for purposes of
 * GatherText it should not be included in IsOpenCharClass.
 */
 static inline bool
 IsIdentChar(int32_t ch) {
-  return IsOpenCharClass(ch, IS_IDCHAR);
+  return IsOpenCharClass(ch, IS_IDCHAR) || ch == 0;
 }
 /**
 * True if 'ch' is a character that by itself begins an identifier.
 * This includes U+0000 since it is handled as U+FFFD, but for purposes of
 * GatherText it should not be included in IsOpenCharClass.
 * (This is a subset of IsIdentChar.)
 */
 static inline bool
 IsIdentStart(int32_t ch) {
-  return IsOpenCharClass(ch, IS_IDSTART);
+  return IsOpenCharClass(ch, IS_IDSTART) || ch == 0;
 }
 /**
@ -539,7 +542,7 @@ nsCSSScanner::GatherEscape(nsString& aOutput, bool aInString)
    // character.
    Advance();
    if (!aInString) {
-      aOutput.Append(0xFFFD);
+      aOutput.Append(UCS2_REPLACEMENT_CHAR);
    }
    return true;
  }
@ -561,7 +564,11 @@ nsCSSScanner::GatherEscape(nsString& aOutput, bool aInString)
    // return, or form feed) can be escaped with a backslash to remove
    // its special meaning." -- CSS2.1 section 4.1.3
    Advance(2);
-    aOutput.Append(ch);
+    if (ch == 0) {
      aOutput.Append(UCS2_REPLACEMENT_CHAR);
    } else {
      aOutput.Append(ch);
    }
    return true;
  }
@ -584,22 +591,21 @@ nsCSSScanner::GatherEscape(nsString& aOutput, bool aInString)
    ch = Peek();
  } while (i < 6 && IsHexDigit(ch));
-  // Silently deleting \0 opens a content-filtration loophole (see
+  // "Interpret the hex digits as a hexadecimal number. If this number is zero,
-  // bug 228856), so what we do instead is pretend the "cancels the
+  // or is greater than the maximum allowed codepoint, return U+FFFD
-  // meaning of special characters" rule applied.
+  // REPLACEMENT CHARACTER" -- CSS Syntax Level 3
  if (MOZ_UNLIKELY(val == 0)) {
-    do {
+    aOutput.Append(UCS2_REPLACEMENT_CHAR);
      aOutput.Append('0');
    } while (--i);
  } else {
    AppendUCS4ToUTF16(ENSURE_VALID_CHAR(val), aOutput);
-    // Consume exactly one whitespace character after a nonzero
+  }
-    // hexadecimal escape sequence.
+
-    if (IsVertSpace(ch)) {
+  // Consume exactly one whitespace character after a
-      AdvanceLine();
+  // hexadecimal escape sequence.
-    } else if (IsHorzSpace(ch)) {
+  if (IsVertSpace(ch)) {
-      Advance();
+    AdvanceLine();
-    }
+  } else if (IsHorzSpace(ch)) {
    Advance();
  }
  return true;
 }
@ -644,6 +650,11 @@ nsCSSScanner::GatherText(uint8_t aClass, nsString& aText)
    int32_t ch = Peek();
    MOZ_ASSERT(!IsOpenCharClass(ch, aClass),
               "should not have exited the inner loop");
    if (ch == 0) {
      Advance();
      aText.Append(UCS2_REPLACEMENT_CHAR);
      continue;
    }
    if (ch != '\\') {
      break;
--- a/layout/style/test/Makefile.in
+++ b/layout/style/test/Makefile.in
@ -84,6 +84,8 @@ MOCHITEST_FILES =	test_acid3_test46.html \
 		test_bug716226.html \
 		test_bug765590.html \
 		test_bug798567.html \
 		test_bug829816.html \
 		file_bug829816.css \
 		test_cascade.html \
 		test_ch_ex_no_infloops.html \
 		test_compute_data_with_start_struct.html \
--- a/layout/style/test/file_bug829816.css
+++ b/layout/style/test/file_bug829816.css
--- a/layout/style/test/test_bug829816.html
+++ b/layout/style/test/test_bug829816.html
@ -0,0 +1,56 @@
 <!DOCTYPE HTML>
 <html>
 <!--
 https://bugzilla.mozilla.org/show_bug.cgi?id=829816
 -->
 <head>
  <meta charset="utf-8">
  <title>Test for Bug 829816</title>
  <script type="application/javascript" src="/tests/SimpleTest/SimpleTest.js"></script>
  <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
  <style type="text/css">
    b { content: "\0";      counter-reset: \0      }
    b { content: "\00";     counter-reset: \00     }
    b { content: "\000";    counter-reset: \000    }
    b { content: "\0000";   counter-reset: \0000   }
    b { content: "\00000";  counter-reset: \00000  }
    b { content: "\000000"; counter-reset: \000000 }
  </style>
  <!-- U+0000 characters in <style> would be replaced by the HTML parser -->
  <link rel="stylesheet" type="text/css" href="file_bug829816.css"/>
  <script type="application/javascript">
  /** Test for Bug 829816 **/
  var ss = document.styleSheets[1];
  for (var i = 0; i < 6; i++) {
    is(ss.cssRules[i].style.content, "\"\uFFFD\"",
        "\\0 in strings should be converted to U+FFFD");
    is(ss.cssRules[i].style.counterReset, "\uFFFD",
        "\\0 in identifiers should be converted to U+FFFD");
  }
  is(document.styleSheets[2].cssRules[0].style.content, "\"\uFFFD\"",
      "U+0000 in strings should be converted to U+FFFD");
  is(document.styleSheets[2].cssRules[0].style.counterReset, "\uFFFD",
      "U+0000 in identifiers should be converted to U+FFFD");
  is(document.styleSheets[2].cssRules[1].style.content, "\"\uFFFD\"",
      "U+0000 in strings should be converted to U+FFFD");
  is(document.styleSheets[2].cssRules[1].style.counterReset, "\uFFFD",
      "U+0000 in identifiers should be converted to U+FFFD");
  </script>
 </head>
 <body>
 <a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=829816">Mozilla Bug 829816</a>
 <p id="display"></p>
 <div id="content" style="display: none">
 </div>
 <pre id="test">
 </pre>
 </body>
 </html>
--- a/layout/style/test/test_parser_diagnostics_unprintables.html
+++ b/layout/style/test/test_parser_diagnostics_unprintables.html
@ -100,8 +100,9 @@ const substitutions = [
  // U+000A LINE FEED, U+000C FORM FEED, and U+000D CARRIAGE RETURN
  // cannot be put into a CSS token as escaped literal characters, so
  // we do them with hex escapes instead.
-  { t: "\\\x00\\\x01\\\x02\\\x03",       i: "\\0 \\1 \\2 \\3 ",
+  // The parser replaces U+0000 with U+FFFD.
-                                         s: "\\0 \\1 \\2 \\3 " },
+  { t: "\\\x00\\\x01\\\x02\\\x03",       i: "<22>\\1 \\2 \\3 ",
                                         s: "<22>\\1 \\2 \\3 " },
  { t: "\\\x04\\\x05\\\x06\\\x07",       i: "\\4 \\5 \\6 \\7 ",
                                         s: "\\4 \\5 \\6 \\7 " },
  { t: "\\\x08\\\x09\\000A\\\x0B",       i: "\\8 \\9 \\A \\B ",