Bug 443976: Parsing support for the unicode-range: descriptor in @font-face. r=dbaron

2009-08-20 14:52:47 -07:00 · 2009-08-20 14:52:47 -07:00 · 36c0b9d09f
--- a/layout/style/nsCSSParser.cpp
+++ b/layout/style/nsCSSParser.cpp
@ -7980,8 +7980,53 @@ CSSParserImpl::ParseFontSrcFormat(nsTArray<nsCSSValue> & values)
 PRBool
 CSSParserImpl::ParseFontRanges(nsCSSValue& aValue)
 {
-  // not currently implemented (bug 443976)
-  return PR_FALSE;
+  nsTArray<PRUint32> ranges;
+  for (;;) {
+    if (!GetToken(PR_TRUE))
+      break;
+
+    if (mToken.mType != eCSSToken_URange) {
+      UngetToken();
+      break;
+    }
+
+    // An invalid range token is a parsing error, causing the entire
+    // descriptor to be ignored.
+    if (!mToken.mIntegerValid)
+      return PR_FALSE;
+
+    PRUint32 low = mToken.mInteger;
+    PRUint32 high = mToken.mInteger2;
+
+    // A range that descends, or a range that is entirely outside the
+    // current range of Unicode (U+0-10FFFF) is ignored, but does not
+    // invalidate the descriptor.  A range that straddles the high end
+    // is clipped.
+    if (low <= 0x10FFFF && low <= high) {
+      if (high > 0x10FFFF)
+        high = 0x10FFFF;
+
+      ranges.AppendElement(low);
+      ranges.AppendElement(high);
+    }
+    if (!ExpectSymbol(',', PR_TRUE))
+      break;
+  }
+
+  if (ranges.Length() == 0)
+    return PR_FALSE;
+
+  nsRefPtr<nsCSSValue::Array> srcVals
+    = nsCSSValue::Array::Create(ranges.Length());
+  if (!srcVals) {
+    mScanner.SetLowLevelError(NS_ERROR_OUT_OF_MEMORY);
+    return PR_FALSE;
+  }
+
+  for (PRUint32 i = 0; i < ranges.Length(); i++)
+    srcVals->Item(i).SetIntValue(ranges[i], eCSSUnit_Integer);
+  aValue.SetArrayValue(srcVals, eCSSUnit_Array);
+  return PR_TRUE;
 }

 PRBool
--- a/layout/style/nsCSSRules.cpp
+++ b/layout/style/nsCSSRules.cpp
@ -71,6 +71,7 @@
 #include "nsDOMError.h"
 #include "nsStyleUtil.h"
 #include "nsCSSDeclaration.h"
+#include "nsPrintfCString.h"

 #define IMPL_STYLE_RULE_INHERIT(_class, super) \
 NS_IMETHODIMP _class::GetStyleSheet(nsIStyleSheet*& aSheet) const { return super::GetStyleSheet(aSheet); }  \
@ -1534,6 +1535,52 @@ AppendSerializedFontSrc(const nsCSSValue& src, nsAString & aResult NS_OUTPARAM)
  aResult.Truncate(aResult.Length() - 2); // remove the last comma-space
 }

+// print all characters with at least four hex digits
+static void
+AppendSerializedUnicodePoint(PRUint32 aCode, nsACString &aBuf NS_OUTPARAM)
+{
+  aBuf.Append(nsPrintfCString("%04X", aCode));
+}
+
+// A unicode-range: descriptor is represented as an array of integers,
+// to be interpreted as a sequence of pairs: min max min max ...
+// It is in source order.  (Possibly it should be sorted and overlaps
+// consolidated, but right now we don't do that.)
+static void
+AppendSerializedUnicodeRange(nsCSSValue const & aValue,
+                             nsAString & aResult NS_OUTPARAM)
+{
+  NS_PRECONDITION(aValue.GetUnit() == eCSSUnit_Null ||
+                  aValue.GetUnit() == eCSSUnit_Array,
+                  "improper value unit for unicode-range:");
+  aResult.Truncate();
+  if (aValue.GetUnit() != eCSSUnit_Array)
+    return;
+
+  nsCSSValue::Array const & sources = *aValue.GetArrayValue();
+  nsCAutoString buf;
+
+  NS_ABORT_IF_FALSE(sources.Count() % 2 == 0,
+                    "odd number of entries in a unicode-range: array");
+
+  for (PRUint32 i = 0; i < sources.Count(); i += 2) {
+    PRUint32 min = sources[i].GetIntValue();
+    PRUint32 max = sources[i+1].GetIntValue();
+
+    // We don't try to replicate the U+XX?? notation.
+    buf.AppendLiteral("U+");
+    AppendSerializedUnicodePoint(min, buf);
+
+    if (min != max) {
+      buf.Append('-');
+      AppendSerializedUnicodePoint(max, buf);
+    }
+    buf.AppendLiteral(", ");
+  }
+  buf.Truncate(buf.Length() - 2); // remove the last comma-space
+  CopyASCIItoUTF16(buf, aResult);
+}
+
 // Mapping from nsCSSFontDesc codes to nsCSSFontFaceStyleDecl fields.
 // Keep this in sync with enum nsCSSFontDesc in nsCSSProperty.h.
 nsCSSValue nsCSSFontFaceStyleDecl::* const
@ -1606,7 +1653,7 @@ nsCSSFontFaceStyleDecl::GetPropertyValue(nsCSSFontDesc aFontDescID,
    return NS_OK;

  case eCSSFontDesc_UnicodeRange:
-    // these are not implemented, so always return an empty string
+    AppendSerializedUnicodeRange(val, aResult);
    return NS_OK;

  case eCSSFontDesc_UNKNOWN:
--- a/layout/style/nsCSSScanner.cpp
+++ b/layout/style/nsCSSScanner.cpp
@ -143,12 +143,31 @@ IsIdent(PRInt32 ch) {
  return ch >= 0 && (ch >= 256 || (gLexTable[ch] & IS_IDENT) != 0);
 }

+static inline PRUint32
+DecimalDigitValue(PRInt32 ch)
+{
+  return ch - '0';
+}
+
+static inline PRUint32
+HexDigitValue(PRInt32 ch)
+{
+  if (IsDigit(ch)) {
+    return DecimalDigitValue(ch);
+  } else {
+    // Note: c&7 just keeps the low three bits which causes
+    // upper and lower case alphabetics to both yield their
+    // "relative to 10" value for computing the hex value.
+    return (ch & 0x7) + 9;
+  }
+}
+
 nsCSSToken::nsCSSToken()
 {
  mType = eCSSToken_Symbol;
 }

-void 
+void
 nsCSSToken::AppendToString(nsString& aBuffer)
 {
  switch (mType) {
@ -160,6 +179,7 @@ nsCSSToken::AppendToString(nsString& aBuffer)
    case eCSSToken_URL:
    case eCSSToken_InvalidURL:
    case eCSSToken_HTMLComment:
+    case eCSSToken_URange:
      aBuffer.Append(mIdent);
      break;
    case eCSSToken_Number:
@ -694,6 +714,10 @@ nsCSSScanner::Next(nsCSSToken& aToken)
      return PR_FALSE;
    }

+    // UNICODE-RANGE
+    if ((ch == 'u' || ch == 'U') && Peek() == '+')
+      return ParseURange(ch, aToken);
+
    // IDENT
    if (StartsIdent(ch, Peek()))
      return ParseIdent(ch, aToken);
@ -921,14 +945,7 @@ nsCSSScanner::ParseAndAppendEscape(nsString& aOutput)
        Pushback(ch);
        break;
      } else if (IsHexDigit(ch)) {
-        if (IsDigit(ch)) {
-          rv = rv * 16 + (ch - '0');
-        } else {
-          // Note: c&7 just keeps the low three bits which causes
-          // upper and lower case alphabetics to both yield their
-          // "relative to 10" value for computing the hex value.
-          rv = rv * 16 + ((ch & 0x7) + 9);
-        }
+        rv = rv * 16 + HexDigitValue(ch);
      } else {
        NS_ASSERTION(IsWhitespace(ch), "bad control flow");
        // single space ends escape
@ -1069,8 +1086,6 @@ nsCSSScanner::ParseAtKeyword(PRInt32 aChar, nsCSSToken& aToken)
  return GatherIdent(0, aToken.mIdent);
 }

-#define CHAR_TO_DIGIT(_c) ((_c) - '0')
-
 PRBool
 nsCSSScanner::ParseNumber(PRInt32 c, nsCSSToken& aToken)
 {
@ -1109,7 +1124,7 @@ nsCSSScanner::ParseNumber(PRInt32 c, nsCSSToken& aToken)
    // Parse the integer part of the mantisssa
    NS_ASSERTION(IsDigit(c), "Why did we get called?");
    do {
-      intPart = 10*intPart + CHAR_TO_DIGIT(c);
+      intPart = 10*intPart + DecimalDigitValue(c);
      c = Read();
      // The IsDigit check will do the right thing even if Read() returns < 0
    } while (IsDigit(c));
@ -1124,7 +1139,7 @@ nsCSSScanner::ParseNumber(PRInt32 c, nsCSSToken& aToken)
    // Power of ten by which we need to divide our next digit
    float divisor = 10;
    do {
-      fracPart += CHAR_TO_DIGIT(c) / divisor;
+      fracPart += DecimalDigitValue(c) / divisor;
      divisor *= 10;
      c = Read();
      // The IsDigit check will do the right thing even if Read() returns < 0
@ -1149,7 +1164,7 @@ nsCSSScanner::ParseNumber(PRInt32 c, nsCSSToken& aToken)
      c = Read();
      NS_ASSERTION(IsDigit(c), "Peek() must have lied");
      do {
-        exponent = 10*exponent + CHAR_TO_DIGIT(c);
+        exponent = 10*exponent + DecimalDigitValue(c);
        c = Read();
        // The IsDigit check will do the right thing even if Read() returns < 0
      } while (IsDigit(c));
@ -1276,3 +1291,95 @@ nsCSSScanner::ParseString(PRInt32 aStop, nsCSSToken& aToken)
  }
  return PR_TRUE;
 }
+
+// UNICODE-RANGE tokens match the regular expression
+//
+//     u\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?
+//
+// However, some such tokens are "invalid".  There are three valid forms:
+//
+//     u+[0-9a-f]{x}              1 <= x <= 6
+//     u+[0-9a-f]{x}\?{y}         1 <= x+y <= 6
+//     u+[0-9a-f]{x}-[0-9a-f]{y}  1 <= x <= 6, 1 <= y <= 6
+//
+// All unicode-range tokens have their text recorded in mIdent; valid ones
+// are also decoded into mInteger and mInteger2, and mIntegerValid is set.
+
+PRBool
+nsCSSScanner::ParseURange(PRInt32 aChar, nsCSSToken& aResult)
+{
+  PRInt32 intro2 = Read();
+  PRInt32 ch = Peek();
+
+  // We should only ever be called if these things are true.
+  NS_ASSERTION(aChar == 'u' || aChar == 'U',
+               "unicode-range called with improper introducer (U)");
+  NS_ASSERTION(intro2 == '+',
+               "unicode-range called with improper introducer (+)");
+
+  // If the character immediately after the '+' is not a hex digit or
+  // '?', this is not really a unicode-range token; push everything
+  // back and scan the U as an ident.
+  if (!IsHexDigit(ch) && ch != '?') {
+    Pushback(intro2);
+    Pushback(aChar);
+    return ParseIdent(aChar, aResult);
+  }
+
+  aResult.mIdent.Truncate();
+  aResult.mIdent.Append(aChar);
+  aResult.mIdent.Append(intro2);
+
+  PRBool valid = PR_TRUE;
+  PRBool haveQues = PR_FALSE;
+  PRUint32 low = 0;
+  PRUint32 high = 0;
+  int i = 0;
+
+  for (;;) {
+    ch = Read();
+    i++;
+    if (i == 7 || !(IsHexDigit(ch) || ch == '?')) {
+      break;
+    }
+
+    aResult.mIdent.Append(ch);
+    if (IsHexDigit(ch)) {
+      if (haveQues) {
+        valid = PR_FALSE; // all question marks should be at the end
+      }
+      low = low*16 + HexDigitValue(ch);
+      high = high*16 + HexDigitValue(ch);
+    } else {
+      haveQues = PR_TRUE;
+      low = low*16 + 0x0;
+      high = high*16 + 0xF;
+    }
+  }
+
+  if (ch == '-' && IsHexDigit(Peek())) {
+    if (haveQues) {
+      valid = PR_FALSE;
+    }
+
+    aResult.mIdent.Append(ch);
+    high = 0;
+    i = 0;
+    for (;;) {
+      ch = Read();
+      i++;
+      if (i == 7 || !IsHexDigit(ch)) {
+        break;
+      }
+      aResult.mIdent.Append(ch);
+      high = high*16 + HexDigitValue(ch);
+    }
+  }
+  Pushback(ch);
+
+  aResult.mInteger = low;
+  aResult.mInteger2 = high;
+  aResult.mIntegerValid = valid;
+  aResult.mType = eCSSToken_URange;
+  return PR_TRUE;
+}
--- a/layout/style/nsCSSScanner.h
+++ b/layout/style/nsCSSScanner.h
@ -98,6 +98,11 @@ enum nsCSSTokenType {
  eCSSToken_Endsmatch,      // "$="
  eCSSToken_Containsmatch,  // "*="

+  eCSSToken_URange,         // Low in mInteger, high in mInteger2;
+                            // mIntegerValid is true if the token is a
+                            // valid range; mIdent preserves the textual
+                            // form of the token for error reporting
+
  // A special token indicating that there was an error in tokenization.
  // It's always an unterminated string.
  eCSSToken_Error           // mSymbol + mIdent
@ -107,9 +112,10 @@ struct nsCSSToken {
  nsAutoString    mIdent NS_OKONHEAP;
  float           mNumber;
  PRInt32         mInteger;
+  PRInt32         mInteger2;
  nsCSSTokenType  mType;
  PRUnichar       mSymbol;
-  PRPackedBool    mIntegerValid; // for number and dimension
+  PRPackedBool    mIntegerValid; // for number, dimension, urange
  PRPackedBool    mHasSign; // for number, percentage, and dimension

  nsCSSToken();
@ -219,6 +225,7 @@ protected:
  PRBool ParseNumber(PRInt32 aChar, nsCSSToken& aResult);
  PRBool ParseRef(PRInt32 aChar, nsCSSToken& aResult);
  PRBool ParseString(PRInt32 aChar, nsCSSToken& aResult);
+  PRBool ParseURange(PRInt32 aChar, nsCSSToken& aResult);
  PRBool SkipCComment();

  PRBool GatherIdent(PRInt32 aChar, nsString& aIdent);
--- a/layout/style/test/descriptor_database.js
+++ b/layout/style/test/descriptor_database.js
@ -89,7 +89,7 @@ var gCSSFontFaceDescriptors = {
 	},
 	"unicode-range": {
 		domProp: null,
-		values: [ "U+0-10FFFF", "U+3-7B3", "U+3??", "U+6A", "U+3????", "U+???", "U+302-302", "U+0-7,A-C", "U+100-17F,200-17F", "U+3??, U+500-513 ,U+612 , U+4????", "U+1FFF,U+200-27F" ],
-		invalid_values: [ "U+1????-2????" ]
+		values: [ "U+0-10FFFF", "U+3-7B3", "U+3??", "U+6A", "U+3????", "U+???", "U+302-302", "U+0-7,U+A-C", "U+100-17F,U+200-17F", "U+3??, U+500-513 ,U+612 , U+4????", "U+1FFF,U+200-27F" ],
+		invalid_values: [ "U+1????-2????", "U+0-7,A-C", "U+100-17F,200-17F" ]
 	}
 }
--- a/layout/style/test/test_descriptor_storage.html
+++ b/layout/style/test/test_descriptor_storage.html
@ -40,9 +40,6 @@ function fake_set_property(descriptor, value) {

 function xfail_parse(descriptor, value) {
  switch (descriptor) {
-    case "unicode-range":
-      // not yet implemented
-      return true;
    case "src":
      // not clear whether this is an error or not, so mark todo for now
      return value == "local(serif)";
--- a/layout/style/test/test_font_face_parser.html
+++ b/layout/style/test/test_font_face_parser.html
@ -38,7 +38,7 @@
      noncanonical: true },

    // Correct but unusual font-family.
-    { rule: _("font-family: Hoefler Text;"), 
+    { rule: _("font-family: Hoefler Text;"),
      d: {"font-family" : "\"Hoefler Text\""},
      noncanonical: true },

@ -164,8 +164,91 @@
      d: { "src" : "url(\"/fonts/Mouse\")" },
      noncanonical: true },

-    // unicode-range is not implemented (bug 443976).
-    // tests for that omitted for now.
+    // Correct unicode-range:
+    { rule: _("unicode-range: U+00A5;"), d: { "unicode-range" : "U+00A5" } },
+    { rule: _("unicode-range: U+A5;"),
+      d: { "unicode-range" : "U+00A5" }, noncanonical: true },
+    { rule: _("unicode-range: U+00a5;"),
+      d: { "unicode-range" : "U+00A5" }, noncanonical: true },
+    { rule: _("unicode-range: u+00a5;"),
+      d: { "unicode-range" : "U+00A5" }, noncanonical: true },
+    { rule: _("unicode-range: U+0000-00FF;"),
+      d: { "unicode-range" : "U+0000-00FF" } },
+    { rule: _("unicode-range: U+00??;"),
+      d: { "unicode-range" : "U+0000-00FF" }, noncanonical: true },
+    { rule: _("unicode-range: U+?"),
+      d: { "unicode-range" : "U+0000-000F" }, noncanonical: true },
+    { rule: _("unicode-range: U+??????"),
+      d: { "unicode-range" : "U+0000-10FFFF" }, noncanonical: true },
+    { rule: _("unicode-range: U+590-5ff;"),
+      d: { "unicode-range" : "U+0590-05FF" }, noncanonical: true },
+    { rule: _("unicode-range: U+A0000-12FFFF"),
+      d: { "unicode-range" : "U+A0000-10FFFF" }, noncanonical: true },
+
+    { rule: _("unicode-range: U+A5, U+4E00-9FFF, U+30??, U+FF00-FF9F;"),
+      d: { "unicode-range" : "U+00A5, U+4E00-9FFF, U+3000-30FF, U+FF00-FF9F" },
+      noncanonical: true },
+
+    { rule: _("unicode-range: U+104??;"),
+      d: { "unicode-range" : "U+10400-104FF" }, noncanonical: true },
+    { rule: _("unicode-range: U+320??, U+321??, U+322??, U+323??, U+324??, U+325??;"),
+      d: { "unicode-range" : "U+32000-320FF, U+32100-321FF, U+32200-322FF, U+32300-323FF, U+32400-324FF, U+32500-325FF" },
+      noncanonical: true },
+    { rule: _("unicode-range: U+100000-10ABCD;"),
+      d: { "unicode-range" : "U+100000-10ABCD" } },
+    { rule: _("unicode-range: U+0121 , U+1023"),
+      d: { "unicode-range" : "U+0121, U+1023" }, noncanonical: true },
+    { rule: _("unicode-range: U+0121/**/, U+1023"),
+      d: { "unicode-range" : "U+0121, U+1023" }, noncanonical: true },
+
+    // Incorrect unicode-range:
+    { rule: _("unicode-range:"), d: {} },
+    { rule: _("unicode-range: U+"), d: {} },
+    { rule: _("unicode-range: U+8FFFFFFF"), d: {} },
+    { rule: _("unicode-range: U+8FFF-7000"), d: {} },
+    { rule: _("unicode-range: U+8F??-9000"), d: {} },
+    { rule: _("unicode-range: U+9000-9???"), d: {} },
+    { rule: _("unicode-range: U+??00"), d: {} },
+    { rule: _("unicode-range: U+12345678?"), d: {} },
+    { rule: _("unicode-range: U+1????????"), d: {} },
+    { rule: _("unicode-range: twelve"), d: {} },
+    { rule: _("unicode-range: 1000"), d: {} },
+    { rule: _("unicode-range: 13??"), d: {} },
+    { rule: _("unicode-range: 1300-1377"), d: {} },
+    { rule: _("unicode-range: U-1000"), d: {} },
+    { rule: _("unicode-range: U+nnnn"), d: {} },
+    { rule: _("unicode-range: U+0121 U+1023"), d: {} },
+    { rule: _("unicode-range: U+ 0121"), d: {} },
+    { rule: _("unicode-range: U +0121"), d: {} },
+    { rule: _("unicode-range: U+0121-"), d: {} },
+    { rule: _("unicode-range: U+0121- 1023"), d: {} },
+    { rule: _("unicode-range: U+0121 -1023"), d: {} },
+    { rule: _("unicode-range: U+012 ?"), d: {} },
+    { rule: _("unicode-range: U+01 2?"), d: {} },
+
+    // Thorough test of seven-digit rejection: all these are syntax errors
+    { rule: _("unicode-range: U+1034560, U+A5"), d: {} },
+    { rule: _("unicode-range: U+1034569, U+A5"), d: {} },
+    { rule: _("unicode-range: U+103456a, U+A5"), d: {} },
+    { rule: _("unicode-range: U+103456f, U+A5"), d: {} },
+    { rule: _("unicode-range: U+103456?, U+A5"), d: {} },
+    { rule: _("unicode-range: U+103456-1034560, U+A5"), d: {} },
+    { rule: _("unicode-range: U+103456-1034569, U+A5"), d: {} },
+    { rule: _("unicode-range: U+103456-103456a, U+A5"), d: {} },
+    { rule: _("unicode-range: U+103456-103456f, U+A5"), d: {} },
+
+    // Syntactically invalid unicode-range tokens invalidate the
+    // entire descriptor
+    { rule: _("unicode-range: U+1, U+2, U+X"), d: {} },
+    { rule: _("unicode-range: U+A5, U+0?F"), d: {} },
+    { rule: _("unicode-range: U+A5, U+0F?-E00"), d: {} },
+
+    // Descending ranges and ranges outside 0-10FFFF are ignored
+    // but do not invalidate the descriptor
+    { rule: _("unicode-range: U+A5, U+90-30"),
+      d: { "unicode-range" : "U+00A5" }, noncanonical: true },
+    { rule: _("unicode-range: U+A5, U+220043"),
+      d: { "unicode-range" : "U+00A5" }, noncanonical: true },
  ];

  var display = document.getElementById("display");