зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1322992
- Implement locale-specific casing behavior for Lithuanian, and add more WPT tests for it. r=m_kato
Differential Revision: https://phabricator.services.mozilla.com/D32129 --HG-- rename : testing/web-platform/tests/css/css-text/text-transform/reference/text-transform-upperlower-039-ref.html => testing/web-platform/tests/css/css-text/text-transform/reference/text-transform-upperlower-044-ref.html rename : testing/web-platform/tests/css/css-text/text-transform/text-transform-upperlower-039.html => testing/web-platform/tests/css/css-text/text-transform/text-transform-upperlower-044.html extra : moz-landing-system : lando
This commit is contained in:
Родитель
2f31fab6d9
Коммит
5fe29c880d
|
@ -219,11 +219,12 @@ gfxTextRunFactory::Parameters GetParametersForInner(
|
|||
// exhibit the behavior in question; multiple lang tags may map to the
|
||||
// same setting here, if the behavior is shared by other languages.
|
||||
enum LanguageSpecificCasingBehavior {
|
||||
eLSCB_None, // default non-lang-specific behavior
|
||||
eLSCB_Dutch, // treat "ij" digraph as a unit for capitalization
|
||||
eLSCB_Greek, // strip accent when uppercasing Greek vowels
|
||||
eLSCB_Irish, // keep prefix letters as lowercase when uppercasing Irish
|
||||
eLSCB_Turkish // preserve dotted/dotless-i distinction in uppercase
|
||||
eLSCB_None, // default non-lang-specific behavior
|
||||
eLSCB_Dutch, // treat "ij" digraph as a unit for capitalization
|
||||
eLSCB_Greek, // strip accent when uppercasing Greek vowels
|
||||
eLSCB_Irish, // keep prefix letters as lowercase when uppercasing Irish
|
||||
eLSCB_Turkish, // preserve dotted/dotless-i distinction in uppercase
|
||||
eLSCB_Lithuanian // retain dot on lowercase i/j when an accent is present
|
||||
};
|
||||
|
||||
static LanguageSpecificCasingBehavior GetCasingFor(const nsAtom* aLang) {
|
||||
|
@ -244,6 +245,9 @@ static LanguageSpecificCasingBehavior GetCasingFor(const nsAtom* aLang) {
|
|||
if (aLang == nsGkAtoms::ga) {
|
||||
return eLSCB_Irish;
|
||||
}
|
||||
if (aLang == nsGkAtoms::lt_) {
|
||||
return eLSCB_Lithuanian;
|
||||
}
|
||||
|
||||
// Is there a region subtag we should ignore?
|
||||
nsAtomString langStr(const_cast<nsAtom*>(aLang));
|
||||
|
@ -277,6 +281,8 @@ bool nsCaseTransformTextRunFactory::TransformString(
|
|||
bool prevIsLetter = false;
|
||||
bool ntPrefix = false; // true immediately after a word-initial 'n' or 't'
|
||||
// when doing Irish lowercasing
|
||||
bool seenSoftDotted = false; // true immediately after an I or J that is
|
||||
// converted to lowercase in Lithuanian mode
|
||||
uint32_t sigmaIndex = uint32_t(-1);
|
||||
nsUGenCategory cat;
|
||||
|
||||
|
@ -353,6 +359,60 @@ bool nsCaseTransformTextRunFactory::TransformString(
|
|||
}
|
||||
}
|
||||
|
||||
if (languageSpecificCasing == eLSCB_Lithuanian) {
|
||||
// clang-format off
|
||||
/* From SpecialCasing.txt:
|
||||
* # Introduce an explicit dot above when lowercasing capital I's and J's
|
||||
* # whenever there are more accents above.
|
||||
* # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
|
||||
*
|
||||
* 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
|
||||
* 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
|
||||
* 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
|
||||
* 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
* 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
* 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
|
||||
*/
|
||||
// clang-format on
|
||||
if (ch == 'I' || ch == 'J' || ch == 0x012E) {
|
||||
ch = ToLowerCase(ch);
|
||||
prevIsLetter = true;
|
||||
seenSoftDotted = true;
|
||||
sigmaIndex = uint32_t(-1);
|
||||
break;
|
||||
}
|
||||
if (ch == 0x00CC) {
|
||||
aConvertedString.Append('i');
|
||||
aConvertedString.Append(0x0307);
|
||||
extraChars += 2;
|
||||
ch = 0x0300;
|
||||
prevIsLetter = true;
|
||||
seenSoftDotted = false;
|
||||
sigmaIndex = uint32_t(-1);
|
||||
break;
|
||||
}
|
||||
if (ch == 0x00CD) {
|
||||
aConvertedString.Append('i');
|
||||
aConvertedString.Append(0x0307);
|
||||
extraChars += 2;
|
||||
ch = 0x0301;
|
||||
prevIsLetter = true;
|
||||
seenSoftDotted = false;
|
||||
sigmaIndex = uint32_t(-1);
|
||||
break;
|
||||
}
|
||||
if (ch == 0x0128) {
|
||||
aConvertedString.Append('i');
|
||||
aConvertedString.Append(0x0307);
|
||||
extraChars += 2;
|
||||
ch = 0x0303;
|
||||
prevIsLetter = true;
|
||||
seenSoftDotted = false;
|
||||
sigmaIndex = uint32_t(-1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
cat = mozilla::unicode::GetGenCategory(ch);
|
||||
|
||||
if (languageSpecificCasing == eLSCB_Irish &&
|
||||
|
@ -371,6 +431,15 @@ bool nsCaseTransformTextRunFactory::TransformString(
|
|||
ntPrefix = false;
|
||||
}
|
||||
|
||||
if (seenSoftDotted && cat == nsUGenCategory::kMark) {
|
||||
// The seenSoftDotted flag will only be set in Lithuanian mode.
|
||||
if (ch == 0x0300 || ch == 0x0301 || ch == 0x0303) {
|
||||
aConvertedString.Append(0x0307);
|
||||
++extraChars;
|
||||
}
|
||||
}
|
||||
seenSoftDotted = false;
|
||||
|
||||
// Special lowercasing behavior for Greek Sigma: note that this is
|
||||
// listed as context-sensitive in Unicode's SpecialCasing.txt, but is
|
||||
// *not* a language-specific mapping; it applies regardless of the
|
||||
|
@ -463,6 +532,26 @@ bool nsCaseTransformTextRunFactory::TransformString(
|
|||
break;
|
||||
}
|
||||
|
||||
if (languageSpecificCasing == eLSCB_Lithuanian) {
|
||||
/*
|
||||
* # Remove DOT ABOVE after "i" with upper or titlecase
|
||||
*
|
||||
* 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
|
||||
*/
|
||||
if (ch == 'i' || ch == 'j' || ch == 0x012F) {
|
||||
seenSoftDotted = true;
|
||||
ch = ToTitleCase(ch);
|
||||
break;
|
||||
}
|
||||
if (seenSoftDotted) {
|
||||
seenSoftDotted = false;
|
||||
if (ch == 0x0307) {
|
||||
ch = uint32_t(-1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (languageSpecificCasing == eLSCB_Irish) {
|
||||
bool mark;
|
||||
uint8_t action;
|
||||
|
@ -565,6 +654,25 @@ bool nsCaseTransformTextRunFactory::TransformString(
|
|||
capitalizeDutchIJ = true;
|
||||
break;
|
||||
}
|
||||
if (languageSpecificCasing == eLSCB_Lithuanian) {
|
||||
/*
|
||||
* # Remove DOT ABOVE after "i" with upper or titlecase
|
||||
*
|
||||
* 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
|
||||
*/
|
||||
if (ch == 'i' || ch == 'j' || ch == 0x012F) {
|
||||
seenSoftDotted = true;
|
||||
ch = ToTitleCase(ch);
|
||||
break;
|
||||
}
|
||||
if (seenSoftDotted) {
|
||||
seenSoftDotted = false;
|
||||
if (ch == 0x0307) {
|
||||
ch = uint32_t(-1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mcm = mozilla::unicode::SpecialTitle(ch);
|
||||
if (mcm) {
|
||||
|
|
|
@ -1,2 +0,0 @@
|
|||
[text-transform-upperlower-039.html]
|
||||
expected: FAIL
|
|
@ -17,7 +17,10 @@
|
|||
</head>
|
||||
<body>
|
||||
<p class="instructions">Test passes if both characters in each pair match. If you are missing a font glyph for a character, ignore that pair, but report which characters were ignored.</p>
|
||||
<div class="test" lang="lt"><span>i̇̀ i̇̀</span> <span>i̇́ i̇́</span> <span>i̇̃ i̇̃</span></div>
|
||||
<div class="test" lang="lt">
|
||||
<span>i̇̀ i̇̀</span> <span>i̇́ i̇́</span> <span>i̇̃ i̇̃</span>
|
||||
<span>i̇̀ i̇̀</span> <span>j̇́ j̇́</span> <span>į̇̃ į̇̃</span>
|
||||
</div>
|
||||
<!--Notes:
|
||||
The language of the test box is set to Lithuanian (lt)
|
||||
-->
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8"/>
|
||||
<title>CSS3 Text, text transform: Lithuanian, uppercase</title>
|
||||
<link rel='author' title='Jonathan Kew' href='mailto:jkew@mozilla.com'>
|
||||
<style type='text/css'>
|
||||
@font-face {
|
||||
font-family: 'webfont';
|
||||
src: url('/fonts/DoulosSIL-R.woff') format('woff');
|
||||
font-weight: normal;
|
||||
font-style: normal;
|
||||
}
|
||||
.test, .ref { font-size: 200%; line-height: 2.5em; font-family: webfont, serif; }
|
||||
.test span, .ref span { margin-right: 1em; white-space: nowrap; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<p class="instructions">Test passes if both characters in each pair match. If you are missing a font glyph for a character, ignore that pair, but report which characters were ignored.</p>
|
||||
<div class="test" lang="lt">
|
||||
<span>Ì Ì</span>
|
||||
<span>Í Í</span>
|
||||
<span>Ĩ Ĩ</span>
|
||||
<span>I I</span>
|
||||
<span>J J</span>
|
||||
<span>Į Į</span>
|
||||
<span>Ẋ Ẋ</span>
|
||||
</div>
|
||||
<!--Notes:
|
||||
The language of the test box is set to Lithuanian (lt)
|
||||
-->
|
||||
</body>
|
||||
</html>
|
|
@ -22,7 +22,10 @@
|
|||
</head>
|
||||
<body>
|
||||
<p class="instructions">Test passes if both characters in each pair match. If you are missing a font glyph for a character, ignore that pair, but report which characters were ignored.</p>
|
||||
<div class="test" lang="lt"><span>Ì i̇̀</span> <span>Í i̇́</span> <span>Ĩ i̇̃</span></div>
|
||||
<div class="test" lang="lt">
|
||||
<span>Ì i̇̀</span> <span>Í i̇́</span> <span>Ĩ i̇̃</span>
|
||||
<span>Ì i̇̀</span> <span>J́ j̇́</span> <span>Į̃ į̇̃</span>
|
||||
</div>
|
||||
<!--Notes:
|
||||
The language of the test box is set to Lithuanian (lt)
|
||||
-->
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8"/>
|
||||
<title>CSS3 Text, text transform: Lithuanian, uppercase</title>
|
||||
<meta name="assert" content="text-transform: uppercase will uppercase Lithuanian as described in Unicode's SpecialCasing.txt .">
|
||||
<link rel='author' title='Jonathan Kew' href='mailto:jkew@mozilla.com'>
|
||||
<link rel='help' href='https://drafts.csswg.org/css-text-3/#text-transform'>
|
||||
<link rel="match" href="reference/text-transform-upperlower-044-ref.html">
|
||||
<style type='text/css'>
|
||||
@font-face {
|
||||
font-family: 'webfont';
|
||||
src: url('/fonts/DoulosSIL-R.woff') format('woff');
|
||||
font-weight: normal;
|
||||
font-style: normal;
|
||||
}
|
||||
.test, .ref { font-size: 200%; line-height: 2.5em; font-family: webfont, serif; }
|
||||
.test span, .ref span { margin-right: 1em; white-space: nowrap; }
|
||||
/* the CSS above is not part of the test */
|
||||
.test { text-transform: uppercase; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<p class="instructions">Test passes if both characters in each pair match. If you are missing a font glyph for a character, ignore that pair, but report which characters were ignored.</p>
|
||||
<div class="test" lang="lt">
|
||||
<span>i̇̀ Ì</span>
|
||||
<span>i̇́ Í</span>
|
||||
<span>i̇̃ Ĩ</span>
|
||||
<span>i̇ I</span>
|
||||
<span>j̇ J</span>
|
||||
<span>į̇ Į</span>
|
||||
<span>ẋ Ẋ</span> <!-- check that dot isn't deleted in other contexts -->
|
||||
</div>
|
||||
<!--Notes:
|
||||
The language of the test box is set to Lithuanian (lt)
|
||||
-->
|
||||
</body>
|
||||
</html>
|
|
@ -2118,6 +2118,7 @@ STATIC_ATOMS = [
|
|||
Atom("crh", "crh"),
|
||||
# Atom("el", "el"), # "el" is present above
|
||||
Atom("ga", "ga"),
|
||||
# Atom("lt", "lt"), # "lt" is present above (atom name "lt_")
|
||||
Atom("nl", "nl"),
|
||||
|
||||
# mathematical language, used for MathML
|
||||
|
|
Загрузка…
Ссылка в новой задаче