bug 1018805 - implement custom lowercasing behavior for Irish. r=smontagu

This commit is contained in:
Jonathan Kew 2014-06-02 12:48:01 +01:00
Родитель e551329cb7
Коммит fc175df9f2
3 изменённых файлов: 54 добавлений и 25 удалений

Просмотреть файл

@ -208,33 +208,36 @@ const uint8_t IrishCasing::sUcClasses[26] = {
kClass_letter
};
uint8_t
IrishCasing::GetClass(uint32_t aCh)
{
using mozilla::unicode::GetGenCategory;
if (aCh >= 'a' && aCh <= 'z') {
return sLcClasses[aCh - 'a'];
} else if (aCh >= 'A' && aCh <= 'Z') {
return sUcClasses[aCh - 'A'];
} else if (GetGenCategory(aCh) == nsIUGenCategory::kLetter) {
if (aCh == a_ACUTE || aCh == e_ACUTE || aCh == i_ACUTE ||
aCh == o_ACUTE || aCh == u_ACUTE) {
return kClass_vowel;
} else if (aCh == A_ACUTE || aCh == E_ACUTE || aCh == I_ACUTE ||
aCh == O_ACUTE || aCh == U_ACUTE) {
return kClass_Vowel;
} else {
return kClass_letter;
}
} else if (aCh == '-' || aCh == HYPHEN || aCh == NO_BREAK_HYPHEN) {
return kClass_hyph;
} else {
return kClass_other;
}
}
uint32_t
IrishCasing::UpperCase(uint32_t aCh, State& aState,
bool& aMarkPos, uint8_t& aAction)
{
using mozilla::unicode::GetGenCategory;
uint8_t cls;
if (aCh >= 'a' && aCh <= 'z') {
cls = sLcClasses[aCh - 'a'];
} else if (aCh >= 'A' && aCh <= 'Z') {
cls = sUcClasses[aCh - 'A'];
} else if (GetGenCategory(aCh) == nsIUGenCategory::kLetter) {
if (aCh == a_ACUTE || aCh == e_ACUTE || aCh == i_ACUTE ||
aCh == o_ACUTE || aCh == u_ACUTE) {
cls = kClass_vowel;
} else if (aCh == A_ACUTE || aCh == E_ACUTE || aCh == I_ACUTE ||
aCh == O_ACUTE || aCh == U_ACUTE) {
cls = kClass_Vowel;
} else {
cls = kClass_letter;
}
} else if (aCh == '-' || aCh == HYPHEN || aCh == NO_BREAK_HYPHEN) {
cls = kClass_hyph;
} else {
cls = kClass_other;
}
uint8_t cls = GetClass(aCh);
uint8_t stateEntry = sUppercaseStateTable[cls][aState];
aMarkPos = !!(stateEntry & kMarkPositionFlag);
aAction = (stateEntry & kActionMask) >> kActionShift;

Просмотреть файл

@ -101,6 +101,14 @@ public:
static uint32_t UpperCase(uint32_t aCh, State& aState,
bool& aMarkPos, uint8_t& aAction);
static bool IsUpperVowel(uint32_t aCh)
{
return GetClass(aCh) == kClass_Vowel;
}
private:
static uint8_t GetClass(uint32_t aCh);
};
} // namespace mozilla

Просмотреть файл

@ -276,6 +276,8 @@ nsCaseTransformTextRunFactory::TransformString(
bool capitalizeDutchIJ = false;
bool prevIsLetter = false;
bool ntPrefix = false; // true immediately after a word-initial 'n' or 't'
// when doing Irish lowercasing
uint32_t sigmaIndex = uint32_t(-1);
nsIUGenCategory::nsUGenCategory cat;
@ -331,6 +333,24 @@ nsCaseTransformTextRunFactory::TransformString(
}
}
cat = mozilla::unicode::GetGenCategory(ch);
if (languageSpecificCasing == eLSCB_Irish &&
cat == nsIUGenCategory::kLetter) {
// See bug 1018805 for Irish lowercasing requirements
if (!prevIsLetter && (ch == 'n' || ch == 't')) {
ntPrefix = true;
} else {
if (ntPrefix && mozilla::IrishCasing::IsUpperVowel(ch)) {
aConvertedString.Append('-');
++extraChars;
}
ntPrefix = false;
}
} else {
ntPrefix = false;
}
// Special lowercasing behavior for Greek Sigma: note that this is listed
// as context-sensitive in Unicode's SpecialCasing.txt, but is *not* a
// language-specific mapping; it applies regardless of the language of
@ -349,8 +369,6 @@ nsCaseTransformTextRunFactory::TransformString(
// position in the converted string; if we then encounter another letter,
// that FINAL SIGMA is replaced with a standard SMALL SIGMA.
cat = mozilla::unicode::GetGenCategory(ch);
// If sigmaIndex is not -1, it marks where we have provisionally mapped
// a CAPITAL SIGMA to FINAL SIGMA; if we now find another letter, we
// need to change it to SMALL SIGMA.