зеркало из https://github.com/mozilla/gecko-dev.git
Bug 346200 r=bryner Spellchecker doesn't like curly quotes and other non-ASCII punctuation
This commit is contained in:
Родитель
7fb1d4ec68
Коммит
860cc1d9cc
|
@ -50,6 +50,13 @@
|
|||
#include "nsIDOMNode.h"
|
||||
#include "nsIDOMHTMLBRElement.h"
|
||||
|
||||
// some character categories we care about from GetCat()
|
||||
#define CHAR_CAT_NUMBER 2
|
||||
#define CHAR_CAT_SPACE 3
|
||||
#define CHAR_CAT_CONTROL 4
|
||||
#define CHAR_CAT_WORD 5
|
||||
#define CHAR_CAT_PUNCTUATION1 6
|
||||
#define CHAR_CAT_PUNCTUATION2 7
|
||||
|
||||
// IsIgnorableCharacter
|
||||
//
|
||||
|
@ -62,6 +69,17 @@ inline PRBool IsIgnorableCharacter(PRUnichar ch)
|
|||
ch == 0x1806); // MONGOLIAN TODO SOFT HYPHEN
|
||||
}
|
||||
|
||||
// IsConditionalPunctuation
|
||||
//
|
||||
// Some characters (like apostrophes) require characters on each side to be
|
||||
// part of a word, and are otherwise punctuation.
|
||||
|
||||
inline PRBool IsConditionalPunctuation(PRUnichar ch)
|
||||
{
|
||||
return (ch == '\'' ||
|
||||
ch == 0x2019); // RIGHT SINGLE QUOTATION MARK
|
||||
}
|
||||
|
||||
// mozInlineSpellWordUtil::Init
|
||||
|
||||
nsresult
|
||||
|
@ -832,33 +850,18 @@ WordSplitState::ClassifyCharacter(PRInt32 aIndex, PRBool aRecurse) const
|
|||
// this will classify the character, we want to treat "ignorable" characters
|
||||
// such as soft hyphens as word characters.
|
||||
PRInt32 charCategory = GetCat(mDOMWordText[aIndex]);
|
||||
if (charCategory == 5 || IsIgnorableCharacter(mDOMWordText[aIndex]))
|
||||
if (charCategory == CHAR_CAT_WORD ||
|
||||
IsIgnorableCharacter(mDOMWordText[aIndex]))
|
||||
return CHAR_CLASS_WORD;
|
||||
|
||||
if (mDOMWordText[aIndex] > 255) {
|
||||
// Non-ASCII character so we can't use is* libc functions.
|
||||
// XXX this function needs work to handle general Unicode characters!!!
|
||||
return CHAR_CLASS_WORD;
|
||||
}
|
||||
|
||||
// all other whitespace chars, control chars, and most punctuation (with
|
||||
// several exceptions) are word separators. These exceptions are for
|
||||
// characters that can be considered part of a word if it is surrounded by
|
||||
// word characters
|
||||
if ((isspace(mDOMWordText[aIndex]) || mDOMWordText[aIndex] < ' ' ||
|
||||
ispunct(mDOMWordText[aIndex]))
|
||||
&& mDOMWordText[aIndex] != '\'')
|
||||
return CHAR_CLASS_SEPARATOR;
|
||||
|
||||
// For punctuation excluded from the above:
|
||||
// If punctuation is surrounded immediately on both sides by word characters
|
||||
// it also counts as a word character.
|
||||
if (ispunct(mDOMWordText[aIndex])) {
|
||||
// If conditional punctuation is surrounded immediately on both sides by word
|
||||
// characters it also counts as a word character.
|
||||
if (IsConditionalPunctuation(mDOMWordText[aIndex])) {
|
||||
if (!aRecurse) {
|
||||
// not allowed to look around, this punctuation counts like a separator
|
||||
return CHAR_CLASS_SEPARATOR;
|
||||
}
|
||||
|
||||
|
||||
// check the left-hand character
|
||||
if (aIndex == 0)
|
||||
return CHAR_CLASS_SEPARATOR;
|
||||
|
@ -875,6 +878,13 @@ WordSplitState::ClassifyCharacter(PRInt32 aIndex, PRBool aRecurse) const
|
|||
return CHAR_CLASS_WORD;
|
||||
}
|
||||
|
||||
// all other punctuation
|
||||
if (charCategory == CHAR_CAT_SPACE ||
|
||||
charCategory == CHAR_CAT_CONTROL ||
|
||||
charCategory == CHAR_CAT_PUNCTUATION1 ||
|
||||
charCategory == CHAR_CAT_PUNCTUATION2)
|
||||
return CHAR_CLASS_SEPARATOR;
|
||||
|
||||
// any other character counts as a word
|
||||
return CHAR_CLASS_WORD;
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче