Bug 346200 r=bryner Spellchecker doesn't like curly quotes and other non-ASCII punctuation

This commit is contained in:
brettw%gmail.com 2006-07-28 17:57:53 +00:00
Родитель 7fb1d4ec68
Коммит 860cc1d9cc
1 изменённых файлов: 31 добавлений и 21 удалений

Просмотреть файл

@ -50,6 +50,13 @@
#include "nsIDOMNode.h"
#include "nsIDOMHTMLBRElement.h"
// some character categories we care about from GetCat()
#define CHAR_CAT_NUMBER 2
#define CHAR_CAT_SPACE 3
#define CHAR_CAT_CONTROL 4
#define CHAR_CAT_WORD 5
#define CHAR_CAT_PUNCTUATION1 6
#define CHAR_CAT_PUNCTUATION2 7
// IsIgnorableCharacter
//
@ -62,6 +69,17 @@ inline PRBool IsIgnorableCharacter(PRUnichar ch)
ch == 0x1806); // MONGOLIAN TODO SOFT HYPHEN
}
// IsConditionalPunctuation
//
// Some characters (like apostrophes) require characters on each side to be
// part of a word, and are otherwise punctuation.
inline PRBool IsConditionalPunctuation(PRUnichar ch)
{
return (ch == '\'' ||
ch == 0x2019); // RIGHT SINGLE QUOTATION MARK
}
// mozInlineSpellWordUtil::Init
nsresult
@ -832,33 +850,18 @@ WordSplitState::ClassifyCharacter(PRInt32 aIndex, PRBool aRecurse) const
// this will classify the character, we want to treat "ignorable" characters
// such as soft hyphens as word characters.
PRInt32 charCategory = GetCat(mDOMWordText[aIndex]);
if (charCategory == 5 || IsIgnorableCharacter(mDOMWordText[aIndex]))
if (charCategory == CHAR_CAT_WORD ||
IsIgnorableCharacter(mDOMWordText[aIndex]))
return CHAR_CLASS_WORD;
if (mDOMWordText[aIndex] > 255) {
// Non-ASCII character so we can't use is* libc functions.
// XXX this function needs work to handle general Unicode characters!!!
return CHAR_CLASS_WORD;
}
// all other whitespace chars, control chars, and most punctuation (with
// several exceptions) are word separators. These exceptions are for
// characters that can be considered part of a word if it is surrounded by
// word characters
if ((isspace(mDOMWordText[aIndex]) || mDOMWordText[aIndex] < ' ' ||
ispunct(mDOMWordText[aIndex]))
&& mDOMWordText[aIndex] != '\'')
return CHAR_CLASS_SEPARATOR;
// For punctuation excluded from the above:
// If punctuation is surrounded immediately on both sides by word characters
// it also counts as a word character.
if (ispunct(mDOMWordText[aIndex])) {
// If conditional punctuation is surrounded immediately on both sides by word
// characters it also counts as a word character.
if (IsConditionalPunctuation(mDOMWordText[aIndex])) {
if (!aRecurse) {
// not allowed to look around, this punctuation counts like a separator
return CHAR_CLASS_SEPARATOR;
}
// check the left-hand character
if (aIndex == 0)
return CHAR_CLASS_SEPARATOR;
@ -875,6 +878,13 @@ WordSplitState::ClassifyCharacter(PRInt32 aIndex, PRBool aRecurse) const
return CHAR_CLASS_WORD;
}
// all other punctuation
if (charCategory == CHAR_CAT_SPACE ||
charCategory == CHAR_CAT_CONTROL ||
charCategory == CHAR_CAT_PUNCTUATION1 ||
charCategory == CHAR_CAT_PUNCTUATION2)
return CHAR_CLASS_SEPARATOR;
// any other character counts as a word
return CHAR_CLASS_WORD;
}