Bug 393356. Spellchecker should not use private copy(ies) of the Unicode category tables. r=smontagu,a=damon

This commit is contained in:
roc+%cs.cmu.edu 2007-08-29 02:03:32 +00:00
Родитель 64495184cf
Коммит dc4ea8c74d
12 изменённых файлов: 66 добавлений и 82 удалений

Просмотреть файл

@ -41,7 +41,6 @@
#include "nsIServiceManager.h" #include "nsIServiceManager.h"
#include "nsUnicharUtilCIID.h" #include "nsUnicharUtilCIID.h"
#include "nsCRT.h" #include "nsCRT.h"
#include "cattable.h"
NS_IMPL_ISUPPORTS1(mozEnglishWordUtils, mozISpellI18NUtil) NS_IMPL_ISUPPORTS1(mozEnglishWordUtils, mozISpellI18NUtil)
@ -51,6 +50,8 @@ mozEnglishWordUtils::mozEnglishWordUtils()
nsresult rv; nsresult rv;
mURLDetector = do_CreateInstance(MOZ_TXTTOHTMLCONV_CONTRACTID, &rv); mURLDetector = do_CreateInstance(MOZ_TXTTOHTMLCONV_CONTRACTID, &rv);
mCaseConv = do_GetService(NS_UNICHARUTIL_CONTRACTID);
mCategories = do_GetService(NS_UNICHARCATEGORY_CONTRACTID);
} }
mozEnglishWordUtils::~mozEnglishWordUtils() mozEnglishWordUtils::~mozEnglishWordUtils()
@ -78,12 +79,6 @@ NS_IMETHODIMP mozEnglishWordUtils::GetRootForm(const PRUnichar *aWord, PRUint32
*count = 0; *count = 0;
if (!mCaseConv) {
mCaseConv = do_GetService(NS_UNICHARUTIL_CONTRACTID);
if (!mCaseConv)
return NS_ERROR_FAILURE;
}
mozEnglishWordUtils::myspCapitalization ct = captype(word); mozEnglishWordUtils::myspCapitalization ct = captype(word);
switch (ct) switch (ct)
{ {
@ -159,10 +154,10 @@ NS_IMETHODIMP mozEnglishWordUtils::GetRootForm(const PRUnichar *aWord, PRUint32
} }
// This needs vast improvement // This needs vast improvement
static PRBool ucIsAlpha(PRUnichar c) PRBool mozEnglishWordUtils::ucIsAlpha(PRUnichar aChar)
{ {
// XXX we have to fix callers to handle the full Unicode range // XXX we have to fix callers to handle the full Unicode range
return (5 == GetCat(PRUint32(c))); return nsIUGenCategory::kLetter == mCategories->Get(PRUint32(aChar));
} }
/* void FindNextWord (in wstring word, in PRUint32 length, in PRUint32 offset, out PRUint32 begin, out PRUint32 end); */ /* void FindNextWord (in wstring word, in PRUint32 length, in PRUint32 offset, out PRUint32 begin, out PRUint32 end); */

Просмотреть файл

@ -44,6 +44,7 @@
#include "nsIUnicodeDecoder.h" #include "nsIUnicodeDecoder.h"
#include "nsString.h" #include "nsString.h"
#include "nsICaseConversion.h" #include "nsICaseConversion.h"
#include "nsIUGenCategory.h"
#include "mozITXTToHTMLConv.h" #include "mozITXTToHTMLConv.h"
@ -62,10 +63,12 @@ public:
protected: protected:
mozEnglishWordUtils::myspCapitalization captype(const nsString &word); mozEnglishWordUtils::myspCapitalization captype(const nsString &word);
PRBool ucIsAlpha(PRUnichar aChar);
nsString mLanguage; nsString mLanguage;
nsString mCharset; nsString mCharset;
nsCOMPtr<nsICaseConversion> mCaseConv; nsCOMPtr<nsICaseConversion> mCaseConv;
nsCOMPtr<nsIUGenCategory> mCategories;
nsCOMPtr<mozITXTToHTMLConv> mURLDetector; // used to detect urls so the spell checker can skip them. nsCOMPtr<mozITXTToHTMLConv> mURLDetector; // used to detect urls so the spell checker can skip them.
}; };

Просмотреть файл

@ -36,7 +36,6 @@
* *
* ***** END LICENSE BLOCK ***** */ * ***** END LICENSE BLOCK ***** */
#include "cattable.h"
#include "mozInlineSpellWordUtil.h" #include "mozInlineSpellWordUtil.h"
#include "nsDebug.h" #include "nsDebug.h"
#include "nsIAtom.h" #include "nsIAtom.h"
@ -49,14 +48,8 @@
#include "nsIEditor.h" #include "nsIEditor.h"
#include "nsIDOMNode.h" #include "nsIDOMNode.h"
#include "nsIDOMHTMLBRElement.h" #include "nsIDOMHTMLBRElement.h"
#include "nsUnicharUtilCIID.h"
// some character categories we care about from GetCat() #include "nsServiceManagerUtils.h"
#define CHAR_CAT_NUMBER 2
#define CHAR_CAT_SPACE 3
#define CHAR_CAT_CONTROL 4
#define CHAR_CAT_WORD 5
#define CHAR_CAT_PUNCTUATION1 6
#define CHAR_CAT_PUNCTUATION2 7
// IsIgnorableCharacter // IsIgnorableCharacter
// //
@ -87,6 +80,10 @@ mozInlineSpellWordUtil::Init(nsWeakPtr aWeakEditor)
{ {
nsresult rv; nsresult rv;
mCategories = do_GetService(NS_UNICHARCATEGORY_CONTRACTID, &rv);
if (NS_FAILED(rv))
return rv;
// getting the editor can fail commonly because the editor was detached, so // getting the editor can fail commonly because the editor was detached, so
// don't assert // don't assert
nsCOMPtr<nsIEditor> editor = do_QueryReferent(aWeakEditor, &rv); nsCOMPtr<nsIEditor> editor = do_QueryReferent(aWeakEditor, &rv);
@ -841,8 +838,9 @@ WordSplitState::ClassifyCharacter(PRInt32 aIndex, PRBool aRecurse) const
// this will classify the character, we want to treat "ignorable" characters // this will classify the character, we want to treat "ignorable" characters
// such as soft hyphens as word characters. // such as soft hyphens as word characters.
PRInt32 charCategory = GetCat(mDOMWordText[aIndex]); nsIUGenCategory::nsUGenCategory
if (charCategory == CHAR_CAT_WORD || charCategory = mWordUtil->GetCategories()->Get(PRUint32(mDOMWordText[aIndex]));
if (charCategory == nsIUGenCategory::kLetter ||
IsIgnorableCharacter(mDOMWordText[aIndex])) IsIgnorableCharacter(mDOMWordText[aIndex]))
return CHAR_CLASS_WORD; return CHAR_CLASS_WORD;
@ -871,10 +869,10 @@ WordSplitState::ClassifyCharacter(PRInt32 aIndex, PRBool aRecurse) const
} }
// all other punctuation // all other punctuation
if (charCategory == CHAR_CAT_SPACE || if (charCategory == nsIUGenCategory::kSeparator ||
charCategory == CHAR_CAT_CONTROL || charCategory == nsIUGenCategory::kOther ||
charCategory == CHAR_CAT_PUNCTUATION1 || charCategory == nsIUGenCategory::kPunctuation ||
charCategory == CHAR_CAT_PUNCTUATION2) charCategory == nsIUGenCategory::kSymbol)
return CHAR_CLASS_SEPARATOR; return CHAR_CLASS_SEPARATOR;
// any other character counts as a word // any other character counts as a word

Просмотреть файл

@ -42,6 +42,7 @@
#include "nsIDocument.h" #include "nsIDocument.h"
#include "nsString.h" #include "nsString.h"
#include "nsTArray.h" #include "nsTArray.h"
#include "nsIUGenCategory.h"
//#define DEBUG_SPELLCHECK //#define DEBUG_SPELLCHECK
@ -118,13 +119,15 @@ public:
nsIDOMDocumentRange* GetDocumentRange() const { return mDOMDocumentRange; } nsIDOMDocumentRange* GetDocumentRange() const { return mDOMDocumentRange; }
nsIDocument* GetDocument() const { return mDocument; } nsIDocument* GetDocument() const { return mDocument; }
nsIDOMNode* GetRootNode() { return mRootNode; } nsIDOMNode* GetRootNode() { return mRootNode; }
nsIUGenCategory* GetCategories() { return mCategories; }
private: private:
// cached stuff for the editor, set by Init // cached stuff for the editor, set by Init
nsCOMPtr<nsIDOMDocumentRange> mDOMDocumentRange; nsCOMPtr<nsIDOMDocumentRange> mDOMDocumentRange;
nsCOMPtr<nsIDocument> mDocument; nsCOMPtr<nsIDocument> mDocument;
nsCOMPtr<nsIDOMViewCSS> mCSSView; nsCOMPtr<nsIDOMViewCSS> mCSSView;
nsCOMPtr<nsIUGenCategory> mCategories;
// range to check, see SetRange // range to check, see SetRange
nsIDOMNode* mRootNode; nsIDOMNode* mRootNode;

Просмотреть файл

@ -69,6 +69,8 @@ static nsModuleComponentInfo components[] =
// unicharutil // unicharutil
{ "Unichar Utility", NS_UNICHARUTIL_CID, { "Unichar Utility", NS_UNICHARUTIL_CID,
NS_UNICHARUTIL_CONTRACTID, nsCaseConversionImp2Constructor}, NS_UNICHARUTIL_CONTRACTID, nsCaseConversionImp2Constructor},
{ "Unichar Category Table", NS_UNICHARCATEGORY_CID,
NS_UNICHARCATEGORY_CONTRACTID, nsCategoryImpConstructor},
{ "Unicode To Entity Converter", NS_ENTITYCONVERTER_CID, { "Unicode To Entity Converter", NS_ENTITYCONVERTER_CID,
NS_ENTITYCONVERTER_CONTRACTID, nsEntityConverterConstructor }, NS_ENTITYCONVERTER_CONTRACTID, nsEntityConverterConstructor },
{ "Unicode To Charset Converter", NS_SAVEASCHARSET_CID, { "Unicode To Charset Converter", NS_SAVEASCHARSET_CID,

Просмотреть файл

@ -48,7 +48,7 @@ EXPORTS = \
nsICaseConversion.h \ nsICaseConversion.h \
nsIOrderIdFormater.h \ nsIOrderIdFormater.h \
nsITextTransform.h \ nsITextTransform.h \
nsIUGenDetailCategory.h \ nsIUGenCategory.h \
nsUnicharUtilCIID.h \ nsUnicharUtilCIID.h \
nsHankakuToZenkakuCID.h \ nsHankakuToZenkakuCID.h \
$(NULL) $(NULL)

Просмотреть файл

@ -41,10 +41,10 @@
#include "nsISupports.h" #include "nsISupports.h"
#include "nscore.h" #include "nscore.h"
// {E86B3371-BF89-11d2-B3AF-00805F8A6670} // {671fea05-fcee-4b1c-82a3-6eb03eda8ddc}
#define NS_IUGENCATEGORY_IID \ #define NS_IUGENCATEGORY_IID \
{ 0xe86b3371, 0xbf89, 0x11d2, \ { 0x671fea05, 0xfcee, 0x4b1c, \
{ 0xb3, 0xaf, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70 } } { 0x82, 0xa3, 0x6e, 0xb0, 0x3e, 0xda, 0x8d, 0xdc } }
class nsIUGenCategory : public nsISupports { class nsIUGenCategory : public nsISupports {
@ -54,31 +54,24 @@ public:
NS_DECLARE_STATIC_IID_ACCESSOR(NS_IUGENCATEGORY_IID) NS_DECLARE_STATIC_IID_ACCESSOR(NS_IUGENCATEGORY_IID)
/** /**
* Read ftp://ftp.unicode.org/Public/UNIDATA/ReadMe-Latest.txt * Read http://www.unicode.org/Public/UNIDATA/UCD.html#General_Category_Values
* section GENERAL CATEGORY * for the detailed definition of the following categories
* for the detail defintation of the following categories
*/ */
typedef enum { typedef enum {
kUGenCategory_Mark = 1, // Mn, Mc, and Me kUndefined = 0,
kUGenCategory_Number = 2, // Nd, Nl, and No kMark = 1, // Mn, Mc, and Me
kUGenCategory_Separator = 3, // Zs, Zl, and Zp kNumber = 2, // Nd, Nl, and No
kUGenCategory_Other = 4, // Cc, Cf, Cs, Co, and Cn kSeparator = 3, // Zs, Zl, and Zp
kUGenCategory_Letter = 5, // Lu, Ll, Lt, Lm, and Lo kOther = 4, // Cc, Cf, Cs, Co, and Cn
kUGenCategory_Punctuation = 6, // Pc, Pd, Ps, Pe, Pi, Pf, and Po kLetter = 5, // Lu, Ll, Lt, Lm, and Lo
kUGenCategory_Symbol = 7 // Sm, Sc, Sk, and So kPunctuation = 6, // Pc, Pd, Ps, Pe, Pi, Pf, and Po
kSymbol = 7 // Sm, Sc, Sk, and So
} nsUGenCategory; } nsUGenCategory;
/** /**
* Give a Unichar, return a nsUGenCategory * Give a Unichar, return a nsUGenCategory
*/ */
NS_IMETHOD Get( PRUnichar aChar, nsUGenCategory* oResult) = 0 ; virtual nsUGenCategory Get(PRUint32 aChar) = 0;
/**
* Give a Unichar, and a nsUGenCategory,
* return PR_TRUE if the Unichar is in that category,
* return PR_FALSE, otherwise
*/
NS_IMETHOD Is( PRUnichar aChar, nsUGenCategory aCategory, PRBool* oResult) = 0;
}; };
NS_DEFINE_STATIC_IID_ACCESSOR(nsIUGenCategory, NS_IUGENCATEGORY_IID) NS_DEFINE_STATIC_IID_ACCESSOR(nsIUGenCategory, NS_IUGENCATEGORY_IID)

Просмотреть файл

@ -46,4 +46,11 @@
{ 0xb3, 0xae, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70 } } { 0xb3, 0xae, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70 } }
#define NS_UNICHARUTIL_CONTRACTID "@mozilla.org/intl/unicharutil;1" #define NS_UNICHARUTIL_CONTRACTID "@mozilla.org/intl/unicharutil;1"
#define NS_UNICHARCATEGORY_CID \
{ 0x748a1132, 0x671a, 0x409a, \
{ 0x8d, 0x1d, 0xf1, 0xcd, 0xf6, 0xb3, 0xa6, 0xb4 } }
#define NS_UNICHARCATEGORY_CONTRACTID "@mozilla.org/intl/unicharcategory;1"
#endif #endif

Просмотреть файл

@ -57,6 +57,7 @@ EXPORTS = cattable.h
CPPSRCS = \ CPPSRCS = \
nsCaseConversionImp2.cpp \ nsCaseConversionImp2.cpp \
nsCategoryImp.cpp \
nsHankakuToZenkaku.cpp \ nsHankakuToZenkaku.cpp \
nsEntityConverter.cpp \ nsEntityConverter.cpp \
nsSaveAsCharset.cpp \ nsSaveAsCharset.cpp \

Просмотреть файл

@ -42,36 +42,26 @@
#include "nsCategoryImp.h" #include "nsCategoryImp.h"
#include "cattable.h" #include "cattable.h"
NS_IMPL_ISUPPORTS1(nsCategoryImp, nsIUGenCategory) static nsCategoryImp gCategoryImp;
NS_IMPL_THREADSAFE_QUERY_INTERFACE1(nsCategoryImp, nsIUGenCategory)
nsCategoryImp::nsCategoryImp() NS_IMETHODIMP_(nsrefcnt) nsCategoryImp::AddRef(void)
{ {
return nsrefcnt(1);
} }
nsCategoryImp::~nsCategoryImp() NS_IMETHODIMP_(nsrefcnt) nsCategoryImp::Release(void)
{ {
return nsrefcnt(1);
} }
nsresult nsCategoryImp::Get( PRUnichar aChar, nsUGenCategory* oResult) nsCategoryImp* nsCategoryImp::GetInstance()
{ {
PRUint8 ret = GetCat(aChar); return &gCategoryImp;
if( 0 == ret)
*oResult = kUGenCategory_Other; // treat it as Cn - Other, Not Assigned
else
*oResult = (nsUGenCategory)ret;
return NS_OK;
} }
nsresult nsCategoryImp::Is( PRUnichar aChar, nsUGenCategory aCategory, PRBool* oResult)
nsIUGenCategory::nsUGenCategory nsCategoryImp::Get(PRUint32 aChar)
{ {
nsUGenCategory cat ; return nsUGenCategory(GetCat(aChar));
PRUint8 ret = GetCat(aChar);
if( 0 == ret)
cat = kUGenCategory_Other; // treat it as Cn - Other, Not Assigned
else
cat = (nsUGenCategory)ret;
*oResult = (aCategory == cat );
return NS_OK;
} }

Просмотреть файл

@ -37,29 +37,18 @@
#ifndef nsCategoryImp_h__ #ifndef nsCategoryImp_h__
#define nsCategoryImp_h__ #define nsCategoryImp_h__
#include "nscore.h"
#include "nsISupports.h"
#include "nsIUGenCategory.h" #include "nsIUGenCategory.h"
class nsCategoryImp : public nsIUGenCategory { class nsCategoryImp : public nsIUGenCategory {
NS_DECL_ISUPPORTS NS_DECL_ISUPPORTS
public: public:
nsCategoryImp(); static nsCategoryImp* GetInstance();
virtual ~nsCategoryImp();
/** /**
* Give a Unichar, return a nsUGenCategory * Give a Unichar, return a nsUGenCategory
*/ */
NS_IMETHOD Get( PRUnichar aChar, nsUGenCategory* oResult); virtual nsUGenCategory Get(PRUint32 aChar);
/**
* Give a Unichar, and a nsUGenCategory,
* return PR_TRUE if the Unichar is in that category,
* return PR_FALSE, otherwise
*/
NS_IMETHOD Is( PRUnichar aChar, nsUGenCategory aCategory, PRBool* oResult);
}; };
#endif /* nsCategoryImp_h__ */ #endif /* nsCategoryImp_h__ */

Просмотреть файл

@ -40,6 +40,7 @@
#include "nsUnicharUtilCIID.h" #include "nsUnicharUtilCIID.h"
#include "nsCaseConversionImp2.h" #include "nsCaseConversionImp2.h"
#include "nsCategoryImp.h"
#include "nsHankakuToZenkakuCID.h" #include "nsHankakuToZenkakuCID.h"
#include "nsTextTransformFactory.h" #include "nsTextTransformFactory.h"
#include "nsICaseConversion.h" #include "nsICaseConversion.h"
@ -82,6 +83,8 @@ UNICHARUTIL_MAKE_CTOR(HankakuToZenkaku)
NS_GENERIC_FACTORY_SINGLETON_CONSTRUCTOR(nsCaseConversionImp2, NS_GENERIC_FACTORY_SINGLETON_CONSTRUCTOR(nsCaseConversionImp2,
nsCaseConversionImp2::GetInstance) nsCaseConversionImp2::GetInstance)
NS_GENERIC_FACTORY_SINGLETON_CONSTRUCTOR(nsCategoryImp,
nsCategoryImp::GetInstance)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsEntityConverter) NS_GENERIC_FACTORY_CONSTRUCTOR(nsEntityConverter)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsSaveAsCharset) NS_GENERIC_FACTORY_CONSTRUCTOR(nsSaveAsCharset)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeNormalizer) NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeNormalizer)