Bug 393356. Spellchecker should not use private copy(ies) of the Unicode category tables. r=smontagu,a=damon

2007-08-29 02:03:32 +00:00 · 2007-08-29 02:03:32 +00:00 · dc4ea8c74d
--- a/extensions/spellcheck/src/mozEnglishWordUtils.cpp
+++ b/extensions/spellcheck/src/mozEnglishWordUtils.cpp
@ -41,7 +41,6 @@
 #include "nsIServiceManager.h"
 #include "nsUnicharUtilCIID.h"
 #include "nsCRT.h"
 #include "cattable.h"
 NS_IMPL_ISUPPORTS1(mozEnglishWordUtils, mozISpellI18NUtil)
@ -51,6 +50,8 @@ mozEnglishWordUtils::mozEnglishWordUtils()
  nsresult rv;
  mURLDetector = do_CreateInstance(MOZ_TXTTOHTMLCONV_CONTRACTID, &rv);
  mCaseConv = do_GetService(NS_UNICHARUTIL_CONTRACTID);
  mCategories = do_GetService(NS_UNICHARCATEGORY_CONTRACTID);
 }
 mozEnglishWordUtils::~mozEnglishWordUtils()
@ -78,12 +79,6 @@ NS_IMETHODIMP mozEnglishWordUtils::GetRootForm(const PRUnichar *aWord, PRUint32
  *count = 0;
  if (!mCaseConv) {
    mCaseConv = do_GetService(NS_UNICHARUTIL_CONTRACTID);
    if (!mCaseConv)
      return NS_ERROR_FAILURE;
  }
  mozEnglishWordUtils::myspCapitalization ct = captype(word);
  switch (ct)
    {
@ -159,10 +154,10 @@ NS_IMETHODIMP mozEnglishWordUtils::GetRootForm(const PRUnichar *aWord, PRUint32
 }
 // This needs vast improvement
-static PRBool ucIsAlpha(PRUnichar c)
+PRBool mozEnglishWordUtils::ucIsAlpha(PRUnichar aChar)
 {
  // XXX we have to fix callers to handle the full Unicode range
-  return (5 == GetCat(PRUint32(c)));
+  return nsIUGenCategory::kLetter == mCategories->Get(PRUint32(aChar));
 }
 /* void FindNextWord (in wstring word, in PRUint32 length, in PRUint32 offset, out PRUint32 begin, out PRUint32 end); */
--- a/extensions/spellcheck/src/mozEnglishWordUtils.h
+++ b/extensions/spellcheck/src/mozEnglishWordUtils.h
@ -44,6 +44,7 @@
 #include "nsIUnicodeDecoder.h"
 #include "nsString.h"
 #include "nsICaseConversion.h"
 #include "nsIUGenCategory.h"
 #include "mozITXTToHTMLConv.h" 
@ -62,10 +63,12 @@ public:
 protected:
  mozEnglishWordUtils::myspCapitalization captype(const nsString &word);
  PRBool ucIsAlpha(PRUnichar aChar);
  nsString mLanguage;
  nsString mCharset;
  nsCOMPtr<nsICaseConversion> mCaseConv;
  nsCOMPtr<nsIUGenCategory>   mCategories;
  nsCOMPtr<mozITXTToHTMLConv> mURLDetector; // used to detect urls so the spell checker can skip them.
 };
--- a/extensions/spellcheck/src/mozInlineSpellWordUtil.cpp
+++ b/extensions/spellcheck/src/mozInlineSpellWordUtil.cpp
@ -36,7 +36,6 @@
 *
 * ***** END LICENSE BLOCK ***** */
 #include "cattable.h"
 #include "mozInlineSpellWordUtil.h"
 #include "nsDebug.h"
 #include "nsIAtom.h"
@ -49,14 +48,8 @@
 #include "nsIEditor.h"
 #include "nsIDOMNode.h"
 #include "nsIDOMHTMLBRElement.h"
-
+#include "nsUnicharUtilCIID.h"
-// some character categories we care about from GetCat()
+#include "nsServiceManagerUtils.h"
 #define CHAR_CAT_NUMBER 2
 #define CHAR_CAT_SPACE 3
 #define CHAR_CAT_CONTROL 4
 #define CHAR_CAT_WORD 5
 #define CHAR_CAT_PUNCTUATION1 6
 #define CHAR_CAT_PUNCTUATION2 7
 // IsIgnorableCharacter
 //
@ -87,6 +80,10 @@ mozInlineSpellWordUtil::Init(nsWeakPtr aWeakEditor)
 {
  nsresult rv;
  mCategories = do_GetService(NS_UNICHARCATEGORY_CONTRACTID, &rv);
  if (NS_FAILED(rv))
    return rv;
  // getting the editor can fail commonly because the editor was detached, so
  // don't assert
  nsCOMPtr<nsIEditor> editor = do_QueryReferent(aWeakEditor, &rv);
@ -841,8 +838,9 @@ WordSplitState::ClassifyCharacter(PRInt32 aIndex, PRBool aRecurse) const
  // this will classify the character, we want to treat "ignorable" characters
  // such as soft hyphens as word characters.
-  PRInt32 charCategory = GetCat(mDOMWordText[aIndex]);
+  nsIUGenCategory::nsUGenCategory
-  if (charCategory == CHAR_CAT_WORD ||
+    charCategory = mWordUtil->GetCategories()->Get(PRUint32(mDOMWordText[aIndex]));
  if (charCategory == nsIUGenCategory::kLetter ||
      IsIgnorableCharacter(mDOMWordText[aIndex]))
    return CHAR_CLASS_WORD;
@ -871,10 +869,10 @@ WordSplitState::ClassifyCharacter(PRInt32 aIndex, PRBool aRecurse) const
  }
  // all other punctuation
-  if (charCategory == CHAR_CAT_SPACE ||
+  if (charCategory == nsIUGenCategory::kSeparator ||
-      charCategory == CHAR_CAT_CONTROL ||
+      charCategory == nsIUGenCategory::kOther ||
-      charCategory == CHAR_CAT_PUNCTUATION1 ||
+      charCategory == nsIUGenCategory::kPunctuation ||
-      charCategory == CHAR_CAT_PUNCTUATION2)
+      charCategory == nsIUGenCategory::kSymbol)
    return CHAR_CLASS_SEPARATOR;
  // any other character counts as a word
--- a/extensions/spellcheck/src/mozInlineSpellWordUtil.h
+++ b/extensions/spellcheck/src/mozInlineSpellWordUtil.h
@ -42,6 +42,7 @@
 #include "nsIDocument.h"
 #include "nsString.h"
 #include "nsTArray.h"
 #include "nsIUGenCategory.h"
 //#define DEBUG_SPELLCHECK
@ -118,13 +119,15 @@ public:
  nsIDOMDocumentRange* GetDocumentRange() const { return mDOMDocumentRange; }
  nsIDocument* GetDocument() const { return mDocument; }
  nsIDOMNode* GetRootNode() { return mRootNode; }
-
+  nsIUGenCategory* GetCategories() { return mCategories; }
 private:
  // cached stuff for the editor, set by Init
  nsCOMPtr<nsIDOMDocumentRange> mDOMDocumentRange;
  nsCOMPtr<nsIDocument>         mDocument;
  nsCOMPtr<nsIDOMViewCSS>       mCSSView;
  nsCOMPtr<nsIUGenCategory>     mCategories;
  // range to check, see SetRange
  nsIDOMNode* mRootNode;
--- a/intl/build/nsI18nModule.cpp
+++ b/intl/build/nsI18nModule.cpp
@ -69,6 +69,8 @@ static nsModuleComponentInfo components[] =
 // unicharutil
  { "Unichar Utility", NS_UNICHARUTIL_CID, 
      NS_UNICHARUTIL_CONTRACTID, nsCaseConversionImp2Constructor},
  { "Unichar Category Table", NS_UNICHARCATEGORY_CID, 
      NS_UNICHARCATEGORY_CONTRACTID, nsCategoryImpConstructor},
  { "Unicode To Entity Converter", NS_ENTITYCONVERTER_CID, 
      NS_ENTITYCONVERTER_CONTRACTID, nsEntityConverterConstructor },
  { "Unicode To Charset Converter", NS_SAVEASCHARSET_CID, 
--- a/intl/unicharutil/public/Makefile.in
+++ b/intl/unicharutil/public/Makefile.in
@ -48,7 +48,7 @@ EXPORTS		= \
 		nsICaseConversion.h \
 		nsIOrderIdFormater.h \
 		nsITextTransform.h \
-		nsIUGenDetailCategory.h \
+		nsIUGenCategory.h \
 		nsUnicharUtilCIID.h \
 		nsHankakuToZenkakuCID.h \
 		$(NULL)
--- a/intl/unicharutil/public/nsIUGenCategory.h
+++ b/intl/unicharutil/public/nsIUGenCategory.h
@ -41,10 +41,10 @@
 #include "nsISupports.h"
 #include "nscore.h"
-// {E86B3371-BF89-11d2-B3AF-00805F8A6670}
+// {671fea05-fcee-4b1c-82a3-6eb03eda8ddc}
 #define NS_IUGENCATEGORY_IID \
-{ 0xe86b3371, 0xbf89, 0x11d2, \
+{ 0x671fea05, 0xfcee, 0x4b1c, \
-    { 0xb3, 0xaf, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70 } }
+    { 0x82, 0xa3, 0x6e, 0xb0, 0x3e, 0xda, 0x8d, 0xdc } }
 class nsIUGenCategory : public nsISupports {
@ -54,31 +54,24 @@ public:
  NS_DECLARE_STATIC_IID_ACCESSOR(NS_IUGENCATEGORY_IID)
   /**
-    *  Read ftp://ftp.unicode.org/Public/UNIDATA/ReadMe-Latest.txt
+    *  Read http://www.unicode.org/Public/UNIDATA/UCD.html#General_Category_Values
-    *  section GENERAL CATEGORY
+    *  for the detailed definition of the following categories
    *  for the detail defintation of the following categories
    */
   typedef enum {
-     kUGenCategory_Mark         = 1, // Mn, Mc, and Me
+     kUndefined    = 0,
-     kUGenCategory_Number       = 2, // Nd, Nl, and No 
+     kMark         = 1, // Mn, Mc, and Me
-     kUGenCategory_Separator    = 3, // Zs, Zl, and Zp
+     kNumber       = 2, // Nd, Nl, and No 
-     kUGenCategory_Other        = 4, // Cc, Cf, Cs, Co, and Cn
+     kSeparator    = 3, // Zs, Zl, and Zp
-     kUGenCategory_Letter       = 5, // Lu, Ll, Lt, Lm, and Lo
+     kOther        = 4, // Cc, Cf, Cs, Co, and Cn
-     kUGenCategory_Punctuation  = 6, // Pc, Pd, Ps, Pe, Pi, Pf, and Po
+     kLetter       = 5, // Lu, Ll, Lt, Lm, and Lo
-     kUGenCategory_Symbol       = 7  // Sm, Sc, Sk, and So
+     kPunctuation  = 6, // Pc, Pd, Ps, Pe, Pi, Pf, and Po
     kSymbol       = 7  // Sm, Sc, Sk, and So
   } nsUGenCategory;
   /**
    * Give a Unichar, return a nsUGenCategory
    */
-   NS_IMETHOD Get( PRUnichar aChar, nsUGenCategory* oResult) = 0 ;
+   virtual nsUGenCategory Get(PRUint32 aChar) = 0;
   /**
    * Give a Unichar, and a nsUGenCategory, 
    * return PR_TRUE if the Unichar is in that category, 
    * return PR_FALSE, otherwise
    */
   NS_IMETHOD Is( PRUnichar aChar, nsUGenCategory aCategory, PRBool* oResult) = 0;
 };
 NS_DEFINE_STATIC_IID_ACCESSOR(nsIUGenCategory, NS_IUGENCATEGORY_IID)
--- a/intl/unicharutil/public/nsUnicharUtilCIID.h
+++ b/intl/unicharutil/public/nsUnicharUtilCIID.h
@ -46,4 +46,11 @@
  { 0xb3, 0xae, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70 } }
 #define NS_UNICHARUTIL_CONTRACTID "@mozilla.org/intl/unicharutil;1"
 #define NS_UNICHARCATEGORY_CID \
 { 0x748a1132, 0x671a, 0x409a, \
  { 0x8d, 0x1d, 0xf1, 0xcd, 0xf6, 0xb3, 0xa6, 0xb4 } }
 #define NS_UNICHARCATEGORY_CONTRACTID "@mozilla.org/intl/unicharcategory;1"
 #endif
--- a/intl/unicharutil/src/Makefile.in
+++ b/intl/unicharutil/src/Makefile.in
@ -57,6 +57,7 @@ EXPORTS = cattable.h
 CPPSRCS		= \
 		nsCaseConversionImp2.cpp \
 		nsCategoryImp.cpp \
 		nsHankakuToZenkaku.cpp \
 		nsEntityConverter.cpp \
 		nsSaveAsCharset.cpp \
--- a/intl/unicharutil/src/nsCategoryImp.cpp
+++ b/intl/unicharutil/src/nsCategoryImp.cpp
@ -42,36 +42,26 @@
 #include "nsCategoryImp.h"
 #include "cattable.h"
-NS_IMPL_ISUPPORTS1(nsCategoryImp, nsIUGenCategory)
+static nsCategoryImp gCategoryImp;
 NS_IMPL_THREADSAFE_QUERY_INTERFACE1(nsCategoryImp, nsIUGenCategory)
-nsCategoryImp::nsCategoryImp()
+NS_IMETHODIMP_(nsrefcnt) nsCategoryImp::AddRef(void)
 {
  return nsrefcnt(1);
 }
-nsCategoryImp::~nsCategoryImp()
+NS_IMETHODIMP_(nsrefcnt) nsCategoryImp::Release(void)
 {
  return nsrefcnt(1);
 }
-nsresult nsCategoryImp::Get( PRUnichar aChar, nsUGenCategory* oResult)
+nsCategoryImp* nsCategoryImp::GetInstance()
 {
-   PRUint8 ret = GetCat(aChar);
+  return &gCategoryImp;
   if( 0 == ret)
      *oResult = kUGenCategory_Other; // treat it as Cn - Other, Not Assigned
   else 
      *oResult = (nsUGenCategory)ret;
   return NS_OK;
 }
 nsresult nsCategoryImp::Is( PRUnichar aChar, nsUGenCategory aCategory, PRBool* oResult)
 nsIUGenCategory::nsUGenCategory nsCategoryImp::Get(PRUint32 aChar)
 {
-   nsUGenCategory cat ;
+  return nsUGenCategory(GetCat(aChar));
   PRUint8 ret = GetCat(aChar);
   if( 0 == ret)
      cat = kUGenCategory_Other; // treat it as Cn - Other, Not Assigned
   else 
      cat = (nsUGenCategory)ret;
   *oResult = (aCategory == cat );
   return NS_OK;
 }
--- a/intl/unicharutil/src/nsCategoryImp.h
+++ b/intl/unicharutil/src/nsCategoryImp.h
@ -37,29 +37,18 @@
 #ifndef nsCategoryImp_h__
 #define nsCategoryImp_h__
 #include "nscore.h"
 #include "nsISupports.h"
 #include "nsIUGenCategory.h"
 class nsCategoryImp : public nsIUGenCategory {
   NS_DECL_ISUPPORTS
-public: 
+public:
-   nsCategoryImp();
+   static nsCategoryImp* GetInstance();
-   virtual ~nsCategoryImp();
+    
   /**
    * Give a Unichar, return a nsUGenCategory
    */
-   NS_IMETHOD Get( PRUnichar aChar, nsUGenCategory* oResult);
+   virtual nsUGenCategory Get(PRUint32 aChar);
   /**
    * Give a Unichar, and a nsUGenCategory, 
    * return PR_TRUE if the Unichar is in that category, 
    * return PR_FALSE, otherwise
    */
   NS_IMETHOD Is( PRUnichar aChar, nsUGenCategory aCategory, PRBool* oResult);
 };
 #endif  /* nsCategoryImp_h__ */
--- a/intl/unicharutil/src/nsUcharUtilConstructors.h
+++ b/intl/unicharutil/src/nsUcharUtilConstructors.h
@ -40,6 +40,7 @@
 #include "nsUnicharUtilCIID.h"
 #include "nsCaseConversionImp2.h"
 #include "nsCategoryImp.h"
 #include "nsHankakuToZenkakuCID.h"
 #include "nsTextTransformFactory.h"
 #include "nsICaseConversion.h"
@ -82,6 +83,8 @@ UNICHARUTIL_MAKE_CTOR(HankakuToZenkaku)
 NS_GENERIC_FACTORY_SINGLETON_CONSTRUCTOR(nsCaseConversionImp2,
                                         nsCaseConversionImp2::GetInstance)
 NS_GENERIC_FACTORY_SINGLETON_CONSTRUCTOR(nsCategoryImp,
                                         nsCategoryImp::GetInstance)
 NS_GENERIC_FACTORY_CONSTRUCTOR(nsEntityConverter)
 NS_GENERIC_FACTORY_CONSTRUCTOR(nsSaveAsCharset)
 NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeNormalizer)