зеркало из https://github.com/mozilla/gecko-dev.git
Add support for REP tables in spellchecker, to improve suggestions by knowing about common misspellings
bug 227214, patch by mscott and me, r=mscott, sr=bienvenu
This commit is contained in:
Родитель
071d8d7188
Коммит
ebf5bcc2b5
|
@ -62,6 +62,7 @@
|
|||
#include "nsNetUtil.h"
|
||||
#include "nsICharsetConverterManager.h"
|
||||
#include "nsUnicharUtilCIID.h"
|
||||
#include "nsUnicharUtils.h"
|
||||
|
||||
static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
|
||||
static NS_DEFINE_CID(kUnicharUtilCID, NS_UNICHARUTIL_CID);
|
||||
|
@ -69,7 +70,8 @@ static NS_DEFINE_CID(kUnicharUtilCID, NS_UNICHARUTIL_CID);
|
|||
static PRInt32 SplitString(nsACString &in,nsCString out[],PRInt32 size);
|
||||
static void doubleReverseHack(nsACString &s);
|
||||
|
||||
myspAffixMgr::myspAffixMgr()
|
||||
myspAffixMgr::myspAffixMgr() :
|
||||
mReplaceTable(nsnull)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -77,6 +79,7 @@ myspAffixMgr::myspAffixMgr()
|
|||
myspAffixMgr::~myspAffixMgr()
|
||||
{
|
||||
mPersonalDictionary = nsnull;
|
||||
delete[] mReplaceTable;
|
||||
}
|
||||
|
||||
nsresult myspAffixMgr::GetPersonalDictionary(mozIPersonalDictionary * *aPersonalDictionary)
|
||||
|
@ -160,7 +163,6 @@ nsresult myspAffixMgr::parse_file(nsIInputStream *strm)
|
|||
PRInt32 numents;
|
||||
nsLineBuffer *lineBuffer;
|
||||
nsresult rv = NS_InitLineBuffer(&lineBuffer);
|
||||
nsCAutoString line;
|
||||
PRBool moreData=PR_TRUE;
|
||||
PRInt32 pos;
|
||||
nsCString cmds[5];
|
||||
|
@ -169,6 +171,9 @@ nsresult myspAffixMgr::parse_file(nsIInputStream *strm)
|
|||
prefixes.clear();
|
||||
suffixes.clear();
|
||||
|
||||
nsCOMPtr<nsICharsetConverterManager> ccm = do_GetService(kCharsetConverterManagerCID, &rv);
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
|
||||
numents = 0; // number of affentry structures to parse
|
||||
char flag='\0'; // affix char identifier
|
||||
{
|
||||
|
@ -179,6 +184,7 @@ nsresult myspAffixMgr::parse_file(nsIInputStream *strm)
|
|||
// read in each line ignoring any that do not
|
||||
// start with PFX or SFX
|
||||
|
||||
nsCAutoString line;
|
||||
while (moreData) {
|
||||
NS_ReadLine(strm,lineBuffer,line,&moreData);
|
||||
/* parse in the try string */
|
||||
|
@ -191,11 +197,56 @@ nsresult myspAffixMgr::parse_file(nsIInputStream *strm)
|
|||
|
||||
/* parse in the name of the character set used by the .dict and .aff */
|
||||
if (Substring(line,0,3).Equals("SET")) {
|
||||
|
||||
pos = line.FindChar(' ');
|
||||
if(pos != -1){
|
||||
mEncoding.Assign(Substring(line,pos+1,line.Length()-pos-1));
|
||||
mEncoding.CompressWhitespace(PR_TRUE,PR_TRUE);
|
||||
|
||||
rv = ccm->GetUnicodeDecoder(mEncoding.get(), getter_AddRefs(mDecoder));
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
rv = ccm->GetUnicodeEncoder(mEncoding.get(), getter_AddRefs(mEncoder));
|
||||
if (mEncoder && NS_SUCCEEDED(rv)) {
|
||||
mEncoder->SetOutputErrorBehavior(mEncoder->kOnError_Signal, nsnull, '?');
|
||||
}
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
}
|
||||
}
|
||||
|
||||
/* parse in the typical fault correcting table */
|
||||
if (Substring(line,0,3).Equals("REP")) {
|
||||
PRInt32 numFields = SplitString(line, cmds, 3);
|
||||
|
||||
if (numFields == 2)
|
||||
numents = atoi(cmds[1].get());
|
||||
|
||||
mReplaceTable = new mozReplaceTable[numents];
|
||||
mReplaceTableLength = numents;
|
||||
|
||||
PRInt32 i = 0;
|
||||
nsAutoString pattern, replacement;
|
||||
|
||||
for (j = 0; (j < numents) && moreData; j++) {
|
||||
NS_ReadLine(strm,lineBuffer,line,&moreData);
|
||||
|
||||
numFields = SplitString(line, cmds, 3);
|
||||
|
||||
if(!cmds[0].Equals("REP")) { //consistency check
|
||||
NS_WARNING("REP line from .aff file is inconsitent");
|
||||
continue;
|
||||
}
|
||||
|
||||
rv = DecodeString(cmds[1], pattern);
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
rv = DecodeString(cmds[2], replacement);
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
// Make sure the replacements are lower case.
|
||||
// We don't want to convert them for every lookup.
|
||||
ToLowerCase(pattern);
|
||||
ToLowerCase(replacement);
|
||||
mReplaceTable[i].pattern = pattern.get();
|
||||
mReplaceTable[i].replacement = replacement.get();
|
||||
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -207,7 +258,7 @@ nsresult myspAffixMgr::parse_file(nsIInputStream *strm)
|
|||
numents = 0;
|
||||
ff=0;
|
||||
// split line into pieces
|
||||
PRInt32 numFields=SplitString(line,cmds,5);
|
||||
PRInt32 numFields=SplitString(line, cmds, 5);
|
||||
if(numFields > 1)flag=cmds[1].First();
|
||||
if((numFields > 2)&&(cmds[2].First()=='Y'))ff=XPRODUCT;
|
||||
if(numFields >3)numents = atoi(cmds[3].get());
|
||||
|
@ -219,7 +270,7 @@ nsresult myspAffixMgr::parse_file(nsIInputStream *strm)
|
|||
nsString tempStr;
|
||||
|
||||
if((numFields < 5)||(cmds[1].First()!=flag)){ //consistency check
|
||||
//complain loudly
|
||||
NS_WARNING("PFX/SFX line from .aff file is inconsitent");
|
||||
continue;
|
||||
}
|
||||
if(cmds[3].Equals("0")){
|
||||
|
@ -239,7 +290,7 @@ nsresult myspAffixMgr::parse_file(nsIInputStream *strm)
|
|||
else{ // cmds[2] != 0
|
||||
newMod.mAppend.Assign( cmds[2]);
|
||||
if((cmds[2].Length()>cmds[4].Length())||!cmds[2].Equals(Substring(cmds[4],0,cmds[2].Length()))){
|
||||
//complain loudly
|
||||
NS_WARNING("PFX/SFX line from .aff file is inconsitent");
|
||||
continue;
|
||||
}
|
||||
cmds[3].Append(Substring(cmds[4],cmds[2].Length(),cmds[4].Length()-cmds[2].Length()));
|
||||
|
@ -262,7 +313,7 @@ nsresult myspAffixMgr::parse_file(nsIInputStream *strm)
|
|||
newMod.mAppend.Assign( cmds[2]);
|
||||
if((cmds[2].Length()>cmds[4].Length())||
|
||||
!cmds[2].Equals(Substring(cmds[4],cmds[4].Length()-cmds[2].Length(),cmds[2].Length()))){
|
||||
//complain loudly
|
||||
NS_WARNING("PFX/SFX line from .aff file is inconsitent");
|
||||
continue;
|
||||
}
|
||||
suffixTest=Substring(cmds[4],0,cmds[4].Length()-cmds[2].Length());
|
||||
|
@ -276,17 +327,7 @@ nsresult myspAffixMgr::parse_file(nsIInputStream *strm)
|
|||
}
|
||||
}
|
||||
|
||||
// We do this here, instead of where we set the charset,
|
||||
// to prevent all kind of leakage in case it fails.
|
||||
nsCOMPtr<nsICharsetConverterManager> ccm = do_GetService(kCharsetConverterManagerCID, &rv);
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
rv = ccm->GetUnicodeDecoder(mEncoding.get(), getter_AddRefs(mDecoder));
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
rv = ccm->GetUnicodeEncoder(mEncoding.get(), getter_AddRefs(mEncoder));
|
||||
if (mEncoder && NS_SUCCEEDED(rv)) {
|
||||
mEncoder->SetOutputErrorBehavior(mEncoder->kOnError_Signal, nsnull, '?');
|
||||
}
|
||||
return rv;
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
|
||||
|
@ -356,6 +397,16 @@ void myspAffixMgr::get_try_string(nsAString &aTryString)
|
|||
}
|
||||
}
|
||||
|
||||
mozReplaceTable *myspAffixMgr::getReplaceTable()
|
||||
{
|
||||
return mReplaceTable;
|
||||
}
|
||||
|
||||
PRUint32 myspAffixMgr::getReplaceTableLength()
|
||||
{
|
||||
return mReplaceTableLength;
|
||||
}
|
||||
|
||||
PRBool
|
||||
myspAffixMgr::prefixCheck(const nsAFlatCString &word)
|
||||
{
|
||||
|
@ -465,6 +516,27 @@ PRBool myspAffixMgr::check(const nsAFlatString &word)
|
|||
return good;
|
||||
}
|
||||
|
||||
nsresult
|
||||
myspAffixMgr::DecodeString(const nsAFlatCString &aSource, nsAString &aDest)
|
||||
{
|
||||
if (!mDecoder) {
|
||||
aDest.Assign(NS_LITERAL_STRING(""));
|
||||
return NS_OK;
|
||||
}
|
||||
PRInt32 inLength = aSource.Length();
|
||||
PRInt32 outLength;
|
||||
nsresult rv = mDecoder->GetMaxLength(aSource.get(), inLength, &outLength);
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
PRUnichar *dest = (PRUnichar *)malloc(sizeof(PRUnichar) * (outLength + 1));
|
||||
if (!dest)
|
||||
return NS_ERROR_OUT_OF_MEMORY;
|
||||
rv = mDecoder->Convert(aSource.get(), &inLength, dest, &outLength);
|
||||
dest[outLength] = 0;
|
||||
aDest = dest;
|
||||
free(dest);
|
||||
return rv;
|
||||
}
|
||||
|
||||
|
||||
static PRInt32
|
||||
SplitString(nsACString &in,nsCString out[],PRInt32 size)
|
||||
|
|
|
@ -79,6 +79,10 @@
|
|||
|
||||
#define XPRODUCT 1
|
||||
|
||||
struct mozReplaceTable {
|
||||
nsString pattern;
|
||||
nsString replacement;
|
||||
};
|
||||
|
||||
class myspPrefix;
|
||||
class myspSuffix;
|
||||
|
@ -91,6 +95,8 @@ public:
|
|||
~myspAffixMgr();
|
||||
nsresult GetPersonalDictionary(mozIPersonalDictionary * *aPersonalDictionary);
|
||||
nsresult SetPersonalDictionary(mozIPersonalDictionary * aPersonalDictionary);
|
||||
mozReplaceTable *getReplaceTable();
|
||||
PRUint32 getReplaceTableLength();
|
||||
PRBool check(const nsAFlatString &word);
|
||||
void get_try_string(nsAString &aTryString);
|
||||
nsresult Load(const nsString &aDictionary);
|
||||
|
@ -101,7 +107,9 @@ protected:
|
|||
PRBool suffixCheck(const nsAFlatCString &word,PRBool cross=PR_FALSE,char crossID=' ');
|
||||
|
||||
nsresult LoadDictionary(nsIInputStream *strm);
|
||||
nsresult parse_file(nsIInputStream *strm);
|
||||
nsresult parse_file(nsIInputStream *strm);
|
||||
|
||||
nsresult DecodeString(const nsAFlatCString &aSource, nsAString &aDest);
|
||||
|
||||
mozAffixState prefixes;
|
||||
mozAffixState suffixes;
|
||||
|
@ -110,6 +118,8 @@ protected:
|
|||
nsCString mEncoding;
|
||||
nsString mLanguage;
|
||||
mozCStr2CStrHashtable mHashTable;
|
||||
mozReplaceTable *mReplaceTable;
|
||||
PRUint32 mReplaceTableLength;
|
||||
nsCOMPtr<mozIPersonalDictionary> mPersonalDictionary;
|
||||
nsCOMPtr<nsIUnicodeEncoder> mEncoder;
|
||||
nsCOMPtr<nsIUnicodeDecoder> mDecoder;
|
||||
|
|
|
@ -57,6 +57,7 @@
|
|||
#include "plstr.h"
|
||||
#include "nsReadableUtils.h"
|
||||
#include "nsMemory.h"
|
||||
#include "nsUnicharUtils.h"
|
||||
|
||||
myspSuggestMgr::myspSuggestMgr()
|
||||
{
|
||||
|
@ -104,9 +105,15 @@ nsresult myspSuggestMgr::suggest(PRUnichar ***slst,const nsAFlatString &word, PR
|
|||
nsug=*num;
|
||||
}
|
||||
|
||||
// perhaps we made a typical spelling error.
|
||||
res = replacechars(wlst, word, &nsug);
|
||||
|
||||
// did we forget to add a char
|
||||
res = forgotchar(wlst, word, &nsug);
|
||||
|
||||
if ((nsug < maxSug) && NS_SUCCEEDED(res)){
|
||||
res = forgotchar(wlst, word, &nsug);
|
||||
}
|
||||
|
||||
// did we swap the order of chars by mistake
|
||||
if ((nsug < maxSug) && NS_SUCCEEDED(res)){
|
||||
res = swapchar(wlst, word, &nsug);
|
||||
|
@ -141,6 +148,60 @@ nsresult myspSuggestMgr::suggest(PRUnichar ***slst,const nsAFlatString &word, PR
|
|||
}
|
||||
|
||||
|
||||
// suggestions for a typical spelling error that
|
||||
// differs by more than 1 letter from the right spelling
|
||||
nsresult myspSuggestMgr::replacechars(PRUnichar ** wlst,const nsAFlatString &word, PRUint32 *ns)
|
||||
{
|
||||
nsAutoString candidate;
|
||||
PRBool cwrd;
|
||||
PRUint32 i,k;
|
||||
PRUint32 startOffset, findOffset;
|
||||
|
||||
if (word.Length() < 2 || !pAMgr)
|
||||
return NS_OK;
|
||||
|
||||
PRUint32 replaceTableLength = pAMgr->getReplaceTableLength();
|
||||
struct mozReplaceTable *replaceTable = pAMgr->getReplaceTable();
|
||||
|
||||
if (replaceTable == nsnull)
|
||||
return NS_OK;
|
||||
|
||||
for (i = 0; i < replaceTableLength; i++) {
|
||||
startOffset = 0;
|
||||
|
||||
candidate.Assign(word);
|
||||
ToLowerCase(candidate);
|
||||
|
||||
while ((findOffset = candidate.Find(replaceTable[i].pattern, startOffset)) != -1) {
|
||||
candidate.Assign(word);
|
||||
ToLowerCase(candidate);
|
||||
candidate.Replace(findOffset, replaceTable[i].pattern.Length(), replaceTable[i].replacement);
|
||||
|
||||
cwrd = PR_TRUE;
|
||||
for (k = 0; k < *ns; k++) {
|
||||
if (candidate.Equals(wlst[k])){
|
||||
cwrd = PR_FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (cwrd && pAMgr->check(candidate)) {
|
||||
if (*ns < maxSug) {
|
||||
wlst[*ns] = ToNewUnicode(candidate);
|
||||
if (!wlst[*ns])
|
||||
return NS_ERROR_OUT_OF_MEMORY;
|
||||
(*ns)++;
|
||||
} else {
|
||||
return NS_OK;
|
||||
}
|
||||
}
|
||||
|
||||
startOffset = findOffset + replaceTable[i].pattern.Length();
|
||||
}
|
||||
}
|
||||
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
// error is wrong char in place of correct one
|
||||
nsresult myspSuggestMgr::badchar(PRUnichar ** wlst,const nsAFlatString &word, PRUint32 *ns)
|
||||
|
|
|
@ -78,6 +78,7 @@ public:
|
|||
nsresult suggest(PRUnichar ***slst, const nsAFlatString &word, PRUint32 *num);
|
||||
|
||||
protected:
|
||||
nsresult replacechars(PRUnichar **wlst,const nsAFlatString &word, PRUint32 *num);
|
||||
nsresult forgotchar(PRUnichar **wlst,const nsAFlatString &word, PRUint32 *num);
|
||||
nsresult swapchar(PRUnichar **wlst,const nsAFlatString &word, PRUint32 *num);
|
||||
nsresult extrachar(PRUnichar **wlst,const nsAFlatString &word, PRUint32 *num);
|
||||
|
|
Загрузка…
Ссылка в новой задаче