зеркало из https://github.com/mozilla/gecko-dev.git
Bug #240600 --> Spell checker improvements. Resynch our forked spell checker with the latest myspell engine
used by open office.org. Patch originally by Michielvan Leeuwen (mvl@exedo.nl), adapted by me. sr=me
This commit is contained in:
Родитель
f6f7bae800
Коммит
ef704532cc
|
@ -1,5 +1,5 @@
|
|||
SET ISO8859-1
|
||||
TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZ
|
||||
TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZ'
|
||||
|
||||
PFX A Y 1
|
||||
PFX A 0 re .
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
62074
|
||||
62076
|
||||
a
|
||||
A
|
||||
AA
|
||||
|
@ -3663,7 +3663,7 @@ atrophy/DSGM
|
|||
atropine/SM
|
||||
Atropos/M
|
||||
Ats
|
||||
attach/BLGZMDR
|
||||
attach/BLGZMDRS
|
||||
attached/UA
|
||||
attacher/M
|
||||
attaché/S
|
||||
|
@ -37631,7 +37631,6 @@ NCC
|
|||
NCO
|
||||
NCR
|
||||
ND
|
||||
nd/A
|
||||
N'Djamena
|
||||
Ndjamena/M
|
||||
Nd/M
|
||||
|
@ -38835,6 +38834,7 @@ Novelia/M
|
|||
novelist/SM
|
||||
novelization/S
|
||||
novelize/GDS
|
||||
Novell/SM
|
||||
novella/SM
|
||||
novel/SM
|
||||
novelty/MS
|
||||
|
@ -46505,6 +46505,7 @@ Renato/M
|
|||
renaturation
|
||||
Renaud/M
|
||||
Renault/MS
|
||||
rend
|
||||
renderer/M
|
||||
render/GJRD
|
||||
rendering/M
|
||||
|
@ -61587,6 +61588,7 @@ xiii
|
|||
xi/M
|
||||
Ximenes/M
|
||||
Ximenez/M
|
||||
Ximian/SM
|
||||
Xingu/M
|
||||
xis
|
||||
xiv
|
||||
|
|
|
@ -53,37 +53,29 @@ IS_COMPONENT = 1
|
|||
|
||||
REQUIRES = xpcom \
|
||||
string \
|
||||
editor \
|
||||
content \
|
||||
layout \
|
||||
dom \
|
||||
necko \
|
||||
widget \
|
||||
gfx \
|
||||
txtsvc \
|
||||
uconv \
|
||||
unicharutil \
|
||||
spellchecker \
|
||||
nspr \
|
||||
intl \
|
||||
$(NULL)
|
||||
|
||||
CPPSRCS = \
|
||||
mozCStr2CStrHashtable.cpp \
|
||||
mozAffixMod.cpp \
|
||||
myspAffixmgr.cpp \
|
||||
mozMySpell.cpp \
|
||||
myspSuggestmgr.cpp \
|
||||
mozMySpellFactory.cpp \
|
||||
$(NULL)
|
||||
CPPSRCS = affentry.cpp \
|
||||
affixmgr.cpp \
|
||||
hashmgr.cpp \
|
||||
suggestmgr.cpp \
|
||||
csutil.cpp \
|
||||
myspell.cpp \
|
||||
mozMySpell.cpp \
|
||||
mozMySpellFactory.cpp \
|
||||
$(NULL)
|
||||
|
||||
EXTRA_DSO_LDOPTS = \
|
||||
-L$(DIST)/bin \
|
||||
-L$(DIST)/lib \
|
||||
$(XPCOM_LIBS) \
|
||||
$(NSPR_LIBS) \
|
||||
$(MOZ_UNICHARUTIL_LIBS) \
|
||||
$(MOZ_UNICHARUTIL_LIBS) \
|
||||
$(NULL)
|
||||
|
||||
include $(topsrcdir)/config/rules.mk
|
||||
|
||||
|
|
|
@ -0,0 +1,101 @@
|
|||
MySpell is a simple spell checker that uses affix
|
||||
compression and is modelled after the spell checker
|
||||
ispell.
|
||||
|
||||
MySpell was written to explore how affix compression
|
||||
can be implemented.
|
||||
|
||||
The Main features of MySpell are:
|
||||
|
||||
1. written in C++ to make it easier to interface with
|
||||
Pspell, OpenOffice, AbiWord, etc
|
||||
|
||||
2. it is stateless, uses no static variables and
|
||||
should be completely reentrant with almost no
|
||||
ifdefs
|
||||
|
||||
3. it tries to be as compatible with ispell to
|
||||
the extent it can. It can read slightly modified
|
||||
versions of munched ispell dictionaries (and it
|
||||
comes with a munched english wordlist borrowed from
|
||||
Kevin Atkinson's excellent Aspell.
|
||||
|
||||
4. it uses a heavily modified aff file format that
|
||||
can be derived from ispell aff files but uses
|
||||
the iso-8859-X character sets only
|
||||
|
||||
5. it is simple with *lots* of comments that
|
||||
describes how the affixes are stored
|
||||
and tested for (based on the approach used by
|
||||
ispell).
|
||||
|
||||
6. like ispell it has a BSD license (and no
|
||||
advertising clause)
|
||||
|
||||
But ... it has *no* support for adding words
|
||||
to a personal dictionary, *no* support for converting
|
||||
between various text encodings, and *no* command line
|
||||
interface (it is purely meant to be a library).
|
||||
|
||||
It can not (in any way) replace all of the functionality
|
||||
of ispell or aspell/pspell. It is meant as a learning
|
||||
tool for understanding affix compression and for
|
||||
being used by front ends like OpenOffice, Abiword, etc.
|
||||
|
||||
MySpell has been tested under Linux and Solaris
|
||||
and has the world's simplest Makefile and no
|
||||
configure support.
|
||||
|
||||
It does come with a simple example program that
|
||||
spell checks some words and returns suggestions.
|
||||
|
||||
To build a static library and an example
|
||||
program under Linux simply type:
|
||||
|
||||
tar -zxvf myspell.tar.gz
|
||||
cd myspell
|
||||
make
|
||||
|
||||
To run the example program:
|
||||
./example ./en_US.aff ./en_US.dic checkme.lst
|
||||
|
||||
Please play around with it and let me know
|
||||
what you think.
|
||||
|
||||
|
||||
Developer Credits:
|
||||
|
||||
Special credit and thanks go to ispell's creator Geoff Kuenning.
|
||||
Ispell affix compression code was used as the basis for the
|
||||
affix code used in MySpell. Specifically Geoff's use of a
|
||||
conds[] array that makes it easy to check if the conditions
|
||||
required for a particular affix are present was very
|
||||
ingenious! Kudos to Geoff. Very nicely done.
|
||||
BTW: ispell is available under a BSD style license
|
||||
from Geoff Kuennings ispell website:
|
||||
http://www.cs.ucla.edu/ficus-members/geoff/ispell.html
|
||||
|
||||
|
||||
The Original MySpell code was written by Kevin Hendricks
|
||||
and released under a BSD license. An almost complete rewrite
|
||||
of MySpell for use by the Mozilla project has been developed by
|
||||
David Einstein (Deinst@world.std.com). David and I are now
|
||||
working on parallel development tracks to help our respective
|
||||
projects (Mozilla and OpenOffice.org and we will maintain full
|
||||
affix file and dictionary file compatibility and work on merging our
|
||||
versions of MySpell back into a single tree. David has been
|
||||
a significant help in improving MySpell.
|
||||
|
||||
|
||||
Special thanks also go to La'szlo' Ne'meth <nemethl@gyorsposta.hu>
|
||||
who is the author of the Hungarian dictionary and who
|
||||
developed and contributed the code to support compound words in
|
||||
MySpell and fixed numerous problems with the encoding case conversion
|
||||
tables.
|
||||
|
||||
|
||||
Thanks,
|
||||
|
||||
Kevin Hendricks
|
||||
kevin.hendricks@sympatico.ca
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
There is experimental support for languages that need to allow
|
||||
compound words. To enable compound word support, you need to
|
||||
add the following lines to your affix (.aff) file.
|
||||
|
||||
COMPOUNDFLAG x
|
||||
|
||||
COMPOUNDMIN #
|
||||
|
||||
where 'x' is replaced by a specific affix character flag that have
|
||||
been added to the dictionary (*.dic) file for words that can
|
||||
run together to make a new word. All subwords of the compound word
|
||||
must have this affix flag for the compound word to be correct.
|
||||
|
||||
and where '#' is replaced by the length of the shortest subword of
|
||||
a compound word. If the "COMPOUNDMIN" line is not found COMPOUNDMIN
|
||||
will default to 3
|
||||
|
||||
|
||||
This support is still under rapid revisions and will change in the
|
||||
future. Use only at your own risk.
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
This is a straight copy of the openoffice myspell component
|
||||
|
||||
Changes made for mozilla:
|
||||
|
||||
* Renamed *.cxx to .cpp, for the build system
|
||||
* Replaced the makefile with a mozilla type makefile
|
||||
* Added a wrapper, implementing mozISpellCheckingEngine and calling myspell
|
||||
This wrapper does the conversion from unicode to the charset used
|
||||
by myspell for the current dictionary.
|
||||
* Rewrote get_current_cs to create tables when needed, to reduce size of
|
||||
the resulting library.
|
||||
* Commented out std namespace declarations from .cpp files (using namespace std;)
|
||||
* Removed #include <unistd.h> for the build system
|
|
@ -0,0 +1,18 @@
|
|||
Build instructions for munch and unmunch utilities
|
||||
---------------------------------------------------
|
||||
|
||||
Under Linux:
|
||||
|
||||
gcc -O2 -omunch -I. munch.c
|
||||
|
||||
gcc -O2 -ounmunch -I. unmunch.c
|
||||
|
||||
|
||||
To see the correct syntax, run
|
||||
|
||||
./munch
|
||||
|
||||
and
|
||||
|
||||
./unmunch
|
||||
|
|
@ -0,0 +1,390 @@
|
|||
#include "license.readme"
|
||||
|
||||
#include <cctype>
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
|
||||
#include "affentry.hxx"
|
||||
|
||||
// using namespace std;
|
||||
|
||||
extern char * mystrdup(const char * s);
|
||||
extern char * myrevstrdup(const char * s);
|
||||
|
||||
PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp)
|
||||
{
|
||||
// register affix manager
|
||||
pmyMgr = pmgr;
|
||||
|
||||
// set up its intial values
|
||||
achar = dp->achar; // char flag
|
||||
strip = dp->strip; // string to strip
|
||||
appnd = dp->appnd; // string to append
|
||||
stripl = dp->stripl; // length of strip string
|
||||
appndl = dp->appndl; // length of append string
|
||||
numconds = dp->numconds; // number of conditions to match
|
||||
xpflg = dp->xpflg; // cross product flag
|
||||
// then copy over all of the conditions
|
||||
memcpy(&conds[0],&dp->conds[0],SETSIZE*sizeof(conds[0]));
|
||||
next = NULL;
|
||||
nextne = NULL;
|
||||
nexteq = NULL;
|
||||
}
|
||||
|
||||
|
||||
PfxEntry::~PfxEntry()
|
||||
{
|
||||
achar = '\0';
|
||||
if (appnd) free(appnd);
|
||||
if (strip)free(strip);
|
||||
pmyMgr = NULL;
|
||||
appnd = NULL;
|
||||
strip = NULL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// add prefix to this word assuming conditions hold
|
||||
char * PfxEntry::add(const char * word, int len)
|
||||
{
|
||||
int cond;
|
||||
char tword[MAXWORDLEN+1];
|
||||
|
||||
/* make sure all conditions match */
|
||||
if ((len > stripl) && (len >= numconds)) {
|
||||
unsigned char * cp = (unsigned char *) word;
|
||||
for (cond = 0; cond < numconds; cond++) {
|
||||
if ((conds[*cp++] & (1 << cond)) == 0)
|
||||
break;
|
||||
}
|
||||
if (cond >= numconds) {
|
||||
/* we have a match so add prefix */
|
||||
int tlen = 0;
|
||||
if (appndl) {
|
||||
strcpy(tword,appnd);
|
||||
tlen += appndl;
|
||||
}
|
||||
char * pp = tword + tlen;
|
||||
strcpy(pp, (word + stripl));
|
||||
return mystrdup(tword);
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// check if this prefix entry matches
|
||||
struct hentry * PfxEntry::check(const char * word, int len)
|
||||
{
|
||||
int cond; // condition number being examined
|
||||
int tmpl; // length of tmpword
|
||||
struct hentry * he; // hash entry of root word or NULL
|
||||
unsigned char * cp;
|
||||
char tmpword[MAXWORDLEN+1];
|
||||
|
||||
|
||||
// on entry prefix is 0 length or already matches the beginning of the word.
|
||||
// So if the remaining root word has positive length
|
||||
// and if there are enough chars in root word and added back strip chars
|
||||
// to meet the number of characters conditions, then test it
|
||||
|
||||
tmpl = len - appndl;
|
||||
|
||||
if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
|
||||
|
||||
// generate new root word by removing prefix and adding
|
||||
// back any characters that would have been stripped
|
||||
|
||||
if (stripl) strcpy (tmpword, strip);
|
||||
strcpy ((tmpword + stripl), (word + appndl));
|
||||
|
||||
// now make sure all of the conditions on characters
|
||||
// are met. Please see the appendix at the end of
|
||||
// this file for more info on exactly what is being
|
||||
// tested
|
||||
|
||||
cp = (unsigned char *)tmpword;
|
||||
for (cond = 0; cond < numconds; cond++) {
|
||||
if ((conds[*cp++] & (1 << cond)) == 0) break;
|
||||
}
|
||||
|
||||
// if all conditions are met then check if resulting
|
||||
// root word in the dictionary
|
||||
|
||||
if (cond >= numconds) {
|
||||
tmpl += stripl;
|
||||
if ((he = pmyMgr->lookup(tmpword)) != NULL) {
|
||||
if (TESTAFF(he->astr, achar, he->alen)) return he;
|
||||
}
|
||||
|
||||
// prefix matched but no root word was found
|
||||
// if XPRODUCT is allowed, try again but now
|
||||
// ross checked combined with a suffix
|
||||
|
||||
if (xpflg & XPRODUCT) {
|
||||
he = pmyMgr->suffix_check(tmpword, tmpl, XPRODUCT, (AffEntry *)this);
|
||||
if (he) return he;
|
||||
}
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
SfxEntry::SfxEntry(AffixMgr * pmgr, affentry* dp)
|
||||
{
|
||||
// register affix manager
|
||||
pmyMgr = pmgr;
|
||||
|
||||
// set up its intial values
|
||||
achar = dp->achar; // char flag
|
||||
strip = dp->strip; // string to strip
|
||||
appnd = dp->appnd; // string to append
|
||||
stripl = dp->stripl; // length of strip string
|
||||
appndl = dp->appndl; // length of append string
|
||||
numconds = dp->numconds; // number of conditions to match
|
||||
xpflg = dp->xpflg; // cross product flag
|
||||
|
||||
// then copy over all of the conditions
|
||||
memcpy(&conds[0],&dp->conds[0],SETSIZE*sizeof(conds[0]));
|
||||
|
||||
rappnd = myrevstrdup(appnd);
|
||||
}
|
||||
|
||||
|
||||
SfxEntry::~SfxEntry()
|
||||
{
|
||||
achar = '\0';
|
||||
if (appnd) free(appnd);
|
||||
if (rappnd) free(rappnd);
|
||||
if (strip) free(strip);
|
||||
pmyMgr = NULL;
|
||||
appnd = NULL;
|
||||
strip = NULL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// add suffix to this word assuming conditions hold
|
||||
char * SfxEntry::add(const char * word, int len)
|
||||
{
|
||||
int cond;
|
||||
char tword[MAXWORDLEN+1];
|
||||
|
||||
/* make sure all conditions match */
|
||||
if ((len > stripl) && (len >= numconds)) {
|
||||
unsigned char * cp = (unsigned char *) (word + len);
|
||||
for (cond = numconds; --cond >=0; ) {
|
||||
if ((conds[*--cp] & (1 << cond)) == 0)
|
||||
break;
|
||||
}
|
||||
if (cond < 0) {
|
||||
/* we have a match so add suffix */
|
||||
strcpy(tword,word);
|
||||
int tlen = len;
|
||||
if (stripl) {
|
||||
tlen -= stripl;
|
||||
}
|
||||
char * pp = (tword + tlen);
|
||||
if (appndl) {
|
||||
strcpy(pp,appnd);
|
||||
tlen += appndl;
|
||||
} else *pp = '\0';
|
||||
return mystrdup(tword);
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// see if this suffix is present in the word
|
||||
struct hentry * SfxEntry::check(const char * word, int len, int optflags, AffEntry* ppfx)
|
||||
{
|
||||
int tmpl; // length of tmpword
|
||||
int cond; // condition beng examined
|
||||
struct hentry * he; // hash entry pointer
|
||||
unsigned char * cp;
|
||||
char tmpword[MAXWORDLEN+1];
|
||||
PfxEntry* ep = (PfxEntry *) ppfx;
|
||||
|
||||
|
||||
// if this suffix is being cross checked with a prefix
|
||||
// but it does not support cross products skip it
|
||||
|
||||
if ((optflags & XPRODUCT) != 0 && (xpflg & XPRODUCT) == 0)
|
||||
return NULL;
|
||||
|
||||
// upon entry suffix is 0 length or already matches the end of the word.
|
||||
// So if the remaining root word has positive length
|
||||
// and if there are enough chars in root word and added back strip chars
|
||||
// to meet the number of characters conditions, then test it
|
||||
|
||||
tmpl = len - appndl;
|
||||
|
||||
if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
|
||||
|
||||
// generate new root word by removing suffix and adding
|
||||
// back any characters that would have been stripped or
|
||||
// or null terminating the shorter string
|
||||
|
||||
strcpy (tmpword, word);
|
||||
cp = (unsigned char *)(tmpword + tmpl);
|
||||
if (stripl) {
|
||||
strcpy ((char *)cp, strip);
|
||||
tmpl += stripl;
|
||||
cp = (unsigned char *)(tmpword + tmpl);
|
||||
} else *cp = '\0';
|
||||
|
||||
// now make sure all of the conditions on characters
|
||||
// are met. Please see the appendix at the end of
|
||||
// this file for more info on exactly what is being
|
||||
// tested
|
||||
|
||||
for (cond = numconds; --cond >= 0; ) {
|
||||
if ((conds[*--cp] & (1 << cond)) == 0) break;
|
||||
}
|
||||
|
||||
// if all conditions are met then check if resulting
|
||||
// root word in the dictionary
|
||||
|
||||
if (cond < 0) {
|
||||
if ((he = pmyMgr->lookup(tmpword)) != NULL) {
|
||||
if (TESTAFF(he->astr, achar , he->alen) &&
|
||||
((optflags & XPRODUCT) == 0 ||
|
||||
TESTAFF(he->astr, ep->getFlag(), he->alen))) return he;
|
||||
}
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#if 0
|
||||
|
||||
Appendix: Understanding Affix Code
|
||||
|
||||
|
||||
An affix is either a prefix or a suffix attached to root words to make
|
||||
other words.
|
||||
|
||||
Basically a Prefix or a Suffix is set of AffEntry objects
|
||||
which store information about the prefix or suffix along
|
||||
with supporting routines to check if a word has a particular
|
||||
prefix or suffix or a combination.
|
||||
|
||||
The structure affentry is defined as follows:
|
||||
|
||||
struct affentry
|
||||
{
|
||||
unsigned char achar; // char used to represent the affix
|
||||
char * strip; // string to strip before adding affix
|
||||
char * appnd; // the affix string to add
|
||||
short stripl; // length of the strip string
|
||||
short appndl; // length of the affix string
|
||||
short numconds; // the number of conditions that must be met
|
||||
short xpflg; // flag: XPRODUCT- combine both prefix and suffix
|
||||
char conds[SETSIZE]; // array which encodes the conditions to be met
|
||||
};
|
||||
|
||||
|
||||
Here is a suffix borrowed from the en_US.aff file. This file
|
||||
is whitespace delimited.
|
||||
|
||||
SFX D Y 4
|
||||
SFX D 0 e d
|
||||
SFX D y ied [^aeiou]y
|
||||
SFX D 0 ed [^ey]
|
||||
SFX D 0 ed [aeiou]y
|
||||
|
||||
This information can be interpreted as follows:
|
||||
|
||||
In the first line has 4 fields
|
||||
|
||||
Field
|
||||
-----
|
||||
1 SFX - indicates this is a suffix
|
||||
2 D - is the name of the character flag which represents this suffix
|
||||
3 Y - indicates it can be combined with prefixes (cross product)
|
||||
4 4 - indicates that sequence of 4 affentry structures are needed to
|
||||
properly store the affix information
|
||||
|
||||
The remaining lines describe the unique information for the 4 SfxEntry
|
||||
objects that make up this affix. Each line can be interpreted
|
||||
as follows: (note fields 1 and 2 are as a check against line 1 info)
|
||||
|
||||
Field
|
||||
-----
|
||||
1 SFX - indicates this is a suffix
|
||||
2 D - is the name of the character flag for this affix
|
||||
3 y - the string of chars to strip off before adding affix
|
||||
(a 0 here indicates the NULL string)
|
||||
4 ied - the string of affix characters to add
|
||||
5 [^aeiou]y - the conditions which must be met before the affix
|
||||
can be applied
|
||||
|
||||
Field 5 is interesting. Since this is a suffix, field 5 tells us that
|
||||
there are 2 conditions that must be met. The first condition is that
|
||||
the next to the last character in the word must *NOT* be any of the
|
||||
following "a", "e", "i", "o" or "u". The second condition is that
|
||||
the last character of the word must end in "y".
|
||||
|
||||
So how can we encode this information concisely and be able to
|
||||
test for both conditions in a fast manner? The answer is found
|
||||
but studying the wonderful ispell code of Geoff Kuenning, et.al.
|
||||
(now available under a normal BSD license).
|
||||
|
||||
If we set up a conds array of 256 bytes indexed (0 to 255) and access it
|
||||
using a character (cast to an unsigned char) of a string, we have 8 bits
|
||||
of information we can store about that character. Specifically we
|
||||
could use each bit to say if that character is allowed in any of the
|
||||
last (or first for prefixes) 8 characters of the word.
|
||||
|
||||
Basically, each character at one end of the word (up to the number
|
||||
of conditions) is used to index into the conds array and the resulting
|
||||
value found there says whether the that character is valid for a
|
||||
specific character position in the word.
|
||||
|
||||
For prefixes, it does this by setting bit 0 if that char is valid
|
||||
in the first position, bit 1 if valid in the second position, and so on.
|
||||
|
||||
If a bit is not set, then that char is not valid for that postion in the
|
||||
word.
|
||||
|
||||
If working with suffixes bit 0 is used for the character closest
|
||||
to the front, bit 1 for the next character towards the end, ...,
|
||||
with bit numconds-1 representing the last char at the end of the string.
|
||||
|
||||
Note: since entries in the conds[] are 8 bits, only 8 conditions
|
||||
(read that only 8 character positions) can be examined at one
|
||||
end of a word (the beginning for prefixes and the end for suffixes.
|
||||
|
||||
So to make this clearer, lets encode the conds array values for the
|
||||
first two affentries for the suffix D described earlier.
|
||||
|
||||
|
||||
For the first affentry:
|
||||
numconds = 1 (only examine the last character)
|
||||
|
||||
conds['e'] = (1 << 0) (the word must end in an E)
|
||||
all others are all 0
|
||||
|
||||
For the second affentry:
|
||||
numconds = 2 (only examine the last two characters)
|
||||
|
||||
conds[X] = conds[X] | (1 << 0) (aeiou are not allowed)
|
||||
where X is all characters *but* a, e, i, o, or u
|
||||
|
||||
|
||||
conds['y'] = (1 << 1) (the last char must be a y)
|
||||
all other bits for all other entries in the conds array are zero
|
||||
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,86 @@
|
|||
#ifndef _AFFIX_HXX_
|
||||
#define _AFFIX_HXX_
|
||||
|
||||
#include "atypes.hxx"
|
||||
#include "baseaffix.hxx"
|
||||
#include "affixmgr.hxx"
|
||||
|
||||
|
||||
/* A Prefix Entry */
|
||||
|
||||
class PfxEntry : public AffEntry
|
||||
{
|
||||
AffixMgr* pmyMgr;
|
||||
|
||||
PfxEntry * next;
|
||||
PfxEntry * nexteq;
|
||||
PfxEntry * nextne;
|
||||
PfxEntry * flgnxt;
|
||||
|
||||
public:
|
||||
|
||||
PfxEntry(AffixMgr* pmgr, affentry* dp );
|
||||
~PfxEntry();
|
||||
|
||||
struct hentry * check(const char * word, int len);
|
||||
|
||||
inline bool allowCross() { return ((xpflg & XPRODUCT) != 0); }
|
||||
inline unsigned char getFlag() { return achar; }
|
||||
inline const char * getKey() { return appnd; }
|
||||
char * add(const char * word, int len);
|
||||
|
||||
inline PfxEntry * getNext() { return next; }
|
||||
inline PfxEntry * getNextNE() { return nextne; }
|
||||
inline PfxEntry * getNextEQ() { return nexteq; }
|
||||
inline PfxEntry * getFlgNxt() { return flgnxt; }
|
||||
|
||||
inline void setNext(PfxEntry * ptr) { next = ptr; }
|
||||
inline void setNextNE(PfxEntry * ptr) { nextne = ptr; }
|
||||
inline void setNextEQ(PfxEntry * ptr) { nexteq = ptr; }
|
||||
inline void setFlgNxt(PfxEntry * ptr) { flgnxt = ptr; }
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
/* A Suffix Entry */
|
||||
|
||||
class SfxEntry : public AffEntry
|
||||
{
|
||||
AffixMgr* pmyMgr;
|
||||
char * rappnd;
|
||||
|
||||
SfxEntry * next;
|
||||
SfxEntry * nexteq;
|
||||
SfxEntry * nextne;
|
||||
SfxEntry * flgnxt;
|
||||
|
||||
public:
|
||||
|
||||
SfxEntry(AffixMgr* pmgr, affentry* dp );
|
||||
~SfxEntry();
|
||||
|
||||
struct hentry * check(const char * word, int len, int optflags,
|
||||
AffEntry* ppfx);
|
||||
|
||||
inline bool allowCross() { return ((xpflg & XPRODUCT) != 0); }
|
||||
inline unsigned char getFlag() { return achar; }
|
||||
inline const char * getKey() { return rappnd; }
|
||||
char * add(const char * word, int len);
|
||||
|
||||
inline SfxEntry * getNext() { return next; }
|
||||
inline SfxEntry * getNextNE() { return nextne; }
|
||||
inline SfxEntry * getNextEQ() { return nexteq; }
|
||||
inline SfxEntry * getFlgNxt() { return flgnxt; }
|
||||
|
||||
inline void setNext(SfxEntry * ptr) { next = ptr; }
|
||||
inline void setNextNE(SfxEntry * ptr) { nextne = ptr; }
|
||||
inline void setNextEQ(SfxEntry * ptr) { nexteq = ptr; }
|
||||
inline void setFlgNxt(SfxEntry * ptr) { flgnxt = ptr; }
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,70 @@
|
|||
#ifndef _AFFIXMGR_HXX_
|
||||
#define _AFFIXMGR_HXX_
|
||||
|
||||
#include "atypes.hxx"
|
||||
#include "baseaffix.hxx"
|
||||
#include "hashmgr.hxx"
|
||||
#include <cstdio>
|
||||
|
||||
class AffixMgr
|
||||
{
|
||||
|
||||
AffEntry * pStart[SETSIZE];
|
||||
AffEntry * sStart[SETSIZE];
|
||||
AffEntry * pFlag[SETSIZE];
|
||||
AffEntry * sFlag[SETSIZE];
|
||||
HashMgr * pHMgr;
|
||||
char * trystring;
|
||||
char * encoding;
|
||||
char * compound;
|
||||
int cpdmin;
|
||||
int numrep;
|
||||
replentry * reptable;
|
||||
int nummap;
|
||||
mapentry * maptable;
|
||||
bool nosplitsugs;
|
||||
|
||||
|
||||
public:
|
||||
|
||||
AffixMgr(const char * affpath, HashMgr * ptr);
|
||||
~AffixMgr();
|
||||
struct hentry * affix_check(const char * word, int len);
|
||||
struct hentry * prefix_check(const char * word, int len);
|
||||
struct hentry * suffix_check(const char * word, int len, int sfxopts, AffEntry* ppfx);
|
||||
int expand_rootword(struct guessword * wlst, int maxn,
|
||||
const char * ts, int wl, const char * ap, int al);
|
||||
struct hentry * compound_check(const char * word, int len, char compound_flag);
|
||||
struct hentry * lookup(const char * word);
|
||||
int get_numrep();
|
||||
struct replentry * get_reptable();
|
||||
int get_nummap();
|
||||
struct mapentry * get_maptable();
|
||||
char * get_encoding();
|
||||
char * get_try_string();
|
||||
char * get_compound();
|
||||
bool get_nosplitsugs();
|
||||
|
||||
private:
|
||||
int parse_file(const char * affpath);
|
||||
int parse_try(char * line);
|
||||
int parse_set(char * line);
|
||||
int parse_cpdflag(char * line);
|
||||
int parse_cpdmin(char * line);
|
||||
int parse_reptable(char * line, FILE * af);
|
||||
int parse_maptable(char * line, FILE * af);
|
||||
int parse_affix(char * line, const char at, FILE * af);
|
||||
|
||||
void encodeit(struct affentry * ptr, char * cs);
|
||||
int build_pfxtree(AffEntry* pfxptr);
|
||||
int build_sfxtree(AffEntry* sfxptr);
|
||||
AffEntry* process_sfx_in_order(AffEntry* ptr, AffEntry* nptr);
|
||||
AffEntry* process_pfx_in_order(AffEntry* ptr, AffEntry* nptr);
|
||||
int process_pfx_tree_to_list();
|
||||
int process_sfx_tree_to_list();
|
||||
int process_pfx_order();
|
||||
int process_sfx_order();
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
#ifndef _ATYPES_HXX_
|
||||
#define _ATYPES_HXX_
|
||||
|
||||
#define SETSIZE 256
|
||||
#define MAXAFFIXES 256
|
||||
#define MAXWORDLEN 100
|
||||
#define XPRODUCT (1 << 0)
|
||||
|
||||
#define MAXLNLEN 1024
|
||||
|
||||
#define TESTAFF( a , b , c ) memchr((void *)(a), (int)(b), (size_t)(c) )
|
||||
|
||||
struct affentry
|
||||
{
|
||||
char * strip;
|
||||
char * appnd;
|
||||
short stripl;
|
||||
short appndl;
|
||||
short numconds;
|
||||
short xpflg;
|
||||
char achar;
|
||||
char conds[SETSIZE];
|
||||
};
|
||||
|
||||
struct replentry {
|
||||
char * pattern;
|
||||
char * replacement;
|
||||
};
|
||||
|
||||
struct mapentry {
|
||||
char * set;
|
||||
int len;
|
||||
};
|
||||
|
||||
struct guessword {
|
||||
char * word;
|
||||
bool allow;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
#ifndef _BASEAFF_HXX_
|
||||
#define _BASEAFF_HXX_
|
||||
|
||||
class AffEntry
|
||||
{
|
||||
protected:
|
||||
char * appnd;
|
||||
char * strip;
|
||||
short appndl;
|
||||
short stripl;
|
||||
short numconds;
|
||||
short xpflg;
|
||||
char achar;
|
||||
char conds[SETSIZE];
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,293 @@
|
|||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <cstdio>
|
||||
#include "csutil.hxx"
|
||||
|
||||
#include "nsCOMPtr.h"
|
||||
#include "nsServiceManagerUtils.h"
|
||||
#include "nsIUnicodeEncoder.h"
|
||||
#include "nsIUnicodeDecoder.h"
|
||||
#include "nsICaseConversion.h"
|
||||
#include "nsICharsetConverterManager.h"
|
||||
#include "nsUnicharUtilCIID.h"
|
||||
#include "nsUnicharUtils.h"
|
||||
|
||||
static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
|
||||
static NS_DEFINE_CID(kUnicharUtilCID, NS_UNICHARUTIL_CID);
|
||||
|
||||
// using namespace std;
|
||||
|
||||
// strip strings into token based on single char delimiter
|
||||
// acts like strsep() but only uses a delim char and not
|
||||
// a delim string
|
||||
|
||||
char * mystrsep(char ** stringp, const char delim)
|
||||
{
|
||||
char * rv = NULL;
|
||||
char * mp = *stringp;
|
||||
int n = strlen(mp);
|
||||
if (n > 0) {
|
||||
char * dp = (char *)memchr(mp,(int)((unsigned char)delim),n);
|
||||
if (dp) {
|
||||
*stringp = dp+1;
|
||||
int nc = (int)((unsigned long)dp - (unsigned long)mp);
|
||||
rv = (char *) malloc(nc+1);
|
||||
memcpy(rv,mp,nc);
|
||||
*(rv+nc) = '\0';
|
||||
return rv;
|
||||
} else {
|
||||
rv = (char *) malloc(n+1);
|
||||
memcpy(rv, mp, n);
|
||||
*(rv+n) = '\0';
|
||||
*stringp = mp + n;
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
// replaces strdup with ansi version
|
||||
char * mystrdup(const char * s)
|
||||
{
|
||||
char * d = NULL;
|
||||
if (s) {
|
||||
int sl = strlen(s);
|
||||
d = (char *) malloc(((sl+1) * sizeof(char)));
|
||||
if (d) memcpy(d,s,((sl+1)*sizeof(char)));
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
|
||||
// remove cross-platform text line end characters
|
||||
void mychomp(char * s)
|
||||
{
|
||||
int k = strlen(s);
|
||||
if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
|
||||
if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
|
||||
}
|
||||
|
||||
|
||||
// does an ansi strdup of the reverse of a string
|
||||
char * myrevstrdup(const char * s)
|
||||
{
|
||||
char * d = NULL;
|
||||
if (s) {
|
||||
int sl = strlen(s);
|
||||
d = (char *) malloc((sl+1) * sizeof(char));
|
||||
if (d) {
|
||||
const char * p = s + sl - 1;
|
||||
char * q = d;
|
||||
while (p >= s) *q++ = *p--;
|
||||
*q = '\0';
|
||||
}
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
#if 0
|
||||
// return 1 if s1 is a leading subset of s2
|
||||
int isSubset(const char * s1, const char * s2)
|
||||
{
|
||||
int l1 = strlen(s1);
|
||||
int l2 = strlen(s2);
|
||||
if (l1 > l2) return 0;
|
||||
if (strncmp(s2,s1,l1) == 0) return 1;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
// return 1 if s1 is a leading subset of s2
|
||||
int isSubset(const char * s1, const char * s2)
|
||||
{
|
||||
while( *s1 && (*s1 == *s2) ) {
|
||||
s1++;
|
||||
s2++;
|
||||
}
|
||||
return (*s1 == '\0');
|
||||
}
|
||||
|
||||
|
||||
// return 1 if s1 (reversed) is a leading subset of end of s2
|
||||
int isRevSubset(const char * s1, const char * end_of_s2, int len)
|
||||
{
|
||||
while( (len > 0) && *s1 && (*s1 == *end_of_s2) ) {
|
||||
s1++;
|
||||
end_of_s2--;
|
||||
len --;
|
||||
}
|
||||
return (*s1 == '\0');
|
||||
}
|
||||
|
||||
|
||||
#if 0
|
||||
// Not needed in mozilla
|
||||
// convert null terminated string to all caps using encoding
|
||||
void enmkallcap(char * d, const char * p, const char * encoding)
|
||||
{
|
||||
struct cs_info * csconv = get_current_cs(encoding);
|
||||
while (*p != '\0') {
|
||||
*d++ = csconv[((unsigned char) *p)].cupper;
|
||||
p++;
|
||||
}
|
||||
*d = '\0';
|
||||
}
|
||||
|
||||
|
||||
// convert null terminated string to all little using encoding
|
||||
void enmkallsmall(char * d, const char * p, const char * encoding)
|
||||
{
|
||||
struct cs_info * csconv = get_current_cs(encoding);
|
||||
while (*p != '\0') {
|
||||
*d++ = csconv[((unsigned char) *p)].clower;
|
||||
p++;
|
||||
}
|
||||
*d = '\0';
|
||||
}
|
||||
|
||||
|
||||
// convert null terminated string to have intial capital using encoding
|
||||
void enmkinitcap(char * d, const char * p, const char * encoding)
|
||||
{
|
||||
struct cs_info * csconv = get_current_cs(encoding);
|
||||
memcpy(d,p,(strlen(p)+1));
|
||||
if (*p != '\0') *d= csconv[((unsigned char)*p)].cupper;
|
||||
}
|
||||
#endif
|
||||
|
||||
// convert null terminated string to all caps
|
||||
void mkallcap(char * p, const struct cs_info * csconv)
|
||||
{
|
||||
while (*p != '\0') {
|
||||
*p = csconv[((unsigned char) *p)].cupper;
|
||||
p++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// convert null terminated string to all little
|
||||
void mkallsmall(char * p, const struct cs_info * csconv)
|
||||
{
|
||||
while (*p != '\0') {
|
||||
*p = csconv[((unsigned char) *p)].clower;
|
||||
p++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// convert null terminated string to have intial capital
|
||||
void mkinitcap(char * p, const struct cs_info * csconv)
|
||||
{
|
||||
if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
|
||||
}
|
||||
|
||||
|
||||
// XXX This function was rewritten for mozilla. Instead of storing the
|
||||
// conversion tables static in this file, create them when needed
|
||||
// with help the mozilla backend.
|
||||
struct cs_info * get_current_cs(const char * es) {
|
||||
struct cs_info *ccs;
|
||||
|
||||
nsCOMPtr<nsIUnicodeEncoder> encoder;
|
||||
nsCOMPtr<nsIUnicodeDecoder> decoder;
|
||||
nsCOMPtr<nsICaseConversion> caseConv;
|
||||
|
||||
nsresult rv;
|
||||
nsCOMPtr<nsICharsetConverterManager> ccm = do_GetService(kCharsetConverterManagerCID, &rv);
|
||||
if (NS_FAILED(rv))
|
||||
return nsnull;
|
||||
|
||||
rv = ccm->GetUnicodeEncoder(es, getter_AddRefs(encoder));
|
||||
if (encoder && NS_SUCCEEDED(rv))
|
||||
encoder->SetOutputErrorBehavior(encoder->kOnError_Replace, nsnull, '?');
|
||||
if (NS_FAILED(rv))
|
||||
return nsnull;
|
||||
rv = ccm->GetUnicodeDecoder(es, getter_AddRefs(decoder));
|
||||
|
||||
caseConv = do_GetService(kUnicharUtilCID, &rv);
|
||||
if (NS_FAILED(rv))
|
||||
return nsnull;
|
||||
|
||||
ccs = (struct cs_info *) malloc(0xff * sizeof(cs_info));
|
||||
|
||||
PRInt32 charLength = 256;
|
||||
PRInt32 uniLength = 512;
|
||||
char *source = (char *)malloc(charLength * sizeof(char));
|
||||
PRUnichar *uni = (PRUnichar *)malloc(uniLength * sizeof(PRUnichar));
|
||||
char *lower = (char *)malloc(charLength * sizeof(char));
|
||||
char *upper = (char *)malloc(charLength * sizeof(char));
|
||||
|
||||
// Create a long string of all chars.
|
||||
unsigned int i;
|
||||
for (i = 0x00; i <= 0xff ; ++i) {
|
||||
source[i] = i;
|
||||
}
|
||||
|
||||
// Convert this long string to unicode
|
||||
rv = decoder->Convert(source, &charLength, uni, &uniLength);
|
||||
|
||||
// Do case conversion stuff, and convert back.
|
||||
caseConv->ToUpper(uni, uni, uniLength);
|
||||
encoder->Convert(uni, &uniLength, upper, &charLength);
|
||||
|
||||
uniLength = 512;
|
||||
charLength = 256;
|
||||
rv = decoder->Convert(source, &charLength, uni, &uniLength);
|
||||
caseConv->ToLower(uni, uni, uniLength);
|
||||
encoder->Convert(uni, &uniLength, lower, &charLength);
|
||||
|
||||
// Store
|
||||
for (i = 0x00; i <= 0xff ; ++i) {
|
||||
ccs[i].cupper = upper[i];
|
||||
ccs[i].clower = lower[i];
|
||||
|
||||
if (ccs[i].clower != (unsigned char)i)
|
||||
ccs[i].ccase = true;
|
||||
else
|
||||
ccs[i].ccase = false;
|
||||
|
||||
}
|
||||
|
||||
free(source);
|
||||
free(uni);
|
||||
free(lower);
|
||||
free(upper);
|
||||
|
||||
return ccs;
|
||||
};
|
||||
|
||||
|
||||
struct lang_map lang2enc[] = {
|
||||
{"ca","ISO8859-1"},
|
||||
{"cs","ISO8859-2"},
|
||||
{"da","ISO8859-1"},
|
||||
{"de","ISO8859-1"},
|
||||
{"el","ISO8859-7"},
|
||||
{"en","ISO8859-1"},
|
||||
{"es","ISO8859-1"},
|
||||
{"fr","ISO8859-1"},
|
||||
{"hr","ISO8859-2"},
|
||||
{"hu","ISO8859-2"},
|
||||
{"it","ISO8859-1"},
|
||||
{"la","ISO8859-1"},
|
||||
{"lv","ISO8859-13"},
|
||||
{"nl","ISO8859-1"},
|
||||
{"pl","ISO8859-2"},
|
||||
{"pt","ISO8859-1"},
|
||||
{"sv","ISO8859-1"},
|
||||
{"ru","KOI8-R"},
|
||||
{"bg","microsoft-cp1251"},
|
||||
};
|
||||
|
||||
|
||||
const char * get_default_enc(const char * lang) {
|
||||
int n = sizeof(lang2enc) / sizeof(lang2enc[0]);
|
||||
for (int i = 0; i < n; i++) {
|
||||
if (strcmp(lang,lang2enc[i].lang) == 0) {
|
||||
return lang2enc[i].def_enc;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
};
|
|
@ -0,0 +1,73 @@
|
|||
#ifndef __CSUTILHXX__
|
||||
#define __CSUTILHXX__
|
||||
|
||||
|
||||
// First some base level utility routines
|
||||
|
||||
// remove end of line char(s)
|
||||
void mychomp(char * s);
|
||||
|
||||
// duplicate string
|
||||
char * mystrdup(const char * s);
|
||||
|
||||
// duplicate reverse of string
|
||||
char * myrevstrdup(const char * s);
|
||||
|
||||
// parse into tokens with char delimiter
|
||||
char * mystrsep(char ** sptr, const char delim);
|
||||
|
||||
// is one string a leading subset of another
|
||||
int isSubset(const char * s1, const char * s2);
|
||||
|
||||
// is one reverse string a leading subset of the end of another
|
||||
int isRevSubset(const char * s1, const char * end_of_s2, int s2_len);
|
||||
|
||||
|
||||
// character encoding information
|
||||
|
||||
struct cs_info {
|
||||
unsigned char ccase;
|
||||
unsigned char clower;
|
||||
unsigned char cupper;
|
||||
};
|
||||
|
||||
|
||||
struct enc_entry {
|
||||
const char * enc_name;
|
||||
struct cs_info * cs_table;
|
||||
};
|
||||
|
||||
// language to encoding default map
|
||||
|
||||
struct lang_map {
|
||||
const char * lang;
|
||||
const char * def_enc;
|
||||
};
|
||||
|
||||
struct cs_info * get_current_cs(const char * es);
|
||||
|
||||
const char * get_default_enc(const char * lang);
|
||||
|
||||
#if 0
|
||||
// Not needed in mozilla
|
||||
// convert null terminated string to all caps using encoding
|
||||
void enmkallcap(char * d, const char * p, const char * encoding);
|
||||
|
||||
// convert null terminated string to all little using encoding
|
||||
void enmkallsmall(char * d, const char * p, const char * encoding);
|
||||
|
||||
// convert null terminated string to have intial capital using encoding
|
||||
void enmkinitcap(char * d, const char * p, const char * encoding);
|
||||
#endif
|
||||
|
||||
// convert null terminated string to all caps
|
||||
void mkallcap(char * p, const struct cs_info * csconv);
|
||||
|
||||
// convert null terminated string to all little
|
||||
void mkallsmall(char * p, const struct cs_info * csconv);
|
||||
|
||||
// convert null terminated string to have intial capital
|
||||
void mkinitcap(char * p, const struct cs_info * csconv);
|
||||
|
||||
|
||||
#endif
|
|
@ -0,0 +1,207 @@
|
|||
#include "license.readme"
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <cstdio>
|
||||
|
||||
#include "hashmgr.hxx"
|
||||
|
||||
extern void mychomp(char * s);
|
||||
extern char * mystrdup(const char *);
|
||||
|
||||
// using namespace std;
|
||||
|
||||
|
||||
// build a hash table from a munched word list
|
||||
|
||||
HashMgr::HashMgr(const char * tpath)
|
||||
{
|
||||
tablesize = 0;
|
||||
tableptr = NULL;
|
||||
int ec = load_tables(tpath);
|
||||
if (ec) {
|
||||
/* error condition - what should we do here */
|
||||
fprintf(stderr,"Hash Manager Error : %d\n",ec);
|
||||
fflush(stderr);
|
||||
if (tableptr) {
|
||||
free(tableptr);
|
||||
}
|
||||
tablesize = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
HashMgr::~HashMgr()
|
||||
{
|
||||
if (tableptr) {
|
||||
// now pass through hash table freeing up everything
|
||||
// go through column by column of the table
|
||||
for (int i=0; i < tablesize; i++) {
|
||||
struct hentry * pt = &tableptr[i];
|
||||
struct hentry * nt = NULL;
|
||||
if (pt) {
|
||||
if (pt->word) free(pt->word);
|
||||
if (pt->astr) free(pt->astr);
|
||||
pt = pt->next;
|
||||
}
|
||||
while(pt) {
|
||||
nt = pt->next;
|
||||
if (pt->word) free(pt->word);
|
||||
if (pt->astr) free(pt->astr);
|
||||
free(pt);
|
||||
pt = nt;
|
||||
}
|
||||
}
|
||||
free(tableptr);
|
||||
}
|
||||
tablesize = 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// lookup a root word in the hashtable
|
||||
|
||||
struct hentry * HashMgr::lookup(const char *word) const
|
||||
{
|
||||
struct hentry * dp;
|
||||
if (tableptr) {
|
||||
dp = &tableptr[hash(word)];
|
||||
if (dp->word == NULL) return NULL;
|
||||
for ( ; dp != NULL; dp = dp->next) {
|
||||
if (strcmp(word,dp->word) == 0) return dp;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// add a word to the hash table (private)
|
||||
|
||||
int HashMgr::add_word(const char * word, int wl, const char * aff, int al)
|
||||
{
|
||||
int i = hash(word);
|
||||
struct hentry * dp = &tableptr[i];
|
||||
struct hentry* hp;
|
||||
if (dp->word == NULL) {
|
||||
dp->wlen = wl;
|
||||
dp->alen = al;
|
||||
dp->word = mystrdup(word);
|
||||
dp->astr = mystrdup(aff);
|
||||
dp->next = NULL;
|
||||
if ((wl) && (dp->word == NULL)) return 1;
|
||||
if ((al) && (dp->astr == NULL)) return 1;
|
||||
} else {
|
||||
hp = (struct hentry *) malloc (sizeof(struct hentry));
|
||||
if (hp == NULL) return 1;
|
||||
hp->wlen = wl;
|
||||
hp->alen = al;
|
||||
hp->word = mystrdup(word);
|
||||
hp->astr = mystrdup(aff);
|
||||
hp->next = NULL;
|
||||
while (dp->next != NULL) dp=dp->next;
|
||||
dp->next = hp;
|
||||
if ((wl) && (hp->word == NULL)) return 1;
|
||||
if ((al) && (hp->astr == NULL)) return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// walk the hash table entry by entry - null at end
|
||||
struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const
|
||||
{
|
||||
//reset to start
|
||||
if ((col < 0) || (hp == NULL)) {
|
||||
col = -1;
|
||||
hp = NULL;
|
||||
}
|
||||
|
||||
if (hp && hp->next != NULL) {
|
||||
hp = hp->next;
|
||||
} else {
|
||||
col++;
|
||||
hp = (col < tablesize) ? &tableptr[col] : NULL;
|
||||
// search for next non-blank column entry
|
||||
while (hp && (hp->word == NULL)) {
|
||||
col ++;
|
||||
hp = (col < tablesize) ? &tableptr[col] : NULL;
|
||||
}
|
||||
if (col < tablesize) return hp;
|
||||
hp = NULL;
|
||||
col = -1;
|
||||
}
|
||||
return hp;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// load a munched word list and build a hash table on the fly
|
||||
|
||||
int HashMgr::load_tables(const char * tpath)
|
||||
{
|
||||
int wl, al;
|
||||
char * ap;
|
||||
|
||||
// raw dictionary - munched file
|
||||
FILE * rawdict = fopen(tpath, "r");
|
||||
if (rawdict == NULL) return 1;
|
||||
|
||||
// first read the first line of file to get hash table size */
|
||||
char ts[MAXDELEN];
|
||||
if (! fgets(ts, MAXDELEN-1,rawdict)) return 2;
|
||||
mychomp(ts);
|
||||
tablesize = atoi(ts);
|
||||
if (!tablesize) return 4;
|
||||
tablesize = tablesize + 5;
|
||||
if ((tablesize %2) == 0) tablesize++;
|
||||
|
||||
// allocate the hash table
|
||||
tableptr = (struct hentry *) calloc(tablesize, sizeof(struct hentry));
|
||||
if (! tableptr) return 3;
|
||||
|
||||
// loop through all words on much list and add to hash
|
||||
// table and create word and affix strings
|
||||
|
||||
while (fgets(ts,MAXDELEN-1,rawdict)) {
|
||||
mychomp(ts);
|
||||
// split each line into word and affix char strings
|
||||
ap = strchr(ts,'/');
|
||||
if (ap) {
|
||||
*ap = '\0';
|
||||
ap++;
|
||||
al = strlen(ap);
|
||||
} else {
|
||||
al = 0;
|
||||
ap = NULL;
|
||||
}
|
||||
|
||||
wl = strlen(ts);
|
||||
|
||||
// add the word and its index
|
||||
if (add_word(ts,wl,ap,al))
|
||||
return 5;;
|
||||
|
||||
}
|
||||
|
||||
fclose(rawdict);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
// the hash function is a simple load and rotate
|
||||
// algorithm borrowed
|
||||
|
||||
int HashMgr::hash(const char * word) const
|
||||
{
|
||||
long hv = 0;
|
||||
for (int i=0; i < 4 && *word != 0; i++)
|
||||
hv = (hv << 8) | (*word++);
|
||||
while (*word != 0) {
|
||||
ROTATE(hv,ROTATE_LEN);
|
||||
hv ^= (*word++);
|
||||
}
|
||||
return (unsigned long) hv % tablesize;
|
||||
}
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
#ifndef _HASHMGR_HXX_
|
||||
#define _HASHMGR_HXX_
|
||||
|
||||
#include "htypes.hxx"
|
||||
|
||||
class HashMgr
|
||||
{
|
||||
int tablesize;
|
||||
struct hentry * tableptr;
|
||||
|
||||
public:
|
||||
HashMgr(const char * tpath);
|
||||
~HashMgr();
|
||||
|
||||
struct hentry * lookup(const char *) const;
|
||||
int hash(const char *) const;
|
||||
struct hentry * walk_hashtable(int & col, struct hentry * hp) const;
|
||||
|
||||
private:
|
||||
HashMgr( const HashMgr & ); // not implemented
|
||||
HashMgr &operator=( const HashMgr & ); // not implemented
|
||||
int load_tables(const char * tpath);
|
||||
int add_word(const char * word, int wl, const char * ap, int al);
|
||||
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,20 @@
|
|||
#ifndef _HTYPES_HXX_
|
||||
#define _HTYPES_HXX_
|
||||
|
||||
#define MAXDELEN 256
|
||||
|
||||
#define ROTATE_LEN 5
|
||||
|
||||
#define ROTATE(v,q) \
|
||||
(v) = ((v) << (q)) | (((v) >> (32 - q)) & ((1 << (q))-1));
|
||||
|
||||
struct hentry
|
||||
{
|
||||
short wlen;
|
||||
short alen;
|
||||
char * word;
|
||||
char * astr;
|
||||
struct hentry * next;
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||
* And Contributors. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All modifications to the source code must be clearly marked as
|
||||
* such. Binary redistributions based on modified source code
|
||||
* must be clearly marked as modified versions in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
*
|
||||
* NOTE: A special thanks and credit goes to Geoff Kuenning
|
||||
* the creator of ispell. MySpell's affix algorithms were
|
||||
* based on those of ispell which should be noted is
|
||||
* copyright Geoff Kuenning et.al. and now available
|
||||
* under a BSD style license. For more information on ispell
|
||||
* and affix compression in general, please see:
|
||||
* http://www.cs.ucla.edu/ficus-members/geoff/ispell.html
|
||||
* (the home page for ispell)
|
||||
*
|
||||
* An almost complete rewrite of MySpell for use by
|
||||
* the Mozilla project has been developed by David Einstein
|
||||
* (Deinst@world.std.com). David and I are now
|
||||
* working on parallel development tracks to help
|
||||
* our respective projects (Mozilla and OpenOffice.org
|
||||
* and we will maintain full affix file and dictionary
|
||||
* file compatibility and work on merging our versions
|
||||
* of MySpell back into a single tree. David has been
|
||||
* a significant help in improving MySpell.
|
||||
*
|
||||
* Special thanks also go to La'szlo' Ne'meth
|
||||
* <nemethl@gyorsposta.hu> who is the author of the
|
||||
* Hungarian dictionary and who developed and contributed
|
||||
* the code to support compound words in MySpell
|
||||
* and fixed numerous problems with the encoding
|
||||
* case conversion tables.
|
||||
*
|
||||
*/
|
|
@ -1,271 +0,0 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Mozilla Spellchecker Component.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* David Einstein.
|
||||
* Portions created by the Initial Developer are Copyright (C) 2001
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): David Einstein <Deinst@world.std.com>
|
||||
* Kevin Hendricks <kevin.hendricks@sympatico.ca>
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* This spellchecker is based on the MySpell spellchecker made for Open Office
|
||||
* by Kevin Hendricks. Although the algorithms and code, have changed
|
||||
* slightly, the architecture is still the same. The Mozilla implementation
|
||||
* is designed to be compatible with the Open Office dictionaries.
|
||||
* Please do not make changes to the affix or dictionary file formats
|
||||
* without attempting to coordinate with Kevin. For more information
|
||||
* on the original MySpell see
|
||||
* http://whiteboard.openoffice.org/source/browse/whiteboard/lingucomponent/source/spellcheck/myspell/
|
||||
*
|
||||
* A special thanks and credit goes to Geoff Kuenning
|
||||
* the creator of ispell. MySpell's affix algorithms were
|
||||
* based on those of ispell which should be noted is
|
||||
* copyright Geoff Kuenning et.al. and now available
|
||||
* under a BSD style license. For more information on ispell
|
||||
* and affix compression in general, please see:
|
||||
* http://www.cs.ucla.edu/ficus-members/geoff/ispell.html
|
||||
* (the home page for ispell)
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
/* based on MySpell (c) 2001 by Kevin Hendicks */
|
||||
|
||||
#include "mozAffixMod.h"
|
||||
|
||||
mozAffixState::mozAffixState()
|
||||
{
|
||||
mTrans=nsnull;
|
||||
mMods=nsnull;
|
||||
mDefault=nsnull;
|
||||
}
|
||||
|
||||
mozAffixState::~mozAffixState()
|
||||
{
|
||||
clear();
|
||||
}
|
||||
|
||||
void
|
||||
mozAffixState::clear()
|
||||
{
|
||||
// clean out any mods
|
||||
mozAffixMod * nextmod=mMods;
|
||||
while(nextmod != nsnull){
|
||||
mozAffixMod *temp=nextmod->next;
|
||||
delete nextmod;
|
||||
nextmod = temp;
|
||||
}
|
||||
mMods=nsnull;
|
||||
|
||||
//clean out transitions
|
||||
mozAffixStateTrans * nexttrans=mTrans;
|
||||
while(nexttrans != nsnull){
|
||||
mozAffixStateTrans *temp=nexttrans->nextTrans;
|
||||
delete nexttrans->nextState;
|
||||
delete nexttrans;
|
||||
nexttrans=temp;
|
||||
}
|
||||
mTrans=nsnull;
|
||||
|
||||
if(mDefault != nsnull){
|
||||
delete mDefault;
|
||||
}
|
||||
mDefault=nsnull;
|
||||
}
|
||||
|
||||
mozAffixState *
|
||||
mozAffixState::nextState(char c)
|
||||
{
|
||||
mozAffixStateTrans * nexttrans=mTrans;
|
||||
while(nexttrans != nsnull){
|
||||
if(c==nexttrans->mRule) return nexttrans->nextState;
|
||||
nexttrans = nexttrans->nextTrans;
|
||||
}
|
||||
return mDefault;
|
||||
}
|
||||
|
||||
void
|
||||
mozAffixState::addMod(const char *affix, mozAffixMod *mod)
|
||||
{
|
||||
mozAffixStateTrans * nexttrans=mTrans;
|
||||
// figure out what kind of character we have and act accordingly
|
||||
if(*affix == '['){
|
||||
char *endblock=(char *)affix+1;
|
||||
char *startblock=(char *)affix+1;
|
||||
while((*endblock != ']')&&(*endblock != '\0')) endblock++;
|
||||
if(*startblock == '^'){
|
||||
char *currblock = startblock+1;
|
||||
//OK, let us start with the complicated case.
|
||||
//Here we sacrifice efficiency for simplicity. we are only running this at startup,
|
||||
// and the lists are not going to be large anyway. First we modify all of the states
|
||||
// not in the block, then we add unmodified clones for the states in the block that do
|
||||
// not occur in the list already.
|
||||
|
||||
//first loop -- go through current states modifying if not found;
|
||||
while(nexttrans != nsnull){
|
||||
currblock=startblock+1;
|
||||
PRBool found=PR_FALSE;
|
||||
while(currblock < endblock){
|
||||
if(*currblock == nexttrans->mRule){
|
||||
found = PR_TRUE;
|
||||
break;
|
||||
}
|
||||
currblock++;
|
||||
}
|
||||
if(!found){
|
||||
nexttrans->nextState->addMod(endblock+1,mod);
|
||||
}
|
||||
nexttrans=nexttrans->nextTrans;
|
||||
}
|
||||
|
||||
//second loop add new states if necessary (if they don't already exist)
|
||||
currblock = startblock+1;
|
||||
while(currblock < endblock){
|
||||
//just add each block one at a time
|
||||
PRBool found=PR_FALSE;
|
||||
nexttrans=mTrans;
|
||||
while(nexttrans!=nsnull){
|
||||
if(nexttrans->mRule == *currblock){
|
||||
found = PR_TRUE;
|
||||
break;
|
||||
}
|
||||
nexttrans=nexttrans->nextTrans;
|
||||
}
|
||||
if(!found){
|
||||
mozAffixState *newState=clone(mDefault);
|
||||
mozAffixStateTrans *newTrans=new mozAffixStateTrans;
|
||||
newTrans->mRule=*currblock;
|
||||
newTrans->nextState=newState;
|
||||
newTrans->nextTrans=mTrans;
|
||||
mTrans=newTrans;
|
||||
}
|
||||
currblock++;
|
||||
}
|
||||
if(mDefault==nsnull) mDefault=new mozAffixState;
|
||||
mDefault->addMod(endblock+1,mod);
|
||||
}
|
||||
else{ // a block of included characters
|
||||
while(startblock < endblock){
|
||||
//just add each block one at a time
|
||||
PRBool found=PR_FALSE;
|
||||
nexttrans=mTrans;
|
||||
while(nexttrans!=nsnull){
|
||||
if(nexttrans->mRule == *startblock){
|
||||
nexttrans->nextState->addMod(endblock+1,mod);
|
||||
found = PR_TRUE;
|
||||
break;
|
||||
}
|
||||
nexttrans=nexttrans->nextTrans;
|
||||
}
|
||||
if(!found){
|
||||
mozAffixState *newState=clone(mDefault);
|
||||
mozAffixStateTrans *newTrans=new mozAffixStateTrans;
|
||||
newTrans->mRule=*startblock;
|
||||
newTrans->nextState=newState;
|
||||
newTrans->nextTrans=mTrans;
|
||||
mTrans=newTrans;
|
||||
newState->addMod(endblock+1,mod);
|
||||
}
|
||||
startblock++;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if(*affix == '\0'){
|
||||
// all we've got to do is insert the mod;
|
||||
mozAffixMod * temp= new mozAffixMod;
|
||||
temp->mID=mod->mID;
|
||||
temp->flags= mod->flags;
|
||||
temp->mAppend.Assign( mod->mAppend);
|
||||
temp->mTruncateLength = mod->mTruncateLength;
|
||||
temp->next= mMods;
|
||||
mMods=temp;
|
||||
}
|
||||
else{
|
||||
// If the single character is a "." fill everything.
|
||||
if (*affix == '.'){
|
||||
while((nexttrans!=nsnull)){
|
||||
nexttrans->nextState->addMod(affix+1,mod);
|
||||
nexttrans=nexttrans->nextTrans;
|
||||
}
|
||||
if(mDefault==nsnull) mDefault=new mozAffixState;
|
||||
mDefault->addMod(affix+1,mod);
|
||||
}
|
||||
else {
|
||||
PRBool found=PR_FALSE;
|
||||
while((nexttrans!=nsnull)){
|
||||
if(nexttrans->mRule == *affix){
|
||||
nexttrans->nextState->addMod(affix+1,mod);
|
||||
found = PR_TRUE;
|
||||
break;
|
||||
}
|
||||
nexttrans=nexttrans->nextTrans;
|
||||
}
|
||||
if(!found){
|
||||
mozAffixState *newState=clone(mDefault);
|
||||
mozAffixStateTrans *newTrans=new mozAffixStateTrans;
|
||||
newTrans->mRule=*affix;
|
||||
newTrans->nextState=newState;
|
||||
newTrans->nextTrans=mTrans;
|
||||
mTrans=newTrans;
|
||||
newState->addMod(affix+1,mod);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mozAffixState *
|
||||
mozAffixState::clone(mozAffixState * old)
|
||||
{
|
||||
mozAffixState *newState = new mozAffixState;
|
||||
if(old != nsnull){
|
||||
if(old->mDefault != nsnull){
|
||||
mDefault=clone(old->mDefault);
|
||||
}
|
||||
mozAffixStateTrans *nexttrans=old->mTrans;
|
||||
while(nexttrans != nsnull){
|
||||
mozAffixStateTrans *temp = new mozAffixStateTrans;
|
||||
temp->mRule = nexttrans->mRule;
|
||||
temp->nextState = clone(nexttrans->nextState);
|
||||
temp->nextTrans=mTrans;
|
||||
mTrans=temp;
|
||||
nexttrans = nexttrans->nextTrans;
|
||||
}
|
||||
mozAffixMod *nextMod=old->mMods;
|
||||
while(nextMod!=nsnull){
|
||||
mozAffixMod * temp= new mozAffixMod;
|
||||
temp->mID=nextMod->mID;
|
||||
temp->flags = nextMod->flags;
|
||||
temp->mAppend.Assign( nextMod->mAppend);
|
||||
temp->mTruncateLength = nextMod->mTruncateLength;
|
||||
temp->next=mMods;
|
||||
mMods=temp;
|
||||
nextMod = nextMod->next;
|
||||
}
|
||||
}
|
||||
return newState;
|
||||
}
|
|
@ -1,99 +0,0 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Mozilla Spellchecker Component.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* David Einstein.
|
||||
* Portions created by the Initial Developer are Copyright (C) 2001
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): David Einstein <Deinst@world.std.com>
|
||||
* Kevin Hendricks <kevin.hendricks@sympatico.ca>
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* This spellchecker is based on the MySpell spellchecker made for Open Office
|
||||
* by Kevin Hendricks. Although the algorithms and code, have changed
|
||||
* slightly, the architecture is still the same. The Mozilla implementation
|
||||
* is designed to be compatible with the Open Office dictionaries.
|
||||
* Please do not make changes to the affix or dictionary file formats
|
||||
* without attempting to coordinate with Kevin. For more information
|
||||
* on the original MySpell see
|
||||
* http://whiteboard.openoffice.org/source/browse/whiteboard/lingucomponent/source/spellcheck/myspell/
|
||||
*
|
||||
* A special thanks and credit goes to Geoff Kuenning
|
||||
* the creator of ispell. MySpell's affix algorithms were
|
||||
* based on those of ispell which should be noted is
|
||||
* copyright Geoff Kuenning et.al. and now available
|
||||
* under a BSD style license. For more information on ispell
|
||||
* and affix compression in general, please see:
|
||||
* http://www.cs.ucla.edu/ficus-members/geoff/ispell.html
|
||||
* (the home page for ispell)
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
#ifndef mozAffixMod_h__
|
||||
#define mozAffixMod_h__
|
||||
|
||||
#include "nsString.h"
|
||||
|
||||
struct mozAffixMod
|
||||
{
|
||||
char mID; //character identifier.
|
||||
nsCString mAppend; //Standard ending to append
|
||||
PRUint16 flags; //flags
|
||||
PRUint16 mTruncateLength; //length of special ending to remove
|
||||
mozAffixMod* next;
|
||||
};
|
||||
class mozAffixState;
|
||||
|
||||
struct mozAffixStateTrans
|
||||
{
|
||||
char mRule;
|
||||
mozAffixState *nextState;
|
||||
mozAffixStateTrans *nextTrans;
|
||||
};
|
||||
|
||||
class mozAffixState
|
||||
{
|
||||
public:
|
||||
mozAffixState();
|
||||
~mozAffixState();
|
||||
|
||||
// we are splitting this into two seperate states
|
||||
mozAffixState *nextState(char c);
|
||||
void addMod(const char* affix, mozAffixMod *mod);
|
||||
mozAffixMod *getMod(){return mMods;}
|
||||
void clear();
|
||||
protected:
|
||||
mozAffixState * clone(mozAffixState* old);
|
||||
|
||||
private:
|
||||
mozAffixStateTrans *mTrans;
|
||||
mozAffixState *mDefault;
|
||||
//a list of next states
|
||||
mozAffixMod *mMods;
|
||||
};
|
||||
|
||||
#endif // mozAffixMod_h__
|
|
@ -1,101 +0,0 @@
|
|||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Mozilla Spellchecker Component.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* David Einstein.
|
||||
* Portions created by the Initial Developer are Copyright (C) 2001
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): David Einstein <Deinst@world.std.com>
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include "mozCStr2CStrHashtable.h"
|
||||
#include "nsCRT.h"
|
||||
#include "nsMemory.h"
|
||||
|
||||
static void* PR_CALLBACK
|
||||
CloneCString(nsHashKey *aKey, void *aData, void* closure)
|
||||
{
|
||||
return nsCRT::strdup((const char*)aData);
|
||||
}
|
||||
|
||||
static PRBool PR_CALLBACK
|
||||
DeleteCString(nsHashKey *aKey, void *aData, void* closure)
|
||||
{
|
||||
nsMemory::Free((char*)aData);
|
||||
return PR_TRUE;
|
||||
}
|
||||
|
||||
mozCStr2CStrHashtable::mozCStr2CStrHashtable()
|
||||
: mHashtable(CloneCString, nsnull, DeleteCString, nsnull, 16)
|
||||
{
|
||||
}
|
||||
|
||||
mozCStr2CStrHashtable::~mozCStr2CStrHashtable()
|
||||
{
|
||||
}
|
||||
|
||||
nsresult
|
||||
mozCStr2CStrHashtable::Put(const char *key, const char* aData)
|
||||
{
|
||||
char* value = strdup(aData);
|
||||
if (value == nsnull)
|
||||
return NS_ERROR_OUT_OF_MEMORY;
|
||||
nsCStringKey k(key);
|
||||
char* oldValue = (char*)mHashtable.Put(&k, value);
|
||||
if (oldValue)
|
||||
free(oldValue);
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
char*
|
||||
mozCStr2CStrHashtable::Get(const char *key)
|
||||
{
|
||||
nsCStringKey k(key);
|
||||
const char* value = (const char*)mHashtable.Get(&k);
|
||||
if (value == nsnull)
|
||||
return nsnull;
|
||||
return strdup(value);
|
||||
}
|
||||
|
||||
nsresult
|
||||
mozCStr2CStrHashtable::Remove(const char *key)
|
||||
{
|
||||
nsCStringKey k(key);
|
||||
char* oldValue = (char*)mHashtable.Remove(&k);
|
||||
if (oldValue)
|
||||
nsMemory::Free(oldValue);
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
void
|
||||
mozCStr2CStrHashtable::Reset()
|
||||
{
|
||||
mHashtable.Reset();
|
||||
}
|
|
@ -1,57 +0,0 @@
|
|||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Mozilla Spellchecker Component.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* David Einstein.
|
||||
* Portions created by the Initial Developer are Copyright (C) 2001
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): David Einstein <Deinst@world.std.com>
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef mozCStr2CStrHashtable_h__
|
||||
#define mozCStr2CStrHashtable_h__
|
||||
|
||||
#include "nsHashtable.h"
|
||||
|
||||
class mozCStr2CStrHashtable
|
||||
{
|
||||
public:
|
||||
mozCStr2CStrHashtable();
|
||||
virtual ~mozCStr2CStrHashtable();
|
||||
|
||||
nsresult Put(const char *key, const char* aData);
|
||||
char* Get(const char *key);
|
||||
nsresult Remove(const char *key);
|
||||
void Reset();
|
||||
protected:
|
||||
nsObjectHashtable mHashtable;
|
||||
};
|
||||
|
||||
#endif
|
|
@ -21,6 +21,7 @@
|
|||
*
|
||||
* Contributor(s): David Einstein <Deinst@world.std.com>
|
||||
* Kevin Hendricks <kevin.hendricks@sympatico.ca>
|
||||
* Michiel van Leeuwen <mvl@exedo.nl>
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
|
@ -58,18 +59,22 @@
|
|||
|
||||
#include "mozMySpell.h"
|
||||
#include "nsReadableUtils.h"
|
||||
#include "nsIFile.h"
|
||||
#include "nsXPIDLString.h"
|
||||
#include "nsISimpleEnumerator.h"
|
||||
#include "nsDirectoryService.h"
|
||||
#include "nsDirectoryServiceDefs.h"
|
||||
#include "mozISpellI18NManager.h"
|
||||
#include "nsICharsetConverterManager.h"
|
||||
#include "nsUnicharUtilCIID.h"
|
||||
#include "nsUnicharUtils.h"
|
||||
#include "nsCRT.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
const PRInt32 kFirstDirSize=8;
|
||||
static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
|
||||
static NS_DEFINE_CID(kUnicharUtilCID, NS_UNICHARUTIL_CID);
|
||||
|
||||
NS_IMPL_ISUPPORTS1(mozMySpell, mozISpellCheckingEngine)
|
||||
|
||||
const PRInt32 kFirstDirSize=8;
|
||||
|
||||
|
||||
mozMySpell::mozMySpell()
|
||||
{
|
||||
|
@ -77,176 +82,188 @@ mozMySpell::mozMySpell()
|
|||
|
||||
mozMySpell::~mozMySpell()
|
||||
{
|
||||
mPersonalDictionary = nsnull;
|
||||
}
|
||||
|
||||
/* attribute wstring dictionary; */
|
||||
NS_IMETHODIMP mozMySpell::GetDictionary(PRUnichar * *aDictionary)
|
||||
NS_IMETHODIMP mozMySpell::GetDictionary(PRUnichar **aDictionary)
|
||||
{
|
||||
nsresult res=NS_OK;
|
||||
NS_PRECONDITION(aDictionary != nsnull, "null ptr");
|
||||
if(!aDictionary){
|
||||
res = NS_ERROR_NULL_POINTER;
|
||||
}
|
||||
else{
|
||||
*aDictionary = ToNewUnicode(mDictionary);
|
||||
if(!aDictionary) res = NS_ERROR_OUT_OF_MEMORY;
|
||||
}
|
||||
|
||||
return res;
|
||||
NS_ENSURE_ARG_POINTER(aDictionary);
|
||||
|
||||
*aDictionary = ToNewUnicode(mDictionary);
|
||||
return *aDictionary ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
|
||||
}
|
||||
|
||||
/* set the Dictionary.
|
||||
* This also Loads the dictionary and initializes the converter using the dictionaries converter
|
||||
*/
|
||||
NS_IMETHODIMP mozMySpell::SetDictionary(const PRUnichar * aDictionary)
|
||||
NS_IMETHODIMP mozMySpell::SetDictionary(const PRUnichar *aDictionary)
|
||||
{
|
||||
if(!aDictionary)
|
||||
return NS_ERROR_NULL_POINTER;
|
||||
NS_ENSURE_ARG_POINTER(aDictionary);
|
||||
|
||||
nsresult res=NS_OK;
|
||||
nsresult rv = NS_OK;
|
||||
|
||||
if (!mDictionary.Equals(aDictionary)&&!(*aDictionary == 0)){
|
||||
if (*aDictionary && !mDictionary.Equals(aDictionary)) {
|
||||
mDictionary = aDictionary;
|
||||
res=mAMgr.Load(mDictionary);
|
||||
if(NS_FAILED(res)){
|
||||
NS_WARNING("Dictionary load failed");
|
||||
return res;
|
||||
|
||||
nsAutoString affFileName, dictFileName;
|
||||
|
||||
// XXX This isn't really good. nsIFile->Path isn't xp save etc.
|
||||
// see nsIFile.idl
|
||||
// A better way would be to QU ti nsILocalFile, and get a filehandle
|
||||
// from there. Only problem is that myspell wants a path
|
||||
|
||||
nsCOMPtr<nsIFile> file;
|
||||
nsresult rv = NS_GetSpecialDirectory(NS_XPCOM_COMPONENT_DIR, getter_AddRefs(file));
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
if (!file)
|
||||
return NS_ERROR_FAILURE;
|
||||
rv = file->Append(NS_LITERAL_STRING("myspell"));
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
rv = file->Append(mDictionary + NS_LITERAL_STRING(".aff"));
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
file->GetPath(affFileName);
|
||||
|
||||
rv = NS_GetSpecialDirectory(NS_XPCOM_COMPONENT_DIR, getter_AddRefs(file));
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
if (!file)
|
||||
return NS_ERROR_FAILURE;
|
||||
rv = file->Append(NS_LITERAL_STRING("myspell"));
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
rv = file->Append(mDictionary + NS_LITERAL_STRING(".dic"));
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
file->GetPath(dictFileName);
|
||||
|
||||
mMySpell = new MySpell(NS_ConvertUTF16toUTF8(affFileName).get(), NS_ConvertUTF16toUTF8(dictFileName).get());
|
||||
if (!mMySpell)
|
||||
return NS_ERROR_FAILURE;
|
||||
|
||||
nsCOMPtr<nsICharsetConverterManager> ccm = do_GetService(kCharsetConverterManagerCID, &rv);
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
|
||||
rv = ccm->GetUnicodeDecoder(mMySpell->get_dic_encoding(), getter_AddRefs(mDecoder));
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
rv = ccm->GetUnicodeEncoder(mMySpell->get_dic_encoding(), getter_AddRefs(mEncoder));
|
||||
if (mEncoder && NS_SUCCEEDED(rv)) {
|
||||
mEncoder->SetOutputErrorBehavior(mEncoder->kOnError_Signal, nsnull, '?');
|
||||
}
|
||||
nsAutoString tryString;
|
||||
mAMgr.get_try_string(tryString);
|
||||
mSMgr.setup(tryString, 64, &mAMgr);
|
||||
nsString language;
|
||||
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
|
||||
PRInt32 pos = mDictionary.FindChar('-');
|
||||
if(pos == -1){
|
||||
language.AssignLiteral("en");
|
||||
}
|
||||
else{
|
||||
language = Substring(mDictionary,0,pos);
|
||||
}
|
||||
nsCOMPtr<mozISpellI18NManager> serv(do_GetService("@mozilla.org/spellchecker/i18nmanager;1", &res));
|
||||
if(serv && NS_SUCCEEDED(res)){
|
||||
res = serv->GetUtil(language.get(),getter_AddRefs(mConverter));
|
||||
}
|
||||
if (pos == -1)
|
||||
mLanguage.Assign(NS_LITERAL_STRING("en"));
|
||||
else
|
||||
mLanguage = Substring(mDictionary, 0, pos);
|
||||
}
|
||||
return res;
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
/* readonly attribute wstring language; */
|
||||
NS_IMETHODIMP mozMySpell::GetLanguage(PRUnichar * *aLanguage)
|
||||
NS_IMETHODIMP mozMySpell::GetLanguage(PRUnichar **aLanguage)
|
||||
{
|
||||
nsresult res=NS_OK;
|
||||
NS_PRECONDITION(aLanguage != nsnull, "null ptr");
|
||||
if(!aLanguage){
|
||||
res = NS_ERROR_NULL_POINTER;
|
||||
}
|
||||
else{
|
||||
nsString language;
|
||||
PRInt32 pos = mDictionary.FindChar('-');
|
||||
if(pos == -1){
|
||||
language.AssignLiteral("en");
|
||||
}
|
||||
else{
|
||||
language = Substring(mDictionary,0,pos);
|
||||
}
|
||||
*aLanguage = ToNewUnicode(language);
|
||||
if(!aLanguage) res = NS_ERROR_OUT_OF_MEMORY;
|
||||
}
|
||||
return res;
|
||||
NS_ENSURE_ARG_POINTER(aLanguage);
|
||||
|
||||
*aLanguage = ToNewUnicode(mLanguage);
|
||||
return *aLanguage ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
|
||||
}
|
||||
|
||||
/* readonly attribute boolean providesPersonalDictionary; */
|
||||
NS_IMETHODIMP mozMySpell::GetProvidesPersonalDictionary(PRBool *aProvidesPersonalDictionary)
|
||||
{
|
||||
if(!aProvidesPersonalDictionary) return NS_ERROR_NULL_POINTER;
|
||||
*aProvidesPersonalDictionary=PR_FALSE;
|
||||
NS_ENSURE_ARG_POINTER(aProvidesPersonalDictionary);
|
||||
|
||||
*aProvidesPersonalDictionary = PR_FALSE;
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
/* readonly attribute boolean providesWordUtils; */
|
||||
NS_IMETHODIMP mozMySpell::GetProvidesWordUtils(PRBool *aProvidesWordUtils)
|
||||
{
|
||||
if(!aProvidesWordUtils) return NS_ERROR_NULL_POINTER;
|
||||
*aProvidesWordUtils=PR_FALSE;
|
||||
NS_ENSURE_ARG_POINTER(aProvidesWordUtils);
|
||||
|
||||
*aProvidesWordUtils = PR_FALSE;
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
/* readonly attribute wstring name; */
|
||||
NS_IMETHODIMP mozMySpell::GetName(PRUnichar * *aName)
|
||||
{
|
||||
return NS_ERROR_NOT_IMPLEMENTED;
|
||||
return NS_ERROR_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
/* readonly attribute wstring copyright; */
|
||||
NS_IMETHODIMP mozMySpell::GetCopyright(PRUnichar * *aCopyright)
|
||||
{
|
||||
return NS_ERROR_NOT_IMPLEMENTED;
|
||||
return NS_ERROR_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/* attribute mozIPersonalDictionary personalDictionary; */
|
||||
NS_IMETHODIMP mozMySpell::GetPersonalDictionary(mozIPersonalDictionary * *aPersonalDictionary)
|
||||
{
|
||||
return mAMgr.GetPersonalDictionary(aPersonalDictionary);
|
||||
*aPersonalDictionary = mPersonalDictionary;
|
||||
NS_IF_ADDREF(*aPersonalDictionary);
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
NS_IMETHODIMP mozMySpell::SetPersonalDictionary(mozIPersonalDictionary * aPersonalDictionary)
|
||||
{
|
||||
return mAMgr.SetPersonalDictionary(aPersonalDictionary);
|
||||
mPersonalDictionary = aPersonalDictionary;
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
/* void GetDictionaryList ([array, size_is (count)] out wstring dictionaries, out PRUint32 count); */
|
||||
NS_IMETHODIMP mozMySpell::GetDictionaryList(PRUnichar ***dictionaries, PRUint32 *count)
|
||||
NS_IMETHODIMP mozMySpell::GetDictionaryList(PRUnichar ***aDictionaries, PRUint32 *aCount)
|
||||
{
|
||||
nsresult res;
|
||||
if(!dictionaries || !count){
|
||||
if (!aDictionaries || !aCount)
|
||||
return NS_ERROR_NULL_POINTER;
|
||||
}
|
||||
|
||||
nsCOMPtr<nsIFile> aFile;
|
||||
*aDictionaries = 0;
|
||||
*aCount = 0;
|
||||
PRInt32 tempCount=0, arraySize = kFirstDirSize;
|
||||
PRUnichar **newPtr;
|
||||
|
||||
nsCOMPtr<nsIFile> file;
|
||||
nsresult rv = NS_GetSpecialDirectory(NS_XPCOM_COMPONENT_DIR, getter_AddRefs(file));
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
if (!file)
|
||||
return NS_ERROR_FAILURE;
|
||||
|
||||
rv = file->Append(NS_LITERAL_STRING("myspell"));
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
|
||||
nsCOMPtr<nsISimpleEnumerator> dirEntries;
|
||||
PRBool hasMore = PR_FALSE;
|
||||
PRInt32 tempCount=0, i, arraySize = kFirstDirSize;;
|
||||
PRUnichar **newPtr, **tmpPtr;
|
||||
rv = file->GetDirectoryEntries(getter_AddRefs(dirEntries));
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
if (!dirEntries)
|
||||
return NS_ERROR_FAILURE;
|
||||
|
||||
res=NS_OK;
|
||||
*dictionaries = 0;
|
||||
*count=0;
|
||||
|
||||
res = NS_GetSpecialDirectory(NS_XPCOM_COMPONENT_DIR, getter_AddRefs(aFile));
|
||||
if (NS_FAILED(res)) return res;
|
||||
if(!aFile)return NS_ERROR_FAILURE;
|
||||
res = aFile->Append(NS_LITERAL_STRING("myspell"));
|
||||
if (NS_FAILED(res)) return res;
|
||||
res = aFile->GetDirectoryEntries(getter_AddRefs(dirEntries));
|
||||
if (NS_FAILED(res)) return res;
|
||||
if(!dirEntries)return NS_ERROR_FAILURE;
|
||||
|
||||
tmpPtr = (PRUnichar **)nsMemory::Alloc(sizeof(PRUnichar *)*kFirstDirSize);
|
||||
PRUnichar **tmpPtr = (PRUnichar **)nsMemory::Alloc(sizeof(PRUnichar *) * kFirstDirSize);
|
||||
if (!tmpPtr)
|
||||
return NS_ERROR_OUT_OF_MEMORY;
|
||||
|
||||
|
||||
PRBool hasMore = PR_FALSE;
|
||||
while (NS_SUCCEEDED(dirEntries->HasMoreElements(&hasMore)) && hasMore) {
|
||||
nsCOMPtr<nsISupports> nextItem;
|
||||
nsCOMPtr<nsIFile> theFile;
|
||||
|
||||
dirEntries->GetNext(getter_AddRefs(nextItem));
|
||||
theFile = do_QueryInterface(nextItem);
|
||||
nsCOMPtr<nsIFile> theFile = do_QueryInterface(nextItem);
|
||||
|
||||
if(theFile){
|
||||
nsString fileName;
|
||||
if (theFile) {
|
||||
nsAutoString fileName;
|
||||
theFile->GetLeafName(fileName);
|
||||
PRInt32 dotLocation = fileName.FindChar('.');
|
||||
if((dotLocation != -1) && Substring(fileName,dotLocation,4).EqualsLiteral(".dic")){
|
||||
if(tempCount >= arraySize){
|
||||
if ((dotLocation != -1) &&
|
||||
Substring(fileName,dotLocation,4).EqualsLiteral(".dic")) {
|
||||
if (tempCount >= arraySize) {
|
||||
arraySize = 2 * tempCount;
|
||||
newPtr = (PRUnichar **)nsMemory::Alloc(sizeof(PRUnichar *) * arraySize);
|
||||
if (! newPtr){
|
||||
if (!newPtr){
|
||||
NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(tempCount, tmpPtr);
|
||||
return NS_ERROR_OUT_OF_MEMORY;
|
||||
}
|
||||
for(i=0;i<tempCount;i++){
|
||||
for (PRInt32 i = 0; i < tempCount; ++i){
|
||||
newPtr[i] = tmpPtr[i];
|
||||
}
|
||||
nsMemory::Free(tmpPtr);
|
||||
|
@ -256,9 +273,10 @@ NS_IMETHODIMP mozMySpell::GetDictionaryList(PRUnichar ***dictionaries, PRUint32
|
|||
}
|
||||
}
|
||||
}
|
||||
*dictionaries=tmpPtr;
|
||||
*count=tempCount;
|
||||
return res;
|
||||
|
||||
*aDictionaries = tmpPtr;
|
||||
*aCount = tempCount;
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
/* boolean Check (in wstring word); */
|
||||
|
@ -266,20 +284,32 @@ NS_IMETHODIMP mozMySpell::Check(const PRUnichar *aWord, PRBool *aResult)
|
|||
{
|
||||
NS_ENSURE_ARG_POINTER(aWord);
|
||||
NS_ENSURE_ARG_POINTER(aResult);
|
||||
NS_ENSURE_ARG_POINTER(mConverter);
|
||||
|
||||
PRUnichar **tmpPtr;
|
||||
PRUint32 count,i;
|
||||
*aResult = PR_FALSE;
|
||||
if (!mMySpell)
|
||||
return NS_ERROR_FAILURE;
|
||||
|
||||
nsresult rv = mConverter->GetRootForm(aWord, mozISpellI18NUtil::kCheck, &tmpPtr, &count);
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
for(i=0 ; i<count ; i++){
|
||||
*aResult = mAMgr.check(nsDependentString(tmpPtr[i]));
|
||||
if (*aResult) break;
|
||||
PRInt32 inLength = nsCRT::strlen(aWord);
|
||||
PRInt32 outLength;
|
||||
nsresult rv = mEncoder->GetMaxLength(aWord, inLength, &outLength);
|
||||
// NS_ERROR_UENC_NOMAPPING is a NS_SUCCESS, no error.
|
||||
if (NS_FAILED(rv) || rv == NS_ERROR_UENC_NOMAPPING) {
|
||||
// not a word in the current charset, so likely not
|
||||
// a word in the current language
|
||||
return PR_FALSE;
|
||||
}
|
||||
NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(count, tmpPtr);
|
||||
return rv;
|
||||
|
||||
char *charsetWord = (char *) nsMemory::Alloc(sizeof(char) * (outLength+1));
|
||||
rv = mEncoder->Convert(aWord, &inLength, charsetWord, &outLength);
|
||||
charsetWord[outLength] = '\0';
|
||||
|
||||
*aResult = mMySpell->spell(charsetWord);
|
||||
|
||||
if (!*aResult) {
|
||||
rv = mPersonalDictionary->Check(aWord, mLanguage.get(), aResult);
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
}
|
||||
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
/* void Suggest (in wstring word, [array, size_is (count)] out wstring suggestions, out PRUint32 count); */
|
||||
|
@ -287,26 +317,40 @@ NS_IMETHODIMP mozMySpell::Suggest(const PRUnichar *aWord, PRUnichar ***aSuggesti
|
|||
{
|
||||
NS_ENSURE_ARG_POINTER(aSuggestions);
|
||||
NS_ENSURE_ARG_POINTER(aSuggestionCount);
|
||||
NS_ENSURE_ARG_POINTER(mConverter);
|
||||
|
||||
*aSuggestions = 0;
|
||||
*aSuggestionCount=0;
|
||||
PRUnichar **tmpPtr;
|
||||
nsAutoString word(aWord);
|
||||
PRUnichar **slst = nsnull;
|
||||
PRUint32 count;
|
||||
PRUint32 ccount=0;
|
||||
if (!mMySpell)
|
||||
return NS_ERROR_FAILURE;
|
||||
|
||||
nsresult rv = mConverter->GetRootForm(aWord, mozISpellI18NUtil::kSuggest, &tmpPtr, &count);
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
*aSuggestionCount = 0;
|
||||
|
||||
for (PRUint32 i = 0; (i < count) && NS_SUCCEEDED(rv) ; i++){
|
||||
rv = mSMgr.suggest(&slst, nsDependentString(tmpPtr[i]), &ccount);
|
||||
char ** wlst;
|
||||
*aSuggestionCount = mMySpell->suggest(&wlst, NS_LossyConvertUCS2toASCII(aWord).get());
|
||||
|
||||
if (*aSuggestionCount) {
|
||||
PRUnichar **tmpPtr = (PRUnichar **)nsMemory::Alloc(*aSuggestionCount * sizeof(PRUnichar *));
|
||||
if (!tmpPtr)
|
||||
return NS_ERROR_OUT_OF_MEMORY;
|
||||
|
||||
for (PRUint32 i = 0; i < *aSuggestionCount; ++i) {
|
||||
// Convert the suggestion to utf16
|
||||
PRInt32 inLength = PL_strlen(wlst[i]);
|
||||
PRInt32 outLength;
|
||||
nsresult rv = mDecoder->GetMaxLength(wlst[i], inLength, &outLength);
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
PRUnichar *dest = (PRUnichar *)malloc(sizeof(PRUnichar) * (outLength + 1));
|
||||
if (!dest)
|
||||
return NS_ERROR_OUT_OF_MEMORY;
|
||||
rv = mDecoder->Convert(wlst[i], &inLength, dest, &outLength);
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
dest[outLength] = 0;
|
||||
free(wlst[i]);
|
||||
|
||||
// XXX ewwww.
|
||||
tmpPtr[i] = ToNewUnicode(nsDependentString(dest));
|
||||
free(dest);
|
||||
}
|
||||
*aSuggestions = tmpPtr;
|
||||
}
|
||||
NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(count, tmpPtr);
|
||||
if (ccount)
|
||||
rv=mConverter->FromRootForm(aWord, (const PRUnichar **)slst, ccount, aSuggestions, aSuggestionCount);
|
||||
NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(ccount, slst);
|
||||
return rv;
|
||||
|
||||
return NS_OK;
|
||||
}
|
||||
|
|
|
@ -56,12 +56,13 @@
|
|||
#ifndef mozMySpell_h__
|
||||
#define mozMySpell_h__
|
||||
|
||||
#include "myspell.hxx"
|
||||
#include "mozISpellCheckingEngine.h"
|
||||
#include "nsString.h"
|
||||
#include "myspAffixmgr.h"
|
||||
#include "myspSuggestmgr.h"
|
||||
#include "mozIPersonalDictionary.h"
|
||||
#include "mozISpellI18NUtil.h"
|
||||
#include "nsString.h"
|
||||
#include "nsCOMPtr.h"
|
||||
#include "nsIUnicodeEncoder.h"
|
||||
#include "nsIUnicodeDecoder.h"
|
||||
|
||||
#define MOZ_MYSPELL_CONTRACTID "@mozilla.org/spellchecker/myspell;1"
|
||||
#define MOZ_MYSPELL_CID \
|
||||
|
@ -69,9 +70,6 @@
|
|||
0xD1EE1205, 0x3F96, 0x4a0f, \
|
||||
{ 0xAB, 0xFE, 0x09, 0xE8, 0xC5, 0x4C, 0x9E, 0x9A} }
|
||||
|
||||
|
||||
|
||||
|
||||
class mozMySpell : public mozISpellCheckingEngine
|
||||
{
|
||||
public:
|
||||
|
@ -83,10 +81,12 @@ public:
|
|||
|
||||
protected:
|
||||
|
||||
nsCOMPtr<mozISpellI18NUtil> mConverter;
|
||||
nsString mDictionary;
|
||||
myspAffixMgr mAMgr;
|
||||
myspSuggestMgr mSMgr;
|
||||
nsCOMPtr<mozIPersonalDictionary> mPersonalDictionary;
|
||||
nsCOMPtr<nsIUnicodeEncoder> mEncoder;
|
||||
nsCOMPtr<nsIUnicodeDecoder> mDecoder;
|
||||
nsString mDictionary;
|
||||
nsString mLanguage;
|
||||
MySpell *mMySpell;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,603 +0,0 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Mozilla Spellchecker Component.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* David Einstein.
|
||||
* Portions created by the Initial Developer are Copyright (C) 2001
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): David Einstein <Deinst@world.std.com>
|
||||
* Kevin Hendricks <kevin.hendricks@sympatico.ca>
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* This spellchecker is based on the MySpell spellchecker made for Open Office
|
||||
* by Kevin Hendricks. Although the algorithms and code, have changed
|
||||
* slightly, the architecture is still the same. The Mozilla implementation
|
||||
* is designed to be compatible with the Open Office dictionaries.
|
||||
* Please do not make changes to the affix or dictionary file formats
|
||||
* without attempting to coordinate with Kevin. For more information
|
||||
* on the original MySpell see
|
||||
* http://whiteboard.openoffice.org/source/browse/whiteboard/lingucomponent/source/spellcheck/myspell/
|
||||
*
|
||||
* A special thanks and credit goes to Geoff Kuenning
|
||||
* the creator of ispell. MySpell's affix algorithms were
|
||||
* based on those of ispell which should be noted is
|
||||
* copyright Geoff Kuenning et.al. and now available
|
||||
* under a BSD style license. For more information on ispell
|
||||
* and affix compression in general, please see:
|
||||
* http://www.cs.ucla.edu/ficus-members/geoff/ispell.html
|
||||
* (the home page for ispell)
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
#include "myspAffixmgr.h"
|
||||
#include "nsIFile.h"
|
||||
#include "nsReadLine.h"
|
||||
#include "nsReadableUtils.h"
|
||||
#include "nsDirectoryServiceDefs.h"
|
||||
#include "plstr.h"
|
||||
#include "nsNetUtil.h"
|
||||
#include "nsICharsetConverterManager.h"
|
||||
#include "nsUnicharUtilCIID.h"
|
||||
#include "nsUnicharUtils.h"
|
||||
|
||||
static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
|
||||
static NS_DEFINE_CID(kUnicharUtilCID, NS_UNICHARUTIL_CID);
|
||||
|
||||
static PRInt32 SplitString(nsACString &in,nsCString out[],PRInt32 size);
|
||||
static void doubleReverseHack(nsACString &s);
|
||||
|
||||
myspAffixMgr::myspAffixMgr() :
|
||||
mReplaceTable(nsnull)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
myspAffixMgr::~myspAffixMgr()
|
||||
{
|
||||
mPersonalDictionary = nsnull;
|
||||
delete[] mReplaceTable;
|
||||
}
|
||||
|
||||
nsresult myspAffixMgr::GetPersonalDictionary(mozIPersonalDictionary * *aPersonalDictionary)
|
||||
{
|
||||
*aPersonalDictionary = mPersonalDictionary;
|
||||
NS_IF_ADDREF(*aPersonalDictionary);
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
nsresult myspAffixMgr::SetPersonalDictionary(mozIPersonalDictionary * aPersonalDictionary)
|
||||
{
|
||||
mPersonalDictionary = aPersonalDictionary;
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
nsresult
|
||||
myspAffixMgr::Load(const nsString& aDictionary)
|
||||
{
|
||||
nsresult res=NS_OK;
|
||||
nsCOMPtr<nsIFile> dicFile;
|
||||
nsCOMPtr<nsIFile> affFile;
|
||||
PRBool fileExists;
|
||||
|
||||
//get the directory
|
||||
res = NS_GetSpecialDirectory(NS_XPCOM_COMPONENT_DIR, getter_AddRefs(dicFile));
|
||||
if(NS_FAILED(res)) return res;
|
||||
if(!dicFile)return NS_ERROR_FAILURE;
|
||||
res = dicFile->Append(NS_LITERAL_STRING("myspell"));
|
||||
if(NS_FAILED(res)) return res;
|
||||
res = dicFile->Exists(&fileExists);
|
||||
if(NS_FAILED(res)) return res;
|
||||
if(!fileExists) return NS_ERROR_FAILURE;
|
||||
res = dicFile->Clone(getter_AddRefs(affFile));
|
||||
if(NS_FAILED(res)) return res;
|
||||
if(!dicFile)return NS_ERROR_FAILURE;
|
||||
|
||||
//get the affix file
|
||||
nsString affName=aDictionary;
|
||||
affName.AppendLiteral(".aff");
|
||||
res=affFile->Append(affName);
|
||||
if(NS_FAILED(res)) return res;
|
||||
res = affFile->Exists(&fileExists);
|
||||
if(NS_FAILED(res)) return res;
|
||||
if(!fileExists) return NS_ERROR_FAILURE;
|
||||
|
||||
//get the dictionary file
|
||||
nsString dicName=aDictionary;
|
||||
dicName.AppendLiteral(".dic");
|
||||
res=dicFile->Append(dicName);
|
||||
if(NS_FAILED(res)) return res;
|
||||
res = dicFile->Exists(&fileExists);
|
||||
if(NS_FAILED(res)) return res;
|
||||
if(!fileExists) return NS_ERROR_FAILURE;
|
||||
|
||||
// load the affixFile
|
||||
nsCOMPtr<nsIInputStream> affStream;
|
||||
res = NS_NewLocalFileInputStream(getter_AddRefs(affStream), affFile);
|
||||
if(NS_FAILED(res)) return res;
|
||||
if(!affStream)return NS_ERROR_FAILURE;
|
||||
res = parse_file(affStream);
|
||||
|
||||
PRInt32 pos=aDictionary.FindChar('-');
|
||||
if(pos<1) pos = 2; // FIXME should be min of 2 and aDictionary.Length()
|
||||
mLanguage = Substring(aDictionary,0,pos);
|
||||
|
||||
// load the dictionary
|
||||
nsCOMPtr<nsIInputStream> dicStream;
|
||||
res = NS_NewLocalFileInputStream(getter_AddRefs(dicStream), dicFile);
|
||||
if(NS_FAILED(res)) return res;
|
||||
if(!dicStream)return NS_ERROR_FAILURE;
|
||||
res = LoadDictionary(dicStream);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
// read in aff file and build up prefix and suffix data structures
|
||||
nsresult myspAffixMgr::parse_file(nsIInputStream *strm)
|
||||
{
|
||||
PRInt32 j;
|
||||
PRInt32 numents;
|
||||
nsLineBuffer *lineBuffer;
|
||||
nsresult rv = NS_InitLineBuffer(&lineBuffer);
|
||||
PRBool moreData=PR_TRUE;
|
||||
PRInt32 pos;
|
||||
nsCString cmds[5];
|
||||
mozAffixMod newMod;
|
||||
|
||||
prefixes.clear();
|
||||
suffixes.clear();
|
||||
|
||||
nsCOMPtr<nsICharsetConverterManager> ccm = do_GetService(kCharsetConverterManagerCID, &rv);
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
|
||||
numents = 0; // number of affentry structures to parse
|
||||
char flag='\0'; // affix char identifier
|
||||
{
|
||||
PRInt16 ff=0;
|
||||
char ft;
|
||||
|
||||
|
||||
// read in each line ignoring any that do not
|
||||
// start with PFX or SFX
|
||||
|
||||
nsCAutoString line;
|
||||
while (moreData) {
|
||||
NS_ReadLine(strm,lineBuffer,line,&moreData);
|
||||
/* parse in the try string */
|
||||
if (Substring(line,0,3).Equals("TRY")) {
|
||||
pos = line.FindChar(' ');
|
||||
if(pos != -1){
|
||||
trystring = Substring(line,pos+1,line.Length()-pos-1);
|
||||
}
|
||||
}
|
||||
|
||||
/* parse in the name of the character set used by the .dict and .aff */
|
||||
if (Substring(line,0,3).Equals("SET")) {
|
||||
pos = line.FindChar(' ');
|
||||
if(pos != -1){
|
||||
mEncoding.Assign(Substring(line,pos+1,line.Length()-pos-1));
|
||||
mEncoding.CompressWhitespace(PR_TRUE,PR_TRUE);
|
||||
|
||||
rv = ccm->GetUnicodeDecoder(mEncoding.get(), getter_AddRefs(mDecoder));
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
rv = ccm->GetUnicodeEncoder(mEncoding.get(), getter_AddRefs(mEncoder));
|
||||
if (mEncoder && NS_SUCCEEDED(rv)) {
|
||||
mEncoder->SetOutputErrorBehavior(mEncoder->kOnError_Signal, nsnull, '?');
|
||||
}
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
}
|
||||
}
|
||||
|
||||
/* parse in the typical fault correcting table */
|
||||
if (Substring(line,0,3).Equals("REP")) {
|
||||
PRInt32 numFields = SplitString(line, cmds, 3);
|
||||
|
||||
if (numFields == 2)
|
||||
numents = atoi(cmds[1].get());
|
||||
|
||||
mReplaceTable = new mozReplaceTable[numents];
|
||||
mReplaceTableLength = numents;
|
||||
|
||||
PRInt32 i = 0;
|
||||
nsAutoString pattern, replacement;
|
||||
|
||||
for (j = 0; (j < numents) && moreData; j++) {
|
||||
NS_ReadLine(strm,lineBuffer,line,&moreData);
|
||||
|
||||
numFields = SplitString(line, cmds, 3);
|
||||
|
||||
if(!cmds[0].Equals("REP")) { //consistency check
|
||||
NS_WARNING("REP line from .aff file is inconsitent");
|
||||
continue;
|
||||
}
|
||||
|
||||
rv = DecodeString(cmds[1], pattern);
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
rv = DecodeString(cmds[2], replacement);
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
// Make sure the replacements are lower case.
|
||||
// We don't want to convert them for every lookup.
|
||||
ToLowerCase(pattern);
|
||||
ToLowerCase(replacement);
|
||||
mReplaceTable[i].pattern = pattern.get();
|
||||
mReplaceTable[i].replacement = replacement.get();
|
||||
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
// get the type of this affix: P - prefix, S - suffix
|
||||
ft = ' ';
|
||||
if (Substring(line,0,3).Equals("PFX")) ft = 'P';
|
||||
if (Substring(line,0,3).Equals("SFX")) ft = 'S';
|
||||
if (ft != ' ') {
|
||||
numents = 0;
|
||||
ff=0;
|
||||
// split line into pieces
|
||||
PRInt32 numFields=SplitString(line, cmds, 5);
|
||||
if(numFields > 1)flag=cmds[1].First();
|
||||
if((numFields > 2)&&(cmds[2].First()=='Y'))ff=XPRODUCT;
|
||||
if(numFields >3)numents = atoi(cmds[3].get());
|
||||
|
||||
// now parse numents affentries for this affix
|
||||
for (j=0; (j < numents)&&moreData; j++) {
|
||||
NS_ReadLine(strm,lineBuffer,line,&moreData);
|
||||
PRInt32 numFields=SplitString(line,cmds,5);
|
||||
nsString tempStr;
|
||||
|
||||
if((numFields < 5)||(cmds[1].First()!=flag)){ //consistency check
|
||||
NS_WARNING("PFX/SFX line from .aff file is inconsitent");
|
||||
continue;
|
||||
}
|
||||
if(cmds[3].Equals("0")){
|
||||
cmds[3].Truncate();
|
||||
}
|
||||
newMod.flags = ff;
|
||||
newMod.mID = flag;
|
||||
newMod.mTruncateLength=cmds[3].Length();
|
||||
|
||||
if(ft == 'P'){
|
||||
if(cmds[2].Equals("0")){
|
||||
newMod.mAppend.Assign("");
|
||||
if(!cmds[4].Equals(".")){
|
||||
cmds[3].Append(cmds[4]);
|
||||
}
|
||||
}
|
||||
else{ // cmds[2] != 0
|
||||
newMod.mAppend.Assign( cmds[2]);
|
||||
if((cmds[2].Length()>cmds[4].Length())||!cmds[2].Equals(Substring(cmds[4],0,cmds[2].Length()))){
|
||||
NS_WARNING("PFX/SFX line from .aff file is inconsitent");
|
||||
continue;
|
||||
}
|
||||
cmds[3].Append(Substring(cmds[4],cmds[2].Length(),cmds[4].Length()-cmds[2].Length()));
|
||||
}
|
||||
prefixes.addMod(cmds[3].get(),&newMod);
|
||||
}
|
||||
else{ // suffix
|
||||
nsCString suffixTest;
|
||||
if(cmds[2].Equals("0")){
|
||||
newMod.mAppend.Assign("");
|
||||
if(!cmds[4].Equals(".")){
|
||||
suffixTest.Assign(cmds[4]);
|
||||
suffixTest.Append(cmds[3]);
|
||||
}
|
||||
else{
|
||||
suffixTest.Assign( cmds[3]);
|
||||
}
|
||||
}
|
||||
else{ // cmds[2] != 0
|
||||
newMod.mAppend.Assign( cmds[2]);
|
||||
if((cmds[2].Length()>cmds[4].Length())||
|
||||
!cmds[2].Equals(Substring(cmds[4],cmds[4].Length()-cmds[2].Length(),cmds[2].Length()))){
|
||||
NS_WARNING("PFX/SFX line from .aff file is inconsitent");
|
||||
continue;
|
||||
}
|
||||
suffixTest=Substring(cmds[4],0,cmds[4].Length()-cmds[2].Length());
|
||||
suffixTest.Append(cmds[3]);
|
||||
}
|
||||
if(suffixTest.Length() != 0)doubleReverseHack(suffixTest);
|
||||
suffixes.addMod(suffixTest.get(),&newMod);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
|
||||
nsresult
|
||||
myspAffixMgr::LoadDictionary(nsIInputStream *strm)
|
||||
{
|
||||
nsLineBuffer *lineBuffer;
|
||||
nsresult res;
|
||||
res= NS_InitLineBuffer(&lineBuffer);
|
||||
nsCAutoString line;
|
||||
PRBool moreData;
|
||||
PRInt32 pos;
|
||||
nsACString::const_iterator begin1,end1,begin2,end2;
|
||||
|
||||
// first read the first line of file to get hash table size */
|
||||
mHashTable.Reset();
|
||||
|
||||
res = NS_ReadLine(strm,lineBuffer,line,&moreData);
|
||||
|
||||
// loop through all words on much list and add to hash
|
||||
// table and create word and affix strings
|
||||
|
||||
while (moreData) {
|
||||
res = NS_ReadLine(strm,lineBuffer,line,&moreData);
|
||||
|
||||
// split each line into word and affix char strings
|
||||
pos = line.FindChar('/');
|
||||
if(pos==-1){
|
||||
line.BeginReading(begin1);
|
||||
line.EndReading(end1);
|
||||
begin2=end2=begin1;
|
||||
}
|
||||
else{
|
||||
line.BeginReading(begin1);
|
||||
begin2=end1=begin1;
|
||||
end1.advance(pos);
|
||||
begin2.advance(pos+1);
|
||||
line.EndReading(end2);
|
||||
}
|
||||
|
||||
|
||||
// add the word and its index
|
||||
mHashTable.Put(PromiseFlatCString(Substring(begin1,end1)).get(),PromiseFlatCString(Substring(begin2,end2)).get());
|
||||
|
||||
}
|
||||
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
// return the preferred try string for suggestions
|
||||
void myspAffixMgr::get_try_string(nsAString &aTryString)
|
||||
{
|
||||
PRInt32 outLength;
|
||||
PRInt32 inLength = trystring.Length();
|
||||
nsresult rv = mDecoder->GetMaxLength(trystring.get(), inLength, &outLength);
|
||||
|
||||
if (NS_SUCCEEDED(rv)) {
|
||||
PRUnichar *tmpPtr = (PRUnichar *) malloc(sizeof(PRUnichar) * (outLength + 1));
|
||||
if (tmpPtr) {
|
||||
rv = mDecoder->Convert(trystring.get(), &inLength, tmpPtr, &outLength);
|
||||
if (NS_SUCCEEDED(rv)) {
|
||||
tmpPtr[outLength] = 0;
|
||||
aTryString = tmpPtr;
|
||||
}
|
||||
free(tmpPtr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mozReplaceTable *myspAffixMgr::getReplaceTable()
|
||||
{
|
||||
return mReplaceTable;
|
||||
}
|
||||
|
||||
PRUint32 myspAffixMgr::getReplaceTableLength()
|
||||
{
|
||||
return mReplaceTableLength;
|
||||
}
|
||||
|
||||
PRBool
|
||||
myspAffixMgr::prefixCheck(const nsAFlatCString &word)
|
||||
{
|
||||
nsACString::const_iterator end,curr;
|
||||
nsCString tempWord;
|
||||
mozAffixState *currState= &prefixes;
|
||||
const char * he = NULL;
|
||||
PRUint32 wLength=word.Length();
|
||||
|
||||
word.BeginReading(curr);
|
||||
word.EndReading(end);
|
||||
while((curr!=end)&&(currState!=nsnull)){
|
||||
// check the current mods
|
||||
mozAffixMod *currMod=currState->getMod();
|
||||
while(currMod != nsnull){
|
||||
tempWord.Assign(currMod->mAppend);
|
||||
tempWord.Append(Substring(word,currMod->mTruncateLength,wLength - currMod->mTruncateLength));
|
||||
he = mHashTable.Get(tempWord.get());;
|
||||
if((he != nsnull)&&(PL_strchr(he, currMod->mID))) return PR_TRUE;
|
||||
if(((currMod->flags)&XPRODUCT)&&suffixCheck(tempWord,PR_TRUE,currMod->mID)) return PR_TRUE;
|
||||
currMod = currMod->next;
|
||||
}
|
||||
currState=currState->nextState(*curr);
|
||||
curr++;
|
||||
}
|
||||
if(currState != nsnull){
|
||||
mozAffixMod *currMod=currState->getMod();
|
||||
while(currMod != nsnull){
|
||||
tempWord.Assign(currMod->mAppend);
|
||||
tempWord.Append(Substring(word,currMod->mTruncateLength,wLength - currMod->mTruncateLength));
|
||||
he=mHashTable.Get(tempWord.get());
|
||||
if((he != nsnull)&&(PL_strchr(he, currMod->mID))) return PR_TRUE;
|
||||
// no need to check cross product, we reached the end of the word.
|
||||
currMod = currMod->next;
|
||||
}
|
||||
}
|
||||
return PR_FALSE;
|
||||
}
|
||||
|
||||
PRBool myspAffixMgr::suffixCheck(const nsAFlatCString &word,PRBool cross,char crossID)
|
||||
{
|
||||
nsACString::const_iterator start,curr;
|
||||
nsCString tempWord;
|
||||
mozAffixState *currState= &suffixes;
|
||||
const char * he = NULL;
|
||||
PRUint32 wLength=word.Length();
|
||||
|
||||
word.EndReading(curr);
|
||||
word.BeginReading(start);
|
||||
while((curr!=start)&&(currState!=nsnull)){
|
||||
// check the current mods
|
||||
mozAffixMod *currMod=currState->getMod();
|
||||
while(currMod != nsnull){
|
||||
tempWord=Substring(word,0,wLength - currMod->mTruncateLength);
|
||||
tempWord.Append(currMod->mAppend);
|
||||
he = mHashTable.Get(tempWord.get());;
|
||||
if((he != nsnull)&&PL_strchr(he, currMod->mID)&&((!cross)||PL_strchr(he, crossID))) return PR_TRUE;
|
||||
currMod = currMod->next;
|
||||
}
|
||||
curr--;
|
||||
currState=currState->nextState(*curr);
|
||||
}
|
||||
//Ok, we've read the last character of the word, but who knows,
|
||||
//we could still get a match en-US "her" for example. Mozdev bug 895
|
||||
if(currState != nsnull){
|
||||
mozAffixMod *currMod=currState->getMod();
|
||||
while(currMod != nsnull){
|
||||
tempWord=Substring(word,0,wLength - currMod->mTruncateLength);
|
||||
tempWord.Append(currMod->mAppend);
|
||||
he = mHashTable.Get(tempWord.get());;
|
||||
if((he != nsnull)&&PL_strchr(he, currMod->mID)&&((!cross)||PL_strchr(he, crossID))) return PR_TRUE;
|
||||
currMod = currMod->next;
|
||||
}
|
||||
}
|
||||
return PR_FALSE;
|
||||
}
|
||||
|
||||
PRBool myspAffixMgr::check(const nsAFlatString &word)
|
||||
{
|
||||
const char *he = nsnull;
|
||||
|
||||
PRInt32 inLength = word.Length();
|
||||
PRInt32 outLength;
|
||||
nsresult rv = mEncoder->GetMaxLength(word.get(), inLength, &outLength);
|
||||
// NS_ERROR_UENC_NOMAPPING is a NS_SUCCESS, no error.
|
||||
if (NS_FAILED(rv) || rv == NS_ERROR_UENC_NOMAPPING) {
|
||||
// not a word in the current charset, so likely not
|
||||
// a word in the current language
|
||||
return PR_FALSE;
|
||||
}
|
||||
char *charsetWord = (char *) nsMemory::Alloc(sizeof(char) * (outLength+1));
|
||||
rv = mEncoder->Convert(word.get(), &inLength, charsetWord, &outLength);
|
||||
charsetWord[outLength] = '\0';
|
||||
|
||||
he = mHashTable.Get(charsetWord);
|
||||
|
||||
if(he != nsnull) return PR_TRUE;
|
||||
if(prefixCheck(nsDependentCString(charsetWord)))
|
||||
return PR_TRUE;
|
||||
if(suffixCheck(nsDependentCString(charsetWord)))
|
||||
return PR_TRUE;
|
||||
|
||||
PRBool good=PR_FALSE;
|
||||
if (mPersonalDictionary) {
|
||||
rv = mPersonalDictionary->Check(word.get(), mLanguage.get(), &good);
|
||||
if (NS_FAILED(rv))
|
||||
return PR_FALSE;
|
||||
}
|
||||
return good;
|
||||
}
|
||||
|
||||
nsresult
|
||||
myspAffixMgr::DecodeString(const nsAFlatCString &aSource, nsAString &aDest)
|
||||
{
|
||||
if (!mDecoder) {
|
||||
aDest.Truncate();
|
||||
return NS_OK;
|
||||
}
|
||||
PRInt32 inLength = aSource.Length();
|
||||
PRInt32 outLength;
|
||||
nsresult rv = mDecoder->GetMaxLength(aSource.get(), inLength, &outLength);
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
PRUnichar *dest = (PRUnichar *)malloc(sizeof(PRUnichar) * (outLength + 1));
|
||||
if (!dest)
|
||||
return NS_ERROR_OUT_OF_MEMORY;
|
||||
rv = mDecoder->Convert(aSource.get(), &inLength, dest, &outLength);
|
||||
dest[outLength] = 0;
|
||||
aDest = dest;
|
||||
free(dest);
|
||||
return rv;
|
||||
}
|
||||
|
||||
|
||||
static PRInt32
|
||||
SplitString(nsACString &in,nsCString out[],PRInt32 size)
|
||||
{
|
||||
nsACString::const_iterator startWord;
|
||||
nsACString::const_iterator endWord;
|
||||
nsACString::const_iterator endLine;
|
||||
PRInt32 pos=0;
|
||||
in.BeginReading(startWord);
|
||||
in.EndReading(endLine);
|
||||
while((pos < size)&&(startWord!=endLine)){
|
||||
while((startWord!=endLine)&&(*startWord == ' '))startWord++;
|
||||
endWord=startWord;
|
||||
while((endWord!=endLine)&&(*endWord != ' '))endWord++;
|
||||
if(startWord != endWord){
|
||||
out[pos++] = Substring(startWord,endWord);
|
||||
}
|
||||
startWord=endWord;
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
/*
|
||||
reverse the suffix search string so that we put it into the tree in reverse.
|
||||
we need to reverse the blocks so that the ^ in negated blocks occurs first.
|
||||
*/
|
||||
static void doubleReverseHack(nsACString &s)
|
||||
{
|
||||
nsACString::iterator start,end,curr;
|
||||
char temp;
|
||||
|
||||
s.BeginWriting(start);
|
||||
s.EndWriting(end);
|
||||
curr=start;
|
||||
while(start!=end){
|
||||
if(*start=='['){
|
||||
curr=start;
|
||||
while((curr!=end)&&(*curr != ']')) curr++;
|
||||
while(start != curr){
|
||||
temp=*curr;
|
||||
*curr=*start;
|
||||
*start=temp;
|
||||
start++;
|
||||
if(start==curr)break;
|
||||
curr--;
|
||||
}
|
||||
while((start != end)&&(*start != '[')) start++;
|
||||
if(*start != '[')start++;
|
||||
}
|
||||
start++;
|
||||
}
|
||||
s.BeginWriting(start);
|
||||
end--;
|
||||
while(start != end){
|
||||
temp = *start;
|
||||
*start = *end;
|
||||
*end=temp;
|
||||
start++;
|
||||
if(start == end)break;
|
||||
end--;
|
||||
}
|
||||
}
|
|
@ -1,128 +0,0 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Mozilla Spellchecker Component.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* David Einstein.
|
||||
* Portions created by the Initial Developer are Copyright (C) 2001
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): David Einstein <Deinst@world.std.com>
|
||||
* Kevin Hendricks <kevin.hendricks@sympatico.ca>
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* This spellchecker is based on the MySpell spellchecker made for Open Office
|
||||
* by Kevin Hendricks. Although the algorithms and code, have changed
|
||||
* slightly, the architecture is still the same. The Mozilla implementation
|
||||
* is designed to be compatible with the Open Office dictionaries.
|
||||
* Please do not make changes to the affix or dictionary file formats
|
||||
* without attempting to coordinate with Kevin. For more information
|
||||
* on the original MySpell see
|
||||
* http://whiteboard.openoffice.org/source/browse/whiteboard/lingucomponent/source/spellcheck/myspell/
|
||||
*
|
||||
* A special thanks and credit goes to Geoff Kuenning
|
||||
* the creator of ispell. MySpell's affix algorithms were
|
||||
* based on those of ispell which should be noted is
|
||||
* copyright Geoff Kuenning et.al. and now available
|
||||
* under a BSD style license. For more information on ispell
|
||||
* and affix compression in general, please see:
|
||||
* http://www.cs.ucla.edu/ficus-members/geoff/ispell.html
|
||||
* (the home page for ispell)
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
#ifndef _AFFIXMGR_HXX_
|
||||
#define _AFFIXMGR_HXX_
|
||||
#include "nsString.h"
|
||||
#include "mozIPersonalDictionary.h"
|
||||
#include "mozCStr2CStrHashtable.h"
|
||||
#include "mozAffixMod.h"
|
||||
#include "nsNetUtil.h"
|
||||
#include "nsIUnicodeEncoder.h"
|
||||
#include "nsIUnicodeDecoder.h"
|
||||
|
||||
/* Modifications for mozilla Copyright 2001 David Einstein Deinst@world.std.com */
|
||||
|
||||
|
||||
|
||||
// shamelesly stolen from nsDirectoryService.cpp
|
||||
// Probably should move to nsDirectoryService.h?
|
||||
#if defined(XP_MAC)
|
||||
#define COMPONENT_DIRECTORY "Components"
|
||||
#else
|
||||
#define COMPONENT_DIRECTORY "components"
|
||||
#endif
|
||||
|
||||
#define MAXAFFIXES 256
|
||||
|
||||
#define XPRODUCT 1
|
||||
|
||||
struct mozReplaceTable {
|
||||
nsString pattern;
|
||||
nsString replacement;
|
||||
};
|
||||
|
||||
class myspPrefix;
|
||||
class myspSuffix;
|
||||
|
||||
class myspAffixMgr
|
||||
{
|
||||
public:
|
||||
|
||||
myspAffixMgr();
|
||||
~myspAffixMgr();
|
||||
nsresult GetPersonalDictionary(mozIPersonalDictionary * *aPersonalDictionary);
|
||||
nsresult SetPersonalDictionary(mozIPersonalDictionary * aPersonalDictionary);
|
||||
mozReplaceTable *getReplaceTable();
|
||||
PRUint32 getReplaceTableLength();
|
||||
PRBool check(const nsAFlatString &word);
|
||||
void get_try_string(nsAString &aTryString);
|
||||
nsresult Load(const nsString &aDictionary);
|
||||
|
||||
protected:
|
||||
|
||||
PRBool prefixCheck(const nsAFlatCString &word);
|
||||
PRBool suffixCheck(const nsAFlatCString &word,PRBool cross=PR_FALSE,char crossID=' ');
|
||||
|
||||
nsresult LoadDictionary(nsIInputStream *strm);
|
||||
nsresult parse_file(nsIInputStream *strm);
|
||||
|
||||
nsresult DecodeString(const nsAFlatCString &aSource, nsAString &aDest);
|
||||
|
||||
mozAffixState prefixes;
|
||||
mozAffixState suffixes;
|
||||
|
||||
nsCString trystring;
|
||||
nsCString mEncoding;
|
||||
nsString mLanguage;
|
||||
mozCStr2CStrHashtable mHashTable;
|
||||
mozReplaceTable *mReplaceTable;
|
||||
PRUint32 mReplaceTableLength;
|
||||
nsCOMPtr<mozIPersonalDictionary> mPersonalDictionary;
|
||||
nsCOMPtr<nsIUnicodeEncoder> mEncoder;
|
||||
nsCOMPtr<nsIUnicodeDecoder> mDecoder;
|
||||
};
|
||||
|
||||
#endif
|
|
@ -1,405 +0,0 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Mozilla Spellchecker Component.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* David Einstein.
|
||||
* Portions created by the Initial Developer are Copyright (C) 2001
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): David Einstein <Deinst@world.std.com>
|
||||
* Kevin Hendricks <kevin.hendricks@sympatico.ca>
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* This spellchecker is based on the MySpell spellchecker made for Open Office
|
||||
* by Kevin Hendricks. Although the algorithms and code, have changed
|
||||
* slightly, the architecture is still the same. The Mozilla implementation
|
||||
* is designed to be compatible with the Open Office dictionaries.
|
||||
* Please do not make changes to the affix or dictionary file formats
|
||||
* without attempting to coordinate with Kevin. For more information
|
||||
* on the original MySpell see
|
||||
* http://whiteboard.openoffice.org/source/browse/whiteboard/lingucomponent/source/spellcheck/myspell/
|
||||
*
|
||||
* A special thanks and credit goes to Geoff Kuenning
|
||||
* the creator of ispell. MySpell's affix algorithms were
|
||||
* based on those of ispell which should be noted is
|
||||
* copyright Geoff Kuenning et.al. and now available
|
||||
* under a BSD style license. For more information on ispell
|
||||
* and affix compression in general, please see:
|
||||
* http://www.cs.ucla.edu/ficus-members/geoff/ispell.html
|
||||
* (the home page for ispell)
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
#include "myspSuggestmgr.h"
|
||||
#include "plstr.h"
|
||||
#include "nsReadableUtils.h"
|
||||
#include "nsMemory.h"
|
||||
#include "nsUnicharUtils.h"
|
||||
|
||||
myspSuggestMgr::myspSuggestMgr()
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
myspSuggestMgr::~myspSuggestMgr()
|
||||
{
|
||||
pAMgr = nsnull;
|
||||
maxSug = 0;
|
||||
}
|
||||
|
||||
void
|
||||
myspSuggestMgr::setup(const nsAFlatString &tryme, int maxn, myspAffixMgr *aptr)
|
||||
{
|
||||
// register affix manager and check in string of chars to
|
||||
// try when building candidate suggestions
|
||||
pAMgr = aptr;
|
||||
ctry = tryme;
|
||||
maxSug = maxn;
|
||||
}
|
||||
|
||||
|
||||
// generate suggestions for a mispelled word
|
||||
// pass in address of array of char * pointers
|
||||
|
||||
nsresult myspSuggestMgr::suggest(PRUnichar ***slst,const nsAFlatString &word, PRUint32 *num)
|
||||
{
|
||||
NS_ENSURE_ARG_POINTER(num);
|
||||
NS_ENSURE_ARG_POINTER(slst);
|
||||
|
||||
nsresult res;
|
||||
PRUint32 nsug;
|
||||
PRUint32 i;
|
||||
PRUnichar **wlst;
|
||||
if(!(*slst)){
|
||||
nsug=0;
|
||||
wlst=(PRUnichar **)nsMemory::Alloc(sizeof(PRUnichar *) * maxSug);
|
||||
if(!wlst)
|
||||
return NS_ERROR_OUT_OF_MEMORY;
|
||||
memset(wlst, nsnull, sizeof(PRUnichar*) * maxSug);
|
||||
}
|
||||
else{
|
||||
wlst=*slst;
|
||||
nsug=*num;
|
||||
}
|
||||
|
||||
// perhaps we made a typical spelling error.
|
||||
res = replacechars(wlst, word, &nsug);
|
||||
|
||||
// did we forget to add a char
|
||||
if ((nsug < maxSug) && NS_SUCCEEDED(res)){
|
||||
res = forgotchar(wlst, word, &nsug);
|
||||
}
|
||||
|
||||
// did we swap the order of chars by mistake
|
||||
if ((nsug < maxSug) && NS_SUCCEEDED(res)){
|
||||
res = swapchar(wlst, word, &nsug);
|
||||
}
|
||||
|
||||
// did we add a char that should not be there
|
||||
if ((nsug < maxSug) && NS_SUCCEEDED(res)){
|
||||
res = extrachar(wlst, word, &nsug);
|
||||
}
|
||||
|
||||
// did we just hit the wrong key in place of a good char
|
||||
if ((nsug < maxSug) && NS_SUCCEEDED(res)){
|
||||
res = badchar(wlst, word, &nsug);
|
||||
}
|
||||
|
||||
// perhaps we forgot to hit space and two words ran together
|
||||
if ((nsug < maxSug) && NS_SUCCEEDED(res)){
|
||||
res = twowords(wlst, word, &nsug);
|
||||
}
|
||||
if(NS_FAILED(res)){
|
||||
for (i=0;i<maxSug; i++)
|
||||
if (wlst[i] != NULL) nsMemory::Free(wlst[i]);
|
||||
nsMemory::Free(wlst);
|
||||
*slst = 0;
|
||||
*num=0;
|
||||
}
|
||||
else{
|
||||
*slst=wlst;
|
||||
*num=nsug;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
// suggestions for a typical spelling error that
|
||||
// differs by more than 1 letter from the right spelling
|
||||
nsresult myspSuggestMgr::replacechars(PRUnichar ** wlst,const nsAFlatString &word, PRUint32 *ns)
|
||||
{
|
||||
nsAutoString candidate;
|
||||
PRBool cwrd;
|
||||
PRUint32 i,k;
|
||||
PRUint32 startOffset, findOffset;
|
||||
|
||||
if (word.Length() < 2 || !pAMgr)
|
||||
return NS_OK;
|
||||
|
||||
PRUint32 replaceTableLength = pAMgr->getReplaceTableLength();
|
||||
struct mozReplaceTable *replaceTable = pAMgr->getReplaceTable();
|
||||
|
||||
if (replaceTable == nsnull)
|
||||
return NS_OK;
|
||||
|
||||
for (i = 0; i < replaceTableLength; i++) {
|
||||
startOffset = 0;
|
||||
|
||||
candidate.Assign(word);
|
||||
ToLowerCase(candidate);
|
||||
|
||||
while ((findOffset = candidate.Find(replaceTable[i].pattern, startOffset)) != -1) {
|
||||
candidate.Assign(word);
|
||||
ToLowerCase(candidate);
|
||||
candidate.Replace(findOffset, replaceTable[i].pattern.Length(), replaceTable[i].replacement);
|
||||
|
||||
cwrd = PR_TRUE;
|
||||
for (k = 0; k < *ns; k++) {
|
||||
if (candidate.Equals(wlst[k])){
|
||||
cwrd = PR_FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (cwrd && pAMgr->check(candidate)) {
|
||||
if (*ns < maxSug) {
|
||||
wlst[*ns] = ToNewUnicode(candidate);
|
||||
if (!wlst[*ns])
|
||||
return NS_ERROR_OUT_OF_MEMORY;
|
||||
(*ns)++;
|
||||
} else {
|
||||
return NS_OK;
|
||||
}
|
||||
}
|
||||
|
||||
startOffset = findOffset + replaceTable[i].pattern.Length();
|
||||
}
|
||||
}
|
||||
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
// error is wrong char in place of correct one
|
||||
nsresult myspSuggestMgr::badchar(PRUnichar ** wlst,const nsAFlatString &word, PRUint32 *ns)
|
||||
{
|
||||
PRUnichar tmpc;
|
||||
nsAutoString candidate;
|
||||
PRBool cwrd;
|
||||
PRUint32 i,j,k;
|
||||
PRUint32 wl = word.Length();
|
||||
candidate.Assign(word);
|
||||
nsASingleFragmentString::char_iterator candIt;
|
||||
for (i=0,candidate.BeginWriting(candIt); i < wl; i++,candIt++) {
|
||||
tmpc = *candIt;
|
||||
for (j=0; j < ctry.Length(); j++) {
|
||||
if (ctry[j] == tmpc) continue;
|
||||
*candIt = ctry[j];
|
||||
cwrd = PR_TRUE;
|
||||
for(k=0;k < *ns;k++){
|
||||
if (candidate.Equals(wlst[k]) ){
|
||||
cwrd = PR_FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (cwrd && pAMgr->check(candidate)) {
|
||||
if (*ns < maxSug) {
|
||||
wlst[*ns] = ToNewUnicode(candidate);
|
||||
if(!wlst[*ns])
|
||||
return NS_ERROR_OUT_OF_MEMORY;
|
||||
(*ns)++;
|
||||
} else return NS_OK;
|
||||
}
|
||||
*candIt = tmpc;
|
||||
}
|
||||
}
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
|
||||
// error is word has an extra letter it does not need
|
||||
nsresult myspSuggestMgr::extrachar(PRUnichar ** wlst,const nsAFlatString &word, PRUint32 *ns)
|
||||
{
|
||||
PRBool cwrd;
|
||||
nsString stCand;
|
||||
nsAutoString candidate;
|
||||
PRUint32 k;
|
||||
PRUint32 wl = word.Length();
|
||||
if (wl < 2) return 0;
|
||||
|
||||
// try omitting one char of word at a time
|
||||
candidate.Assign(Substring(word,1,wl-1));
|
||||
nsASingleFragmentString::char_iterator r;
|
||||
nsASingleFragmentString::const_char_iterator p,end;
|
||||
word.EndReading(end);
|
||||
|
||||
for (word.BeginReading(p),candidate.BeginWriting(r); p != end; ) {
|
||||
cwrd = PR_TRUE;
|
||||
for(k=0;k < *ns;k++){
|
||||
if (candidate.Equals(wlst[k])){
|
||||
cwrd = PR_FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (cwrd && pAMgr->check(candidate)) {
|
||||
if (*ns < maxSug) {
|
||||
wlst[*ns] = ToNewUnicode(candidate);
|
||||
if(!wlst[*ns])
|
||||
return NS_ERROR_OUT_OF_MEMORY;
|
||||
(*ns)++;
|
||||
} else return NS_OK;
|
||||
}
|
||||
*r++ = *p++;
|
||||
}
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
|
||||
// error is mising a letter it needs
|
||||
nsresult myspSuggestMgr::forgotchar(PRUnichar **wlst,const nsAFlatString &word, PRUint32 *ns)
|
||||
{
|
||||
PRBool cwrd;
|
||||
nsString stCand;
|
||||
nsAutoString candidate;
|
||||
PRUint32 i,k;
|
||||
candidate = NS_LITERAL_STRING(" ") + word;
|
||||
nsASingleFragmentString::char_iterator q;
|
||||
nsASingleFragmentString::const_char_iterator p,end;
|
||||
word.EndReading(end);
|
||||
|
||||
// try inserting a tryme character before every letter
|
||||
for (word.BeginReading(p), candidate.BeginWriting(q); p != end; ) {
|
||||
for ( i = 0; i < ctry.Length(); i++) {
|
||||
*q = ctry[i];
|
||||
cwrd = PR_TRUE;
|
||||
for(k=0;k < *ns;k++){
|
||||
if (candidate.Equals(wlst[k]) ){
|
||||
cwrd = PR_FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (cwrd && pAMgr->check(candidate)) {
|
||||
if (*ns < maxSug) {
|
||||
wlst[*ns] = ToNewUnicode(candidate);
|
||||
if(!wlst[*ns])
|
||||
return NS_ERROR_OUT_OF_MEMORY;
|
||||
(*ns)++;
|
||||
} else return NS_OK;
|
||||
}
|
||||
}
|
||||
*q++ = *p++;
|
||||
}
|
||||
|
||||
// now try adding one to end */
|
||||
for ( i = 0; i < ctry.Length(); i++) {
|
||||
*q = ctry[i];
|
||||
cwrd = PR_TRUE;
|
||||
for(k=0;k < *ns;k++){
|
||||
if (candidate.Equals(wlst[k])){
|
||||
cwrd = PR_FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (cwrd && pAMgr->check(candidate)) {
|
||||
if (*ns < maxSug) {
|
||||
wlst[*ns] = ToNewUnicode(candidate);
|
||||
if(!wlst[*ns])
|
||||
return NS_ERROR_OUT_OF_MEMORY;
|
||||
(*ns)++;
|
||||
} else return NS_OK;
|
||||
}
|
||||
}
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
|
||||
/* error is should have been two words */
|
||||
nsresult myspSuggestMgr::twowords(PRUnichar ** wlst,const nsAFlatString &word, PRUint32 *ns)
|
||||
{
|
||||
nsAutoString candidate;
|
||||
PRUint32 pos;
|
||||
PRUint32 wl=word.Length();
|
||||
if (wl < 3) return NS_OK;
|
||||
candidate.Assign(word);
|
||||
nsAutoString temp;
|
||||
|
||||
// split the string into two pieces after every char
|
||||
// if both pieces are good words make them a suggestion
|
||||
for (pos = 1; pos < wl; pos++) {
|
||||
temp.Assign(Substring(candidate,0,pos));
|
||||
if (pAMgr->check(temp)) {
|
||||
temp.Assign(Substring(candidate,pos,wl-pos));
|
||||
if (pAMgr->check(temp)) {
|
||||
if (*ns < maxSug) {
|
||||
candidate.Insert(PRUnichar(' '),pos);
|
||||
wlst[*ns] = ToNewUnicode(candidate);
|
||||
if(!wlst[*ns])
|
||||
return NS_ERROR_OUT_OF_MEMORY;
|
||||
(*ns)++;
|
||||
} else return NS_OK;
|
||||
}
|
||||
}
|
||||
}
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
|
||||
// error is adjacent letter were swapped
|
||||
nsresult myspSuggestMgr::swapchar(PRUnichar **wlst,const nsAFlatString &word, PRUint32 *ns)
|
||||
{
|
||||
nsAutoString candidate;
|
||||
PRUnichar tmpc;
|
||||
PRBool cwrd;
|
||||
PRUint32 k;
|
||||
candidate.Assign(word);
|
||||
nsASingleFragmentString::char_iterator p,q,end;
|
||||
candidate.EndWriting(end);
|
||||
|
||||
for (candidate.BeginWriting(p),q=p, q++; q != end; p++,q++) {
|
||||
tmpc = *p;
|
||||
*p = *q;
|
||||
*q = tmpc;
|
||||
cwrd = PR_TRUE;
|
||||
for(k=0;k < *ns;k++){
|
||||
if (candidate.Equals(wlst[k])){
|
||||
cwrd = PR_FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (cwrd && pAMgr->check(candidate)) {
|
||||
if (*ns < maxSug) {
|
||||
wlst[*ns] = ToNewUnicode(candidate);
|
||||
if(!wlst[*ns])
|
||||
return NS_ERROR_OUT_OF_MEMORY;
|
||||
(*ns)++;
|
||||
} else return NS_OK;
|
||||
}
|
||||
tmpc = *p;
|
||||
*p = *q;
|
||||
*q = tmpc;
|
||||
}
|
||||
return NS_OK;
|
||||
}
|
||||
|
|
@ -1,90 +0,0 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Mozilla Spellchecker Component.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* David Einstein.
|
||||
* Portions created by the Initial Developer are Copyright (C) 2001
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): David Einstein <Deinst@world.std.com>
|
||||
* Kevin Hendricks <kevin.hendricks@sympatico.ca>
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* This spellchecker is based on the MySpell spellchecker made for Open Office
|
||||
* by Kevin Hendricks. Although the algorithms and code, have changed
|
||||
* slightly, the architecture is still the same. The Mozilla implementation
|
||||
* is designed to be compatible with the Open Office dictionaries.
|
||||
* Please do not make changes to the affix or dictionary file formats
|
||||
* without attempting to coordinate with Kevin. For more information
|
||||
* on the original MySpell see
|
||||
* http://whiteboard.openoffice.org/source/browse/whiteboard/lingucomponent/source/spellcheck/myspell/
|
||||
*
|
||||
* A special thanks and credit goes to Geoff Kuenning
|
||||
* the creator of ispell. MySpell's affix algorithms were
|
||||
* based on those of ispell which should be noted is
|
||||
* copyright Geoff Kuenning et.al. and now available
|
||||
* under a BSD style license. For more information on ispell
|
||||
* and affix compression in general, please see:
|
||||
* http://www.cs.ucla.edu/ficus-members/geoff/ispell.html
|
||||
* (the home page for ispell)
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
#ifndef _SUGGESTMGR_HXX_
|
||||
#define _SUGGESTMGR_HXX_
|
||||
|
||||
#include "nsString.h"
|
||||
#include "nsVoidArray.h"
|
||||
#include "myspAffixmgr.h"
|
||||
#include "nsString.h"
|
||||
|
||||
/* Modifications for mozilla Copyright 2001 David Einstein Deinst@world.std.com */
|
||||
|
||||
|
||||
class myspSuggestMgr
|
||||
{
|
||||
nsString ctry;
|
||||
myspAffixMgr* pAMgr;
|
||||
PRUint32 maxSug;
|
||||
|
||||
public:
|
||||
myspSuggestMgr();
|
||||
~myspSuggestMgr();
|
||||
|
||||
void setup(const nsAFlatString &tryme, int maxn, myspAffixMgr *aptr);
|
||||
nsresult suggest(PRUnichar ***slst, const nsAFlatString &word, PRUint32 *num);
|
||||
|
||||
protected:
|
||||
nsresult replacechars(PRUnichar **wlst,const nsAFlatString &word, PRUint32 *num);
|
||||
nsresult forgotchar(PRUnichar **wlst,const nsAFlatString &word, PRUint32 *num);
|
||||
nsresult swapchar(PRUnichar **wlst,const nsAFlatString &word, PRUint32 *num);
|
||||
nsresult extrachar(PRUnichar **wlst,const nsAFlatString &word, PRUint32 *num);
|
||||
nsresult badchar(PRUnichar **wlst,const nsAFlatString &word, PRUint32 *num);
|
||||
nsresult twowords(PRUnichar **wlst,const nsAFlatString &word, PRUint32 *num);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,304 @@
|
|||
#include "license.readme"
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
|
||||
#include "myspell.hxx"
|
||||
|
||||
// using namespace std;
|
||||
|
||||
|
||||
MySpell::MySpell(const char * affpath, const char * dpath)
|
||||
{
|
||||
encoding = NULL;
|
||||
csconv = NULL;
|
||||
|
||||
/* first set up the hash manager */
|
||||
pHMgr = new HashMgr(dpath);
|
||||
|
||||
/* next set up the affix manager */
|
||||
/* it needs access to the hash manager lookup methods */
|
||||
pAMgr = new AffixMgr(affpath,pHMgr);
|
||||
|
||||
/* get the preferred try string and the dictionary */
|
||||
/* encoding from the Affix Manager for that dictionary */
|
||||
char * try_string = pAMgr->get_try_string();
|
||||
encoding = pAMgr->get_encoding();
|
||||
csconv = get_current_cs(encoding);
|
||||
|
||||
/* and finally set up the suggestion manager */
|
||||
maxSug = 25;
|
||||
pSMgr = new SuggestMgr(try_string, maxSug, pAMgr);
|
||||
if (try_string) free(try_string);
|
||||
}
|
||||
|
||||
|
||||
MySpell::~MySpell()
|
||||
{
|
||||
if (pSMgr) delete pSMgr;
|
||||
if (pAMgr) delete pAMgr;
|
||||
if (pHMgr) delete pHMgr;
|
||||
pSMgr = NULL;
|
||||
pAMgr = NULL;
|
||||
pHMgr = NULL;
|
||||
csconv= NULL;
|
||||
if (encoding) free(encoding);
|
||||
encoding = NULL;
|
||||
}
|
||||
|
||||
|
||||
// make a copy of src at destination while removing all leading
|
||||
// blanks and removing any trailing periods after recording
|
||||
// their presence with the abbreviation flag
|
||||
// also since already going through character by character,
|
||||
// set the capitalization type
|
||||
// return the length of the "cleaned" word
|
||||
|
||||
int MySpell::cleanword(char * dest, const char * src, int * pcaptype, int * pabbrev)
|
||||
{
|
||||
|
||||
// with the new breakiterator code this should not be needed anymore
|
||||
const char * special_chars = "._#$%&()* +,-/:;<=>[]\\^`{|}~\t \x0a\x0d\x01\'\"";
|
||||
|
||||
unsigned char * p = (unsigned char *) dest;
|
||||
const unsigned char * q = (const unsigned char * ) src;
|
||||
|
||||
// first skip over any leading special characters
|
||||
while ((*q != '\0') && (strchr(special_chars,(int)(*q)))) q++;
|
||||
|
||||
// now strip off any trailing special characters
|
||||
// if a period comes after a normal char record its presence
|
||||
*pabbrev = 0;
|
||||
int nl = strlen((const char *)q);
|
||||
while ((nl > 0) && (strchr(special_chars,(int)(*(q+nl-1))))) {
|
||||
nl--;
|
||||
}
|
||||
if ( *(q+nl) == '.' ) *pabbrev = 1;
|
||||
|
||||
// if no characters are left it can't be an abbreviation and can't be capitalized
|
||||
if (nl <= 0) {
|
||||
*pcaptype = NOCAP;
|
||||
*pabbrev = 0;
|
||||
*p = '\0';
|
||||
return 0;
|
||||
}
|
||||
|
||||
// now determine the capitalization type of the first nl letters
|
||||
int ncap = 0;
|
||||
int nneutral = 0;
|
||||
int nc = 0;
|
||||
while (nl > 0) {
|
||||
nc++;
|
||||
if (csconv[(*q)].ccase) ncap++;
|
||||
if (csconv[(*q)].cupper == csconv[(*q)].clower) nneutral++;
|
||||
*p++ = *q++;
|
||||
nl--;
|
||||
}
|
||||
// remember to terminate the destination string
|
||||
*p = '\0';
|
||||
|
||||
// now finally set the captype
|
||||
if (ncap == 0) {
|
||||
*pcaptype = NOCAP;
|
||||
} else if ((ncap == 1) && csconv[(unsigned char)(*dest)].ccase) {
|
||||
*pcaptype = INITCAP;
|
||||
} else if ((ncap == nc) || ((ncap + nneutral) == nc)){
|
||||
*pcaptype = ALLCAP;
|
||||
} else {
|
||||
*pcaptype = HUHCAP;
|
||||
}
|
||||
return nc;
|
||||
}
|
||||
|
||||
|
||||
int MySpell::spell(const char * word)
|
||||
{
|
||||
char * rv=NULL;
|
||||
char cw[MAXWORDLEN+1];
|
||||
char wspace[MAXWORDLEN+1];
|
||||
|
||||
int wl = strlen(word);
|
||||
if (wl > (MAXWORDLEN - 1)) return 0;
|
||||
int captype = 0;
|
||||
int abbv = 0;
|
||||
wl = cleanword(cw, word, &captype, &abbv);
|
||||
if (wl == 0) return 1;
|
||||
|
||||
switch(captype) {
|
||||
case HUHCAP:
|
||||
case NOCAP: {
|
||||
rv = check(cw);
|
||||
if ((abbv) && !(rv)) {
|
||||
memcpy(wspace,cw,wl);
|
||||
*(wspace+wl) = '.';
|
||||
*(wspace+wl+1) = '\0';
|
||||
rv = check(wspace);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case ALLCAP: {
|
||||
memcpy(wspace,cw,(wl+1));
|
||||
mkallsmall(wspace, csconv);
|
||||
rv = check(wspace);
|
||||
if (!rv) {
|
||||
mkinitcap(wspace, csconv);
|
||||
rv = check(wspace);
|
||||
}
|
||||
if (!rv) rv = check(cw);
|
||||
if ((abbv) && !(rv)) {
|
||||
memcpy(wspace,cw,wl);
|
||||
*(wspace+wl) = '.';
|
||||
*(wspace+wl+1) = '\0';
|
||||
rv = check(wspace);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case INITCAP: {
|
||||
memcpy(wspace,cw,(wl+1));
|
||||
mkallsmall(wspace, csconv);
|
||||
rv = check(wspace);
|
||||
if (!rv) rv = check(cw);
|
||||
if ((abbv) && !(rv)) {
|
||||
memcpy(wspace,cw,wl);
|
||||
*(wspace+wl) = '.';
|
||||
*(wspace+wl+1) = '\0';
|
||||
rv = check(wspace);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (rv) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
char * MySpell::check(const char * word)
|
||||
{
|
||||
struct hentry * he = NULL;
|
||||
if (pHMgr)
|
||||
he = pHMgr->lookup (word);
|
||||
|
||||
if ((he == NULL) && (pAMgr)) {
|
||||
// try stripping off affixes */
|
||||
he = pAMgr->affix_check(word, strlen(word));
|
||||
|
||||
// try check compound word
|
||||
if ((he == NULL) && (pAMgr->get_compound())) {
|
||||
he = pAMgr->compound_check(word, strlen(word), (pAMgr->get_compound())[0]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (he) return he->word;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int MySpell::suggest(char*** slst, const char * word)
|
||||
{
|
||||
char cw[MAXWORDLEN+1];
|
||||
char wspace[MAXWORDLEN+1];
|
||||
if (! pSMgr) return 0;
|
||||
int wl = strlen(word);
|
||||
if (wl > (MAXWORDLEN-1)) return 0;
|
||||
int captype = 0;
|
||||
int abbv = 0;
|
||||
wl = cleanword(cw, word, &captype, &abbv);
|
||||
if (wl == 0) return 0;
|
||||
|
||||
int ns = 0;
|
||||
char ** wlst = (char **) calloc(maxSug, sizeof(char *));
|
||||
if (wlst == NULL) return 0;
|
||||
|
||||
switch(captype) {
|
||||
case NOCAP: {
|
||||
ns = pSMgr->suggest(wlst, ns, cw);
|
||||
break;
|
||||
}
|
||||
|
||||
case INITCAP: {
|
||||
|
||||
ns = pSMgr->suggest(wlst,ns,cw);
|
||||
if (ns != -1) {
|
||||
memcpy(wspace,cw,(wl+1));
|
||||
mkallsmall(wspace, csconv);
|
||||
if (ns) {
|
||||
ns = pSMgr->suggest(wlst, ns, wspace);
|
||||
} else {
|
||||
int ns2 = pSMgr->suggest(wlst, ns, wspace);
|
||||
for (int j=ns; j < ns2; j++)
|
||||
mkinitcap(wlst[j], csconv);
|
||||
ns = ns2;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case HUHCAP: {
|
||||
ns = pSMgr->suggest(wlst, ns, cw);
|
||||
if (ns != -1) {
|
||||
memcpy(wspace,cw,(wl+1));
|
||||
mkallsmall(wspace, csconv);
|
||||
ns = pSMgr->suggest(wlst, ns, wspace);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case ALLCAP: {
|
||||
memcpy(wspace,cw,(wl+1));
|
||||
mkallsmall(wspace, csconv);
|
||||
ns = pSMgr->suggest(wlst, ns, wspace);
|
||||
if (ns > 0) {
|
||||
for (int j=0; j < ns; j++)
|
||||
mkallcap(wlst[j], csconv);
|
||||
}
|
||||
if (ns != -1)
|
||||
ns = pSMgr->suggest(wlst, ns , cw);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (ns > 0) {
|
||||
*slst = wlst;
|
||||
return ns;
|
||||
}
|
||||
// try ngram approach since found nothing
|
||||
if (ns == 0) {
|
||||
ns = pSMgr->ngsuggest(wlst, cw, pHMgr);
|
||||
if (ns) {
|
||||
switch(captype) {
|
||||
case NOCAP: break;
|
||||
case HUHCAP: break;
|
||||
case INITCAP: {
|
||||
for (int j=0; j < ns; j++)
|
||||
mkinitcap(wlst[j], csconv);
|
||||
}
|
||||
break;
|
||||
|
||||
case ALLCAP: {
|
||||
for (int j=0; j < ns; j++)
|
||||
mkallcap(wlst[j], csconv);
|
||||
}
|
||||
break;
|
||||
}
|
||||
*slst = wlst;
|
||||
return ns;
|
||||
}
|
||||
}
|
||||
if (ns < 0) {
|
||||
// we ran out of memory - we should free up as much as possible
|
||||
for (int i=0;i<maxSug; i++)
|
||||
if (wlst[i] != NULL) free(wlst[i]);
|
||||
}
|
||||
if (wlst) free(wlst);
|
||||
*slst = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
char * MySpell::get_dic_encoding()
|
||||
{
|
||||
return encoding;
|
||||
}
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
#ifndef _MYSPELLMGR_HXX_
|
||||
#define _MYSPELLMGR_HXX_
|
||||
|
||||
#include "hashmgr.hxx"
|
||||
#include "affixmgr.hxx"
|
||||
#include "suggestmgr.hxx"
|
||||
#include "csutil.hxx"
|
||||
|
||||
#define NOCAP 0
|
||||
#define INITCAP 1
|
||||
#define ALLCAP 2
|
||||
#define HUHCAP 3
|
||||
|
||||
class MySpell
|
||||
{
|
||||
AffixMgr* pAMgr;
|
||||
HashMgr* pHMgr;
|
||||
SuggestMgr* pSMgr;
|
||||
char * encoding;
|
||||
struct cs_info * csconv;
|
||||
int maxSug;
|
||||
|
||||
public:
|
||||
MySpell(const char * affpath, const char * dpath);
|
||||
~MySpell();
|
||||
|
||||
int suggest(char*** slst, const char * word);
|
||||
int spell(const char *);
|
||||
char * get_dic_encoding();
|
||||
|
||||
private:
|
||||
int cleanword(char *, const char *, int *, int *);
|
||||
char * check(const char *);
|
||||
};
|
||||
|
||||
#endif
|
|
@ -1,45 +0,0 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Mozilla Spellchecker Component.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* David Einstein.
|
||||
* Portions created by the Initial Developer are Copyright (C) 2001
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): David Einstein <Deinst@world.std.com>
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef nsReadCLine_h__
|
||||
#define nsReadCLine_h__
|
||||
|
||||
//This is shameless and obvious theft from nsReadLine.h
|
||||
|
||||
#include "nsReadLine.h"
|
||||
#include "nsString.h"
|
||||
#endif // nsReadCLine_h__
|
|
@ -0,0 +1,551 @@
|
|||
#include "license.readme"
|
||||
#include <cstdlib>
|
||||
#include <cctype>
|
||||
#include <cstring>
|
||||
#include <cstdio>
|
||||
|
||||
#include "suggestmgr.hxx"
|
||||
|
||||
// using namespace std;
|
||||
|
||||
extern char * mystrdup(const char *);
|
||||
|
||||
|
||||
SuggestMgr::SuggestMgr(const char * tryme, int maxn,
|
||||
AffixMgr * aptr)
|
||||
{
|
||||
|
||||
// register affix manager and check in string of chars to
|
||||
// try when building candidate suggestions
|
||||
pAMgr = aptr;
|
||||
ctry = mystrdup(tryme);
|
||||
ctryl = 0;
|
||||
if (ctry)
|
||||
ctryl = strlen(ctry);
|
||||
maxSug = maxn;
|
||||
nosplitsugs=(0==1);
|
||||
if (pAMgr) pAMgr->get_nosplitsugs();
|
||||
}
|
||||
|
||||
|
||||
SuggestMgr::~SuggestMgr()
|
||||
{
|
||||
pAMgr = NULL;
|
||||
if (ctry) free(ctry);
|
||||
ctry = NULL;
|
||||
ctryl = 0;
|
||||
maxSug = 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// generate suggestions for a mispelled word
|
||||
// pass in address of array of char * pointers
|
||||
|
||||
int SuggestMgr::suggest(char** wlst, int ns, const char * word)
|
||||
{
|
||||
|
||||
int nsug = ns;
|
||||
|
||||
// did we swap the order of chars by mistake
|
||||
if ((nsug < maxSug) && (nsug > -1))
|
||||
nsug = swapchar(wlst, word, nsug);
|
||||
|
||||
// perhaps we made chose the wrong char from a related set
|
||||
if ((nsug < maxSug) && (nsug > -1))
|
||||
nsug = mapchars(wlst, word, nsug);
|
||||
|
||||
// perhaps we made a typical fault of spelling
|
||||
if ((nsug < maxSug) && (nsug > -1))
|
||||
nsug = replchars(wlst, word, nsug);
|
||||
|
||||
// did we forget to add a char
|
||||
if ((nsug < maxSug) && (nsug > -1))
|
||||
nsug = forgotchar(wlst, word, nsug);
|
||||
|
||||
// did we add a char that should not be there
|
||||
if ((nsug < maxSug) && (nsug > -1))
|
||||
nsug = extrachar(wlst, word, nsug);
|
||||
|
||||
// did we just hit the wrong key in place of a good char
|
||||
if ((nsug < maxSug) && (nsug > -1))
|
||||
nsug = badchar(wlst, word, nsug);
|
||||
|
||||
// perhaps we forgot to hit space and two words ran together
|
||||
if (!nosplitsugs) {
|
||||
if ((nsug < maxSug) && (nsug > -1))
|
||||
nsug = twowords(wlst, word, nsug);
|
||||
}
|
||||
return nsug;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// suggestions for when chose the wrong char out of a related set
|
||||
int SuggestMgr::mapchars(char** wlst, const char * word, int ns)
|
||||
{
|
||||
int wl = strlen(word);
|
||||
if (wl < 2 || ! pAMgr) return ns;
|
||||
|
||||
int nummap = pAMgr->get_nummap();
|
||||
struct mapentry* maptable = pAMgr->get_maptable();
|
||||
if (maptable==NULL) return ns;
|
||||
ns = map_related(word, 0, wlst, ns, maptable, nummap);
|
||||
return ns;
|
||||
}
|
||||
|
||||
|
||||
int SuggestMgr::map_related(const char * word, int i, char** wlst, int ns, const mapentry* maptable, int nummap)
|
||||
{
|
||||
char c = *(word + i);
|
||||
if (c == 0) {
|
||||
int cwrd = 1;
|
||||
for (int m=0; m < ns; m++)
|
||||
if (strcmp(word,wlst[m]) == 0) cwrd = 0;
|
||||
if ((cwrd) && check(word,strlen(word))) {
|
||||
if (ns < maxSug) {
|
||||
wlst[ns] = mystrdup(word);
|
||||
// fprintf(stderr,"map_related %d adding %s\n",ns, wlst[ns]); fflush(stderr);
|
||||
if (wlst[ns] == NULL) return -1;
|
||||
ns++;
|
||||
}
|
||||
}
|
||||
return ns;
|
||||
}
|
||||
int in_map = 0;
|
||||
for (int j = 0; j < nummap; j++) {
|
||||
if (strchr(maptable[j].set,c) != 0) {
|
||||
in_map = 1;
|
||||
char * newword = strdup(word);
|
||||
for (int k = 0; k < maptable[j].len; k++) {
|
||||
*(newword + i) = *(maptable[j].set + k);
|
||||
ns = map_related(newword, (i+1), wlst, ns, maptable, nummap);
|
||||
}
|
||||
free(newword);
|
||||
}
|
||||
}
|
||||
if (!in_map) {
|
||||
i++;
|
||||
ns = map_related(word, i, wlst, ns, maptable, nummap);
|
||||
}
|
||||
return ns;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// suggestions for a typical fault of spelling, that
|
||||
// differs with more, than 1 letter from the right form.
|
||||
int SuggestMgr::replchars(char** wlst, const char * word, int ns)
|
||||
{
|
||||
char candidate[MAXSWL];
|
||||
const char * r;
|
||||
int lenr, lenp;
|
||||
int cwrd;
|
||||
|
||||
int wl = strlen(word);
|
||||
if (wl < 2 || ! pAMgr) return ns;
|
||||
|
||||
int numrep = pAMgr->get_numrep();
|
||||
struct replentry* reptable = pAMgr->get_reptable();
|
||||
if (reptable==NULL) return ns;
|
||||
|
||||
for (int i=0; i < numrep; i++ ) {
|
||||
r = word;
|
||||
lenr = strlen(reptable[i].replacement);
|
||||
lenp = strlen(reptable[i].pattern);
|
||||
// search every occurence of the pattern in the word
|
||||
while ((r=strstr(r, reptable[i].pattern)) != NULL) {
|
||||
strcpy(candidate, word);
|
||||
if (r-word + lenr + strlen(r+lenp) >= MAXSWL) break;
|
||||
strcpy(candidate+(r-word),reptable[i].replacement);
|
||||
strcpy(candidate+(r-word)+lenr, r+lenp);
|
||||
cwrd = 1;
|
||||
for (int k=0; k < ns; k++)
|
||||
if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
|
||||
if ((cwrd) && check(candidate,strlen(candidate))) {
|
||||
if (ns < maxSug) {
|
||||
wlst[ns] = mystrdup(candidate);
|
||||
// fprintf(stderr,"replchars %d adding %s\n",ns,wlst[ns]); fflush(stderr);
|
||||
if (wlst[ns] == NULL) return -1;
|
||||
ns++;
|
||||
} else return ns;
|
||||
}
|
||||
r++; // search for the next letter
|
||||
}
|
||||
}
|
||||
return ns;
|
||||
}
|
||||
|
||||
|
||||
// error is wrong char in place of correct one
|
||||
int SuggestMgr::badchar(char ** wlst, const char * word, int ns)
|
||||
{
|
||||
char tmpc;
|
||||
char candidate[MAXSWL];
|
||||
|
||||
int wl = strlen(word);
|
||||
int cwrd;
|
||||
strcpy (candidate, word);
|
||||
|
||||
// swap out each char one by one and try all the tryme
|
||||
// chars in its place to see if that makes a good word
|
||||
for (int i=0; i < wl; i++) {
|
||||
tmpc = candidate[i];
|
||||
for (int j=0; j < ctryl; j++) {
|
||||
if (ctry[j] == tmpc) continue;
|
||||
candidate[i] = ctry[j];
|
||||
cwrd = 1;
|
||||
for (int k=0; k < ns; k++)
|
||||
if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
|
||||
if ((cwrd) && check(candidate,wl)) {
|
||||
if (ns < maxSug) {
|
||||
wlst[ns] = mystrdup(candidate);
|
||||
// fprintf(stderr,"bad_char %d adding %s\n",ns, wlst[ns]); fflush(stderr);
|
||||
if (wlst[ns] == NULL) return -1;
|
||||
ns++;
|
||||
} else return ns;
|
||||
}
|
||||
candidate[i] = tmpc;
|
||||
}
|
||||
}
|
||||
return ns;
|
||||
}
|
||||
|
||||
|
||||
// error is word has an extra letter it does not need
|
||||
int SuggestMgr::extrachar(char** wlst, const char * word, int ns)
|
||||
{
|
||||
char candidate[MAXSWL];
|
||||
const char * p;
|
||||
char * r;
|
||||
int cwrd;
|
||||
|
||||
int wl = strlen(word);
|
||||
if (wl < 2) return ns;
|
||||
|
||||
// try omitting one char of word at a time
|
||||
strcpy (candidate, word + 1);
|
||||
for (p = word, r = candidate; *p != 0; ) {
|
||||
cwrd = 1;
|
||||
for (int k=0; k < ns; k++)
|
||||
if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
|
||||
if ((cwrd) && check(candidate,wl-1)) {
|
||||
if (ns < maxSug) {
|
||||
wlst[ns] = mystrdup(candidate);
|
||||
// fprintf(stderr,"extra_char %d adding %s\n",ns,wlst[ns]); fflush(stderr);
|
||||
if (wlst[ns] == NULL) return -1;
|
||||
ns++;
|
||||
} else return ns;
|
||||
}
|
||||
*r++ = *p++;
|
||||
}
|
||||
return ns;
|
||||
}
|
||||
|
||||
|
||||
// error is mising a letter it needs
|
||||
int SuggestMgr::forgotchar(char ** wlst, const char * word, int ns)
|
||||
{
|
||||
char candidate[MAXSWL];
|
||||
const char * p;
|
||||
char * q;
|
||||
int cwrd;
|
||||
|
||||
int wl = strlen(word);
|
||||
|
||||
// try inserting a tryme character before every letter
|
||||
strcpy(candidate + 1, word);
|
||||
for (p = word, q = candidate; *p != 0; ) {
|
||||
for (int i = 0; i < ctryl; i++) {
|
||||
*q = ctry[i];
|
||||
cwrd = 1;
|
||||
for (int k=0; k < ns; k++)
|
||||
if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
|
||||
if ((cwrd) && check(candidate,wl+1)) {
|
||||
if (ns < maxSug) {
|
||||
wlst[ns] = mystrdup(candidate);
|
||||
// fprintf(stderr,"forgotchar %d adding %s\n",ns,wlst[ns]); fflush(stderr);
|
||||
if (wlst[ns] == NULL) return -1;
|
||||
ns++;
|
||||
} else return ns;
|
||||
}
|
||||
}
|
||||
*q++ = *p++;
|
||||
}
|
||||
|
||||
// now try adding one to end */
|
||||
for (int i = 0; i < ctryl; i++) {
|
||||
*q = ctry[i];
|
||||
cwrd = 1;
|
||||
for (int k=0; k < ns; k++)
|
||||
if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
|
||||
if ((cwrd) && check(candidate,wl+1)) {
|
||||
if (ns < maxSug) {
|
||||
wlst[ns] = mystrdup(candidate);
|
||||
// fprintf(stderr,"forgot_char %d adding %s\n",ns,wlst[ns]); fflush(stderr);
|
||||
if (wlst[ns] == NULL) return -1;
|
||||
ns++;
|
||||
} else return ns;
|
||||
}
|
||||
}
|
||||
return ns;
|
||||
}
|
||||
|
||||
|
||||
/* error is should have been two words */
|
||||
int SuggestMgr::twowords(char ** wlst, const char * word, int ns)
|
||||
{
|
||||
char candidate[MAXSWL];
|
||||
char * p;
|
||||
|
||||
int wl=strlen(word);
|
||||
if (wl < 3) return ns;
|
||||
strcpy(candidate + 1, word);
|
||||
|
||||
// split the string into two pieces after every char
|
||||
// if both pieces are good words make them a suggestion
|
||||
for (p = candidate + 1; p[1] != '\0'; p++) {
|
||||
p[-1] = *p;
|
||||
*p = '\0';
|
||||
if (check(candidate,strlen(candidate))) {
|
||||
if (check((p+1),strlen(p+1))) {
|
||||
*p = ' ';
|
||||
if (ns < maxSug) {
|
||||
wlst[ns] = mystrdup(candidate);
|
||||
// fprintf(stderr,"two_words %d adding %s\n",ns,wlst[ns]); fflush(stderr);
|
||||
if (wlst[ns] == NULL) return -1;
|
||||
ns++;
|
||||
} else return ns;
|
||||
}
|
||||
}
|
||||
}
|
||||
return ns;
|
||||
}
|
||||
|
||||
|
||||
// error is adjacent letter were swapped
|
||||
int SuggestMgr::swapchar(char ** wlst, const char * word, int ns)
|
||||
{
|
||||
char candidate[MAXSWL];
|
||||
char * p;
|
||||
char tmpc;
|
||||
int cwrd;
|
||||
|
||||
int wl = strlen(word);
|
||||
|
||||
// try swapping adjacent chars one by one
|
||||
strcpy(candidate, word);
|
||||
for (p = candidate; p[1] != 0; p++) {
|
||||
tmpc = *p;
|
||||
*p = p[1];
|
||||
p[1] = tmpc;
|
||||
cwrd = 1;
|
||||
for (int k=0; k < ns; k++)
|
||||
if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
|
||||
if ((cwrd) && check(candidate,wl)) {
|
||||
if (ns < maxSug) {
|
||||
wlst[ns] = mystrdup(candidate);
|
||||
// fprintf(stderr,"swap_char %d adding %s\n",ns,wlst[ns]); fflush(stderr);
|
||||
if (wlst[ns] == NULL) return -1;
|
||||
ns++;
|
||||
} else return ns;
|
||||
}
|
||||
tmpc = *p;
|
||||
*p = p[1];
|
||||
p[1] = tmpc;
|
||||
}
|
||||
return ns;
|
||||
}
|
||||
|
||||
|
||||
// generate a set of suggestions for very poorly spelled words
|
||||
int SuggestMgr::ngsuggest(char** wlst, char * word, HashMgr* pHMgr)
|
||||
{
|
||||
|
||||
int i, j;
|
||||
int lval;
|
||||
int sc;
|
||||
int lp;
|
||||
|
||||
if (! pHMgr) return 0;
|
||||
|
||||
// exhaustively search through all root words
|
||||
// keeping track of the MAX_ROOTS most similar root words
|
||||
struct hentry * roots[MAX_ROOTS];
|
||||
int scores[MAX_ROOTS];
|
||||
for (i = 0; i < MAX_ROOTS; i++) {
|
||||
roots[i] = NULL;
|
||||
scores[i] = -100 * i;
|
||||
}
|
||||
lp = MAX_ROOTS - 1;
|
||||
|
||||
int n = strlen(word);
|
||||
|
||||
struct hentry* hp = NULL;
|
||||
int col = -1;
|
||||
while ((hp = pHMgr->walk_hashtable(col, hp))) {
|
||||
sc = ngram(3, word, hp->word, NGRAM_LONGER_WORSE);
|
||||
if (sc > scores[lp]) {
|
||||
scores[lp] = sc;
|
||||
roots[lp] = hp;
|
||||
int lval = sc;
|
||||
for (j=0; j < MAX_ROOTS; j++)
|
||||
if (scores[j] < lval) {
|
||||
lp = j;
|
||||
lval = scores[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// find minimum threshhold for a passable suggestion
|
||||
// mangle original word three differnt ways
|
||||
// and score them to generate a minimum acceptable score
|
||||
int thresh = 0;
|
||||
char * mw = NULL;
|
||||
for (int sp = 1; sp < 4; sp++) {
|
||||
mw = strdup(word);
|
||||
for (int k=sp; k < n; k+=4) *(mw + k) = '*';
|
||||
thresh = thresh + ngram(n, word, mw, NGRAM_ANY_MISMATCH);
|
||||
free(mw);
|
||||
}
|
||||
mw = NULL;
|
||||
thresh = thresh / 3;
|
||||
thresh--;
|
||||
|
||||
// now expand affixes on each of these root words and
|
||||
// and use length adjusted ngram scores to select
|
||||
// possible suggestions
|
||||
char * guess[MAX_GUESS];
|
||||
int gscore[MAX_GUESS];
|
||||
for(i=0;i<MAX_GUESS;i++) {
|
||||
guess[i] = NULL;
|
||||
gscore[i] = -100 * i;
|
||||
}
|
||||
|
||||
lp = MAX_GUESS - 1;
|
||||
|
||||
struct guessword * glst;
|
||||
glst = (struct guessword *) calloc(MAX_WORDS,sizeof(struct guessword));
|
||||
if (! glst) return 0;
|
||||
|
||||
for (i = 0; i < MAX_ROOTS; i++) {
|
||||
|
||||
if (roots[i]) {
|
||||
struct hentry * rp = roots[i];
|
||||
int nw = pAMgr->expand_rootword(glst, MAX_WORDS, rp->word, rp->wlen,
|
||||
rp->astr, rp->alen);
|
||||
for (int k = 0; k < nw; k++) {
|
||||
sc = ngram(n, word, glst[k].word, NGRAM_ANY_MISMATCH);
|
||||
if (sc > thresh)
|
||||
{
|
||||
if (sc > gscore[lp])
|
||||
{
|
||||
if (guess[lp]) free(guess[lp]);
|
||||
gscore[lp] = sc;
|
||||
guess[lp] = glst[k].word;
|
||||
glst[k].word = NULL;
|
||||
lval = sc;
|
||||
for (j=0; j < MAX_GUESS; j++)
|
||||
{
|
||||
if (gscore[j] < lval)
|
||||
{
|
||||
lp = j;
|
||||
lval = gscore[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
free (glst[k].word);
|
||||
glst[k].word = NULL;
|
||||
glst[k].allow = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (glst) free(glst);
|
||||
|
||||
// now we are done generating guesses
|
||||
// sort in order of decreasing score and copy over
|
||||
|
||||
bubblesort(&guess[0], &gscore[0], MAX_GUESS);
|
||||
int ns = 0;
|
||||
for (i=0; i < MAX_GUESS; i++) {
|
||||
if (guess[i]) {
|
||||
int unique = 1;
|
||||
for (j=i+1; j < MAX_GUESS; j++)
|
||||
if (guess[j])
|
||||
if (!strcmp(guess[i], guess[j])) unique = 0;
|
||||
if (unique) {
|
||||
wlst[ns++] = guess[i];
|
||||
} else {
|
||||
free(guess[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
return ns;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// see if a candidate suggestion is spelled correctly
|
||||
// needs to check both root words and words with affixes
|
||||
int SuggestMgr::check(const char * word, int len)
|
||||
{
|
||||
struct hentry * rv=NULL;
|
||||
if (pAMgr) {
|
||||
rv = pAMgr->lookup(word);
|
||||
if (rv == NULL) rv = pAMgr->affix_check(word,len);
|
||||
}
|
||||
if (rv) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// generate an n-gram score comparing s1 and s2
|
||||
int SuggestMgr::ngram(int n, char * s1, const char * s2, int uselen)
|
||||
{
|
||||
int nscore = 0;
|
||||
int l1 = strlen(s1);
|
||||
int l2 = strlen(s2);
|
||||
int ns;
|
||||
for (int j=1;j<=n;j++) {
|
||||
ns = 0;
|
||||
for (int i=0;i<=(l1-j);i++) {
|
||||
char c = *(s1 + i + j);
|
||||
*(s1 + i + j) = '\0';
|
||||
if (strstr(s2,(s1+i))) ns++;
|
||||
*(s1 + i + j ) = c;
|
||||
}
|
||||
nscore = nscore + ns;
|
||||
if (ns < 2) break;
|
||||
}
|
||||
ns = 0;
|
||||
if (uselen == NGRAM_LONGER_WORSE) ns = (l2-l1)-2;
|
||||
if (uselen == NGRAM_ANY_MISMATCH) ns = abs(l2-l1)-2;
|
||||
return (nscore - ((ns > 0) ? ns : 0));
|
||||
}
|
||||
|
||||
|
||||
// sort in decreasing order of score
|
||||
void SuggestMgr::bubblesort(char** rword, int* rsc, int n )
|
||||
{
|
||||
int m = 1;
|
||||
while (m < n) {
|
||||
int j = m;
|
||||
while (j > 0) {
|
||||
if (rsc[j-1] < rsc[j]) {
|
||||
int sctmp = rsc[j-1];
|
||||
char * wdtmp = rword[j-1];
|
||||
rsc[j-1] = rsc[j];
|
||||
rword[j-1] = rword[j];
|
||||
rsc[j] = sctmp;
|
||||
rword[j] = wdtmp;
|
||||
j--;
|
||||
} else break;
|
||||
}
|
||||
m++;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
#ifndef _SUGGESTMGR_HXX_
|
||||
#define _SUGGESTMGR_HXX_
|
||||
|
||||
#define MAXSWL 100
|
||||
#define MAX_ROOTS 10
|
||||
#define MAX_WORDS 500
|
||||
#define MAX_GUESS 10
|
||||
|
||||
#define NGRAM_IGNORE_LENGTH 0
|
||||
#define NGRAM_LONGER_WORSE 1
|
||||
#define NGRAM_ANY_MISMATCH 2
|
||||
|
||||
|
||||
#include "atypes.hxx"
|
||||
#include "affixmgr.hxx"
|
||||
#include "hashmgr.hxx"
|
||||
|
||||
class SuggestMgr
|
||||
{
|
||||
char * ctry;
|
||||
int ctryl;
|
||||
AffixMgr* pAMgr;
|
||||
int maxSug;
|
||||
bool nosplitsugs;
|
||||
|
||||
public:
|
||||
SuggestMgr(const char * tryme, int maxn, AffixMgr *aptr);
|
||||
~SuggestMgr();
|
||||
|
||||
int suggest(char** wlst, int ns, const char * word);
|
||||
int check(const char *, int);
|
||||
int ngsuggest(char ** wlst, char * word, HashMgr* pHMgr);
|
||||
|
||||
private:
|
||||
int replchars(char**, const char *, int);
|
||||
int mapchars(char**, const char *, int);
|
||||
int map_related(const char *, int, char ** wlst, int, const mapentry*, int);
|
||||
int forgotchar(char **, const char *, int);
|
||||
int swapchar(char **, const char *, int);
|
||||
int extrachar(char **, const char *, int);
|
||||
int badchar(char **, const char *, int);
|
||||
int twowords(char **, const char *, int);
|
||||
int ngram(int n, char * s1, const char * s2, int uselen);
|
||||
void bubblesort( char ** rwd, int * rsc, int n);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
@ -138,7 +138,9 @@ mozSpellChecker::CheckWord(const nsAString &aWord, PRBool *aIsMisspelled, nsStri
|
|||
for(i=0;i<count;i++){
|
||||
aSuggestions->AppendString(nsDependentString(words[i]));
|
||||
}
|
||||
NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(count, words);
|
||||
|
||||
if (count)
|
||||
NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(count, words);
|
||||
}
|
||||
if(aIsMisspelled){
|
||||
*aIsMisspelled = PR_TRUE;
|
||||
|
|
Загрузка…
Ссылка в новой задаче