Bug #391659 --> remove myspell, it is no longer part of the build.

This commit is contained in:
scott@scott-macgregor.org 2007-08-10 10:19:24 -07:00
Родитель 0c44f011cf
Коммит 071a1fea23
26 изменённых файлов: 0 добавлений и 4763 удалений

Просмотреть файл

@ -1,48 +0,0 @@
# ***** BEGIN LICENSE BLOCK *****
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
#
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
#
# The Original Code is Mozilla Spellchecker Component.
#
# The Initial Developer of the Original Code is
# David Einstein.
# Portions created by the Initial Developer are Copyright (C) 2001
# the Initial Developer. All Rights Reserved.
#
# Contributor(s): David Einstein <Deinst@world.std.com>
#
# Alternatively, the contents of this file may be used under the terms of
# either the GNU General Public License Version 2 or later (the "GPL"), or
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
# in which case the provisions of the GPL or the LGPL are applicable instead
# of those above. If you wish to allow use of your version of this file only
# under the terms of either the GPL or the LGPL, and not to allow others to
# use your version of this file under the terms of the MPL, indicate your
# decision by deleting the provisions above and replace them with the notice
# and other provisions required by the GPL or the LGPL. If you do not delete
# the provisions above, a recipient may use your version of this file under
# the terms of any one of the MPL, the GPL or the LGPL.
#
# ***** END LICENSE BLOCK *****
DEPTH = ../../..
topsrcdir = @top_srcdir@
srcdir = @srcdir@
VPATH = @srcdir@
include $(DEPTH)/config/autoconf.mk
MODULE = myspell
DIRS = src
include $(topsrcdir)/config/rules.mk

Просмотреть файл

@ -1,70 +0,0 @@
# ***** BEGIN LICENSE BLOCK *****
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
#
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
#
# The Original Code is Mozilla Spellchecker Component.
#
# The Initial Developer of the Original Code is
# David Einstein.
# Portions created by the Initial Developer are Copyright (C) 2001
# the Initial Developer. All Rights Reserved.
#
# Contributor(s): David Einstein <Deinst@world.std.com>
#
# Alternatively, the contents of this file may be used under the terms of
# either the GNU General Public License Version 2 or later (the "GPL"), or
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
# in which case the provisions of the GPL or the LGPL are applicable instead
# of those above. If you wish to allow use of your version of this file only
# under the terms of either the GPL or the LGPL, and not to allow others to
# use your version of this file under the terms of the MPL, indicate your
# decision by deleting the provisions above and replace them with the notice
# and other provisions required by the GPL or the LGPL. If you do not delete
# the provisions above, a recipient may use your version of this file under
# the terms of any one of the MPL, the GPL or the LGPL.
#
# ***** END LICENSE BLOCK *****
DEPTH = ../../../..
topsrcdir = @top_srcdir@
srcdir = @srcdir@
VPATH = @srcdir@
include $(DEPTH)/config/autoconf.mk
MODULE = myspell
LIBRARY_NAME = myspell_s
FORCE_STATIC_LIB = 1
LIBXUL_LIBRARY = 1
REQUIRES = xpcom \
string \
uconv \
unicharutil \
spellchecker \
xulapp \
$(NULL)
CPPSRCS = affentry.cpp \
affixmgr.cpp \
hashmgr.cpp \
suggestmgr.cpp \
csutil.cpp \
myspell.cpp \
mozMySpell.cpp \
$(NULL)
ifdef MOZ_XUL_APP
CPPSRCS += mozMySpellDirProvider.cpp
endif
include $(topsrcdir)/config/rules.mk

Просмотреть файл

@ -1,101 +0,0 @@
MySpell is a simple spell checker that uses affix
compression and is modelled after the spell checker
ispell.
MySpell was written to explore how affix compression
can be implemented.
The Main features of MySpell are:
1. written in C++ to make it easier to interface with
Pspell, OpenOffice, AbiWord, etc
2. it is stateless, uses no static variables and
should be completely reentrant with almost no
ifdefs
3. it tries to be as compatible with ispell to
the extent it can. It can read slightly modified
versions of munched ispell dictionaries (and it
comes with a munched english wordlist borrowed from
Kevin Atkinson's excellent Aspell.
4. it uses a heavily modified aff file format that
can be derived from ispell aff files but uses
the iso-8859-X character sets only
5. it is simple with *lots* of comments that
describes how the affixes are stored
and tested for (based on the approach used by
ispell).
6. like ispell it has a BSD license (and no
advertising clause)
But ... it has *no* support for adding words
to a personal dictionary, *no* support for converting
between various text encodings, and *no* command line
interface (it is purely meant to be a library).
It can not (in any way) replace all of the functionality
of ispell or aspell/pspell. It is meant as a learning
tool for understanding affix compression and for
being used by front ends like OpenOffice, Abiword, etc.
MySpell has been tested under Linux and Solaris
and has the world's simplest Makefile and no
configure support.
It does come with a simple example program that
spell checks some words and returns suggestions.
To build a static library and an example
program under Linux simply type:
tar -zxvf myspell.tar.gz
cd myspell
make
To run the example program:
./example ./en_US.aff ./en_US.dic checkme.lst
Please play around with it and let me know
what you think.
Developer Credits:
Special credit and thanks go to ispell's creator Geoff Kuenning.
Ispell affix compression code was used as the basis for the
affix code used in MySpell. Specifically Geoff's use of a
conds[] array that makes it easy to check if the conditions
required for a particular affix are present was very
ingenious! Kudos to Geoff. Very nicely done.
BTW: ispell is available under a BSD style license
from Geoff Kuennings ispell website:
http://www.cs.ucla.edu/ficus-members/geoff/ispell.html
The Original MySpell code was written by Kevin Hendricks
and released under a BSD license. An almost complete rewrite
of MySpell for use by the Mozilla project has been developed by
David Einstein (Deinst@world.std.com). David and I are now
working on parallel development tracks to help our respective
projects (Mozilla and OpenOffice.org and we will maintain full
affix file and dictionary file compatibility and work on merging our
versions of MySpell back into a single tree. David has been
a significant help in improving MySpell.
Special thanks also go to La'szlo' Ne'meth <nemethl@gyorsposta.hu>
who is the author of the Hungarian dictionary and who
developed and contributed the code to support compound words in
MySpell and fixed numerous problems with the encoding case conversion
tables.
Thanks,
Kevin Hendricks
kevin.hendricks@sympatico.ca

Просмотреть файл

@ -1,21 +0,0 @@
There is experimental support for languages that need to allow
compound words. To enable compound word support, you need to
add the following lines to your affix (.aff) file.
COMPOUNDFLAG x
COMPOUNDMIN #
where 'x' is replaced by a specific affix character flag that have
been added to the dictionary (*.dic) file for words that can
run together to make a new word. All subwords of the compound word
must have this affix flag for the compound word to be correct.
and where '#' is replaced by the length of the shortest subword of
a compound word. If the "COMPOUNDMIN" line is not found COMPOUNDMIN
will default to 3
This support is still under rapid revisions and will change in the
future. Use only at your own risk.

Просмотреть файл

@ -1,80 +0,0 @@
This is a straight copy of the openoffice myspell component
Changes made for mozilla:
* Renamed *.cxx to .cpp, for the build system
* Replaced the makefile with a mozilla type makefile
* Added a wrapper, implementing mozISpellCheckingEngine and calling myspell
This wrapper does the conversion from unicode to the charset used
by myspell for the current dictionary.
* Rewrote get_current_cs to create tables when needed, to reduce size of
the resulting library.
* Commented out std namespace declarations from .cpp files (using namespace std;)
* Removed #include <unistd.h> for the build system
* Some specific changes are needed for SunONE studio compiler, please refer to https://bugzilla.mozilla.org/show_bug.cgi?id=278672
* The files cctype.h,cstring.h,cstdio.h etc are not supported by all compilers.
So the following files need to be changed.
Refer bug 280901 (http://bugzilla.mozilla.org/show_bug.cgi?id=280901)
- mozilla/extensions/spellcheck/myspell/src/affentry.cpp
Use:
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
instead of
#include <cctype>
#include <cstring>
#include <cstdlib>
#include <cstdio>
- mozilla/extensions/spellcheck/myspell/src/affixmgr.cpp
Use:
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
instead of:
#include <cstdlib>
#include <cstring>
#include <cstdio>
- mozilla/extensions/spellcheck/myspell/src/affixmgr.hxx
Use:
#include <stdio.h>
instead of:
#include <cstdio>
- mozilla/extensions/spellcheck/myspell/src/csutil.cpp
Use:
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
instead of:
#include <cstdlib>
#include <cstring>
#include <cstdio>
- mozilla/extensions/spellcheck/myspell/src/hashmgr.cpp
Use:
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
instead of:
#include <cstdlib>
#include <cstring>
#include <cstdio>
- mozilla/extensions/spellcheck/myspell/src/myspell.cpp
Use:
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
instead of:
#include <cstring>
#include <cstdlib>
#include <cstdio>
- mozilla/extensions/spellcheck/myspell/src/suggestmgr.cpp
Use:
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include <stdio.h>
instead of:
#include <cstdlib>
#include <cctype>
#include <cstring>
#include <cstdio>

Просмотреть файл

@ -1,18 +0,0 @@
Build instructions for munch and unmunch utilities
---------------------------------------------------
Under Linux:
gcc -O2 -omunch -I. munch.c
gcc -O2 -ounmunch -I. unmunch.c
To see the correct syntax, run
./munch
and
./unmunch

Просмотреть файл

@ -1,390 +0,0 @@
#include "license.readme"
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include "affentry.hxx"
// using namespace std;
extern char * mystrdup(const char * s);
extern char * myrevstrdup(const char * s);
PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp)
{
// register affix manager
pmyMgr = pmgr;
// set up its intial values
achar = dp->achar; // char flag
strip = dp->strip; // string to strip
appnd = dp->appnd; // string to append
stripl = dp->stripl; // length of strip string
appndl = dp->appndl; // length of append string
numconds = dp->numconds; // number of conditions to match
xpflg = dp->xpflg; // cross product flag
// then copy over all of the conditions
memcpy(&conds[0],&dp->conds[0],SETSIZE*sizeof(conds[0]));
next = NULL;
nextne = NULL;
nexteq = NULL;
}
PfxEntry::~PfxEntry()
{
achar = '\0';
if (appnd) free(appnd);
if (strip)free(strip);
pmyMgr = NULL;
appnd = NULL;
strip = NULL;
}
// add prefix to this word assuming conditions hold
char * PfxEntry::add(const char * word, int len)
{
int cond;
char tword[MAXWORDLEN+1];
/* make sure all conditions match */
if ((len > stripl) && (len >= numconds)) {
unsigned char * cp = (unsigned char *) word;
for (cond = 0; cond < numconds; cond++) {
if ((conds[*cp++] & (1 << cond)) == 0)
break;
}
if (cond >= numconds) {
/* we have a match so add prefix */
int tlen = 0;
if (appndl) {
strcpy(tword,appnd);
tlen += appndl;
}
char * pp = tword + tlen;
strcpy(pp, (word + stripl));
return mystrdup(tword);
}
}
return NULL;
}
// check if this prefix entry matches
struct hentry * PfxEntry::check(const char * word, int len)
{
int cond; // condition number being examined
int tmpl; // length of tmpword
struct hentry * he; // hash entry of root word or NULL
unsigned char * cp;
char tmpword[MAXWORDLEN+1];
// on entry prefix is 0 length or already matches the beginning of the word.
// So if the remaining root word has positive length
// and if there are enough chars in root word and added back strip chars
// to meet the number of characters conditions, then test it
tmpl = len - appndl;
if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
// generate new root word by removing prefix and adding
// back any characters that would have been stripped
if (stripl) strcpy (tmpword, strip);
strcpy ((tmpword + stripl), (word + appndl));
// now make sure all of the conditions on characters
// are met. Please see the appendix at the end of
// this file for more info on exactly what is being
// tested
cp = (unsigned char *)tmpword;
for (cond = 0; cond < numconds; cond++) {
if ((conds[*cp++] & (1 << cond)) == 0) break;
}
// if all conditions are met then check if resulting
// root word in the dictionary
if (cond >= numconds) {
tmpl += stripl;
if ((he = pmyMgr->lookup(tmpword)) != NULL) {
if (TESTAFF(he->astr, achar, he->alen)) return he;
}
// prefix matched but no root word was found
// if XPRODUCT is allowed, try again but now
// ross checked combined with a suffix
if (xpflg & XPRODUCT) {
he = pmyMgr->suffix_check(tmpword, tmpl, XPRODUCT, (AffEntry *)this);
if (he) return he;
}
}
}
return NULL;
}
SfxEntry::SfxEntry(AffixMgr * pmgr, affentry* dp)
{
// register affix manager
pmyMgr = pmgr;
// set up its intial values
achar = dp->achar; // char flag
strip = dp->strip; // string to strip
appnd = dp->appnd; // string to append
stripl = dp->stripl; // length of strip string
appndl = dp->appndl; // length of append string
numconds = dp->numconds; // number of conditions to match
xpflg = dp->xpflg; // cross product flag
// then copy over all of the conditions
memcpy(&conds[0],&dp->conds[0],SETSIZE*sizeof(conds[0]));
rappnd = myrevstrdup(appnd);
}
SfxEntry::~SfxEntry()
{
achar = '\0';
if (appnd) free(appnd);
if (rappnd) free(rappnd);
if (strip) free(strip);
pmyMgr = NULL;
appnd = NULL;
strip = NULL;
}
// add suffix to this word assuming conditions hold
char * SfxEntry::add(const char * word, int len)
{
int cond;
char tword[MAXWORDLEN+1];
/* make sure all conditions match */
if ((len > stripl) && (len >= numconds)) {
unsigned char * cp = (unsigned char *) (word + len);
for (cond = numconds; --cond >=0; ) {
if ((conds[*--cp] & (1 << cond)) == 0)
break;
}
if (cond < 0) {
/* we have a match so add suffix */
strcpy(tword,word);
int tlen = len;
if (stripl) {
tlen -= stripl;
}
char * pp = (tword + tlen);
if (appndl) {
strcpy(pp,appnd);
tlen += appndl;
} else *pp = '\0';
return mystrdup(tword);
}
}
return NULL;
}
// see if this suffix is present in the word
struct hentry * SfxEntry::check(const char * word, int len, int optflags, AffEntry* ppfx)
{
int tmpl; // length of tmpword
int cond; // condition beng examined
struct hentry * he; // hash entry pointer
unsigned char * cp;
char tmpword[MAXWORDLEN+1];
PfxEntry* ep = (PfxEntry *) ppfx;
// if this suffix is being cross checked with a prefix
// but it does not support cross products skip it
if ((optflags & XPRODUCT) != 0 && (xpflg & XPRODUCT) == 0)
return NULL;
// upon entry suffix is 0 length or already matches the end of the word.
// So if the remaining root word has positive length
// and if there are enough chars in root word and added back strip chars
// to meet the number of characters conditions, then test it
tmpl = len - appndl;
if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
// generate new root word by removing suffix and adding
// back any characters that would have been stripped or
// or null terminating the shorter string
strcpy (tmpword, word);
cp = (unsigned char *)(tmpword + tmpl);
if (stripl) {
strcpy ((char *)cp, strip);
tmpl += stripl;
cp = (unsigned char *)(tmpword + tmpl);
} else *cp = '\0';
// now make sure all of the conditions on characters
// are met. Please see the appendix at the end of
// this file for more info on exactly what is being
// tested
for (cond = numconds; --cond >= 0; ) {
if ((conds[*--cp] & (1 << cond)) == 0) break;
}
// if all conditions are met then check if resulting
// root word in the dictionary
if (cond < 0) {
if ((he = pmyMgr->lookup(tmpword)) != NULL) {
if (TESTAFF(he->astr, achar , he->alen) &&
((optflags & XPRODUCT) == 0 ||
TESTAFF(he->astr, ep->getFlag(), he->alen))) return he;
}
}
}
return NULL;
}
#if 0
Appendix: Understanding Affix Code
An affix is either a prefix or a suffix attached to root words to make
other words.
Basically a Prefix or a Suffix is set of AffEntry objects
which store information about the prefix or suffix along
with supporting routines to check if a word has a particular
prefix or suffix or a combination.
The structure affentry is defined as follows:
struct affentry
{
unsigned char achar; // char used to represent the affix
char * strip; // string to strip before adding affix
char * appnd; // the affix string to add
short stripl; // length of the strip string
short appndl; // length of the affix string
short numconds; // the number of conditions that must be met
short xpflg; // flag: XPRODUCT- combine both prefix and suffix
char conds[SETSIZE]; // array which encodes the conditions to be met
};
Here is a suffix borrowed from the en_US.aff file. This file
is whitespace delimited.
SFX D Y 4
SFX D 0 e d
SFX D y ied [^aeiou]y
SFX D 0 ed [^ey]
SFX D 0 ed [aeiou]y
This information can be interpreted as follows:
In the first line has 4 fields
Field
-----
1 SFX - indicates this is a suffix
2 D - is the name of the character flag which represents this suffix
3 Y - indicates it can be combined with prefixes (cross product)
4 4 - indicates that sequence of 4 affentry structures are needed to
properly store the affix information
The remaining lines describe the unique information for the 4 SfxEntry
objects that make up this affix. Each line can be interpreted
as follows: (note fields 1 and 2 are as a check against line 1 info)
Field
-----
1 SFX - indicates this is a suffix
2 D - is the name of the character flag for this affix
3 y - the string of chars to strip off before adding affix
(a 0 here indicates the NULL string)
4 ied - the string of affix characters to add
5 [^aeiou]y - the conditions which must be met before the affix
can be applied
Field 5 is interesting. Since this is a suffix, field 5 tells us that
there are 2 conditions that must be met. The first condition is that
the next to the last character in the word must *NOT* be any of the
following "a", "e", "i", "o" or "u". The second condition is that
the last character of the word must end in "y".
So how can we encode this information concisely and be able to
test for both conditions in a fast manner? The answer is found
but studying the wonderful ispell code of Geoff Kuenning, et.al.
(now available under a normal BSD license).
If we set up a conds array of 256 bytes indexed (0 to 255) and access it
using a character (cast to an unsigned char) of a string, we have 8 bits
of information we can store about that character. Specifically we
could use each bit to say if that character is allowed in any of the
last (or first for prefixes) 8 characters of the word.
Basically, each character at one end of the word (up to the number
of conditions) is used to index into the conds array and the resulting
value found there says whether the that character is valid for a
specific character position in the word.
For prefixes, it does this by setting bit 0 if that char is valid
in the first position, bit 1 if valid in the second position, and so on.
If a bit is not set, then that char is not valid for that postion in the
word.
If working with suffixes bit 0 is used for the character closest
to the front, bit 1 for the next character towards the end, ...,
with bit numconds-1 representing the last char at the end of the string.
Note: since entries in the conds[] are 8 bits, only 8 conditions
(read that only 8 character positions) can be examined at one
end of a word (the beginning for prefixes and the end for suffixes.
So to make this clearer, lets encode the conds array values for the
first two affentries for the suffix D described earlier.
For the first affentry:
numconds = 1 (only examine the last character)
conds['e'] = (1 << 0) (the word must end in an E)
all others are all 0
For the second affentry:
numconds = 2 (only examine the last two characters)
conds[X] = conds[X] | (1 << 0) (aeiou are not allowed)
where X is all characters *but* a, e, i, o, or u
conds['y'] = (1 << 1) (the last char must be a y)
all other bits for all other entries in the conds array are zero
#endif

Просмотреть файл

@ -1,86 +0,0 @@
#ifndef _AFFIX_HXX_
#define _AFFIX_HXX_
#include "atypes.hxx"
#include "baseaffix.hxx"
#include "affixmgr.hxx"
/* A Prefix Entry */
class PfxEntry : public AffEntry
{
AffixMgr* pmyMgr;
PfxEntry * next;
PfxEntry * nexteq;
PfxEntry * nextne;
PfxEntry * flgnxt;
public:
PfxEntry(AffixMgr* pmgr, affentry* dp );
~PfxEntry();
struct hentry * check(const char * word, int len);
inline bool allowCross() { return ((xpflg & XPRODUCT) != 0); }
inline unsigned char getFlag() { return achar; }
inline const char * getKey() { return appnd; }
char * add(const char * word, int len);
inline PfxEntry * getNext() { return next; }
inline PfxEntry * getNextNE() { return nextne; }
inline PfxEntry * getNextEQ() { return nexteq; }
inline PfxEntry * getFlgNxt() { return flgnxt; }
inline void setNext(PfxEntry * ptr) { next = ptr; }
inline void setNextNE(PfxEntry * ptr) { nextne = ptr; }
inline void setNextEQ(PfxEntry * ptr) { nexteq = ptr; }
inline void setFlgNxt(PfxEntry * ptr) { flgnxt = ptr; }
};
/* A Suffix Entry */
class SfxEntry : public AffEntry
{
AffixMgr* pmyMgr;
char * rappnd;
SfxEntry * next;
SfxEntry * nexteq;
SfxEntry * nextne;
SfxEntry * flgnxt;
public:
SfxEntry(AffixMgr* pmgr, affentry* dp );
~SfxEntry();
struct hentry * check(const char * word, int len, int optflags,
AffEntry* ppfx);
inline bool allowCross() { return ((xpflg & XPRODUCT) != 0); }
inline unsigned char getFlag() { return achar; }
inline const char * getKey() { return rappnd; }
char * add(const char * word, int len);
inline SfxEntry * getNext() { return next; }
inline SfxEntry * getNextNE() { return nextne; }
inline SfxEntry * getNextEQ() { return nexteq; }
inline SfxEntry * getFlgNxt() { return flgnxt; }
inline void setNext(SfxEntry * ptr) { next = ptr; }
inline void setNextNE(SfxEntry * ptr) { nextne = ptr; }
inline void setNextEQ(SfxEntry * ptr) { nexteq = ptr; }
inline void setFlgNxt(SfxEntry * ptr) { flgnxt = ptr; }
};
#endif

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,74 +0,0 @@
#ifndef _AFFIXMGR_HXX_
#define _AFFIXMGR_HXX_
#ifdef __SUNPRO_CC // for SunONE Studio compiler
using namespace std;
#endif
#include "atypes.hxx"
#include "baseaffix.hxx"
#include "hashmgr.hxx"
#include <stdio.h>
class AffixMgr
{
AffEntry * pStart[SETSIZE];
AffEntry * sStart[SETSIZE];
AffEntry * pFlag[SETSIZE];
AffEntry * sFlag[SETSIZE];
HashMgr * pHMgr;
char * trystring;
char * encoding;
char * compound;
int cpdmin;
int numrep;
replentry * reptable;
int nummap;
mapentry * maptable;
bool nosplitsugs;
public:
AffixMgr(const char * affpath, HashMgr * ptr);
~AffixMgr();
struct hentry * affix_check(const char * word, int len);
struct hentry * prefix_check(const char * word, int len);
struct hentry * suffix_check(const char * word, int len, int sfxopts, AffEntry* ppfx);
int expand_rootword(struct guessword * wlst, int maxn,
const char * ts, int wl, const char * ap, int al);
struct hentry * compound_check(const char * word, int len, char compound_flag);
struct hentry * lookup(const char * word);
int get_numrep();
struct replentry * get_reptable();
int get_nummap();
struct mapentry * get_maptable();
char * get_encoding();
char * get_try_string();
char * get_compound();
bool get_nosplitsugs();
private:
int parse_file(const char * affpath);
int parse_try(char * line);
int parse_set(char * line);
int parse_cpdflag(char * line);
int parse_cpdmin(char * line);
int parse_reptable(char * line, FILE * af);
int parse_maptable(char * line, FILE * af);
int parse_affix(char * line, const char at, FILE * af);
void encodeit(struct affentry * ptr, char * cs);
int build_pfxtree(AffEntry* pfxptr);
int build_sfxtree(AffEntry* sfxptr);
AffEntry* process_sfx_in_order(AffEntry* ptr, AffEntry* nptr);
AffEntry* process_pfx_in_order(AffEntry* ptr, AffEntry* nptr);
int process_pfx_tree_to_list();
int process_sfx_tree_to_list();
int process_pfx_order();
int process_sfx_order();
};
#endif

Просмотреть файл

@ -1,45 +0,0 @@
#ifndef _ATYPES_HXX_
#define _ATYPES_HXX_
#define SETSIZE 256
#define MAXAFFIXES 256
#define MAXWORDLEN 100
#define XPRODUCT (1 << 0)
#define MAXLNLEN 1024
#define TESTAFF( a , b , c ) memchr((void *)(a), (int)(b), (size_t)(c) )
struct affentry
{
char * strip;
char * appnd;
short stripl;
short appndl;
short numconds;
short xpflg;
char achar;
char conds[SETSIZE];
};
struct replentry {
char * pattern;
char * replacement;
};
struct mapentry {
char * set;
int len;
};
struct guessword {
char * word;
bool allow;
};
#endif

Просмотреть файл

@ -1,17 +0,0 @@
#ifndef _BASEAFF_HXX_
#define _BASEAFF_HXX_
class AffEntry
{
protected:
char * appnd;
char * strip;
short appndl;
short stripl;
short numconds;
short xpflg;
char achar;
char conds[SETSIZE];
};
#endif

Просмотреть файл

@ -1,292 +0,0 @@
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "csutil.hxx"
#include "nsCOMPtr.h"
#include "nsServiceManagerUtils.h"
#include "nsIUnicodeEncoder.h"
#include "nsIUnicodeDecoder.h"
#include "nsICaseConversion.h"
#include "nsICharsetConverterManager.h"
#include "nsUnicharUtilCIID.h"
#include "nsUnicharUtils.h"
#ifdef __SUNPRO_CC // for SunONE Studio compiler
using namespace std;
#endif
// strip strings into token based on single char delimiter
// acts like strsep() but only uses a delim char and not
// a delim string
char * mystrsep(char ** stringp, const char delim)
{
char * rv = NULL;
char * mp = *stringp;
int n = strlen(mp);
if (n > 0) {
char * dp = (char *)memchr(mp,(int)((unsigned char)delim),n);
if (dp) {
*stringp = dp+1;
int nc = (int)((unsigned long)dp - (unsigned long)mp);
rv = (char *) malloc(nc+1);
memcpy(rv,mp,nc);
*(rv+nc) = '\0';
return rv;
} else {
rv = (char *) malloc(n+1);
memcpy(rv, mp, n);
*(rv+n) = '\0';
*stringp = mp + n;
return rv;
}
}
return NULL;
}
// replaces strdup with ansi version
char * mystrdup(const char * s)
{
char * d = NULL;
if (s) {
int sl = strlen(s);
d = (char *) malloc(((sl+1) * sizeof(char)));
if (d) memcpy(d,s,((sl+1)*sizeof(char)));
}
return d;
}
// remove cross-platform text line end characters
void mychomp(char * s)
{
int k = strlen(s);
if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
}
// does an ansi strdup of the reverse of a string
char * myrevstrdup(const char * s)
{
char * d = NULL;
if (s) {
int sl = strlen(s);
d = (char *) malloc((sl+1) * sizeof(char));
if (d) {
const char * p = s + sl - 1;
char * q = d;
while (p >= s) *q++ = *p--;
*q = '\0';
}
}
return d;
}
#if 0
// return 1 if s1 is a leading subset of s2
int isSubset(const char * s1, const char * s2)
{
int l1 = strlen(s1);
int l2 = strlen(s2);
if (l1 > l2) return 0;
if (strncmp(s2,s1,l1) == 0) return 1;
return 0;
}
#endif
// return 1 if s1 is a leading subset of s2
int isSubset(const char * s1, const char * s2)
{
while( *s1 && (*s1 == *s2) ) {
s1++;
s2++;
}
return (*s1 == '\0');
}
// return 1 if s1 (reversed) is a leading subset of end of s2
int isRevSubset(const char * s1, const char * end_of_s2, int len)
{
while( (len > 0) && *s1 && (*s1 == *end_of_s2) ) {
s1++;
end_of_s2--;
len --;
}
return (*s1 == '\0');
}
#if 0
// Not needed in mozilla
// convert null terminated string to all caps using encoding
void enmkallcap(char * d, const char * p, const char * encoding)
{
struct cs_info * csconv = get_current_cs(encoding);
while (*p != '\0') {
*d++ = csconv[((unsigned char) *p)].cupper;
p++;
}
*d = '\0';
}
// convert null terminated string to all little using encoding
void enmkallsmall(char * d, const char * p, const char * encoding)
{
struct cs_info * csconv = get_current_cs(encoding);
while (*p != '\0') {
*d++ = csconv[((unsigned char) *p)].clower;
p++;
}
*d = '\0';
}
// convert null terminated string to have intial capital using encoding
void enmkinitcap(char * d, const char * p, const char * encoding)
{
struct cs_info * csconv = get_current_cs(encoding);
memcpy(d,p,(strlen(p)+1));
if (*p != '\0') *d= csconv[((unsigned char)*p)].cupper;
}
#endif
// convert null terminated string to all caps
void mkallcap(char * p, const struct cs_info * csconv)
{
while (*p != '\0') {
*p = csconv[((unsigned char) *p)].cupper;
p++;
}
}
// convert null terminated string to all little
void mkallsmall(char * p, const struct cs_info * csconv)
{
while (*p != '\0') {
*p = csconv[((unsigned char) *p)].clower;
p++;
}
}
// convert null terminated string to have intial capital
void mkinitcap(char * p, const struct cs_info * csconv)
{
if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
}
// XXX This function was rewritten for mozilla. Instead of storing the
// conversion tables static in this file, create them when needed
// with help the mozilla backend.
struct cs_info * get_current_cs(const char * es) {
struct cs_info *ccs;
nsCOMPtr<nsIUnicodeEncoder> encoder;
nsCOMPtr<nsIUnicodeDecoder> decoder;
nsCOMPtr<nsICaseConversion> caseConv;
nsresult rv;
nsCOMPtr<nsICharsetConverterManager> ccm = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
if (NS_FAILED(rv))
return nsnull;
rv = ccm->GetUnicodeEncoder(es, getter_AddRefs(encoder));
if (encoder && NS_SUCCEEDED(rv))
encoder->SetOutputErrorBehavior(encoder->kOnError_Replace, nsnull, '?');
if (NS_FAILED(rv))
return nsnull;
rv = ccm->GetUnicodeDecoder(es, getter_AddRefs(decoder));
caseConv = do_GetService(NS_UNICHARUTIL_CONTRACTID, &rv);
if (NS_FAILED(rv))
return nsnull;
ccs = (struct cs_info *) malloc(256 * sizeof(cs_info));
PRInt32 charLength = 256;
PRInt32 uniLength = 512;
char *source = (char *)malloc(charLength * sizeof(char));
PRUnichar *uni = (PRUnichar *)malloc(uniLength * sizeof(PRUnichar));
char *lower = (char *)malloc(charLength * sizeof(char));
char *upper = (char *)malloc(charLength * sizeof(char));
// Create a long string of all chars.
unsigned int i;
for (i = 0x00; i <= 0xff ; ++i) {
source[i] = i;
}
// Convert this long string to unicode
rv = decoder->Convert(source, &charLength, uni, &uniLength);
// Do case conversion stuff, and convert back.
caseConv->ToUpper(uni, uni, uniLength);
encoder->Convert(uni, &uniLength, upper, &charLength);
uniLength = 512;
charLength = 256;
rv = decoder->Convert(source, &charLength, uni, &uniLength);
caseConv->ToLower(uni, uni, uniLength);
encoder->Convert(uni, &uniLength, lower, &charLength);
// Store
for (i = 0x00; i <= 0xff ; ++i) {
ccs[i].cupper = upper[i];
ccs[i].clower = lower[i];
if (ccs[i].clower != (unsigned char)i)
ccs[i].ccase = true;
else
ccs[i].ccase = false;
}
free(source);
free(uni);
free(lower);
free(upper);
return ccs;
}
struct lang_map lang2enc[] = {
{"ca","ISO8859-1"},
{"cs","ISO8859-2"},
{"da","ISO8859-1"},
{"de","ISO8859-1"},
{"el","ISO8859-7"},
{"en","ISO8859-1"},
{"es","ISO8859-1"},
{"fr","ISO8859-1"},
{"hr","ISO8859-2"},
{"hu","ISO8859-2"},
{"it","ISO8859-1"},
{"la","ISO8859-1"},
{"lv","ISO8859-13"},
{"nl","ISO8859-1"},
{"pl","ISO8859-2"},
{"pt","ISO8859-1"},
{"sv","ISO8859-1"},
{"ru","KOI8-R"},
{"bg","microsoft-cp1251"},
};
const char * get_default_enc(const char * lang) {
int n = sizeof(lang2enc) / sizeof(lang2enc[0]);
for (int i = 0; i < n; i++) {
if (strcmp(lang,lang2enc[i].lang) == 0) {
return lang2enc[i].def_enc;
}
}
return NULL;
}

Просмотреть файл

@ -1,73 +0,0 @@
#ifndef __CSUTILHXX__
#define __CSUTILHXX__
// First some base level utility routines
// remove end of line char(s)
void mychomp(char * s);
// duplicate string
char * mystrdup(const char * s);
// duplicate reverse of string
char * myrevstrdup(const char * s);
// parse into tokens with char delimiter
char * mystrsep(char ** sptr, const char delim);
// is one string a leading subset of another
int isSubset(const char * s1, const char * s2);
// is one reverse string a leading subset of the end of another
int isRevSubset(const char * s1, const char * end_of_s2, int s2_len);
// character encoding information
struct cs_info {
unsigned char ccase;
unsigned char clower;
unsigned char cupper;
};
struct enc_entry {
const char * enc_name;
struct cs_info * cs_table;
};
// language to encoding default map
struct lang_map {
const char * lang;
const char * def_enc;
};
struct cs_info * get_current_cs(const char * es);
const char * get_default_enc(const char * lang);
#if 0
// Not needed in mozilla
// convert null terminated string to all caps using encoding
void enmkallcap(char * d, const char * p, const char * encoding);
// convert null terminated string to all little using encoding
void enmkallsmall(char * d, const char * p, const char * encoding);
// convert null terminated string to have intial capital using encoding
void enmkinitcap(char * d, const char * p, const char * encoding);
#endif
// convert null terminated string to all caps
void mkallcap(char * p, const struct cs_info * csconv);
// convert null terminated string to all little
void mkallsmall(char * p, const struct cs_info * csconv);
// convert null terminated string to have intial capital
void mkinitcap(char * p, const struct cs_info * csconv);
#endif

Просмотреть файл

@ -1,224 +0,0 @@
#include "license.readme"
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "hashmgr.hxx"
extern void mychomp(char * s);
extern char * mystrdup(const char *);
#ifdef __SUNPRO_CC // for SunONE Studio compiler
using namespace std;
#endif
// build a hash table from a munched word list
HashMgr::HashMgr(const char * tpath)
{
tablesize = 0;
tableptr = NULL;
int ec = load_tables(tpath);
if (ec) {
/* error condition - what should we do here */
fprintf(stderr,"Hash Manager Error : %d\n",ec);
fflush(stderr);
if (tableptr) {
free(tableptr);
tableptr = 0;
}
tablesize = 0;
}
}
HashMgr::~HashMgr()
{
if (tableptr) {
// now pass through hash table freeing up everything
// go through column by column of the table
for (int i=0; i < tablesize; i++) {
struct hentry * pt = &tableptr[i];
struct hentry * nt = NULL;
if (pt) {
if (pt->word) free(pt->word);
if (pt->astr) free(pt->astr);
pt = pt->next;
}
while(pt) {
nt = pt->next;
if (pt->word) free(pt->word);
if (pt->astr) free(pt->astr);
free(pt);
pt = nt;
}
}
free(tableptr);
}
tablesize = 0;
}
// lookup a root word in the hashtable
struct hentry * HashMgr::lookup(const char *word) const
{
struct hentry * dp;
if (tableptr) {
dp = &tableptr[hash(word)];
if (dp->word == NULL) return NULL;
for ( ; dp != NULL; dp = dp->next) {
if (strcmp(word,dp->word) == 0) return dp;
}
}
return NULL;
}
// add a word to the hash table (private)
int HashMgr::add_word(const char * word, int wl, const char * aff, int al)
{
int i = hash(word);
struct hentry * dp = &tableptr[i];
struct hentry* hp;
if (dp->word == NULL) {
dp->wlen = wl;
dp->alen = al;
dp->word = mystrdup(word);
dp->astr = mystrdup(aff);
dp->next = NULL;
if ((wl) && (dp->word == NULL)) return 1;
if ((al) && (dp->astr == NULL)) return 1;
} else {
hp = (struct hentry *) malloc (sizeof(struct hentry));
if (hp == NULL) return 1;
hp->wlen = wl;
hp->alen = al;
hp->word = mystrdup(word);
hp->astr = mystrdup(aff);
hp->next = NULL;
while (dp->next != NULL) dp=dp->next;
dp->next = hp;
if ((wl) && (hp->word == NULL)) return 1;
if ((al) && (hp->astr == NULL)) return 1;
}
return 0;
}
// walk the hash table entry by entry - null at end
struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const
{
//reset to start
if ((col < 0) || (hp == NULL)) {
col = -1;
hp = NULL;
}
if (hp && hp->next != NULL) {
hp = hp->next;
} else {
col++;
hp = (col < tablesize) ? &tableptr[col] : NULL;
// search for next non-blank column entry
while (hp && (hp->word == NULL)) {
col ++;
hp = (col < tablesize) ? &tableptr[col] : NULL;
}
if (col < tablesize) return hp;
hp = NULL;
col = -1;
}
return hp;
}
// load a munched word list and build a hash table on the fly
int HashMgr::load_tables(const char * tpath)
{
int wl, al;
char * ap;
// raw dictionary - munched file
FILE * rawdict = fopen(tpath, "r");
if (rawdict == NULL) {
return 1;
}
// first read the first line of file to get hash table size */
char ts[MAXDELEN];
if (!fgets(ts, MAXDELEN-1,rawdict)) {
fclose(rawdict);
return 2;
}
mychomp(ts);
tablesize = atoi(ts);
if (!tablesize) {
fclose(rawdict);
return 4;
}
tablesize = tablesize + 5;
if ((tablesize %2) == 0) tablesize++;
// allocate the hash table
tableptr = (struct hentry *) calloc(tablesize, sizeof(struct hentry));
if (!tableptr) {
fclose(rawdict);
return 3;
}
// loop through all words on much list and add to hash
// table and create word and affix strings
while (fgets(ts,MAXDELEN-1,rawdict)) {
mychomp(ts);
// split each line into word and affix char strings
ap = strchr(ts,'/');
if (ap) {
*ap = '\0';
ap++;
al = strlen(ap);
} else {
al = 0;
ap = NULL;
}
wl = strlen(ts);
// add the word and its index
if (add_word(ts,wl,ap,al)) {
fclose(rawdict);
return 5;
}
}
fclose(rawdict);
return 0;
}
// the hash function is a simple load and rotate
// algorithm borrowed
int HashMgr::hash(const char * word) const
{
long hv = 0;
for (int i=0; i < 4 && *word != 0; i++)
hv = (hv << 8) | (*word++);
while (*word != 0) {
ROTATE(hv,ROTATE_LEN);
hv ^= (*word++);
}
return (unsigned long) hv % tablesize;
}

Просмотреть файл

@ -1,27 +0,0 @@
#ifndef _HASHMGR_HXX_
#define _HASHMGR_HXX_
#include "htypes.hxx"
class HashMgr
{
int tablesize;
struct hentry * tableptr;
public:
HashMgr(const char * tpath);
~HashMgr();
struct hentry * lookup(const char *) const;
int hash(const char *) const;
struct hentry * walk_hashtable(int & col, struct hentry * hp) const;
private:
HashMgr( const HashMgr & ); // not implemented
HashMgr &operator=( const HashMgr & ); // not implemented
int load_tables(const char * tpath);
int add_word(const char * word, int wl, const char * ap, int al);
};
#endif

Просмотреть файл

@ -1,20 +0,0 @@
#ifndef _HTYPES_HXX_
#define _HTYPES_HXX_
#define MAXDELEN 256
#define ROTATE_LEN 5
#define ROTATE(v,q) \
(v) = ((v) << (q)) | (((v) >> (32 - q)) & ((1 << (q))-1));
struct hentry
{
short wlen;
short alen;
char * word;
char * astr;
struct hentry * next;
};
#endif

Просмотреть файл

@ -1,61 +0,0 @@
/*
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
* And Contributors. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. All modifications to the source code must be clearly marked as
* such. Binary redistributions based on modified source code
* must be clearly marked as modified versions in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*
* NOTE: A special thanks and credit goes to Geoff Kuenning
* the creator of ispell. MySpell's affix algorithms were
* based on those of ispell which should be noted is
* copyright Geoff Kuenning et.al. and now available
* under a BSD style license. For more information on ispell
* and affix compression in general, please see:
* http://www.cs.ucla.edu/ficus-members/geoff/ispell.html
* (the home page for ispell)
*
* An almost complete rewrite of MySpell for use by
* the Mozilla project has been developed by David Einstein
* (Deinst@world.std.com). David and I are now
* working on parallel development tracks to help
* our respective projects (Mozilla and OpenOffice.org
* and we will maintain full affix file and dictionary
* file compatibility and work on merging our versions
* of MySpell back into a single tree. David has been
* a significant help in improving MySpell.
*
* Special thanks also go to La'szlo' Ne'meth
* <nemethl@gyorsposta.hu> who is the author of the
* Hungarian dictionary and who developed and contributed
* the code to support compound words in MySpell
* and fixed numerous problems with the encoding
* case conversion tables.
*
*/

Просмотреть файл

@ -1,487 +0,0 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Spellchecker Component.
*
* The Initial Developer of the Original Code is
* David Einstein.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s): David Einstein <Deinst@world.std.com>
* Kevin Hendricks <kevin.hendricks@sympatico.ca>
* Michiel van Leeuwen <mvl@exedo.nl>
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* This spellchecker is based on the MySpell spellchecker made for Open Office
* by Kevin Hendricks. Although the algorithms and code, have changed
* slightly, the architecture is still the same. The Mozilla implementation
* is designed to be compatible with the Open Office dictionaries.
* Please do not make changes to the affix or dictionary file formats
* without attempting to coordinate with Kevin. For more information
* on the original MySpell see
* http://whiteboard.openoffice.org/source/browse/whiteboard/lingucomponent/source/spellcheck/myspell/
*
* A special thanks and credit goes to Geoff Kuenning
* the creator of ispell. MySpell's affix algorithms were
* based on those of ispell which should be noted is
* copyright Geoff Kuenning et.al. and now available
* under a BSD style license. For more information on ispell
* and affix compression in general, please see:
* http://www.cs.ucla.edu/ficus-members/geoff/ispell.html
* (the home page for ispell)
*
* ***** END LICENSE BLOCK ***** */
/* based on MySpell (c) 2001 by Kevin Hendicks */
#include "mozMySpell.h"
#include "nsReadableUtils.h"
#include "nsXPIDLString.h"
#include "nsIObserverService.h"
#include "nsISimpleEnumerator.h"
#include "nsIDirectoryEnumerator.h"
#include "nsDirectoryServiceUtils.h"
#include "nsDirectoryServiceDefs.h"
#include "mozISpellI18NManager.h"
#include "nsICharsetConverterManager.h"
#include "nsUnicharUtilCIID.h"
#include "nsUnicharUtils.h"
#include "nsCRT.h"
#include <stdlib.h>
NS_IMPL_ISUPPORTS3(mozMySpell,
mozISpellCheckingEngine,
nsIObserver,
nsISupportsWeakReference)
nsresult
mozMySpell::Init()
{
if (!mDictionaries.Init())
return NS_ERROR_OUT_OF_MEMORY;
LoadDictionaryList();
nsCOMPtr<nsIObserverService> obs =
do_GetService("@mozilla.org/observer-service;1");
if (obs) {
obs->AddObserver(this, "profile-do-change", PR_TRUE);
}
return NS_OK;
}
mozMySpell::~mozMySpell()
{
mPersonalDictionary = nsnull;
delete mMySpell;
}
/* attribute wstring dictionary; */
NS_IMETHODIMP mozMySpell::GetDictionary(PRUnichar **aDictionary)
{
NS_ENSURE_ARG_POINTER(aDictionary);
if (mDictionary.IsEmpty())
return NS_ERROR_NOT_INITIALIZED;
*aDictionary = ToNewUnicode(mDictionary);
return *aDictionary ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
}
/* set the Dictionary.
* This also Loads the dictionary and initializes the converter using the dictionaries converter
*/
NS_IMETHODIMP mozMySpell::SetDictionary(const PRUnichar *aDictionary)
{
NS_ENSURE_ARG_POINTER(aDictionary);
if (mDictionary.Equals(aDictionary))
return NS_OK;
nsIFile* affFile = mDictionaries.GetWeak(aDictionary);
if (!affFile)
return NS_ERROR_FILE_NOT_FOUND;
nsCAutoString dictFileName, affFileName;
// XXX This isn't really good. nsIFile->NativePath isn't safe for all
// character sets on Windows.
// A better way would be to QI to nsILocalFile, and get a filehandle
// from there. Only problem is that myspell wants a path
nsresult rv = affFile->GetNativePath(affFileName);
NS_ENSURE_SUCCESS(rv, rv);
dictFileName = affFileName;
PRInt32 dotPos = dictFileName.RFindChar('.');
if (dotPos == -1)
return NS_ERROR_FAILURE;
dictFileName.SetLength(dotPos);
dictFileName.AppendLiteral(".dic");
// SetDictionary can be called multiple times, so we might have a
// valid mMySpell instance which needs cleaned up.
delete mMySpell;
mDictionary = aDictionary;
mMySpell = new MySpell(affFileName.get(),
dictFileName.get());
if (!mMySpell)
return NS_ERROR_OUT_OF_MEMORY;
nsCOMPtr<nsICharsetConverterManager> ccm =
do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
NS_ENSURE_SUCCESS(rv, rv);
rv = ccm->GetUnicodeDecoder(mMySpell->get_dic_encoding(),
getter_AddRefs(mDecoder));
NS_ENSURE_SUCCESS(rv, rv);
rv = ccm->GetUnicodeEncoder(mMySpell->get_dic_encoding(),
getter_AddRefs(mEncoder));
NS_ENSURE_SUCCESS(rv, rv);
if (mEncoder)
mEncoder->SetOutputErrorBehavior(mEncoder->kOnError_Signal, nsnull, '?');
PRInt32 pos = mDictionary.FindChar('-');
if (pos == -1)
pos = mDictionary.FindChar('_');
if (pos == -1)
mLanguage.Assign(mDictionary);
else
mLanguage = Substring(mDictionary, 0, pos);
return NS_OK;
}
/* readonly attribute wstring language; */
NS_IMETHODIMP mozMySpell::GetLanguage(PRUnichar **aLanguage)
{
NS_ENSURE_ARG_POINTER(aLanguage);
if (mDictionary.IsEmpty())
return NS_ERROR_NOT_INITIALIZED;
*aLanguage = ToNewUnicode(mLanguage);
return *aLanguage ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
}
/* readonly attribute boolean providesPersonalDictionary; */
NS_IMETHODIMP mozMySpell::GetProvidesPersonalDictionary(PRBool *aProvidesPersonalDictionary)
{
NS_ENSURE_ARG_POINTER(aProvidesPersonalDictionary);
*aProvidesPersonalDictionary = PR_FALSE;
return NS_OK;
}
/* readonly attribute boolean providesWordUtils; */
NS_IMETHODIMP mozMySpell::GetProvidesWordUtils(PRBool *aProvidesWordUtils)
{
NS_ENSURE_ARG_POINTER(aProvidesWordUtils);
*aProvidesWordUtils = PR_FALSE;
return NS_OK;
}
/* readonly attribute wstring name; */
NS_IMETHODIMP mozMySpell::GetName(PRUnichar * *aName)
{
return NS_ERROR_NOT_IMPLEMENTED;
}
/* readonly attribute wstring copyright; */
NS_IMETHODIMP mozMySpell::GetCopyright(PRUnichar * *aCopyright)
{
return NS_ERROR_NOT_IMPLEMENTED;
}
/* attribute mozIPersonalDictionary personalDictionary; */
NS_IMETHODIMP mozMySpell::GetPersonalDictionary(mozIPersonalDictionary * *aPersonalDictionary)
{
*aPersonalDictionary = mPersonalDictionary;
NS_IF_ADDREF(*aPersonalDictionary);
return NS_OK;
}
NS_IMETHODIMP mozMySpell::SetPersonalDictionary(mozIPersonalDictionary * aPersonalDictionary)
{
mPersonalDictionary = aPersonalDictionary;
return NS_OK;
}
struct AppendNewStruct
{
PRUnichar **dics;
PRUint32 count;
PRBool failed;
};
static PLDHashOperator
AppendNewString(const PRUnichar *aString, nsIFile* aFile, void* aClosure)
{
AppendNewStruct *ans = (AppendNewStruct*) aClosure;
ans->dics[ans->count] = NS_strdup(aString);
if (!ans->dics[ans->count]) {
ans->failed = PR_TRUE;
return PL_DHASH_STOP;
}
++ans->count;
return PL_DHASH_NEXT;
}
/* void GetDictionaryList ([array, size_is (count)] out wstring dictionaries, out PRUint32 count); */
NS_IMETHODIMP mozMySpell::GetDictionaryList(PRUnichar ***aDictionaries,
PRUint32 *aCount)
{
if (!aDictionaries || !aCount)
return NS_ERROR_NULL_POINTER;
AppendNewStruct ans = {
(PRUnichar**) NS_Alloc(sizeof(PRUnichar*) * mDictionaries.Count()),
0,
PR_FALSE
};
// This pointer is used during enumeration
mDictionaries.EnumerateRead(AppendNewString, &ans);
if (ans.failed) {
while (ans.count) {
--ans.count;
NS_Free(ans.dics[ans.count]);
}
NS_Free(ans.dics);
return NS_ERROR_OUT_OF_MEMORY;
}
*aDictionaries = ans.dics;
*aCount = ans.count;
return NS_OK;
}
void
mozMySpell::LoadDictionaryList()
{
mDictionaries.Clear();
nsresult rv;
nsCOMPtr<nsIProperties> dirSvc =
do_GetService(NS_DIRECTORY_SERVICE_CONTRACTID);
if (!dirSvc)
return;
nsCOMPtr<nsIFile> dictDir;
rv = dirSvc->Get(DICTIONARY_SEARCH_DIRECTORY,
NS_GET_IID(nsIFile), getter_AddRefs(dictDir));
if (NS_FAILED(rv)) {
// default to appdir/dictionaries
rv = dirSvc->Get(NS_XPCOM_CURRENT_PROCESS_DIR,
NS_GET_IID(nsIFile), getter_AddRefs(dictDir));
if (NS_FAILED(rv))
return;
dictDir->AppendNative(NS_LITERAL_CSTRING("dictionaries"));
}
LoadDictionariesFromDir(dictDir);
nsCOMPtr<nsISimpleEnumerator> dictDirs;
rv = dirSvc->Get(DICTIONARY_SEARCH_DIRECTORY_LIST,
NS_GET_IID(nsISimpleEnumerator), getter_AddRefs(dictDirs));
if (NS_FAILED(rv))
return;
PRBool hasMore;
while (NS_SUCCEEDED(dictDirs->HasMoreElements(&hasMore)) && hasMore) {
nsCOMPtr<nsISupports> elem;
dictDirs->GetNext(getter_AddRefs(elem));
dictDir = do_QueryInterface(elem);
if (dictDir)
LoadDictionariesFromDir(dictDir);
}
}
void
mozMySpell::LoadDictionariesFromDir(nsIFile* aDir)
{
nsresult rv;
PRBool check = PR_FALSE;
rv = aDir->Exists(&check);
if (NS_FAILED(rv) || !check)
return;
rv = aDir->IsDirectory(&check);
if (NS_FAILED(rv) || !check)
return;
nsCOMPtr<nsISimpleEnumerator> e;
rv = aDir->GetDirectoryEntries(getter_AddRefs(e));
if (NS_FAILED(rv))
return;
nsCOMPtr<nsIDirectoryEnumerator> files(do_QueryInterface(e));
if (!files)
return;
nsCOMPtr<nsIFile> file;
while (NS_SUCCEEDED(files->GetNextFile(getter_AddRefs(file))) && file) {
nsAutoString leafName;
file->GetLeafName(leafName);
if (!StringEndsWith(leafName, NS_LITERAL_STRING(".dic")))
continue;
nsAutoString dict(leafName);
dict.SetLength(dict.Length() - 4); // magic length of ".dic"
// check for the presence of the .aff file
leafName = dict;
leafName.AppendLiteral(".aff");
file->SetLeafName(leafName);
rv = file->Exists(&check);
if (NS_FAILED(rv) || !check)
continue;
#ifdef DEBUG_bsmedberg
printf("Adding dictionary: %s\n", NS_ConvertUTF16toUTF8(dict).get());
#endif
mDictionaries.Put(dict.get(), file);
}
}
nsresult mozMySpell::ConvertCharset(const PRUnichar* aStr, char ** aDst)
{
NS_ENSURE_ARG_POINTER(aDst);
NS_ENSURE_TRUE(mEncoder, NS_ERROR_NULL_POINTER);
PRInt32 outLength;
PRInt32 inLength = nsCRT::strlen(aStr);
nsresult rv = mEncoder->GetMaxLength(aStr, inLength, &outLength);
NS_ENSURE_SUCCESS(rv, rv);
*aDst = (char *) nsMemory::Alloc(sizeof(char) * (outLength+1));
NS_ENSURE_TRUE(*aDst, NS_ERROR_OUT_OF_MEMORY);
rv = mEncoder->Convert(aStr, &inLength, *aDst, &outLength);
if (NS_SUCCEEDED(rv))
(*aDst)[outLength] = '\0';
return rv;
}
/* boolean Check (in wstring word); */
NS_IMETHODIMP mozMySpell::Check(const PRUnichar *aWord, PRBool *aResult)
{
NS_ENSURE_ARG_POINTER(aWord);
NS_ENSURE_ARG_POINTER(aResult);
NS_ENSURE_TRUE(mMySpell, NS_ERROR_FAILURE);
nsXPIDLCString charsetWord;
nsresult rv = ConvertCharset(aWord, getter_Copies(charsetWord));
NS_ENSURE_SUCCESS(rv, rv);
*aResult = mMySpell->spell(charsetWord);
if (!*aResult && mPersonalDictionary)
rv = mPersonalDictionary->Check(aWord, mLanguage.get(), aResult);
return rv;
}
/* void Suggest (in wstring word, [array, size_is (count)] out wstring suggestions, out PRUint32 count); */
NS_IMETHODIMP mozMySpell::Suggest(const PRUnichar *aWord, PRUnichar ***aSuggestions, PRUint32 *aSuggestionCount)
{
NS_ENSURE_ARG_POINTER(aSuggestions);
NS_ENSURE_ARG_POINTER(aSuggestionCount);
NS_ENSURE_TRUE(mMySpell, NS_ERROR_FAILURE);
nsresult rv;
*aSuggestionCount = 0;
nsXPIDLCString charsetWord;
rv = ConvertCharset(aWord, getter_Copies(charsetWord));
NS_ENSURE_SUCCESS(rv, rv);
char ** wlst;
*aSuggestionCount = mMySpell->suggest(&wlst, charsetWord);
if (*aSuggestionCount) {
*aSuggestions = (PRUnichar **)nsMemory::Alloc(*aSuggestionCount * sizeof(PRUnichar *));
if (*aSuggestions) {
PRUint32 index = 0;
for (index = 0; index < *aSuggestionCount && NS_SUCCEEDED(rv); ++index) {
// Convert the suggestion to utf16
PRInt32 inLength = nsCRT::strlen(wlst[index]);
PRInt32 outLength;
rv = mDecoder->GetMaxLength(wlst[index], inLength, &outLength);
if (NS_SUCCEEDED(rv))
{
(*aSuggestions)[index] = (PRUnichar *) nsMemory::Alloc(sizeof(PRUnichar) * (outLength+1));
if ((*aSuggestions)[index])
{
rv = mDecoder->Convert(wlst[index], &inLength, (*aSuggestions)[index], &outLength);
if (NS_SUCCEEDED(rv))
(*aSuggestions)[index][outLength] = 0;
}
else
rv = NS_ERROR_OUT_OF_MEMORY;
}
}
if (NS_FAILED(rv))
NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(index, *aSuggestions); // free the PRUnichar strings up to the point at which the error occurred
}
else // if (*aSuggestions)
rv = NS_ERROR_OUT_OF_MEMORY;
}
NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(*aSuggestionCount, wlst);
return rv;
}
NS_IMETHODIMP
mozMySpell::Observe(nsISupports* aSubj, const char *aTopic,
const PRUnichar *aData)
{
NS_ASSERTION(!strcmp(aTopic, "profile-do-change"),
"Unexpected observer topic");
LoadDictionaryList();
return NS_OK;
}

Просмотреть файл

@ -1,110 +0,0 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Spellchecker Component.
*
* The Initial Developer of the Original Code is
* David Einstein.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s): David Einstein <Deinst@world.std.com>
* Kevin Hendricks <kevin.hendricks@sympatico.ca>
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* This spellchecker is based on the MySpell spellchecker made for Open Office
* by Kevin Hendricks. Although the algorithms and code, have changed
* slightly, the architecture is still the same. The Mozilla implementation
* is designed to be compatible with the Open Office dictionaries.
* Please do not make changes to the affix or dictionary file formats
* without attempting to coordinate with Kevin. For more information
* on the original MySpell see
* http://whiteboard.openoffice.org/source/browse/whiteboard/lingucomponent/source/spellcheck/myspell/
*
* A special thanks and credit goes to Geoff Kuenning
* the creator of ispell. MySpell's affix algorithms were
* based on those of ispell which should be noted is
* copyright Geoff Kuenning et.al. and now available
* under a BSD style license. For more information on ispell
* and affix compression in general, please see:
* http://www.cs.ucla.edu/ficus-members/geoff/ispell.html
* (the home page for ispell)
*
* ***** END LICENSE BLOCK ***** */
#ifndef mozMySpell_h__
#define mozMySpell_h__
#include "myspell.hxx"
#include "mozISpellCheckingEngine.h"
#include "mozIPersonalDictionary.h"
#include "nsString.h"
#include "nsCOMPtr.h"
#include "nsIObserver.h"
#include "nsIUnicodeEncoder.h"
#include "nsIUnicodeDecoder.h"
#include "nsInterfaceHashtable.h"
#include "nsWeakReference.h"
#define MOZ_MYSPELL_CONTRACTID "@mozilla.org/spellchecker/myspell;1"
#define MOZ_MYSPELL_CID \
{ /* D1EE1205-3F96-4a0f-ABFE-09E8C54C9E9A} */ \
0xD1EE1205, 0x3F96, 0x4a0f, \
{ 0xAB, 0xFE, 0x09, 0xE8, 0xC5, 0x4C, 0x9E, 0x9A} }
class mozMySpell : public mozISpellCheckingEngine,
public nsIObserver,
public nsSupportsWeakReference
{
public:
NS_DECL_ISUPPORTS
NS_DECL_MOZISPELLCHECKINGENGINE
NS_DECL_NSIOBSERVER
mozMySpell() : mMySpell(nsnull) { }
virtual ~mozMySpell();
nsresult Init();
void LoadDictionaryList();
void LoadDictionariesFromDir(nsIFile* aDir);
// helper method for converting a word to the charset of the dictionary
nsresult ConvertCharset(const PRUnichar* aStr, char ** aDst);
protected:
nsCOMPtr<mozIPersonalDictionary> mPersonalDictionary;
nsCOMPtr<nsIUnicodeEncoder> mEncoder;
nsCOMPtr<nsIUnicodeDecoder> mDecoder;
// Hashtable matches dictionary name to .aff file
nsInterfaceHashtable<nsUnicharPtrHashKey, nsIFile> mDictionaries;
nsString mDictionary;
nsString mLanguage;
MySpell *mMySpell;
};
#endif

Просмотреть файл

@ -1,180 +0,0 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Firefox.
*
* The Initial Developer of the Original Code is
* the Mozilla Foundation <http://www.mozilla.org>.
* Portions created by the Initial Developer are Copyright (C) 2006
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Benjamin Smedberg <benjamin@smedbergs.us> (Original Code)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include "mozMySpellDirProvider.h"
#include "nsXULAppAPI.h"
#include "nsString.h"
#include "mozISpellCheckingEngine.h"
#include "nsICategoryManager.h"
NS_IMPL_ISUPPORTS2(mozMySpellDirProvider,
nsIDirectoryServiceProvider,
nsIDirectoryServiceProvider2)
NS_IMETHODIMP
mozMySpellDirProvider::GetFile(const char *aKey, PRBool *aPersist,
nsIFile* *aResult)
{
return NS_ERROR_FAILURE;
}
NS_IMETHODIMP
mozMySpellDirProvider::GetFiles(const char *aKey,
nsISimpleEnumerator* *aResult)
{
if (strcmp(aKey, DICTIONARY_SEARCH_DIRECTORY_LIST) != 0) {
return NS_ERROR_FAILURE;
}
nsCOMPtr<nsIProperties> dirSvc =
do_GetService(NS_DIRECTORY_SERVICE_CONTRACTID);
if (!dirSvc)
return NS_ERROR_FAILURE;
nsCOMPtr<nsISimpleEnumerator> list;
nsresult rv = dirSvc->Get(XRE_EXTENSIONS_DIR_LIST,
NS_GET_IID(nsISimpleEnumerator),
getter_AddRefs(list));
if (NS_FAILED(rv))
return rv;
nsCOMPtr<nsISimpleEnumerator> e = new AppendingEnumerator(list);
if (!e)
return NS_ERROR_OUT_OF_MEMORY;
*aResult = nsnull;
e.swap(*aResult);
return NS_SUCCESS_AGGREGATE_RESULT;
}
NS_IMPL_ISUPPORTS1(mozMySpellDirProvider::AppendingEnumerator,
nsISimpleEnumerator)
NS_IMETHODIMP
mozMySpellDirProvider::AppendingEnumerator::HasMoreElements(PRBool *aResult)
{
*aResult = mNext ? PR_TRUE : PR_FALSE;
return NS_OK;
}
NS_IMETHODIMP
mozMySpellDirProvider::AppendingEnumerator::GetNext(nsISupports* *aResult)
{
if (aResult)
NS_ADDREF(*aResult = mNext);
mNext = nsnull;
nsresult rv;
// Ignore all errors
PRBool more;
while (NS_SUCCEEDED(mBase->HasMoreElements(&more)) && more) {
nsCOMPtr<nsISupports> nextbasesupp;
mBase->GetNext(getter_AddRefs(nextbasesupp));
nsCOMPtr<nsIFile> nextbase(do_QueryInterface(nextbasesupp));
if (!nextbase)
continue;
nextbase->Clone(getter_AddRefs(mNext));
if (!mNext)
continue;
mNext->AppendNative(NS_LITERAL_CSTRING("dictionaries"));
PRBool exists;
rv = mNext->Exists(&exists);
if (NS_SUCCEEDED(rv) && exists)
break;
mNext = nsnull;
}
return NS_OK;
}
mozMySpellDirProvider::AppendingEnumerator::AppendingEnumerator
(nsISimpleEnumerator* aBase) :
mBase(aBase)
{
// Initialize mNext to begin
GetNext(nsnull);
}
NS_METHOD
mozMySpellDirProvider::Register(nsIComponentManager* aCompMgr,
nsIFile* aPath, const char *aLoaderStr,
const char *aType,
const nsModuleComponentInfo *aInfo)
{
nsresult rv;
nsCOMPtr<nsICategoryManager> catMan =
do_GetService(NS_CATEGORYMANAGER_CONTRACTID);
if (!catMan)
return NS_ERROR_FAILURE;
rv = catMan->AddCategoryEntry(XPCOM_DIRECTORY_PROVIDER_CATEGORY,
"spellcheck-directory-provider",
kContractID, PR_TRUE, PR_TRUE, nsnull);
return rv;
}
NS_METHOD
mozMySpellDirProvider::Unregister(nsIComponentManager* aCompMgr,
nsIFile* aPath,
const char *aLoaderStr,
const nsModuleComponentInfo *aInfo)
{
nsresult rv;
nsCOMPtr<nsICategoryManager> catMan =
do_GetService(NS_CATEGORYMANAGER_CONTRACTID);
if (!catMan)
return NS_ERROR_FAILURE;
rv = catMan->DeleteCategoryEntry(XPCOM_DIRECTORY_PROVIDER_CATEGORY,
"spellcheck-directory-provider",
PR_TRUE);
return rv;
}
char const *const
mozMySpellDirProvider::kContractID = "@mozilla.org/spellcheck/dir-provider;1";

Просмотреть файл

@ -1,83 +0,0 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Firefox.
*
* The Initial Developer of the Original Code is
* the Mozilla Foundation <http://www.mozilla.org>.
* Portions created by the Initial Developer are Copyright (C) 2006
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Benjamin Smedberg <benjamin@smedbergs.us> (Original Code)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#ifndef mozMySpellDirProvider_h__
#define mozMySpellDirProvider_h__
#include "nsIDirectoryService.h"
#include "nsIGenericFactory.h"
#include "nsISimpleEnumerator.h"
class mozMySpellDirProvider :
public nsIDirectoryServiceProvider2
{
public:
NS_DECL_ISUPPORTS
NS_DECL_NSIDIRECTORYSERVICEPROVIDER
NS_DECL_NSIDIRECTORYSERVICEPROVIDER2
static NS_METHOD Register(nsIComponentManager* aCompMgr,
nsIFile* aPath, const char *aLoaderStr,
const char *aType,
const nsModuleComponentInfo *aInfo);
static NS_METHOD Unregister(nsIComponentManager* aCompMgr,
nsIFile* aPath, const char *aLoaderStr,
const nsModuleComponentInfo *aInfo);
static char const *const kContractID;
private:
class AppendingEnumerator : public nsISimpleEnumerator
{
public:
NS_DECL_ISUPPORTS
NS_DECL_NSISIMPLEENUMERATOR
AppendingEnumerator(nsISimpleEnumerator* aBase);
private:
nsCOMPtr<nsISimpleEnumerator> mBase;
nsCOMPtr<nsIFile> mNext;
};
};
#define MYSPELLDIRPROVIDER_CID \
{ 0x64d6174c, 0x1496, 0x4ffd, \
{ 0x87, 0xf2, 0xda, 0x26, 0x70, 0xf8, 0x89, 0x34 } }
#endif // mozMySpellDirProvider

Просмотреть файл

@ -1,302 +0,0 @@
#include "license.readme"
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include "myspell.hxx"
// using namespace std;
MySpell::MySpell(const char * affpath, const char * dpath)
{
encoding = NULL;
csconv = NULL;
/* first set up the hash manager */
pHMgr = new HashMgr(dpath);
/* next set up the affix manager */
/* it needs access to the hash manager lookup methods */
pAMgr = new AffixMgr(affpath,pHMgr);
/* get the preferred try string and the dictionary */
/* encoding from the Affix Manager for that dictionary */
char * try_string = pAMgr->get_try_string();
encoding = pAMgr->get_encoding();
csconv = get_current_cs(encoding);
/* and finally set up the suggestion manager */
maxSug = 25;
pSMgr = new SuggestMgr(try_string, maxSug, pAMgr);
if (try_string) free(try_string);
}
MySpell::~MySpell()
{
delete pSMgr;
delete pAMgr;
delete pHMgr;
free(csconv);
csconv = NULL;
if (encoding)
free(encoding);
}
// make a copy of src at destination while removing all leading
// blanks and removing any trailing periods after recording
// their presence with the abbreviation flag
// also since already going through character by character,
// set the capitalization type
// return the length of the "cleaned" word
int MySpell::cleanword(char * dest, const char * src, int * pcaptype, int * pabbrev)
{
// with the new breakiterator code this should not be needed anymore
const char * special_chars = "._#$%&()* +,-/:;<=>[]\\^`{|}~\t \x0a\x0d\x01\'\"";
unsigned char * p = (unsigned char *) dest;
const unsigned char * q = (const unsigned char * ) src;
// first skip over any leading special characters
while ((*q != '\0') && (strchr(special_chars,(int)(*q)))) q++;
// now strip off any trailing special characters
// if a period comes after a normal char record its presence
*pabbrev = 0;
int nl = strlen((const char *)q);
while ((nl > 0) && (strchr(special_chars,(int)(*(q+nl-1))))) {
nl--;
}
if ( *(q+nl) == '.' ) *pabbrev = 1;
// if no characters are left it can't be an abbreviation and can't be capitalized
if (nl <= 0) {
*pcaptype = NOCAP;
*pabbrev = 0;
*p = '\0';
return 0;
}
// now determine the capitalization type of the first nl letters
int ncap = 0;
int nneutral = 0;
int nc = 0;
while (nl > 0) {
nc++;
if (csconv[(*q)].ccase) ncap++;
if (csconv[(*q)].cupper == csconv[(*q)].clower) nneutral++;
*p++ = *q++;
nl--;
}
// remember to terminate the destination string
*p = '\0';
// now finally set the captype
if (ncap == 0) {
*pcaptype = NOCAP;
} else if ((ncap == 1) && csconv[(unsigned char)(*dest)].ccase) {
*pcaptype = INITCAP;
} else if ((ncap == nc) || ((ncap + nneutral) == nc)){
*pcaptype = ALLCAP;
} else {
*pcaptype = HUHCAP;
}
return nc;
}
int MySpell::spell(const char * word)
{
char * rv=NULL;
char cw[MAXWORDLEN+1];
char wspace[MAXWORDLEN+1];
int wl = strlen(word);
if (wl > (MAXWORDLEN - 1)) return 0;
int captype = 0;
int abbv = 0;
wl = cleanword(cw, word, &captype, &abbv);
if (wl == 0) return 1;
switch(captype) {
case HUHCAP:
case NOCAP: {
rv = check(cw);
if ((abbv) && !(rv)) {
memcpy(wspace,cw,wl);
*(wspace+wl) = '.';
*(wspace+wl+1) = '\0';
rv = check(wspace);
}
break;
}
case ALLCAP: {
memcpy(wspace,cw,(wl+1));
mkallsmall(wspace, csconv);
rv = check(wspace);
if (!rv) {
mkinitcap(wspace, csconv);
rv = check(wspace);
}
if (!rv) rv = check(cw);
if ((abbv) && !(rv)) {
memcpy(wspace,cw,wl);
*(wspace+wl) = '.';
*(wspace+wl+1) = '\0';
rv = check(wspace);
}
break;
}
case INITCAP: {
memcpy(wspace,cw,(wl+1));
mkallsmall(wspace, csconv);
rv = check(wspace);
if (!rv) rv = check(cw);
if ((abbv) && !(rv)) {
memcpy(wspace,cw,wl);
*(wspace+wl) = '.';
*(wspace+wl+1) = '\0';
rv = check(wspace);
}
break;
}
}
if (rv) return 1;
return 0;
}
char * MySpell::check(const char * word)
{
struct hentry * he = NULL;
if (pHMgr)
he = pHMgr->lookup (word);
if ((he == NULL) && (pAMgr)) {
// try stripping off affixes */
he = pAMgr->affix_check(word, strlen(word));
// try check compound word
if ((he == NULL) && (pAMgr->get_compound())) {
he = pAMgr->compound_check(word, strlen(word), (pAMgr->get_compound())[0]);
}
}
if (he) return he->word;
return NULL;
}
int MySpell::suggest(char*** slst, const char * word)
{
char cw[MAXWORDLEN+1];
char wspace[MAXWORDLEN+1];
if (! pSMgr) return 0;
int wl = strlen(word);
if (wl > (MAXWORDLEN-1)) return 0;
int captype = 0;
int abbv = 0;
wl = cleanword(cw, word, &captype, &abbv);
if (wl == 0) return 0;
int ns = 0;
char ** wlst = (char **) calloc(maxSug, sizeof(char *));
if (wlst == NULL) return 0;
switch(captype) {
case NOCAP: {
ns = pSMgr->suggest(wlst, ns, cw);
break;
}
case INITCAP: {
ns = pSMgr->suggest(wlst,ns,cw);
if (ns != -1) {
memcpy(wspace,cw,(wl+1));
mkallsmall(wspace, csconv);
if (ns) {
ns = pSMgr->suggest(wlst, ns, wspace);
} else {
int ns2 = pSMgr->suggest(wlst, ns, wspace);
for (int j=ns; j < ns2; j++)
mkinitcap(wlst[j], csconv);
ns = ns2;
}
}
break;
}
case HUHCAP: {
ns = pSMgr->suggest(wlst, ns, cw);
if (ns != -1) {
memcpy(wspace,cw,(wl+1));
mkallsmall(wspace, csconv);
ns = pSMgr->suggest(wlst, ns, wspace);
}
break;
}
case ALLCAP: {
memcpy(wspace,cw,(wl+1));
mkallsmall(wspace, csconv);
ns = pSMgr->suggest(wlst, ns, wspace);
if (ns > 0) {
for (int j=0; j < ns; j++)
mkallcap(wlst[j], csconv);
}
if (ns != -1)
ns = pSMgr->suggest(wlst, ns , cw);
break;
}
}
if (ns > 0) {
*slst = wlst;
return ns;
}
// try ngram approach since found nothing
if (ns == 0) {
ns = pSMgr->ngsuggest(wlst, cw, pHMgr);
if (ns) {
switch(captype) {
case NOCAP: break;
case HUHCAP: break;
case INITCAP: {
for (int j=0; j < ns; j++)
mkinitcap(wlst[j], csconv);
}
break;
case ALLCAP: {
for (int j=0; j < ns; j++)
mkallcap(wlst[j], csconv);
}
break;
}
*slst = wlst;
return ns;
}
}
if (ns < 0) {
// we ran out of memory - we should free up as much as possible
for (int i=0;i<maxSug; i++)
if (wlst[i] != NULL) free(wlst[i]);
}
if (wlst) free(wlst);
*slst = NULL;
return 0;
}
char * MySpell::get_dic_encoding()
{
return encoding;
}

Просмотреть файл

@ -1,36 +0,0 @@
#ifndef _MYSPELLMGR_HXX_
#define _MYSPELLMGR_HXX_
#include "hashmgr.hxx"
#include "affixmgr.hxx"
#include "suggestmgr.hxx"
#include "csutil.hxx"
#define NOCAP 0
#define INITCAP 1
#define ALLCAP 2
#define HUHCAP 3
class MySpell
{
AffixMgr* pAMgr;
HashMgr* pHMgr;
SuggestMgr* pSMgr;
char * encoding;
struct cs_info * csconv;
int maxSug;
public:
MySpell(const char * affpath, const char * dpath);
~MySpell();
int suggest(char*** slst, const char * word);
int spell(const char *);
char * get_dic_encoding();
private:
int cleanword(char *, const char *, int *, int *);
char * check(const char *);
};
#endif

Просмотреть файл

@ -1,559 +0,0 @@
#include "license.readme"
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include <stdio.h>
#include "suggestmgr.hxx"
// using namespace std;
extern char * mystrdup(const char *);
SuggestMgr::SuggestMgr(const char * tryme, int maxn,
AffixMgr * aptr)
{
// register affix manager and check in string of chars to
// try when building candidate suggestions
pAMgr = aptr;
ctry = mystrdup(tryme);
ctryl = 0;
if (ctry)
ctryl = strlen(ctry);
maxSug = maxn;
nosplitsugs=(0==1);
if (pAMgr) pAMgr->get_nosplitsugs();
}
SuggestMgr::~SuggestMgr()
{
pAMgr = NULL;
if (ctry) free(ctry);
ctry = NULL;
ctryl = 0;
maxSug = 0;
}
// generate suggestions for a mispelled word
// pass in address of array of char * pointers
int SuggestMgr::suggest(char** wlst, int ns, const char * word)
{
int nsug = ns;
// did we swap the order of chars by mistake
if ((nsug < maxSug) && (nsug > -1))
nsug = swapchar(wlst, word, nsug);
// perhaps we made chose the wrong char from a related set
if ((nsug < maxSug) && (nsug > -1))
nsug = mapchars(wlst, word, nsug);
// perhaps we made a typical fault of spelling
if ((nsug < maxSug) && (nsug > -1))
nsug = replchars(wlst, word, nsug);
// did we forget to add a char
if ((nsug < maxSug) && (nsug > -1))
nsug = forgotchar(wlst, word, nsug);
// did we add a char that should not be there
if ((nsug < maxSug) && (nsug > -1))
nsug = extrachar(wlst, word, nsug);
// did we just hit the wrong key in place of a good char
if ((nsug < maxSug) && (nsug > -1))
nsug = badchar(wlst, word, nsug);
// perhaps we forgot to hit space and two words ran together
if (!nosplitsugs) {
if ((nsug < maxSug) && (nsug > -1))
nsug = twowords(wlst, word, nsug);
}
return nsug;
}
// suggestions for when chose the wrong char out of a related set
int SuggestMgr::mapchars(char** wlst, const char * word, int ns)
{
int wl = strlen(word);
if (wl < 2 || ! pAMgr) return ns;
int nummap = pAMgr->get_nummap();
struct mapentry* maptable = pAMgr->get_maptable();
if (maptable==NULL) return ns;
ns = map_related(word, 0, wlst, ns, maptable, nummap);
return ns;
}
int SuggestMgr::map_related(const char * word, int i, char** wlst, int ns, const mapentry* maptable, int nummap)
{
char c = *(word + i);
if (c == 0) {
int cwrd = 1;
for (int m=0; m < ns; m++)
if (strcmp(word,wlst[m]) == 0) cwrd = 0;
if ((cwrd) && check(word,strlen(word))) {
if (ns < maxSug) {
wlst[ns] = mystrdup(word);
// fprintf(stderr,"map_related %d adding %s\n",ns, wlst[ns]); fflush(stderr);
if (wlst[ns] == NULL) return -1;
ns++;
}
}
return ns;
}
int in_map = 0;
for (int j = 0; j < nummap; j++) {
if (strchr(maptable[j].set,c) != 0) {
in_map = 1;
#ifdef __SUNPRO_CC // for SunONE Studio compiler
char * newword = mystrdup(word);
#else
char * newword = strdup(word);
#endif
for (int k = 0; k < maptable[j].len; k++) {
*(newword + i) = *(maptable[j].set + k);
ns = map_related(newword, (i+1), wlst, ns, maptable, nummap);
}
free(newword);
}
}
if (!in_map) {
i++;
ns = map_related(word, i, wlst, ns, maptable, nummap);
}
return ns;
}
// suggestions for a typical fault of spelling, that
// differs with more, than 1 letter from the right form.
int SuggestMgr::replchars(char** wlst, const char * word, int ns)
{
char candidate[MAXSWL];
const char * r;
int lenr, lenp;
int cwrd;
int wl = strlen(word);
if (wl < 2 || ! pAMgr) return ns;
int numrep = pAMgr->get_numrep();
struct replentry* reptable = pAMgr->get_reptable();
if (reptable==NULL) return ns;
for (int i=0; i < numrep; i++ ) {
r = word;
lenr = strlen(reptable[i].replacement);
lenp = strlen(reptable[i].pattern);
// search every occurence of the pattern in the word
while ((r=strstr(r, reptable[i].pattern)) != NULL) {
strcpy(candidate, word);
if (r-word + lenr + strlen(r+lenp) >= MAXSWL) break;
strcpy(candidate+(r-word),reptable[i].replacement);
strcpy(candidate+(r-word)+lenr, r+lenp);
cwrd = 1;
for (int k=0; k < ns; k++)
if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
if ((cwrd) && check(candidate,strlen(candidate))) {
if (ns < maxSug) {
wlst[ns] = mystrdup(candidate);
// fprintf(stderr,"replchars %d adding %s\n",ns,wlst[ns]); fflush(stderr);
if (wlst[ns] == NULL) return -1;
ns++;
} else return ns;
}
r++; // search for the next letter
}
}
return ns;
}
// error is wrong char in place of correct one
int SuggestMgr::badchar(char ** wlst, const char * word, int ns)
{
char tmpc;
char candidate[MAXSWL];
int wl = strlen(word);
int cwrd;
strcpy (candidate, word);
// swap out each char one by one and try all the tryme
// chars in its place to see if that makes a good word
for (int i=0; i < wl; i++) {
tmpc = candidate[i];
for (int j=0; j < ctryl; j++) {
if (ctry[j] == tmpc) continue;
candidate[i] = ctry[j];
cwrd = 1;
for (int k=0; k < ns; k++)
if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
if ((cwrd) && check(candidate,wl)) {
if (ns < maxSug) {
wlst[ns] = mystrdup(candidate);
// fprintf(stderr,"bad_char %d adding %s\n",ns, wlst[ns]); fflush(stderr);
if (wlst[ns] == NULL) return -1;
ns++;
} else return ns;
}
candidate[i] = tmpc;
}
}
return ns;
}
// error is word has an extra letter it does not need
int SuggestMgr::extrachar(char** wlst, const char * word, int ns)
{
char candidate[MAXSWL];
const char * p;
char * r;
int cwrd;
int wl = strlen(word);
if (wl < 2) return ns;
// try omitting one char of word at a time
strcpy (candidate, word + 1);
for (p = word, r = candidate; *p != 0; ) {
cwrd = 1;
for (int k=0; k < ns; k++)
if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
if ((cwrd) && check(candidate,wl-1)) {
if (ns < maxSug) {
wlst[ns] = mystrdup(candidate);
// fprintf(stderr,"extra_char %d adding %s\n",ns,wlst[ns]); fflush(stderr);
if (wlst[ns] == NULL) return -1;
ns++;
} else return ns;
}
*r++ = *p++;
}
return ns;
}
// error is mising a letter it needs
int SuggestMgr::forgotchar(char ** wlst, const char * word, int ns)
{
char candidate[MAXSWL];
const char * p;
char * q;
int cwrd;
int wl = strlen(word);
// try inserting a tryme character before every letter
strcpy(candidate + 1, word);
for (p = word, q = candidate; *p != 0; ) {
for (int i = 0; i < ctryl; i++) {
*q = ctry[i];
cwrd = 1;
for (int k=0; k < ns; k++)
if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
if ((cwrd) && check(candidate,wl+1)) {
if (ns < maxSug) {
wlst[ns] = mystrdup(candidate);
// fprintf(stderr,"forgotchar %d adding %s\n",ns,wlst[ns]); fflush(stderr);
if (wlst[ns] == NULL) return -1;
ns++;
} else return ns;
}
}
*q++ = *p++;
}
// now try adding one to end */
for (int i = 0; i < ctryl; i++) {
*q = ctry[i];
cwrd = 1;
for (int k=0; k < ns; k++)
if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
if ((cwrd) && check(candidate,wl+1)) {
if (ns < maxSug) {
wlst[ns] = mystrdup(candidate);
// fprintf(stderr,"forgot_char %d adding %s\n",ns,wlst[ns]); fflush(stderr);
if (wlst[ns] == NULL) return -1;
ns++;
} else return ns;
}
}
return ns;
}
/* error is should have been two words */
int SuggestMgr::twowords(char ** wlst, const char * word, int ns)
{
char candidate[MAXSWL];
char * p;
int wl=strlen(word);
if (wl < 3) return ns;
strcpy(candidate + 1, word);
// split the string into two pieces after every char
// if both pieces are good words make them a suggestion
for (p = candidate + 1; p[1] != '\0'; p++) {
p[-1] = *p;
*p = '\0';
if (check(candidate,strlen(candidate))) {
if (check((p+1),strlen(p+1))) {
*p = ' ';
if (ns < maxSug) {
wlst[ns] = mystrdup(candidate);
// fprintf(stderr,"two_words %d adding %s\n",ns,wlst[ns]); fflush(stderr);
if (wlst[ns] == NULL) return -1;
ns++;
} else return ns;
}
}
}
return ns;
}
// error is adjacent letter were swapped
int SuggestMgr::swapchar(char ** wlst, const char * word, int ns)
{
char candidate[MAXSWL];
char * p;
char tmpc;
int cwrd;
int wl = strlen(word);
// try swapping adjacent chars one by one
strcpy(candidate, word);
for (p = candidate; p[1] != 0; p++) {
tmpc = *p;
*p = p[1];
p[1] = tmpc;
cwrd = 1;
for (int k=0; k < ns; k++)
if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
if ((cwrd) && check(candidate,wl)) {
if (ns < maxSug) {
wlst[ns] = mystrdup(candidate);
// fprintf(stderr,"swap_char %d adding %s\n",ns,wlst[ns]); fflush(stderr);
if (wlst[ns] == NULL) return -1;
ns++;
} else return ns;
}
tmpc = *p;
*p = p[1];
p[1] = tmpc;
}
return ns;
}
// generate a set of suggestions for very poorly spelled words
int SuggestMgr::ngsuggest(char** wlst, char * word, HashMgr* pHMgr)
{
int i, j;
int lval;
int sc;
int lp;
if (! pHMgr) return 0;
// exhaustively search through all root words
// keeping track of the MAX_ROOTS most similar root words
struct hentry * roots[MAX_ROOTS];
int scores[MAX_ROOTS];
for (i = 0; i < MAX_ROOTS; i++) {
roots[i] = NULL;
scores[i] = -100 * i;
}
lp = MAX_ROOTS - 1;
int n = strlen(word);
struct hentry* hp = NULL;
int col = -1;
while ((hp = pHMgr->walk_hashtable(col, hp))) {
sc = ngram(3, word, hp->word, NGRAM_LONGER_WORSE);
if (sc > scores[lp]) {
scores[lp] = sc;
roots[lp] = hp;
int lval = sc;
for (j=0; j < MAX_ROOTS; j++)
if (scores[j] < lval) {
lp = j;
lval = scores[j];
}
}
}
// find minimum threshhold for a passable suggestion
// mangle original word three differnt ways
// and score them to generate a minimum acceptable score
int thresh = 0;
char * mw = NULL;
for (int sp = 1; sp < 4; sp++) {
#ifdef __SUNPRO_CC // for SunONE Studio compiler
mw = mystrdup(word);
#else
mw = strdup(word);
#endif
for (int k=sp; k < n; k+=4) *(mw + k) = '*';
thresh = thresh + ngram(n, word, mw, NGRAM_ANY_MISMATCH);
free(mw);
}
mw = NULL;
thresh = thresh / 3;
thresh--;
// now expand affixes on each of these root words and
// and use length adjusted ngram scores to select
// possible suggestions
char * guess[MAX_GUESS];
int gscore[MAX_GUESS];
for(i=0;i<MAX_GUESS;i++) {
guess[i] = NULL;
gscore[i] = -100 * i;
}
lp = MAX_GUESS - 1;
struct guessword * glst;
glst = (struct guessword *) calloc(MAX_WORDS,sizeof(struct guessword));
if (! glst) return 0;
for (i = 0; i < MAX_ROOTS; i++) {
if (roots[i]) {
struct hentry * rp = roots[i];
int nw = pAMgr->expand_rootword(glst, MAX_WORDS, rp->word, rp->wlen,
rp->astr, rp->alen);
for (int k = 0; k < nw; k++) {
sc = ngram(n, word, glst[k].word, NGRAM_ANY_MISMATCH);
if (sc > thresh)
{
if (sc > gscore[lp])
{
if (guess[lp]) free(guess[lp]);
gscore[lp] = sc;
guess[lp] = glst[k].word;
glst[k].word = NULL;
lval = sc;
for (j=0; j < MAX_GUESS; j++)
{
if (gscore[j] < lval)
{
lp = j;
lval = gscore[j];
}
}
}
}
free (glst[k].word);
glst[k].word = NULL;
glst[k].allow = 0;
}
}
}
if (glst) free(glst);
// now we are done generating guesses
// sort in order of decreasing score and copy over
bubblesort(&guess[0], &gscore[0], MAX_GUESS);
int ns = 0;
for (i=0; i < MAX_GUESS; i++) {
if (guess[i]) {
int unique = 1;
for (j=i+1; j < MAX_GUESS; j++)
if (guess[j])
if (!strcmp(guess[i], guess[j])) unique = 0;
if (unique) {
wlst[ns++] = guess[i];
} else {
free(guess[i]);
}
}
}
return ns;
}
// see if a candidate suggestion is spelled correctly
// needs to check both root words and words with affixes
int SuggestMgr::check(const char * word, int len)
{
struct hentry * rv=NULL;
if (pAMgr) {
rv = pAMgr->lookup(word);
if (rv == NULL) rv = pAMgr->affix_check(word,len);
}
if (rv) return 1;
return 0;
}
// generate an n-gram score comparing s1 and s2
int SuggestMgr::ngram(int n, char * s1, const char * s2, int uselen)
{
int nscore = 0;
int l1 = strlen(s1);
int l2 = strlen(s2);
int ns;
for (int j=1;j<=n;j++) {
ns = 0;
for (int i=0;i<=(l1-j);i++) {
char c = *(s1 + i + j);
*(s1 + i + j) = '\0';
if (strstr(s2,(s1+i))) ns++;
*(s1 + i + j ) = c;
}
nscore = nscore + ns;
if (ns < 2) break;
}
ns = 0;
if (uselen == NGRAM_LONGER_WORSE) ns = (l2-l1)-2;
if (uselen == NGRAM_ANY_MISMATCH) ns = abs(l2-l1)-2;
return (nscore - ((ns > 0) ? ns : 0));
}
// sort in decreasing order of score
void SuggestMgr::bubblesort(char** rword, int* rsc, int n )
{
int m = 1;
while (m < n) {
int j = m;
while (j > 0) {
if (rsc[j-1] < rsc[j]) {
int sctmp = rsc[j-1];
char * wdtmp = rword[j-1];
rsc[j-1] = rsc[j];
rword[j-1] = rword[j];
rsc[j] = sctmp;
rword[j] = wdtmp;
j--;
} else break;
}
m++;
}
return;
}

Просмотреть файл

@ -1,48 +0,0 @@
#ifndef _SUGGESTMGR_HXX_
#define _SUGGESTMGR_HXX_
#define MAXSWL 100
#define MAX_ROOTS 10
#define MAX_WORDS 500
#define MAX_GUESS 10
#define NGRAM_IGNORE_LENGTH 0
#define NGRAM_LONGER_WORSE 1
#define NGRAM_ANY_MISMATCH 2
#include "atypes.hxx"
#include "affixmgr.hxx"
#include "hashmgr.hxx"
class SuggestMgr
{
char * ctry;
int ctryl;
AffixMgr* pAMgr;
int maxSug;
bool nosplitsugs;
public:
SuggestMgr(const char * tryme, int maxn, AffixMgr *aptr);
~SuggestMgr();
int suggest(char** wlst, int ns, const char * word);
int check(const char *, int);
int ngsuggest(char ** wlst, char * word, HashMgr* pHMgr);
private:
int replchars(char**, const char *, int);
int mapchars(char**, const char *, int);
int map_related(const char *, int, char ** wlst, int, const mapentry*, int);
int forgotchar(char **, const char *, int);
int swapchar(char **, const char *, int);
int extrachar(char **, const char *, int);
int badchar(char **, const char *, int);
int twowords(char **, const char *, int);
int ngram(int n, char * s1, const char * s2, int uselen);
void bubblesort( char ** rwd, int * rsc, int n);
};
#endif