Bug #319778 --> upgrade our version of hunspell to 1.1.8

NPOTB

sr=mscott
patch by ryanvm & nemeth


git-svn-id: svn://10.0.0.236/trunk@230247 18797224-902f-48f8-a5cc-f745e15eee43
This commit is contained in:
scott%scott-macgregor.org 2007-07-18 22:22:34 +00:00
parent 25e31f9657
commit d41a59d2ab
9 changed files with 122 additions and 66 deletions

View File

@ -34,7 +34,7 @@
*
******* END LICENSE BLOCK *******
Hunspell Version: 1.1.6
Hunspell Version: 1.1.8
Hunspell Author: László Németh
MySpell Author: Kevin Hendricks & David Einstein

View File

@ -775,14 +775,15 @@ struct hentry * SfxEntry::get_next_homonym(struct hentry * he, int optflags, Aff
const FLAG cclass, const FLAG needflag)
{
PfxEntry* ep = (PfxEntry *) ppfx;
FLAG eFlag = ep ? ep->getFlag() : FLAG_NULL;
while (he->next_homonym) {
he = he->next_homonym;
if ((TESTAFF(he->astr, aflag, he->alen) || (ep && ep->getCont() && TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
((optflags & aeXPRODUCT) == 0 ||
TESTAFF(he->astr, ep->getFlag(), he->alen) ||
TESTAFF(he->astr, eFlag, he->alen) ||
// handle conditional suffix
((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen))
((contclass) && TESTAFF(contclass, eFlag, contclasslen))
) &&
// handle cont. class
((!cclass) ||

View File

@ -2993,7 +2993,7 @@ int AffixMgr::get_checksharps()
char * AffixMgr::get_ignore()
{
if (!ignorechars) return NULL;
return mystrdup(ignorechars);
return ignorechars;
}
// return the preferred ignore string for suggestions
@ -3936,7 +3936,7 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af, char * dupflag
return 0;
}
int AffixMgr::redundant_condition(char ft, char * strip, int stripl, const char * cond, char * line) {
int AffixMgr::redundant_condition(char ft, char * strip, int stripl, const char * cond, char * WARNVAR) {
int condl = strlen(cond);
int i;
int j;
@ -3949,7 +3949,7 @@ int AffixMgr::redundant_condition(char ft, char * strip, int stripl, const char
for (i = 0, j = 0; (i < stripl) && (j < condl); i++, j++) {
if (cond[j] != '[') {
if (cond[j] != strip[i]) {
HUNSPELL_WARNING(stderr, "warning: incompatible stripping characters and condition:\n%s\n", line);
HUNSPELL_WARNING(stderr, "warning: incompatible stripping characters and condition:\n%s\n", warnvar);
}
} else {
neg = (cond[j+1] == '^') ? 1 : 0;
@ -3959,11 +3959,11 @@ int AffixMgr::redundant_condition(char ft, char * strip, int stripl, const char
if (strip[i] == cond[j]) in = 1;
} while ((j < (condl - 1)) && (cond[j] != ']'));
if (j == (condl - 1) && (cond[j] != ']')) {
HUNSPELL_WARNING(stderr, "error: missing ] in condition:\n%s\n", line);
HUNSPELL_WARNING(stderr, "error: missing ] in condition:\n%s\n", warnvar);
return 0;
}
if ((!neg && !in) || (neg && in)) {
HUNSPELL_WARNING(stderr, "warning: incompatible stripping characters and condition:\n%s\n", line);
HUNSPELL_WARNING(stderr, "warning: incompatible stripping characters and condition:\n%s\n", warnvar);
return 0;
}
}
@ -3977,7 +3977,7 @@ int AffixMgr::redundant_condition(char ft, char * strip, int stripl, const char
for (i = stripl - 1, j = condl - 1; (i >= 0) && (j >= 0); i--, j--) {
if (cond[j] != ']') {
if (cond[j] != strip[i]) {
HUNSPELL_WARNING(stderr, "warning: incompatible stripping characters and condition:\n%s\n", line);
HUNSPELL_WARNING(stderr, "warning: incompatible stripping characters and condition:\n%s\n", warnvar);
}
} else {
in = 0;
@ -3986,12 +3986,12 @@ int AffixMgr::redundant_condition(char ft, char * strip, int stripl, const char
if (strip[i] == cond[j]) in = 1;
} while ((j > 0) && (cond[j] != '['));
if ((j == 0) && (cond[j] != '[')) {
HUNSPELL_WARNING(stderr, "error: missing ] in condition:\n%s\n", line);
HUNSPELL_WARNING(stderr, "error: missing ] in condition:\n%s\n", warnvar);
return 0;
}
neg = (cond[j+1] == '^') ? 1 : 0;
if ((!neg && !in) || (neg && in)) {
HUNSPELL_WARNING(stderr, "warning: incompatible stripping characters and condition:\n%s\n", line);
HUNSPELL_WARNING(stderr, "warning: incompatible stripping characters and condition:\n%s\n", warnvar);
return 0;
}
}

View File

@ -60,8 +60,10 @@
#ifndef HUNSPELL_WARNING
#ifdef HUNSPELL_WARNING_ON
#define HUNSPELL_WARNING fprintf
#define WARNVAR warnvar
#else
#define HUNSPELL_WARNING
#define HUNSPELL_WARNING(a,b,...) {}
#define WARNVAR
#endif
#endif

View File

@ -101,8 +101,8 @@ using namespace std;
#endif
#endif
struct unicode_info2 * utf_tbl = NULL;
int utf_tbl_count = 0; // utf_tbl can be used by multiple Hunspell instances
static struct unicode_info2 * utf_tbl = NULL;
static int utf_tbl_count = 0; // utf_tbl can be used by multiple Hunspell instances
/* only UTF-16 (BMP) implementation */
char * u16_u8(char * dest, int size, const w_char * src, int srclen) {
@ -158,7 +158,7 @@ int u8_u16(w_char * dest, int size, const char * src) {
w_char * u2 = dest;
w_char * u2_max = u2 + size;
while (*u8 && (u2 < u2_max)) {
while ((u2 < u2_max) && *u8) {
switch ((*u8) & 0xf0) {
case 0x00:
case 0x10:
@ -289,15 +289,19 @@ int flag_bsearch(unsigned short flags[], unsigned short flag, int length) {
*stringp = dp+1;
int nc = (int)((unsigned long)dp - (unsigned long)mp);
rv = (char *) malloc(nc+1);
memcpy(rv,mp,nc);
*(rv+nc) = '\0';
return rv;
if (rv) {
memcpy(rv,mp,nc);
*(rv+nc) = '\0';
return rv;
}
} else {
rv = (char *) malloc(n+1);
memcpy(rv, mp, n);
*(rv+n) = '\0';
*stringp = mp + n;
return rv;
rv = (char *) malloc(n+1);
if (rv) {
memcpy(rv, mp, n);
*(rv+n) = '\0';
*stringp = mp + n;
return rv;
}
}
}
return NULL;
@ -5186,7 +5190,7 @@ int initialize_utf_tbl() {
#endif
void free_utf_tbl() {
if (utf_tbl_count > 0) utf_tbl--;
if (utf_tbl_count > 0) utf_tbl_count--;
if (utf_tbl && (utf_tbl_count == 0)) {
free(utf_tbl);
utf_tbl = NULL;
@ -5344,14 +5348,14 @@ void remove_ignored_chars(char * word, char * ignored_chars)
*word = '\0';
}
int parse_string(char * line, char ** out, const char * name)
int parse_string(char * line, char ** out, const char * WARNVAR)
{
char * tp = line;
char * piece;
int i = 0;
int np = 0;
if (*out) {
HUNSPELL_WARNING(stderr, "error: duplicate %s line\n", name);
HUNSPELL_WARNING(stderr, "error: duplicate %s line\n", warnvar);
return 1;
}
piece = mystrsep(&tp, 0);
@ -5372,7 +5376,7 @@ int parse_string(char * line, char ** out, const char * name)
piece = mystrsep(&tp, 0);
}
if (np != 2) {
HUNSPELL_WARNING(stderr, "error: missing %s information\n", name);
HUNSPELL_WARNING(stderr, "error: missing %s information\n", warnvar);
return 1;
}
return 0;

View File

@ -160,6 +160,12 @@ HashMgr::~HashMgr()
aliasm = NULL;
}
#ifndef OPENOFFICEORG
#ifndef MOZILLA_CLIENT
if (utf8) free_utf_tbl();
#endif
#endif
if (enc) free(enc);
if (lang) free(lang);
@ -185,7 +191,11 @@ struct hentry * HashMgr::lookup(const char *word) const
// add a word to the hash table (private)
int HashMgr::add_word(const char * word, int wl, unsigned short * aff,
int al, const char * desc, bool onlyupcase)
int al, const char *
#ifdef HUNSPELL_EXPERIMENTAL
desc
#endif
, bool onlyupcase)
{
char * st = mystrdup(word);
bool upcasehomonym = false;
@ -222,7 +232,11 @@ int HashMgr::add_word(const char * word, int wl, unsigned short * aff,
#endif
} else {
struct hentry* hp = (struct hentry *) malloc (sizeof(struct hentry));
if (!hp) return 1;
if (!hp)
{
if (st) free(st);
return 1;
}
hp->wlen = (short) wl;
hp->alen = (short) al;
hp->word = st;
@ -234,7 +248,13 @@ int HashMgr::add_word(const char * word, int wl, unsigned short * aff,
hp->description = (desc) ? get_aliasm(atoi(desc)) : mystrdup(desc);
} else {
hp->description = mystrdup(desc);
if (desc && !hp->description) return 1;
if (desc && !hp->description)
{
free(hp->word);
free(hp->astr);
free(hp);
return 1;
}
if (dp->description && complexprefixes) {
if (utf8) reverseword_utf(hp->description); else reverseword(hp->description);
}
@ -247,6 +267,8 @@ int HashMgr::add_word(const char * word, int wl, unsigned short * aff,
if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {
free(dp->astr);
dp->astr = hp->astr;
dp->alen = hp->alen;
dp->alen = hp->alen;
free(hp->word);
free(hp);
return 0;
@ -308,9 +330,12 @@ int HashMgr::put_word_pattern(const char * word, int wl, const char * pattern)
struct hentry * dp = lookup(pattern);
if (!dp || !dp->astr) return 1;
flags = (unsigned short *) malloc (dp->alen * sizeof(short));
memcpy((void *) flags, (void *) dp->astr, dp->alen * sizeof(short));
add_word(word, wl, flags, dp->alen, NULL, false);
return 0;
if (flags) {
memcpy((void *) flags, (void *) dp->astr, dp->alen * sizeof(short));
add_word(word, wl, flags, dp->alen, NULL, false);
return 0;
}
return 1;
}
// walk the hash table entry by entry - null at end
@ -440,15 +465,16 @@ int HashMgr::load_tables(const char * tpath)
return 5;
}
// add decapizatalized forms to handle following cases
// OpenOffice.org -> OPENOFFICE.ORG
// CIA's -> CIA'S
// add inner capitalized forms to handle the following allcap forms:
// Mixed caps: OpenOffice.org -> OPENOFFICE.ORG
// Allcaps with suffixes: CIA's -> CIA'S
captype = utf8 ? get_captype_utf8(ts, wl, langnum) : get_captype(ts, wl, csconv);
if (((captype == HUHCAP) || (captype == HUHINITCAP) ||
((captype == ALLCAP) && (flags != NULL))) &&
!((flags != NULL) && TESTAFF(flags, forbiddenword, al))) {
unsigned short * flags2 = (unsigned short *) malloc (sizeof(unsigned short *)* (al + 1));
memcpy(flags2, flags, al * sizeof(unsigned short *));
unsigned short * flags2 = (unsigned short *) malloc (sizeof(unsigned short) * (al+1));
if (!flags2) return 6;
if (al) memcpy(flags2, flags, al * sizeof(unsigned short));
flags2[al] = ONLYUPCASEFLAG;
if (utf8) {
char st[MAXDELEN];
@ -498,8 +524,9 @@ int HashMgr::decode_flags(unsigned short ** result, char * flags) {
case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz)
len = strlen(flags);
if (len%2 == 1) HUNSPELL_WARNING(stderr, "error: length of FLAG_LONG flagvector is odd: %s\n", flags);
len = len/2;
len /= 2;
*result = (unsigned short *) malloc(len * sizeof(short));
if (!*result) return -1;
for (int i = 0; i < len; i++) {
(*result)[i] = (((unsigned short) flags[i * 2]) << 8) + (unsigned short) flags[i * 2 + 1];
}
@ -514,6 +541,7 @@ int HashMgr::decode_flags(unsigned short ** result, char * flags) {
if (*p == ',') len++;
}
*result = (unsigned short *) malloc(len * sizeof(short));
if (!*result) return -1;
dest = *result;
for (p = flags; *p; p++) {
if (*p == ',') {
@ -531,6 +559,7 @@ int HashMgr::decode_flags(unsigned short ** result, char * flags) {
w_char w[MAXDELEN/2];
len = u8_u16(w, MAXDELEN/2, flags);
*result = (unsigned short *) malloc(len * sizeof(short));
if (!*result) return -1;
memcpy(*result, w, len * sizeof(short));
break;
}
@ -538,6 +567,7 @@ int HashMgr::decode_flags(unsigned short ** result, char * flags) {
unsigned short * dest;
len = strlen(flags);
*result = (unsigned short *) malloc(len * sizeof(short));
if (!*result) return -1;
dest = *result;
for (unsigned char * p = (unsigned char *) flags; *p; p++) {
*dest = (unsigned short) *p;

View File

@ -132,7 +132,6 @@ int Hunspell::cleanword2(char * dest, const char * src,
{
unsigned char * p = (unsigned char *) dest;
const unsigned char * q = (const unsigned char * ) src;
int firstcap = 0;
// first skip over any leading blanks
while ((*q != '\0') && (*q == ' ')) q++;
@ -156,7 +155,7 @@ int Hunspell::cleanword2(char * dest, const char * src,
*(dest + nl) = '\0';
nl = strlen(dest);
if (utf8) {
*nc = u8_u16(dest_utf, MAXWORDLEN, (const char *) q);
*nc = u8_u16(dest_utf, MAXWORDLEN, dest);
// don't check too long words
if (*nc >= MAXWORDLEN) return 0;
if (*nc == -1) { // big Unicode character (non BMP area)
@ -436,7 +435,8 @@ int Hunspell::spell(const char * word, int * info, char ** root)
rv = checkword(wspace, info, root);
if (rv) break;
}
// spec. prefix handling for Italian, etc. (SANT'ELIA -> Sant'+Elia)
// Spec. prefix handling for Catalan, French, Italian:
// prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).
if (pAMgr && strchr(cw, '\'')) {
wl = mkallsmall2(cw, unicw, nc);
char * apostrophe = strchr(cw, '\'');
@ -498,8 +498,10 @@ int Hunspell::spell(const char * word, int * info, char ** root)
}
if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;
if (rv) break;
rv = checkword(wspace, info, root);
if (abbv && !rv) {
*(wspace+wl) = '.';
*(wspace+wl+1) = '\0';
rv = checkword(wspace, info, root);
@ -525,11 +527,6 @@ int Hunspell::spell(const char * word, int * info, char ** root)
}
}
// check ONLYUPCASE and return
// if (rv && !((captype==INITCAP) && (rv->astr) && (pAMgr) &&
// TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen))) {
// return 1;
// }
if (rv) return 1;
// recursive breaking at break points (not good for morphological analysis)
@ -539,7 +536,8 @@ int Hunspell::spell(const char * word, int * info, char ** root)
int corr = 0;
// German words beginning with "-" are not accepted
if (langnum == LANG_de) corr = 1;
for (int j = 0; j < pAMgr->get_numbreak(); j++) {
int numbreak = pAMgr ? pAMgr->get_numbreak() : 0;
for (int j = 0; j < numbreak; j++) {
s=(char *) strstr(cw + corr, wordbreak[j]);
if (s) {
r = *s;
@ -761,14 +759,17 @@ int Hunspell::suggest(char*** slst, const char * word)
// something.The -> something. The
char * dot = strchr(cw, '.');
if (dot && (dot > cw)) {
int captype = utf8 ? get_captype_utf8(dot+1, strlen(dot+1), langnum) :
int captype_ = utf8 ? get_captype_utf8(dot+1, strlen(dot+1), langnum) :
get_captype(dot+1, strlen(dot+1), csconv);
if (captype == INITCAP) {
if (captype_ == INITCAP) {
char * st = mystrdup(cw);
st = (char *) realloc(st, wl + 1);
st[(dot - cw) + 1] = ' ';
strcpy(st + (dot - cw) + 2, dot + 1);
ns = insert_sug(slst, st, ns);
st = (char *) realloc(st, wl + 2);
if (st) {
st[(dot - cw) + 1] = ' ';
strcpy(st + (dot - cw) + 2, dot + 1);
ns = insert_sug(slst, st, ns);
free(st);
}
}
}
if (captype == HUHINITCAP) {
@ -915,7 +916,7 @@ int Hunspell::suggest(char*** slst, const char * word)
}
// remove bad capitalized and forbidden forms
if (pAMgr->get_keepcase() || pAMgr->get_forbiddenword()) {
if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) {
switch (captype) {
case INITCAP:
case ALLCAP: {
@ -1758,7 +1759,12 @@ int Hunspell::analyze(char ***out, const char *word) {
if (!word) return 0;
char * m = morph(word);
if(!m) return 0;
if (!out) return line_tok(m, out);
if (!out)
{
n = line_tok(m, out);
free(m);
return n;
}
// without memory allocation
/* BUG missing buffer size checking */

View File

@ -69,15 +69,21 @@
#define MAXSUGGESTION 15
#define MAXSHARPS 5
#ifdef W32
#define DLLTEST2_API __declspec(dllexport)
#endif
#ifndef _MYSPELLMGR_HXX_
#define _MYSPELLMGR_HXX_
#ifdef HUNSPELL_STATIC
#define DLLEXPORT
#else
#ifdef HUNSPELL_EXPORTS
#define DLLEXPORT __declspec( dllexport )
#else
#define DLLEXPORT __declspec( dllimport )
#endif
#endif
#ifdef W32
class DLLTEST2_API Hunspell
class DLLEXPORT Hunspell
#else
class Hunspell
#endif

View File

@ -109,7 +109,7 @@ SuggestMgr::SuggestMgr(const char * tryme, int maxn,
w_char t[MAXSWL];
ctryl = u8_u16(t, MAXSWL, tryme);
ctry_utf = (w_char *) malloc(ctryl * sizeof(w_char));
memcpy(ctry_utf, t, ctryl * sizeof(w_char));
if (ctry_utf) memcpy(ctry_utf, t, ctryl * sizeof(w_char));
} else {
ctry = mystrdup(tryme);
ctryl = strlen(ctry);
@ -502,7 +502,6 @@ int SuggestMgr::doubletwochars(char** wlst, const char * word, int ns, int cpdsu
// perhaps we doubled two characters (pattern aba -> ababa, for example vacation -> vacacation)
int SuggestMgr::doubletwochars_utf(char ** wlst, const w_char * word, int wl, int ns, int cpdsuggest)
{
w_char tmpc;
w_char candidate_utf[MAXSWL];
char candidate[MAXSWUTF8L];
int state=0;
@ -620,7 +619,6 @@ int SuggestMgr::forgotchar(char ** wlst, const char * word, int ns, int cpdsugge
char candidate[MAXSWUTF8L];
const char * p;
char * q;
int cwrd;
time_t timelimit = time(NULL);
int timer = MINTIMER;
int wl = strlen(word);
@ -821,6 +819,7 @@ int SuggestMgr::longswapchar_utf(char ** wlst, const w_char * word, int wl, int
tmpc = *p;
*p = *q;
*q = tmpc;
u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl);
ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, NULL);
if (ns == -1) return -1;
*q = *p;
@ -1468,6 +1467,7 @@ char * SuggestMgr::suggest_morph_for_spelling_error(const char * word)
{
char * p = NULL;
char ** wlst = (char **) calloc(maxSug, sizeof(char *));
if (!**wlst) return NULL;
// we will use only the first suggestion
for (int i = 0; i < maxSug - 1; i++) wlst[i] = "";
int ns = suggest(&wlst, word, maxSug - 1);
@ -1476,7 +1476,7 @@ char * SuggestMgr::suggest_morph_for_spelling_error(const char * word)
free(wlst[maxSug - 1]);
}
if (wlst) free(wlst);
return p;
return p;
}
#endif // END OF HUNSPELL_EXPERIMENTAL CODE
@ -1669,6 +1669,12 @@ void SuggestMgr::lcs(const char * s, const char * s2, int * l1, int * l2, char *
}
c = (char *) malloc((m + 1) * (n + 1));
b = (char *) malloc((m + 1) * (n + 1));
if (!c || !b) {
if (c) free(c);
if (b) free(b);
*result = NULL;
return;
}
for (i = 1; i <= m; i++) c[i*(n+1)] = 0;
for (j = 0; j <= n; j++) c[j] = 0;
for (i = 1; i <= m; i++) {
@ -1700,6 +1706,7 @@ int SuggestMgr::lcslen(const char * s, const char* s2) {
char * result;
int len = 0;
lcs(s, s2, &m, &n, &result);
if (!result) return 0;
i = m;
j = n;
while ((i != 0) && (j != 0)) {
@ -1711,6 +1718,6 @@ int SuggestMgr::lcslen(const char * s, const char* s2) {
i--;
} else j--;
}
if (result) free(result);
free(result);
return len;
}