Bug 355178 - Don't break words on hyphens before passing them to hunspell, as it can handle word breaking on hyphens on its own; r=smaug

--HG--
extra : rebase_source : 298f0832ac6f0b12497e9c883bbebea8e168284c
This commit is contained in:
Ehsan Akhgari 2011-04-14 17:16:40 -04:00
Родитель cedadc8bc5
Коммит 7b6af5362f
4 изменённых файлов: 67 добавлений и 32 удалений

Просмотреть файл

@ -9463,101 +9463,103 @@
< schrod's
---
> schrod/SM
42883,42885c48694
41998a42010
> scot-free
42883,42885c48695
< shit's
< shit/S!
< shite/S!
---
> shit/MS!
42887,42888c48696,48697
42887,42888c48697,48698
< shithead/S!
< shitload/!
---
> shithead/MS!
> shitload/MS!
42891c48700
42891c48701
< shitty/RT!
---
> shitty/TR!
42976a48786
42976a48787
> should've
43008c48818
43008c48819
< showtime
---
> showtime/MS
43724,43726c49534
43724,43726c49535
< smoulder's
< smouldered
< smoulders
---
> smoulder/GSMD
44062c49870
44062c49871
< sonofabitch
---
> sonofabitch/!
44371a50180
44371a50181
> spick/S!
44383c50192
44383c50193
< spik/S
---
> spik/S!
46106a51916
46106a51917
> syllabi
46160c51970
46160c51971
< synch/GMD
---
> synch/GMDS
46167d51976
46167d51977
< synchs
46203,46204c52012,52013
46203,46204c52013,52014
< sysadmin/S
< sysop/S
---
> sysadmin/MS
> sysop/MS
46752a52562
46752a52563
> terabit/MS
46753a52564,52565
46753a52565,52566
> terahertz/M
> terapixel/MS
46817a52630
46817a52631
> testcase/MS
46831a52645
46831a52646
> testsuite/MS
46925a52740
46925a52741
> theremin/MS
47755a53571
47755a53572
> transfect/DSMG
47774a53591,53592
47774a53592,53593
> transgenderism
> transgene/MS
47951c53769
47951c53770
< triage/M
---
> triage/MG
48869a54688
48869a54689
> unlikeable
49211c55030
49211c55031
< vagina/M
---
> vagina/MS
49368,49369c55187
49368,49369c55188
< velour's
< velours's
---
> velour/MS
49478a55297
49478a55298
> vertices
50148a55968
50148a55969
> weaponize/DSG
50260,50261d56079
50260,50261d56080
< werwolf/M
< werwolves
50728c56546
50728c56547
< women
---
> women/M
50794c56612
50794c56613
< wop/S!
---
> wop/MS!

Просмотреть файл

@ -1,4 +1,4 @@
57434
57435
0/nm
0th/pt
1/n1
@ -48141,6 +48141,7 @@ scorn/MDRSZG
scorner/M
scornful/Y
scorpion/MS
scot-free
scotch/MDSG
scotchs
scoundrel/MS

Просмотреть файл

@ -888,23 +888,55 @@ WordSplitState::ClassifyCharacter(PRInt32 aIndex, PRBool aRecurse) const
return CHAR_CLASS_SEPARATOR;
if (ClassifyCharacter(aIndex - 1, false) != CHAR_CLASS_WORD)
return CHAR_CLASS_SEPARATOR;
// If the previous charatcer is a word-char, make sure that it's not a
// special dot character.
if (mDOMWordText[aIndex - 1] == '.')
return CHAR_CLASS_SEPARATOR;
// now we know left char is a word-char, check the right-hand character
if (aIndex == PRInt32(mDOMWordText.Length()) - 1)
return CHAR_CLASS_SEPARATOR;
if (ClassifyCharacter(aIndex + 1, false) != CHAR_CLASS_WORD)
return CHAR_CLASS_SEPARATOR;
// If the next charatcer is a word-char, make sure that it's not a
// special dot character.
if (mDOMWordText[aIndex + 1] == '.')
return CHAR_CLASS_SEPARATOR;
// char on either side is a word, this counts as a word
return CHAR_CLASS_WORD;
}
// The dot character, if appearing at the end of a word, should
// be considered part of that word. Example: "etc.", or
// abbreviations
if (aIndex > 0 &&
mDOMWordText[aIndex] == '.' &&
mDOMWordText[aIndex - 1] != '.' &&
ClassifyCharacter(aIndex - 1, false) != CHAR_CLASS_WORD) {
return CHAR_CLASS_WORD;
}
// all other punctuation
if (charCategory == nsIUGenCategory::kSeparator ||
charCategory == nsIUGenCategory::kOther ||
charCategory == nsIUGenCategory::kPunctuation ||
charCategory == nsIUGenCategory::kSymbol)
charCategory == nsIUGenCategory::kSymbol) {
// Don't break on hyphens, as hunspell handles them on its own.
if (aIndex > 0 &&
mDOMWordText[aIndex] == '-' &&
mDOMWordText[aIndex - 1] != '-' &&
ClassifyCharacter(aIndex - 1, false) == CHAR_CLASS_WORD) {
// A hyphen is only meaningful as a separator inside a word
// if the previous and next characters are a word character.
if (aIndex == PRInt32(mDOMWordText.Length()) - 1)
return CHAR_CLASS_SEPARATOR;
if (mDOMWordText[aIndex + 1] != '.' &&
ClassifyCharacter(aIndex + 1, false) == CHAR_CLASS_WORD)
return CHAR_CLASS_WORD;
}
return CHAR_CLASS_SEPARATOR;
}
// any other character counts as a word
return CHAR_CLASS_WORD;

Просмотреть файл

@ -47,7 +47,7 @@ needs-focus == input-text-notheme-onfocus-reframe.html input-text-notheme-onfocu
needs-focus == caret_after_reframe.html caret_after_reframe-ref.html
== nobogusnode-1.html nobogusnode-ref.html
== nobogusnode-2.html nobogusnode-ref.html
fails-if(!Android) == spellcheck-hyphen-valid.html spellcheck-hyphen-valid-ref.html
fails-if(Android) == spellcheck-hyphen-valid.html spellcheck-hyphen-valid-ref.html
fails-if(Android) != spellcheck-hyphen-invalid.html spellcheck-hyphen-invalid-ref.html
== spellcheck-slash-valid.html spellcheck-slash-valid-ref.html
== spellcheck-period-valid.html spellcheck-period-valid-ref.html