Remove unexpected space inserted between Chinese and Japanese characters as a result of unfolding lines. These scripts don't use space as a word boundary. b=135323 r=shanjian sr=waterson

This commit is contained in:
taka%netscape.com 2002-07-01 20:43:16 +00:00
Родитель 74a68876ce
Коммит 6e215cd94b
6 изменённых файлов: 87 добавлений и 9 удалений

Просмотреть файл

@ -1627,7 +1627,16 @@ nsPlainTextSerializer::Write(const nsAString& aString)
}
else {
// There's still whitespace left in the string
if (nextpos != 0 && (nextpos + 1) < totLen) {
offsetIntoBuffer = str.get() + nextpos;
// skip '\n' if it is between CJ chars
if (offsetIntoBuffer[0] == '\n' && IS_CJ_CHAR(offsetIntoBuffer[-1]) && IS_CJ_CHAR(offsetIntoBuffer[1])) {
offsetIntoBuffer = str.get() + bol;
AddToLine(offsetIntoBuffer, nextpos-bol);
bol = nextpos + 1;
continue;
}
}
// If we're already in whitespace and not preformatted, just skip it:
if (mInWhitespace && (nextpos == bol) && !mPreFormatted &&
!(mFlags & nsIDocumentEncoder::OutputPreformatted)) {

Просмотреть файл

@ -85,5 +85,12 @@ inline PRBool IsLowerCase(PRUnichar c) {
#define SURROGATE_TO_UCS4(h, l) ((((PRUint32)(h)-(PRUint32)0xd800) << 10) + \
(PRUint32)(l) - (PRUint32)(0xdc00) + 0x10000)
/* (0x3131u <= (u) && (u) <= 0x318eu) => Hangul Compatibility Jamo */
/* (0xac00u <= (u) && (u) <= 0xd7a3u) => Hangul Syllables */
#define IS_CJ_CHAR(u) \
((0x2e80u <= (u) && (u) <= 0x312fu) || \
(0x3190u <= (u) && (u) <= 0xabffu) || \
(0xf900u <= (u) && (u) <= 0xfaffu) || \
(0xff00u <= (u) && (u) <= 0xffffu) )
#endif /* nsUnicharUtils_h__ */

Просмотреть файл

@ -1618,9 +1618,19 @@ nsTextFrame::PrepareUnicodeText(nsTextTransformer& aTX,
PRInt32 i;
if (nsnull != indexp) {
// Point mapping indicies at each content index in the word
i = contentLen;
while (--i >= 0) {
*indexp++ = strInx++;
if (1 == wordLen && contentLen == 2 && IS_CJ_CHAR(*bp)) {
// if all these condition meets, we have a '\n' between CJK chars,
// and this '\n' should be removed.
i = contentLen;
while (--i >= 0) {
*indexp++ = strInx;
}
strInx++;
} else {
i = contentLen;
while (--i >= 0) {
*indexp++ = strInx++;
}
}
}
// Nonbreaking spaces count as spaces, not letters

Просмотреть файл

@ -46,6 +46,7 @@
#include "nsIWordBreaker.h"
#include "nsIServiceManager.h"
#include "nsUnicharUtilCIID.h"
#include "nsUnicharUtils.h"
#include "nsICaseConversion.h"
#include "prenv.h"
#include "nsIPref.h"
@ -824,6 +825,7 @@ nsTextTransformer::GetNextWord(PRBool aInWord,
PRBool isWhitespace = PR_FALSE;
PRUnichar* result = nsnull;
PRBool prevBufferPos;
PRBool skippedWhitespace = PR_FALSE;
// Initialize OUT parameter
*aWasTransformed = PR_FALSE;
@ -855,6 +857,22 @@ nsTextTransformer::GetNextWord(PRBool aInWord,
case eNormal:
if (XP_IS_SPACE(firstChar)) {
offset = ScanNormalWhiteSpace_F();
// if this is just a '\n', and characters before and after it are CJK chars,
// we will skip this one.
if (firstChar == '\n' &&
offset - mOffset == 1 &&
mOffset > 0 &&
offset < fragLen)
{
PRUnichar lastChar = frag->CharAt(mOffset - 1);
PRUnichar nextChar = frag->CharAt(offset);
if (IS_CJ_CHAR(lastChar) && IS_CJ_CHAR(nextChar)) {
skippedWhitespace = PR_TRUE;
--mBufferPos;
mOffset = offset;
continue; }
}
if (firstChar != ' ') {
*aWasTransformed = PR_TRUE;
}
@ -1023,9 +1041,12 @@ nsTextTransformer::GetNextWord(PRBool aInWord,
break;
}
*aIsWhiteSpaceResult = isWhitespace;
*aWordLenResult = wordLen;
*aContentLenResult = offset - mOffset;
*aIsWhiteSpaceResult = isWhitespace;
// we need to adjust the length if a '\n' has been skip between CJK chars
*aContentLenResult += (skippedWhitespace ? 1 : 0);
// If the word length doesn't match the content length then we transformed
// the text

Просмотреть файл

@ -1618,9 +1618,19 @@ nsTextFrame::PrepareUnicodeText(nsTextTransformer& aTX,
PRInt32 i;
if (nsnull != indexp) {
// Point mapping indicies at each content index in the word
i = contentLen;
while (--i >= 0) {
*indexp++ = strInx++;
if (1 == wordLen && contentLen == 2 && IS_CJ_CHAR(*bp)) {
// if all these condition meets, we have a '\n' between CJK chars,
// and this '\n' should be removed.
i = contentLen;
while (--i >= 0) {
*indexp++ = strInx;
}
strInx++;
} else {
i = contentLen;
while (--i >= 0) {
*indexp++ = strInx++;
}
}
}
// Nonbreaking spaces count as spaces, not letters

Просмотреть файл

@ -46,6 +46,7 @@
#include "nsIWordBreaker.h"
#include "nsIServiceManager.h"
#include "nsUnicharUtilCIID.h"
#include "nsUnicharUtils.h"
#include "nsICaseConversion.h"
#include "prenv.h"
#include "nsIPref.h"
@ -824,6 +825,7 @@ nsTextTransformer::GetNextWord(PRBool aInWord,
PRBool isWhitespace = PR_FALSE;
PRUnichar* result = nsnull;
PRBool prevBufferPos;
PRBool skippedWhitespace = PR_FALSE;
// Initialize OUT parameter
*aWasTransformed = PR_FALSE;
@ -855,6 +857,22 @@ nsTextTransformer::GetNextWord(PRBool aInWord,
case eNormal:
if (XP_IS_SPACE(firstChar)) {
offset = ScanNormalWhiteSpace_F();
// if this is just a '\n', and characters before and after it are CJK chars,
// we will skip this one.
if (firstChar == '\n' &&
offset - mOffset == 1 &&
mOffset > 0 &&
offset < fragLen)
{
PRUnichar lastChar = frag->CharAt(mOffset - 1);
PRUnichar nextChar = frag->CharAt(offset);
if (IS_CJ_CHAR(lastChar) && IS_CJ_CHAR(nextChar)) {
skippedWhitespace = PR_TRUE;
--mBufferPos;
mOffset = offset;
continue; }
}
if (firstChar != ' ') {
*aWasTransformed = PR_TRUE;
}
@ -1023,9 +1041,12 @@ nsTextTransformer::GetNextWord(PRBool aInWord,
break;
}
*aIsWhiteSpaceResult = isWhitespace;
*aWordLenResult = wordLen;
*aContentLenResult = offset - mOffset;
*aIsWhiteSpaceResult = isWhitespace;
// we need to adjust the length if a '\n' has been skip between CJK chars
*aContentLenResult += (skippedWhitespace ? 1 : 0);
// If the word length doesn't match the content length then we transformed
// the text