зеркало из https://github.com/mozilla/pjs.git
Remove unexpected space inserted between Chinese and Japanese characters as a result of unfolding lines. These scripts don't use space as a word boundary. b=135323 r=shanjian sr=waterson
This commit is contained in:
Родитель
74a68876ce
Коммит
6e215cd94b
|
@ -1627,7 +1627,16 @@ nsPlainTextSerializer::Write(const nsAString& aString)
|
|||
}
|
||||
else {
|
||||
// There's still whitespace left in the string
|
||||
|
||||
if (nextpos != 0 && (nextpos + 1) < totLen) {
|
||||
offsetIntoBuffer = str.get() + nextpos;
|
||||
// skip '\n' if it is between CJ chars
|
||||
if (offsetIntoBuffer[0] == '\n' && IS_CJ_CHAR(offsetIntoBuffer[-1]) && IS_CJ_CHAR(offsetIntoBuffer[1])) {
|
||||
offsetIntoBuffer = str.get() + bol;
|
||||
AddToLine(offsetIntoBuffer, nextpos-bol);
|
||||
bol = nextpos + 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// If we're already in whitespace and not preformatted, just skip it:
|
||||
if (mInWhitespace && (nextpos == bol) && !mPreFormatted &&
|
||||
!(mFlags & nsIDocumentEncoder::OutputPreformatted)) {
|
||||
|
|
|
@ -85,5 +85,12 @@ inline PRBool IsLowerCase(PRUnichar c) {
|
|||
#define SURROGATE_TO_UCS4(h, l) ((((PRUint32)(h)-(PRUint32)0xd800) << 10) + \
|
||||
(PRUint32)(l) - (PRUint32)(0xdc00) + 0x10000)
|
||||
|
||||
/* (0x3131u <= (u) && (u) <= 0x318eu) => Hangul Compatibility Jamo */
|
||||
/* (0xac00u <= (u) && (u) <= 0xd7a3u) => Hangul Syllables */
|
||||
#define IS_CJ_CHAR(u) \
|
||||
((0x2e80u <= (u) && (u) <= 0x312fu) || \
|
||||
(0x3190u <= (u) && (u) <= 0xabffu) || \
|
||||
(0xf900u <= (u) && (u) <= 0xfaffu) || \
|
||||
(0xff00u <= (u) && (u) <= 0xffffu) )
|
||||
|
||||
#endif /* nsUnicharUtils_h__ */
|
||||
|
|
|
@ -1618,9 +1618,19 @@ nsTextFrame::PrepareUnicodeText(nsTextTransformer& aTX,
|
|||
PRInt32 i;
|
||||
if (nsnull != indexp) {
|
||||
// Point mapping indicies at each content index in the word
|
||||
i = contentLen;
|
||||
while (--i >= 0) {
|
||||
*indexp++ = strInx++;
|
||||
if (1 == wordLen && contentLen == 2 && IS_CJ_CHAR(*bp)) {
|
||||
// if all these condition meets, we have a '\n' between CJK chars,
|
||||
// and this '\n' should be removed.
|
||||
i = contentLen;
|
||||
while (--i >= 0) {
|
||||
*indexp++ = strInx;
|
||||
}
|
||||
strInx++;
|
||||
} else {
|
||||
i = contentLen;
|
||||
while (--i >= 0) {
|
||||
*indexp++ = strInx++;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Nonbreaking spaces count as spaces, not letters
|
||||
|
|
|
@ -46,6 +46,7 @@
|
|||
#include "nsIWordBreaker.h"
|
||||
#include "nsIServiceManager.h"
|
||||
#include "nsUnicharUtilCIID.h"
|
||||
#include "nsUnicharUtils.h"
|
||||
#include "nsICaseConversion.h"
|
||||
#include "prenv.h"
|
||||
#include "nsIPref.h"
|
||||
|
@ -824,6 +825,7 @@ nsTextTransformer::GetNextWord(PRBool aInWord,
|
|||
PRBool isWhitespace = PR_FALSE;
|
||||
PRUnichar* result = nsnull;
|
||||
PRBool prevBufferPos;
|
||||
PRBool skippedWhitespace = PR_FALSE;
|
||||
|
||||
// Initialize OUT parameter
|
||||
*aWasTransformed = PR_FALSE;
|
||||
|
@ -855,6 +857,22 @@ nsTextTransformer::GetNextWord(PRBool aInWord,
|
|||
case eNormal:
|
||||
if (XP_IS_SPACE(firstChar)) {
|
||||
offset = ScanNormalWhiteSpace_F();
|
||||
|
||||
// if this is just a '\n', and characters before and after it are CJK chars,
|
||||
// we will skip this one.
|
||||
if (firstChar == '\n' &&
|
||||
offset - mOffset == 1 &&
|
||||
mOffset > 0 &&
|
||||
offset < fragLen)
|
||||
{
|
||||
PRUnichar lastChar = frag->CharAt(mOffset - 1);
|
||||
PRUnichar nextChar = frag->CharAt(offset);
|
||||
if (IS_CJ_CHAR(lastChar) && IS_CJ_CHAR(nextChar)) {
|
||||
skippedWhitespace = PR_TRUE;
|
||||
--mBufferPos;
|
||||
mOffset = offset;
|
||||
continue; }
|
||||
}
|
||||
if (firstChar != ' ') {
|
||||
*aWasTransformed = PR_TRUE;
|
||||
}
|
||||
|
@ -1023,9 +1041,12 @@ nsTextTransformer::GetNextWord(PRBool aInWord,
|
|||
break;
|
||||
}
|
||||
|
||||
*aIsWhiteSpaceResult = isWhitespace;
|
||||
*aWordLenResult = wordLen;
|
||||
*aContentLenResult = offset - mOffset;
|
||||
*aIsWhiteSpaceResult = isWhitespace;
|
||||
|
||||
// we need to adjust the length if a '\n' has been skip between CJK chars
|
||||
*aContentLenResult += (skippedWhitespace ? 1 : 0);
|
||||
|
||||
// If the word length doesn't match the content length then we transformed
|
||||
// the text
|
||||
|
|
|
@ -1618,9 +1618,19 @@ nsTextFrame::PrepareUnicodeText(nsTextTransformer& aTX,
|
|||
PRInt32 i;
|
||||
if (nsnull != indexp) {
|
||||
// Point mapping indicies at each content index in the word
|
||||
i = contentLen;
|
||||
while (--i >= 0) {
|
||||
*indexp++ = strInx++;
|
||||
if (1 == wordLen && contentLen == 2 && IS_CJ_CHAR(*bp)) {
|
||||
// if all these condition meets, we have a '\n' between CJK chars,
|
||||
// and this '\n' should be removed.
|
||||
i = contentLen;
|
||||
while (--i >= 0) {
|
||||
*indexp++ = strInx;
|
||||
}
|
||||
strInx++;
|
||||
} else {
|
||||
i = contentLen;
|
||||
while (--i >= 0) {
|
||||
*indexp++ = strInx++;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Nonbreaking spaces count as spaces, not letters
|
||||
|
|
|
@ -46,6 +46,7 @@
|
|||
#include "nsIWordBreaker.h"
|
||||
#include "nsIServiceManager.h"
|
||||
#include "nsUnicharUtilCIID.h"
|
||||
#include "nsUnicharUtils.h"
|
||||
#include "nsICaseConversion.h"
|
||||
#include "prenv.h"
|
||||
#include "nsIPref.h"
|
||||
|
@ -824,6 +825,7 @@ nsTextTransformer::GetNextWord(PRBool aInWord,
|
|||
PRBool isWhitespace = PR_FALSE;
|
||||
PRUnichar* result = nsnull;
|
||||
PRBool prevBufferPos;
|
||||
PRBool skippedWhitespace = PR_FALSE;
|
||||
|
||||
// Initialize OUT parameter
|
||||
*aWasTransformed = PR_FALSE;
|
||||
|
@ -855,6 +857,22 @@ nsTextTransformer::GetNextWord(PRBool aInWord,
|
|||
case eNormal:
|
||||
if (XP_IS_SPACE(firstChar)) {
|
||||
offset = ScanNormalWhiteSpace_F();
|
||||
|
||||
// if this is just a '\n', and characters before and after it are CJK chars,
|
||||
// we will skip this one.
|
||||
if (firstChar == '\n' &&
|
||||
offset - mOffset == 1 &&
|
||||
mOffset > 0 &&
|
||||
offset < fragLen)
|
||||
{
|
||||
PRUnichar lastChar = frag->CharAt(mOffset - 1);
|
||||
PRUnichar nextChar = frag->CharAt(offset);
|
||||
if (IS_CJ_CHAR(lastChar) && IS_CJ_CHAR(nextChar)) {
|
||||
skippedWhitespace = PR_TRUE;
|
||||
--mBufferPos;
|
||||
mOffset = offset;
|
||||
continue; }
|
||||
}
|
||||
if (firstChar != ' ') {
|
||||
*aWasTransformed = PR_TRUE;
|
||||
}
|
||||
|
@ -1023,9 +1041,12 @@ nsTextTransformer::GetNextWord(PRBool aInWord,
|
|||
break;
|
||||
}
|
||||
|
||||
*aIsWhiteSpaceResult = isWhitespace;
|
||||
*aWordLenResult = wordLen;
|
||||
*aContentLenResult = offset - mOffset;
|
||||
*aIsWhiteSpaceResult = isWhitespace;
|
||||
|
||||
// we need to adjust the length if a '\n' has been skip between CJK chars
|
||||
*aContentLenResult += (skippedWhitespace ? 1 : 0);
|
||||
|
||||
// If the word length doesn't match the content length then we transformed
|
||||
// the text
|
||||
|
|
Загрузка…
Ссылка в новой задаче