Remove unexpected space inserted between Chinese and Japanese characters as a result of unfolding lines. These scripts don't use space as a word boundary. b=135323 r=shanjian sr=waterson

2002-07-01 20:43:16 +00:00 · 2002-07-01 20:43:16 +00:00 · 6e215cd94b
--- a/content/base/src/nsPlainTextSerializer.cpp
+++ b/content/base/src/nsPlainTextSerializer.cpp
@ -1627,7 +1627,16 @@ nsPlainTextSerializer::Write(const nsAString& aString)
    } 
    else {
      // There's still whitespace left in the string
-
+      if (nextpos != 0 && (nextpos + 1) < totLen) {
+        offsetIntoBuffer = str.get() + nextpos;
+        // skip '\n' if it is between CJ chars
+        if (offsetIntoBuffer[0] == '\n' && IS_CJ_CHAR(offsetIntoBuffer[-1]) && IS_CJ_CHAR(offsetIntoBuffer[1])) {
+          offsetIntoBuffer = str.get() + bol;
+          AddToLine(offsetIntoBuffer, nextpos-bol);
+          bol = nextpos + 1;
+          continue;
+        }
+      }
      // If we're already in whitespace and not preformatted, just skip it:
      if (mInWhitespace && (nextpos == bol) && !mPreFormatted &&
          !(mFlags & nsIDocumentEncoder::OutputPreformatted)) {
--- a/intl/unicharutil/util/nsUnicharUtils.h
+++ b/intl/unicharutil/util/nsUnicharUtils.h
@ -85,5 +85,12 @@ inline PRBool IsLowerCase(PRUnichar c) {
 #define SURROGATE_TO_UCS4(h, l)  ((((PRUint32)(h)-(PRUint32)0xd800) << 10) +  \
                                    (PRUint32)(l) - (PRUint32)(0xdc00) + 0x10000)

+/* (0x3131u <= (u) && (u) <= 0x318eu) => Hangul Compatibility Jamo */
+/* (0xac00u <= (u) && (u) <= 0xd7a3u) => Hangul Syllables          */
+#define IS_CJ_CHAR(u) \
+         ((0x2e80u <= (u) && (u) <= 0x312fu) || \
+          (0x3190u <= (u) && (u) <= 0xabffu) || \
+          (0xf900u <= (u) && (u) <= 0xfaffu) || \
+          (0xff00u <= (u) && (u) <= 0xffffu) )

 #endif  /* nsUnicharUtils_h__ */
--- a/layout/generic/nsTextFrame.cpp
+++ b/layout/generic/nsTextFrame.cpp
@ -1618,9 +1618,19 @@ nsTextFrame::PrepareUnicodeText(nsTextTransformer& aTX,
      PRInt32 i;
      if (nsnull != indexp) {
        // Point mapping indicies at each content index in the word
-        i = contentLen;
-        while (--i >= 0) {
-          *indexp++ = strInx++;
+        if (1 == wordLen && contentLen == 2 && IS_CJ_CHAR(*bp)) {
+          // if all these condition meets, we have a '\n' between CJK chars, 
+          // and this '\n' should be removed.
+          i = contentLen;
+          while (--i >= 0) {
+            *indexp++ = strInx;
+          }
+          strInx++;
+        } else {
+          i = contentLen;
+          while (--i >= 0) {
+            *indexp++ = strInx++;
+          }
        }
      }
      // Nonbreaking spaces count as spaces, not letters
--- a/layout/generic/nsTextTransformer.cpp
+++ b/layout/generic/nsTextTransformer.cpp
@ -46,6 +46,7 @@
 #include "nsIWordBreaker.h"
 #include "nsIServiceManager.h"
 #include "nsUnicharUtilCIID.h"
+#include "nsUnicharUtils.h"
 #include "nsICaseConversion.h"
 #include "prenv.h"
 #include "nsIPref.h"
@ -824,6 +825,7 @@ nsTextTransformer::GetNextWord(PRBool aInWord,
  PRBool isWhitespace = PR_FALSE;
  PRUnichar* result = nsnull;
  PRBool prevBufferPos;
+  PRBool skippedWhitespace = PR_FALSE;

  // Initialize OUT parameter
  *aWasTransformed = PR_FALSE;
@ -855,6 +857,22 @@ nsTextTransformer::GetNextWord(PRBool aInWord,
      case eNormal:
        if (XP_IS_SPACE(firstChar)) {
          offset = ScanNormalWhiteSpace_F();
+
+          // if this is just a '\n', and characters before and after it are CJK chars, 
+          // we will skip this one.
+          if (firstChar == '\n' && 
+              offset - mOffset == 1 && 
+              mOffset > 0 &&
+              offset < fragLen) 
+          {
+            PRUnichar lastChar = frag->CharAt(mOffset - 1);
+            PRUnichar nextChar = frag->CharAt(offset);
+            if (IS_CJ_CHAR(lastChar) && IS_CJ_CHAR(nextChar)) {
+              skippedWhitespace = PR_TRUE;
+              --mBufferPos;
+              mOffset = offset;
+              continue;            }
+          }
          if (firstChar != ' ') {
            *aWasTransformed = PR_TRUE;
          }
@ -1023,9 +1041,12 @@ nsTextTransformer::GetNextWord(PRBool aInWord,
    break;
  }

+  *aIsWhiteSpaceResult = isWhitespace;
  *aWordLenResult = wordLen;
  *aContentLenResult = offset - mOffset;
-  *aIsWhiteSpaceResult = isWhitespace;
+
+  // we need to adjust the length if a '\n' has been skip between CJK chars
+  *aContentLenResult += (skippedWhitespace ? 1 : 0);

  // If the word length doesn't match the content length then we transformed
  // the text
--- a/layout/html/base/src/nsTextFrame.cpp
+++ b/layout/html/base/src/nsTextFrame.cpp
@ -1618,9 +1618,19 @@ nsTextFrame::PrepareUnicodeText(nsTextTransformer& aTX,
      PRInt32 i;
      if (nsnull != indexp) {
        // Point mapping indicies at each content index in the word
-        i = contentLen;
-        while (--i >= 0) {
-          *indexp++ = strInx++;
+        if (1 == wordLen && contentLen == 2 && IS_CJ_CHAR(*bp)) {
+          // if all these condition meets, we have a '\n' between CJK chars, 
+          // and this '\n' should be removed.
+          i = contentLen;
+          while (--i >= 0) {
+            *indexp++ = strInx;
+          }
+          strInx++;
+        } else {
+          i = contentLen;
+          while (--i >= 0) {
+            *indexp++ = strInx++;
+          }
        }
      }
      // Nonbreaking spaces count as spaces, not letters
--- a/layout/html/base/src/nsTextTransformer.cpp
+++ b/layout/html/base/src/nsTextTransformer.cpp
@ -46,6 +46,7 @@
 #include "nsIWordBreaker.h"
 #include "nsIServiceManager.h"
 #include "nsUnicharUtilCIID.h"
+#include "nsUnicharUtils.h"
 #include "nsICaseConversion.h"
 #include "prenv.h"
 #include "nsIPref.h"
@ -824,6 +825,7 @@ nsTextTransformer::GetNextWord(PRBool aInWord,
  PRBool isWhitespace = PR_FALSE;
  PRUnichar* result = nsnull;
  PRBool prevBufferPos;
+  PRBool skippedWhitespace = PR_FALSE;

  // Initialize OUT parameter
  *aWasTransformed = PR_FALSE;
@ -855,6 +857,22 @@ nsTextTransformer::GetNextWord(PRBool aInWord,
      case eNormal:
        if (XP_IS_SPACE(firstChar)) {
          offset = ScanNormalWhiteSpace_F();
+
+          // if this is just a '\n', and characters before and after it are CJK chars, 
+          // we will skip this one.
+          if (firstChar == '\n' && 
+              offset - mOffset == 1 && 
+              mOffset > 0 &&
+              offset < fragLen) 
+          {
+            PRUnichar lastChar = frag->CharAt(mOffset - 1);
+            PRUnichar nextChar = frag->CharAt(offset);
+            if (IS_CJ_CHAR(lastChar) && IS_CJ_CHAR(nextChar)) {
+              skippedWhitespace = PR_TRUE;
+              --mBufferPos;
+              mOffset = offset;
+              continue;            }
+          }
          if (firstChar != ' ') {
            *aWasTransformed = PR_TRUE;
          }
@ -1023,9 +1041,12 @@ nsTextTransformer::GetNextWord(PRBool aInWord,
    break;
  }

+  *aIsWhiteSpaceResult = isWhitespace;
  *aWordLenResult = wordLen;
  *aContentLenResult = offset - mOffset;
-  *aIsWhiteSpaceResult = isWhitespace;
+
+  // we need to adjust the length if a '\n' has been skip between CJK chars
+  *aContentLenResult += (skippedWhitespace ? 1 : 0);

  // If the word length doesn't match the content length then we transformed
  // the text