Correct handling of zero-width joiners and non-joiners. Bug 110497. r=ftang, sr=attinasi

This commit is contained in:
smontagu%netscape.com 2002-04-23 23:50:17 +00:00
Родитель 637dcb62df
Коммит e35ea80166
10 изменённых файлов: 74 добавлений и 24 удалений

Просмотреть файл

@ -2305,13 +2305,13 @@ PRBool nsBidi::IsBidiCategory(PRUnichar aChar, eBidiCategory aBidiCategory)
return (GetBidiCategory(aChar) == aBidiCategory);
}
#define LRM_CHAR 0x200e
#define ZWNJ 0x200c
PRBool nsBidi::IsBidiControl(PRUnichar aChar)
{
// This method is used when stripping Bidi control characters for
// display, so it will return TRUE for LRM and RLM as well as the
// characters with category eBidiCat_CC
return (eBidiCat_CC == GetBidiCat(aChar) || ((aChar)&0xfffe)==LRM_CHAR);
// display, so it will return TRUE for LRM, RLM, ZWJ and ZWNJ as
// well as the characters with category eBidiCat_CC
return (eBidiCat_CC == GetBidiCat(aChar) || ((aChar)&0xfffc)==ZWNJ);
}
nsCharType nsBidi::GetCharType(PRUnichar aChar)

Просмотреть файл

@ -943,6 +943,14 @@ public:
* @param aDestSize will receive the number of characters that were written to <code>aDest</code>.
*/
nsresult WriteReverse(const PRUnichar *aSrc, PRInt32 aSrcLength, PRUnichar *aDest, PRUint16 aOptions, PRInt32 *aDestSize);
/**
* Give a Unichar
* return PR_TRUE if the Unichar is a Bidi control character (LRE, RLE, PDF, LRO, RLO, LRM, RLM)
* return PR_FALSE, otherwise
*/
PRBool IsBidiControl(PRUnichar aChar);
protected:
/** length of the current text */
PRInt32 mLength;
@ -1022,13 +1030,6 @@ private:
*/
PRBool IsBidiCategory(PRUnichar aChar, eBidiCategory aBidiCategory);
/**
* Give a Unichar
* return PR_TRUE if the Unichar is a Bidi control character (LRE, RLE, PDF, LRO, RLO, LRM, RLM)
* return PR_FALSE, otherwise
*/
PRBool IsBidiControl(PRUnichar aChar);
/**
* Give a Unichar, return a nsCharType (compatible with ICU)
*/

Просмотреть файл

@ -950,9 +950,31 @@ nsBidiPresUtils::FormatUnicodeText(nsIPresContext* aPresContext,
}
}
}
StripBidiControlCharacters(aText, aTextLength);
return rv;
}
void
nsBidiPresUtils::StripBidiControlCharacters(PRUnichar* aText,
PRInt32& aTextLength) const
{
if ( (nsnull == aText) || (aTextLength < 1) ) {
return;
}
PRInt32 stripLen = 0;
for (PRInt32 i = 0; i < aTextLength; i++) {
if (mBidiEngine->IsBidiControl(aText[i])) {
++stripLen;
}
else {
aText[i - stripLen] = aText[i];
}
}
aTextLength -= stripLen;
}
#if 0 // XXX: for the future use ???
void
RemoveDiacritics(PRUnichar* aText,

Просмотреть файл

@ -162,6 +162,8 @@ private:
PRUint8& aCharType,
PRUint8& aPrevCharType) const;
void StripBidiControlCharacters(PRUnichar* aText,
PRInt32& aTextLength) const;
nsAutoString mBuffer;
nsVoidArray mLogicalFrames;
nsVoidArray mVisualFrames;

Просмотреть файл

@ -943,6 +943,14 @@ public:
* @param aDestSize will receive the number of characters that were written to <code>aDest</code>.
*/
nsresult WriteReverse(const PRUnichar *aSrc, PRInt32 aSrcLength, PRUnichar *aDest, PRUint16 aOptions, PRInt32 *aDestSize);
/**
* Give a Unichar
* return PR_TRUE if the Unichar is a Bidi control character (LRE, RLE, PDF, LRO, RLO, LRM, RLM)
* return PR_FALSE, otherwise
*/
PRBool IsBidiControl(PRUnichar aChar);
protected:
/** length of the current text */
PRInt32 mLength;
@ -1022,13 +1030,6 @@ private:
*/
PRBool IsBidiCategory(PRUnichar aChar, eBidiCategory aBidiCategory);
/**
* Give a Unichar
* return PR_TRUE if the Unichar is a Bidi control character (LRE, RLE, PDF, LRO, RLO, LRM, RLM)
* return PR_FALSE, otherwise
*/
PRBool IsBidiControl(PRUnichar aChar);
/**
* Give a Unichar, return a nsCharType (compatible with ICU)
*/

Просмотреть файл

@ -162,6 +162,8 @@ private:
PRUint8& aCharType,
PRUint8& aPrevCharType) const;
void StripBidiControlCharacters(PRUnichar* aText,
PRInt32& aTextLength) const;
nsAutoString mBuffer;
nsVoidArray mLogicalFrames;
nsVoidArray mVisualFrames;

Просмотреть файл

@ -2305,13 +2305,13 @@ PRBool nsBidi::IsBidiCategory(PRUnichar aChar, eBidiCategory aBidiCategory)
return (GetBidiCategory(aChar) == aBidiCategory);
}
#define LRM_CHAR 0x200e
#define ZWNJ 0x200c
PRBool nsBidi::IsBidiControl(PRUnichar aChar)
{
// This method is used when stripping Bidi control characters for
// display, so it will return TRUE for LRM and RLM as well as the
// characters with category eBidiCat_CC
return (eBidiCat_CC == GetBidiCat(aChar) || ((aChar)&0xfffe)==LRM_CHAR);
// display, so it will return TRUE for LRM, RLM, ZWJ and ZWNJ as
// well as the characters with category eBidiCat_CC
return (eBidiCat_CC == GetBidiCat(aChar) || ((aChar)&0xfffc)==ZWNJ);
}
nsCharType nsBidi::GetCharType(PRUnichar aChar)

Просмотреть файл

@ -950,9 +950,31 @@ nsBidiPresUtils::FormatUnicodeText(nsIPresContext* aPresContext,
}
}
}
StripBidiControlCharacters(aText, aTextLength);
return rv;
}
void
nsBidiPresUtils::StripBidiControlCharacters(PRUnichar* aText,
PRInt32& aTextLength) const
{
if ( (nsnull == aText) || (aTextLength < 1) ) {
return;
}
PRInt32 stripLen = 0;
for (PRInt32 i = 0; i < aTextLength; i++) {
if (mBidiEngine->IsBidiControl(aText[i])) {
++stripLen;
}
else {
aText[i - stripLen] = aText[i];
}
}
aTextLength -= stripLen;
}
#if 0 // XXX: for the future use ???
void
RemoveDiacritics(PRUnichar* aText,

Просмотреть файл

@ -65,7 +65,7 @@ class nsIWordBreaker;
#define CH_RLO 8238 //<!CDATA "&#8238;" -- right-to-left override, U+202E -->
#define IS_BIDI_CONTROL(_ch) \
(((_ch) >= CH_ZWNJ && (_ch) <= CH_RLM) \
(((_ch) >= CH_LRM && (_ch) <= CH_RLM) \
|| ((_ch) >= CH_LRE && (_ch) <= CH_RLO))
#endif // IBMBIDI

Просмотреть файл

@ -65,7 +65,7 @@ class nsIWordBreaker;
#define CH_RLO 8238 //<!CDATA "&#8238;" -- right-to-left override, U+202E -->
#define IS_BIDI_CONTROL(_ch) \
(((_ch) >= CH_ZWNJ && (_ch) <= CH_RLM) \
(((_ch) >= CH_LRM && (_ch) <= CH_RLM) \
|| ((_ch) >= CH_LRE && (_ch) <= CH_RLO))
#endif // IBMBIDI