From a6b06cd95bc0c687da17ed732ff0ded9355c4383 Mon Sep 17 00:00:00 2001 From: "shanjian%netscape.com" Date: Tue, 8 Oct 2002 02:10:52 +0000 Subject: [PATCH] #134053 utf8 conversion problem in nsString.h Change conversion to handle surrogates r=yokoyama, sr=scc --- string/obsolete/nsString2.h | 20 +++++++++++++++----- xpcom/string/obsolete/nsString2.h | 20 +++++++++++++++----- 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/string/obsolete/nsString2.h b/string/obsolete/nsString2.h index 4d732a8e9286..36879063cd24 100644 --- a/string/obsolete/nsString2.h +++ b/string/obsolete/nsString2.h @@ -581,6 +581,9 @@ class NS_COM NS_ConvertUTF8toUCS2 NS_ConvertUTF8toUCS2( PRUnichar ); }; +#define PLANE1_BASE 0x00010000 +#define UCS2_REPLACEMENT_CHAR 0xfffd + class ConvertUTF8toUCS2 { public: @@ -672,7 +675,7 @@ class ConvertUTF8toUCS2 if ( ucs4 < minUcs4 ) { // Overlong sequence - *mBuffer++ = 0xFFFD; + *mBuffer++ = UCS2_REPLACEMENT_CHAR; } else if ( ucs4 <= 0xD7FF ) { @@ -681,16 +684,23 @@ class ConvertUTF8toUCS2 else if ( /* ucs4 >= 0xD800 && */ ucs4 <= 0xDFFF ) { // Surrogates - *mBuffer++ = 0xFFFD; + *mBuffer++ = UCS2_REPLACEMENT_CHAR; } else if ( ucs4 == 0xFFFE || ucs4 == 0xFFFF ) { // Prohibited characters - *mBuffer++ = 0xFFFD; + *mBuffer++ = UCS2_REPLACEMENT_CHAR; } - else if ( ucs4 >= 0x00010000 ) + else if ( ucs4 >= PLANE1_BASE ) { - *mBuffer++ = 0xFFFD; + if ( ucs4 >= 0x00110000 ) + *mBuffer++ = UCS2_REPLACEMENT_CHAR; + else { + // surrogate, see unicode specification 3.7 for following math. + ucs4 -= PLANE1_BASE; + *mBuffer++ = (PRUnichar)(ucs4 >> 10) | 0xd800u; + *mBuffer++ = (PRUnichar)(ucs4 & 0x3ff) | 0xdc00u; + } } else { diff --git a/xpcom/string/obsolete/nsString2.h b/xpcom/string/obsolete/nsString2.h index 4d732a8e9286..36879063cd24 100644 --- a/xpcom/string/obsolete/nsString2.h +++ b/xpcom/string/obsolete/nsString2.h @@ -581,6 +581,9 @@ class NS_COM NS_ConvertUTF8toUCS2 NS_ConvertUTF8toUCS2( PRUnichar ); }; +#define PLANE1_BASE 0x00010000 +#define UCS2_REPLACEMENT_CHAR 0xfffd + class ConvertUTF8toUCS2 { public: @@ -672,7 +675,7 @@ class ConvertUTF8toUCS2 if ( ucs4 < minUcs4 ) { // Overlong sequence - *mBuffer++ = 0xFFFD; + *mBuffer++ = UCS2_REPLACEMENT_CHAR; } else if ( ucs4 <= 0xD7FF ) { @@ -681,16 +684,23 @@ class ConvertUTF8toUCS2 else if ( /* ucs4 >= 0xD800 && */ ucs4 <= 0xDFFF ) { // Surrogates - *mBuffer++ = 0xFFFD; + *mBuffer++ = UCS2_REPLACEMENT_CHAR; } else if ( ucs4 == 0xFFFE || ucs4 == 0xFFFF ) { // Prohibited characters - *mBuffer++ = 0xFFFD; + *mBuffer++ = UCS2_REPLACEMENT_CHAR; } - else if ( ucs4 >= 0x00010000 ) + else if ( ucs4 >= PLANE1_BASE ) { - *mBuffer++ = 0xFFFD; + if ( ucs4 >= 0x00110000 ) + *mBuffer++ = UCS2_REPLACEMENT_CHAR; + else { + // surrogate, see unicode specification 3.7 for following math. + ucs4 -= PLANE1_BASE; + *mBuffer++ = (PRUnichar)(ucs4 >> 10) | 0xd800u; + *mBuffer++ = (PRUnichar)(ucs4 & 0x3ff) | 0xdc00u; + } } else {