Bug 73292: Add const nsACString& constructor to NS_Convert(ASCII|UTF8)toUCS2. r=dbaron, sr=scc.

This commit is contained in:
jaggernaut%netscape.com 2001-10-17 05:17:53 +00:00
Родитель 43fc51fb89
Коммит 79e0a6c9a2
4 изменённых файлов: 466 добавлений и 250 удалений

Просмотреть файл

@ -1500,144 +1500,226 @@ nsAutoString::nsAutoString(const CBufDescriptor& aBuffer) : nsString() {
AddNullTerminator(*this);
}
NS_ConvertASCIItoUCS2::NS_ConvertASCIItoUCS2( const char* aCString, PRUint32 aLength )
void
NS_ConvertASCIItoUCS2::Init( const char* aCString, PRUint32 aLength )
{
Initialize(*this,mBuffer,(sizeof(mBuffer)>>eTwoByte)-1,0,eTwoByte,PR_FALSE);
AddNullTerminator(*this);
AppendWithConversion(aCString,aLength);
}
NS_ConvertASCIItoUCS2::NS_ConvertASCIItoUCS2( const char* aCString )
NS_ConvertASCIItoUCS2::NS_ConvertASCIItoUCS2( const nsACString& aCString )
{
Initialize(*this,mBuffer,(sizeof(mBuffer)>>eTwoByte)-1,0,eTwoByte,PR_FALSE);
AddNullTerminator(*this);
AppendWithConversion(aCString);
SetCapacity(aCString.Length());
nsACString::const_iterator start; aCString.BeginReading(start);
nsACString::const_iterator end; aCString.EndReading(end);
while (start != end)
{
const nsReadableFragment<char>& frag = start.fragment();
AppendWithConversion(frag.mStart, frag.mEnd - frag.mStart);
start.advance(start.size_forward());
}
}
class UTF8traits
{
public:
static PRBool isASCII(char c) { return (c & 0x80) == 0x00; }
static PRBool isInSeq(char c) { return (c & 0xC0) == 0x80; }
static PRBool is2byte(char c) { return (c & 0xE0) == 0xC0; }
static PRBool is3byte(char c) { return (c & 0xF0) == 0xE0; }
static PRBool is4byte(char c) { return (c & 0xF8) == 0xF0; }
static PRBool is5byte(char c) { return (c & 0xFC) == 0xF8; }
static PRBool is6byte(char c) { return (c & 0xFE) == 0xFC; }
};
class CalculateUTF8Length
{
public:
typedef nsACString::char_type value_type;
CalculateUTF8Length() : mLength(0) { }
size_t Length() const { return mLength; }
PRUint32 write( const value_type* start, PRUint32 N )
{
// algorithm assumes utf8 units won't
// be spread across fragments
const value_type* p = start;
const value_type* end = start + N;
for ( ; p != end /* && *p */; ++mLength )
{
if ( UTF8traits::isASCII(*p) )
p += 1;
else if ( UTF8traits::is2byte(*p) )
p += 2;
else if ( UTF8traits::is3byte(*p) )
p += 3;
else if ( UTF8traits::is4byte(*p) )
p += 4;
else if ( UTF8traits::is5byte(*p) )
p += 5;
else if ( UTF8traits::is6byte(*p) )
p += 6;
else
{
NS_ERROR("not a UTF-8 string");
break;
}
}
return p - start;
}
private:
size_t mLength;
};
class ConvertUTF8toUCS2
{
public:
typedef nsACString::char_type value_type;
typedef nsAString::char_type buffer_type;
ConvertUTF8toUCS2( buffer_type* aBuffer ) : mStart(aBuffer), mBuffer(aBuffer) {}
size_t Length() const { return mBuffer - mStart; }
PRUint32 write( const value_type* start, PRUint32 N )
{
// algorithm assumes utf8 units won't
// be spread across fragments
const value_type* p = start;
const value_type* end = start + N;
for ( ; p != end /* && *p */; )
{
char c = *p++;
if ( UTF8traits::isASCII(c) )
{
*mBuffer++ = buffer_type(c);
continue;
}
PRUint32 ucs4;
PRUint32 minUcs4;
PRInt32 state = 0;
if ( UTF8traits::is2byte(c) )
{
ucs4 = (PRUint32(c) << 6) & 0x000007C0L;
state = 1;
minUcs4 = 0x00000080;
}
else if ( UTF8traits::is3byte(c) )
{
ucs4 = (PRUint32(c) << 12) & 0x0000F000L;
state = 2;
minUcs4 = 0x00000800;
}
else if ( UTF8traits::is4byte(c) )
{
ucs4 = (PRUint32(c) << 18) & 0x001F0000L;
state = 3;
minUcs4 = 0x00010000;
}
else if ( UTF8traits::is5byte(c) )
{
ucs4 = (PRUint32(c) << 24) & 0x03000000L;
state = 4;
minUcs4 = 0x00200000;
}
else if ( UTF8traits::is6byte(c) )
{
ucs4 = (PRUint32(c) << 30) & 0x40000000L;
state = 5;
minUcs4 = 0x04000000;
}
else
{
NS_ERROR("not a UTF8 string");
break;
}
while ( state-- )
{
c = *p++;
if ( UTF8traits::isInSeq(c) )
{
PRInt32 shift = state * 6;
ucs4 |= (PRUint32(c) & 0x3F) << shift;
}
else
{
NS_ERROR("not a UTF8 string");
return p - start;
}
}
if ( ucs4 < minUcs4 )
{
// Overlong sequence
*mBuffer++ = 0xFFFD;
}
else if ( ucs4 <= 0xD7FF )
{
*mBuffer++ = ucs4;
}
else if ( /* ucs4 >= 0xD800 && */ ucs4 <= 0xDFFF )
{
// Surrogates
*mBuffer++ = 0xFFFD;
}
else if ( ucs4 == 0xFFFE || ucs4 == 0xFFFF )
{
// Prohibited characters
*mBuffer++ = 0xFFFD;
}
else if ( ucs4 >= 0x00010000 )
{
*mBuffer++ = 0xFFFD;
}
else
{
if ( ucs4 != 0xFEFF ) // ignore BOM
*mBuffer++ = ucs4;
}
}
return p - start;
}
private:
buffer_type* mStart;
buffer_type* mBuffer;
};
void
NS_ConvertUTF8toUCS2::Init( const char* aCString, PRUint32 aLength )
NS_ConvertUTF8toUCS2::Init( const nsACString& aCString )
{
// Handle null string by just leaving us as a brand-new
// uninitialized nsAutoString.
if (! aCString)
return;
// Compute space required: do this once so we don't incur multiple
// allocations. This "optimization" is probably of dubious value...
const char* p;
PRUint32 count;
for (p = aCString, count = 0; *p && count < aLength; ++count) {
if ( 0 == (*p & 0x80) )
p += 1; // ASCII
else if ( 0xC0 == (*p & 0xE0) )
p += 2; // 2 byte UTF8
else if ( 0xE0 == (*p & 0xF0) )
p += 3; // 3 byte UTF8
else if ( 0xF0 == (*p & 0xF8) )
p += 4; // 4 byte UTF8
else if ( 0xF8 == (*p & 0xFC) )
p += 5; // 5 byte UTF8
else if ( 0xFC == (*p & 0xFE) )
p += 6;
else {
NS_ERROR("not a UTF-8 string");
return;
}
}
nsACString::const_iterator start, end;
CalculateUTF8Length calculator;
copy_string(aCString.BeginReading(start), aCString.EndReading(end), calculator);
PRUint32 count = calculator.Length();
// Grow the buffer if we need to.
if ((count * sizeof(PRUnichar)) >= sizeof(mBuffer))
SetCapacity(count + 1);
SetCapacity(count);
// |SetCapacity| normally doesn't guarantee the use we are putting it to here (see its interface comment in nsAWritableString.h),
// we can only use it since our local implementation, |nsString::SetCapacity|, is known to do what we want
// We'll write directly into the new string's buffer
PRUnichar* out = mUStr;
// All ready? Time to convert
// Convert the characters.
for (p = aCString, count = 0; *p && count < aLength; ++count) {
char c = *p++;
ConvertUTF8toUCS2 converter(mUStr);
copy_string(aCString.BeginReading(start), aCString.EndReading(end), converter);
mLength = converter.Length();
if (mCapacity)
mUStr[mLength] = '\0'; // null terminate
if( 0 == (0x80 & c)) { // ASCII
*out++ = PRUnichar(c);
continue;
}
PRUint32 ucs4;
PRUint32 minUcs4;
PRInt32 state = 0;
if ( 0xC0 == (0xE0 & c) ) { // 2 bytes UTF8
ucs4 = (PRUint32(c) << 6) & 0x000007C0L;
state = 1;
minUcs4 = 0x00000080;
}
else if ( 0xE0 == (0xF0 & c) ) { // 3 bytes UTF8
ucs4 = (PRUint32(c) << 12) & 0x0000F000L;
state = 2;
minUcs4 = 0x00000800;
}
else if ( 0xF0 == (0xF8 & c) ) { // 4 bytes UTF8
ucs4 = (PRUint32(c) << 18) & 0x001F0000L;
state = 3;
minUcs4 = 0x00010000;
}
else if ( 0xF8 == (0xFC & c) ) { // 5 bytes UTF8
ucs4 = (PRUint32(c) << 24) & 0x03000000L;
state = 4;
minUcs4 = 0x00200000;
}
else if ( 0xFC == (0xFE & c) ) { // 6 bytes UTF8
ucs4 = (PRUint32(c) << 30) & 0x40000000L;
state = 5;
minUcs4 = 0x04000000;
}
else {
NS_ERROR("not a UTF8 string");
break;
}
while (state--) {
c = *p++;
if ( 0x80 == (0xC0 & c) ) {
PRInt32 shift = state * 6;
ucs4 |= (PRUint32(c) & 0x3F) << shift;
}
else {
NS_ERROR("not a UTF8 string");
goto done; // so we minimally clean up
}
}
if (ucs4 < minUcs4) {
// Overlong sequence
*out++ = 0xFFFD;
} else if (ucs4 >= 0xD800 && ucs4 <= 0xDFFF) {
// Surrogates
*out++ = 0xFFFD;
} else if (ucs4 == 0xFFFE || ucs4 == 0xFFFF) {
// Prohibited characters
*out++ = 0xFFFD;
} else if (ucs4 >= 0x00010000) {
if (ucs4 >= 0x001F0000) {
*out++ = 0xFFFD;
}
else {
ucs4 -= 0x00010000;
*out++ = 0xD800 | (0x000003FF & (ucs4 >> 10));
*out++ = 0xDC00 | (0x000003FF & ucs4);
}
}
else {
if (0xfeff != ucs4) // ignore BOM
*out++ = ucs4;
}
}
done:
*out = '\0'; // null terminate
mLength = count;
NS_ASSERTION(count == mLength, "calculator calculated incorrect length");
}
#if 0

Просмотреть файл

@ -547,8 +547,25 @@ class NS_COM NS_ConvertASCIItoUCS2
*/
{
public:
explicit NS_ConvertASCIItoUCS2( const char* );
NS_ConvertASCIItoUCS2( const char*, PRUint32 );
explicit
NS_ConvertASCIItoUCS2( const nsACString& aCString );
explicit
NS_ConvertASCIItoUCS2( const nsAFlatCString& aCString )
{
Init( aCString.get(), aCString.Length() );
}
explicit
NS_ConvertASCIItoUCS2( const char* aCString )
{
Init( aCString, ~PRUint32(0) /* MAXINT */ );
}
NS_ConvertASCIItoUCS2( const char* aCString, PRUint32 aLength )
{
Init( aCString, aLength );
}
#if 0
operator const nsDependentString() const
@ -557,6 +574,9 @@ class NS_COM NS_ConvertASCIItoUCS2
}
#endif
protected:
void Init( const char* aCString, PRUint32 aLength );
private:
// NOT TO BE IMPLEMENTED
NS_ConvertASCIItoUCS2( PRUnichar );
@ -566,19 +586,25 @@ class NS_COM NS_ConvertUTF8toUCS2
: public nsAutoString
{
public:
explicit
NS_ConvertUTF8toUCS2( const nsACString& aCString )
{
Init( aCString );
}
explicit
NS_ConvertUTF8toUCS2( const char* aCString )
{
Init( aCString, ~PRUint32(0) /* MAXINT */ );
Init( nsDependentCString( aCString ) );
}
NS_ConvertUTF8toUCS2( const char* aCString, PRUint32 aLength )
{
Init( aCString, aLength );
Init( nsDependentCString( aCString, aLength ) );
}
protected:
void Init( const char* aCString, PRUint32 aLength );
void Init( const nsACString& aCString );
private:
NS_ConvertUTF8toUCS2( PRUnichar );

Просмотреть файл

@ -1500,144 +1500,226 @@ nsAutoString::nsAutoString(const CBufDescriptor& aBuffer) : nsString() {
AddNullTerminator(*this);
}
NS_ConvertASCIItoUCS2::NS_ConvertASCIItoUCS2( const char* aCString, PRUint32 aLength )
void
NS_ConvertASCIItoUCS2::Init( const char* aCString, PRUint32 aLength )
{
Initialize(*this,mBuffer,(sizeof(mBuffer)>>eTwoByte)-1,0,eTwoByte,PR_FALSE);
AddNullTerminator(*this);
AppendWithConversion(aCString,aLength);
}
NS_ConvertASCIItoUCS2::NS_ConvertASCIItoUCS2( const char* aCString )
NS_ConvertASCIItoUCS2::NS_ConvertASCIItoUCS2( const nsACString& aCString )
{
Initialize(*this,mBuffer,(sizeof(mBuffer)>>eTwoByte)-1,0,eTwoByte,PR_FALSE);
AddNullTerminator(*this);
AppendWithConversion(aCString);
SetCapacity(aCString.Length());
nsACString::const_iterator start; aCString.BeginReading(start);
nsACString::const_iterator end; aCString.EndReading(end);
while (start != end)
{
const nsReadableFragment<char>& frag = start.fragment();
AppendWithConversion(frag.mStart, frag.mEnd - frag.mStart);
start.advance(start.size_forward());
}
}
class UTF8traits
{
public:
static PRBool isASCII(char c) { return (c & 0x80) == 0x00; }
static PRBool isInSeq(char c) { return (c & 0xC0) == 0x80; }
static PRBool is2byte(char c) { return (c & 0xE0) == 0xC0; }
static PRBool is3byte(char c) { return (c & 0xF0) == 0xE0; }
static PRBool is4byte(char c) { return (c & 0xF8) == 0xF0; }
static PRBool is5byte(char c) { return (c & 0xFC) == 0xF8; }
static PRBool is6byte(char c) { return (c & 0xFE) == 0xFC; }
};
class CalculateUTF8Length
{
public:
typedef nsACString::char_type value_type;
CalculateUTF8Length() : mLength(0) { }
size_t Length() const { return mLength; }
PRUint32 write( const value_type* start, PRUint32 N )
{
// algorithm assumes utf8 units won't
// be spread across fragments
const value_type* p = start;
const value_type* end = start + N;
for ( ; p != end /* && *p */; ++mLength )
{
if ( UTF8traits::isASCII(*p) )
p += 1;
else if ( UTF8traits::is2byte(*p) )
p += 2;
else if ( UTF8traits::is3byte(*p) )
p += 3;
else if ( UTF8traits::is4byte(*p) )
p += 4;
else if ( UTF8traits::is5byte(*p) )
p += 5;
else if ( UTF8traits::is6byte(*p) )
p += 6;
else
{
NS_ERROR("not a UTF-8 string");
break;
}
}
return p - start;
}
private:
size_t mLength;
};
class ConvertUTF8toUCS2
{
public:
typedef nsACString::char_type value_type;
typedef nsAString::char_type buffer_type;
ConvertUTF8toUCS2( buffer_type* aBuffer ) : mStart(aBuffer), mBuffer(aBuffer) {}
size_t Length() const { return mBuffer - mStart; }
PRUint32 write( const value_type* start, PRUint32 N )
{
// algorithm assumes utf8 units won't
// be spread across fragments
const value_type* p = start;
const value_type* end = start + N;
for ( ; p != end /* && *p */; )
{
char c = *p++;
if ( UTF8traits::isASCII(c) )
{
*mBuffer++ = buffer_type(c);
continue;
}
PRUint32 ucs4;
PRUint32 minUcs4;
PRInt32 state = 0;
if ( UTF8traits::is2byte(c) )
{
ucs4 = (PRUint32(c) << 6) & 0x000007C0L;
state = 1;
minUcs4 = 0x00000080;
}
else if ( UTF8traits::is3byte(c) )
{
ucs4 = (PRUint32(c) << 12) & 0x0000F000L;
state = 2;
minUcs4 = 0x00000800;
}
else if ( UTF8traits::is4byte(c) )
{
ucs4 = (PRUint32(c) << 18) & 0x001F0000L;
state = 3;
minUcs4 = 0x00010000;
}
else if ( UTF8traits::is5byte(c) )
{
ucs4 = (PRUint32(c) << 24) & 0x03000000L;
state = 4;
minUcs4 = 0x00200000;
}
else if ( UTF8traits::is6byte(c) )
{
ucs4 = (PRUint32(c) << 30) & 0x40000000L;
state = 5;
minUcs4 = 0x04000000;
}
else
{
NS_ERROR("not a UTF8 string");
break;
}
while ( state-- )
{
c = *p++;
if ( UTF8traits::isInSeq(c) )
{
PRInt32 shift = state * 6;
ucs4 |= (PRUint32(c) & 0x3F) << shift;
}
else
{
NS_ERROR("not a UTF8 string");
return p - start;
}
}
if ( ucs4 < minUcs4 )
{
// Overlong sequence
*mBuffer++ = 0xFFFD;
}
else if ( ucs4 <= 0xD7FF )
{
*mBuffer++ = ucs4;
}
else if ( /* ucs4 >= 0xD800 && */ ucs4 <= 0xDFFF )
{
// Surrogates
*mBuffer++ = 0xFFFD;
}
else if ( ucs4 == 0xFFFE || ucs4 == 0xFFFF )
{
// Prohibited characters
*mBuffer++ = 0xFFFD;
}
else if ( ucs4 >= 0x00010000 )
{
*mBuffer++ = 0xFFFD;
}
else
{
if ( ucs4 != 0xFEFF ) // ignore BOM
*mBuffer++ = ucs4;
}
}
return p - start;
}
private:
buffer_type* mStart;
buffer_type* mBuffer;
};
void
NS_ConvertUTF8toUCS2::Init( const char* aCString, PRUint32 aLength )
NS_ConvertUTF8toUCS2::Init( const nsACString& aCString )
{
// Handle null string by just leaving us as a brand-new
// uninitialized nsAutoString.
if (! aCString)
return;
// Compute space required: do this once so we don't incur multiple
// allocations. This "optimization" is probably of dubious value...
const char* p;
PRUint32 count;
for (p = aCString, count = 0; *p && count < aLength; ++count) {
if ( 0 == (*p & 0x80) )
p += 1; // ASCII
else if ( 0xC0 == (*p & 0xE0) )
p += 2; // 2 byte UTF8
else if ( 0xE0 == (*p & 0xF0) )
p += 3; // 3 byte UTF8
else if ( 0xF0 == (*p & 0xF8) )
p += 4; // 4 byte UTF8
else if ( 0xF8 == (*p & 0xFC) )
p += 5; // 5 byte UTF8
else if ( 0xFC == (*p & 0xFE) )
p += 6;
else {
NS_ERROR("not a UTF-8 string");
return;
}
}
nsACString::const_iterator start, end;
CalculateUTF8Length calculator;
copy_string(aCString.BeginReading(start), aCString.EndReading(end), calculator);
PRUint32 count = calculator.Length();
// Grow the buffer if we need to.
if ((count * sizeof(PRUnichar)) >= sizeof(mBuffer))
SetCapacity(count + 1);
SetCapacity(count);
// |SetCapacity| normally doesn't guarantee the use we are putting it to here (see its interface comment in nsAWritableString.h),
// we can only use it since our local implementation, |nsString::SetCapacity|, is known to do what we want
// We'll write directly into the new string's buffer
PRUnichar* out = mUStr;
// All ready? Time to convert
// Convert the characters.
for (p = aCString, count = 0; *p && count < aLength; ++count) {
char c = *p++;
ConvertUTF8toUCS2 converter(mUStr);
copy_string(aCString.BeginReading(start), aCString.EndReading(end), converter);
mLength = converter.Length();
if (mCapacity)
mUStr[mLength] = '\0'; // null terminate
if( 0 == (0x80 & c)) { // ASCII
*out++ = PRUnichar(c);
continue;
}
PRUint32 ucs4;
PRUint32 minUcs4;
PRInt32 state = 0;
if ( 0xC0 == (0xE0 & c) ) { // 2 bytes UTF8
ucs4 = (PRUint32(c) << 6) & 0x000007C0L;
state = 1;
minUcs4 = 0x00000080;
}
else if ( 0xE0 == (0xF0 & c) ) { // 3 bytes UTF8
ucs4 = (PRUint32(c) << 12) & 0x0000F000L;
state = 2;
minUcs4 = 0x00000800;
}
else if ( 0xF0 == (0xF8 & c) ) { // 4 bytes UTF8
ucs4 = (PRUint32(c) << 18) & 0x001F0000L;
state = 3;
minUcs4 = 0x00010000;
}
else if ( 0xF8 == (0xFC & c) ) { // 5 bytes UTF8
ucs4 = (PRUint32(c) << 24) & 0x03000000L;
state = 4;
minUcs4 = 0x00200000;
}
else if ( 0xFC == (0xFE & c) ) { // 6 bytes UTF8
ucs4 = (PRUint32(c) << 30) & 0x40000000L;
state = 5;
minUcs4 = 0x04000000;
}
else {
NS_ERROR("not a UTF8 string");
break;
}
while (state--) {
c = *p++;
if ( 0x80 == (0xC0 & c) ) {
PRInt32 shift = state * 6;
ucs4 |= (PRUint32(c) & 0x3F) << shift;
}
else {
NS_ERROR("not a UTF8 string");
goto done; // so we minimally clean up
}
}
if (ucs4 < minUcs4) {
// Overlong sequence
*out++ = 0xFFFD;
} else if (ucs4 >= 0xD800 && ucs4 <= 0xDFFF) {
// Surrogates
*out++ = 0xFFFD;
} else if (ucs4 == 0xFFFE || ucs4 == 0xFFFF) {
// Prohibited characters
*out++ = 0xFFFD;
} else if (ucs4 >= 0x00010000) {
if (ucs4 >= 0x001F0000) {
*out++ = 0xFFFD;
}
else {
ucs4 -= 0x00010000;
*out++ = 0xD800 | (0x000003FF & (ucs4 >> 10));
*out++ = 0xDC00 | (0x000003FF & ucs4);
}
}
else {
if (0xfeff != ucs4) // ignore BOM
*out++ = ucs4;
}
}
done:
*out = '\0'; // null terminate
mLength = count;
NS_ASSERTION(count == mLength, "calculator calculated incorrect length");
}
#if 0

Просмотреть файл

@ -547,8 +547,25 @@ class NS_COM NS_ConvertASCIItoUCS2
*/
{
public:
explicit NS_ConvertASCIItoUCS2( const char* );
NS_ConvertASCIItoUCS2( const char*, PRUint32 );
explicit
NS_ConvertASCIItoUCS2( const nsACString& aCString );
explicit
NS_ConvertASCIItoUCS2( const nsAFlatCString& aCString )
{
Init( aCString.get(), aCString.Length() );
}
explicit
NS_ConvertASCIItoUCS2( const char* aCString )
{
Init( aCString, ~PRUint32(0) /* MAXINT */ );
}
NS_ConvertASCIItoUCS2( const char* aCString, PRUint32 aLength )
{
Init( aCString, aLength );
}
#if 0
operator const nsDependentString() const
@ -557,6 +574,9 @@ class NS_COM NS_ConvertASCIItoUCS2
}
#endif
protected:
void Init( const char* aCString, PRUint32 aLength );
private:
// NOT TO BE IMPLEMENTED
NS_ConvertASCIItoUCS2( PRUnichar );
@ -566,19 +586,25 @@ class NS_COM NS_ConvertUTF8toUCS2
: public nsAutoString
{
public:
explicit
NS_ConvertUTF8toUCS2( const nsACString& aCString )
{
Init( aCString );
}
explicit
NS_ConvertUTF8toUCS2( const char* aCString )
{
Init( aCString, ~PRUint32(0) /* MAXINT */ );
Init( nsDependentCString( aCString ) );
}
NS_ConvertUTF8toUCS2( const char* aCString, PRUint32 aLength )
{
Init( aCString, aLength );
Init( nsDependentCString( aCString, aLength ) );
}
protected:
void Init( const char* aCString, PRUint32 aLength );
void Init( const nsACString& aCString );
private:
NS_ConvertUTF8toUCS2( PRUnichar );