Remove UTF-32 encoder and decoder. Bug 604317, r=emk

This commit is contained in:
Simon Montagu 2011-03-30 08:35:34 +02:00
Родитель 8715a63ac2
Коммит 812728ada4
27 изменённых файлов: 22 добавлений и 1436 удалений

Просмотреть файл

@ -545,7 +545,7 @@ public:
/**
* Determine whether a buffer begins with a BOM for UTF-8, UTF-16LE,
* UTF-16BE, UTF-32LE, UTF-32BE.
* UTF-16BE
*
* @param aBuffer the buffer to check
* @param aLength the length of the buffer

Просмотреть файл

@ -3534,24 +3534,6 @@ nsContentUtils::CheckForBOM(const unsigned char* aBuffer, PRUint32 aLength,
aBuffer[2] == 0xBF) {
aCharset = "UTF-8";
}
else if (aLength >= 4 &&
aBuffer[0] == 0x00 &&
aBuffer[1] == 0x00 &&
aBuffer[2] == 0xFE &&
aBuffer[3] == 0xFF) {
aCharset = "UTF-32";
if (bigEndian)
*bigEndian = PR_TRUE;
}
else if (aLength >= 4 &&
aBuffer[0] == 0xFF &&
aBuffer[1] == 0xFE &&
aBuffer[2] == 0x00 &&
aBuffer[3] == 0x00) {
aCharset = "UTF-32";
if (bigEndian)
*bigEndian = PR_FALSE;
}
else if (aLength >= 2 &&
aBuffer[0] == 0xFE && aBuffer[1] == 0xFF) {
aCharset = "UTF-16";

Просмотреть файл

@ -713,23 +713,11 @@ nsDOMFileReader::GuessCharset(const char *aFileData,
aCharset = mCharset;
} else {
// no charset detector available, check the BOM
unsigned char sniffBuf[4];
unsigned char sniffBuf[3];
PRUint32 numRead = (aDataLen >= sizeof(sniffBuf) ? sizeof(sniffBuf) : aDataLen);
memcpy(sniffBuf, aFileData, numRead);
if (numRead >= 4 &&
sniffBuf[0] == 0x00 &&
sniffBuf[1] == 0x00 &&
sniffBuf[2] == 0xfe &&
sniffBuf[3] == 0xff) {
aCharset = "UTF-32BE";
} else if (numRead >= 4 &&
sniffBuf[0] == 0xff &&
sniffBuf[1] == 0xfe &&
sniffBuf[2] == 0x00 &&
sniffBuf[3] == 0x00) {
aCharset = "UTF-32LE";
} else if (numRead >= 2 &&
if (numRead >= 2 &&
sniffBuf[0] == 0xfe &&
sniffBuf[1] == 0xff) {
aCharset = "UTF-16BE";

Просмотреть файл

@ -115,13 +115,6 @@ r.onload = getLoadHandler(testTextData,
"utf16 reading");
expectedTestCount++;
r = new FileReader();
r.onload = getLoadHandler(testTextData,
convertToUTF32(testTextData).length,
"utf32 reading");
r.readAsText(createFileWithData(convertToUTF32(testTextData)), "UTF-32");
expectedTestCount++;
// Test loading an empty file works (and doesn't crash!)
var emptyFile = createFileWithData("");
@ -351,15 +344,6 @@ function convertToUTF16(s) {
return res;
}
function convertToUTF32(s) {
res = "";
for (var i = 0; i < s.length; ++i) {
c = s.charCodeAt(i);
res += "\0\0" + String.fromCharCode(c >>> 8, c & 255);
}
return res;
}
function convertToUTF8(s) {
return unescape(encodeURIComponent(s));
}

Просмотреть файл

@ -710,10 +710,9 @@ nsEncodingFormSubmission::nsEncodingFormSubmission(const nsACString& aCharset,
charset.AssignLiteral("windows-1252");
}
// use UTF-8 for UTF-16* and UTF-32* (per WHATWG and existing practice of
// use UTF-8 for UTF-16* (per WHATWG and existing practice of
// MS IE/Opera).
if (StringBeginsWith(charset, NS_LITERAL_CSTRING("UTF-16")) ||
StringBeginsWith(charset, NS_LITERAL_CSTRING("UTF-32"))) {
if (StringBeginsWith(charset, NS_LITERAL_CSTRING("UTF-16"))) {
charset.AssignLiteral("UTF-8");
}

Просмотреть файл

@ -82,9 +82,6 @@ utf-8.title = Unicode (UTF-8)
utf-16.title = Unicode (UTF-16)
utf-16le.title = Unicode (UTF-16LE)
utf-16be.title = Unicode (UTF-16BE)
utf-32.title = Unicode (UTF-32)
utf-32le.title = Unicode (UTF-32LE)
utf-32be.title = Unicode (UTF-32BE)
iso-8859-5.title = Cyrillic (ISO-8859-5)
iso-ir-111.title = Cyrillic (ISO-IR-111)
windows-1251.title = Cyrillic (Windows-1251)

Просмотреть файл

@ -113,17 +113,13 @@ nsJSON::Encode(nsAString &aJSON)
static const char UTF8BOM[] = "\xEF\xBB\xBF";
static const char UTF16LEBOM[] = "\xFF\xFE";
static const char UTF16BEBOM[] = "\xFE\xFF";
static const char UTF32LEBOM[] = "\xFF\xFE\0\0";
static const char UTF32BEBOM[] = "\0\0\xFE\xFF";
static nsresult CheckCharset(const char* aCharset)
{
// Check that the charset is permissible
if (!(strcmp(aCharset, "UTF-8") == 0 ||
strcmp(aCharset, "UTF-16LE") == 0 ||
strcmp(aCharset, "UTF-16BE") == 0 ||
strcmp(aCharset, "UTF-32LE") == 0 ||
strcmp(aCharset, "UTF-32BE") == 0)) {
strcmp(aCharset, "UTF-16BE") == 0)) {
return NS_ERROR_INVALID_ARG;
}
@ -166,10 +162,6 @@ nsJSON::EncodeToStream(nsIOutputStream *aStream,
rv = aStream->Write(UTF16LEBOM, 2, &ignored);
else if (strcmp(aCharset, "UTF-16BE") == 0)
rv = aStream->Write(UTF16BEBOM, 2, &ignored);
else if (strcmp(aCharset, "UTF-32LE") == 0)
rv = aStream->Write(UTF32LEBOM, 4, &ignored);
else if (strcmp(aCharset, "UTF-32BE") == 0)
rv = aStream->Write(UTF32BEBOM, 4, &ignored);
NS_ENSURE_SUCCESS(rv, rv);
}
@ -704,15 +696,9 @@ nsJSONListener::ProcessBytes(const char* aBuffer, PRUint32 aByteLength)
// See section 3 of RFC4627 for details on why this works.
const char *buffer = mSniffBuffer.get();
if (mSniffBuffer.Length() >= 4) {
if (buffer[0] == 0x00 && buffer[1] == 0x00 &&
if (buffer[0] == 0x00 && buffer[1] != 0x00 &&
buffer[2] == 0x00 && buffer[3] != 0x00) {
charset = "UTF-32BE";
} else if (buffer[0] == 0x00 && buffer[1] != 0x00 &&
buffer[2] == 0x00 && buffer[3] != 0x00) {
charset = "UTF-16BE";
} else if (buffer[0] != 0x00 && buffer[1] == 0x00 &&
buffer[2] == 0x00 && buffer[3] == 0x00) {
charset = "UTF-32LE";
} else if (buffer[0] != 0x00 && buffer[1] == 0x00 &&
buffer[2] != 0x00 && buffer[3] == 0x00) {
charset = "UTF-16LE";

Просмотреть файл

@ -130,14 +130,10 @@ function testOutputStreams() {
var utf8File = writeToFile(pair[1], "UTF-8", false);
var utf16LEFile = writeToFile(pair[1], "UTF-16LE", false);
var utf16BEFile = writeToFile(pair[1], "UTF-16BE", false);
var utf32LEFile = writeToFile(pair[1], "UTF-32LE", false);
var utf32BEFile = writeToFile(pair[1], "UTF-32BE", false);
// all ascii with no BOMs, so this will work
do_check_eq(utf16LEFile.fileSize / 2, utf8File.fileSize);
do_check_eq(utf32LEFile.fileSize / 4, utf8File.fileSize);
do_check_eq(utf16LEFile.fileSize, utf16BEFile.fileSize);
do_check_eq(utf32LEFile.fileSize, utf32BEFile.fileSize);
}
}
@ -148,10 +144,6 @@ function testOutputStreams() {
do_check_eq(f.fileSize, 6);
var f = writeToFile({},"UTF-16BE", true);
do_check_eq(f.fileSize, 6);
var f = writeToFile({},"UTF-32LE", true);
do_check_eq(f.fileSize, 12);
var f = writeToFile({},"UTF-32BE", true);
do_check_eq(f.fileSize, 12);
outputDir.remove(true);
}

Просмотреть файл

@ -111,7 +111,7 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
if (mStart)
{
mStart = PR_FALSE;
if (aLen > 3)
if (aLen > 2)
switch (aBuf[0])
{
case '\xEF':
@ -120,26 +120,12 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
mDetectedCharset = "UTF-8";
break;
case '\xFE':
if (('\xFF' == aBuf[1]) && ('\x00' == aBuf[2]) && ('\x00' == aBuf[3]))
// FE FF 00 00 UCS-4, unusual octet order BOM (3412)
mDetectedCharset = "X-ISO-10646-UCS-4-3412";
else if ('\xFF' == aBuf[1])
if ('\xFF' == aBuf[1])
// FE FF UTF-16, big endian BOM
mDetectedCharset = "UTF-16";
break;
case '\x00':
if (('\x00' == aBuf[1]) && ('\xFE' == aBuf[2]) && ('\xFF' == aBuf[3]))
// 00 00 FE FF UTF-32, big-endian BOM
mDetectedCharset = "UTF-32";
else if (('\x00' == aBuf[1]) && ('\xFF' == aBuf[2]) && ('\xFE' == aBuf[3]))
// 00 00 FF FE UCS-4, unusual octet order BOM (2143)
mDetectedCharset = "X-ISO-10646-UCS-4-2143";
break;
case '\xFF':
if (('\xFE' == aBuf[1]) && ('\x00' == aBuf[2]) && ('\x00' == aBuf[3]))
// FF FE 00 00 UTF-32, little-endian BOM
mDetectedCharset = "UTF-32";
else if ('\xFE' == aBuf[1])
if ('\xFE' == aBuf[1])
// FF FE UTF-16, little endian BOM
mDetectedCharset = "UTF-16";
break;

Просмотреть файл

@ -20,7 +20,7 @@ ucvko - Korean charsets - ISO-2022-KR, EUC-KR, CP949
ucvlatin - Latin charsets and others - ISO-8859-x, CP1250-1258
CP866, 874, GEOSTD8, ARMSCII, ISO-IR-111, KOI8,
Mac charsets, T61, TIS620, TCVN, VISCII, VPS
UTF7, UTF16, UTF32.
UTF7, UTF16
ucvtw - Traditional Chinese charsets Set 1 - Big5
ucvtw2 - Traditional Chinese charsets Set 2 - EUC-TW

Просмотреть файл

@ -189,9 +189,6 @@ utf-8.LangGroup = x-unicode
utf-16.LangGroup = x-unicode
utf-16be.LangGroup = x-unicode
utf-16le.LangGroup = x-unicode
utf-32.LangGroup = x-unicode
utf-32be.LangGroup = x-unicode
utf-32le.LangGroup = x-unicode
utf-7.LangGroup = x-unicode
x-imap4-modified-utf7.LangGroup = x-unicode
viscii.LangGroup = x-western

Просмотреть файл

@ -83,9 +83,6 @@ iso-2022-cn=ISO-2022-CN
iso-2022-cn-ext=ISO-2022-CN
iso-2022-kr=ISO-2022-KR
iso-2022-jp=ISO-2022-JP
utf-32be=UTF-32BE
utf-32le=UTF-32LE
utf-32=UTF-32
utf-16be=UTF-16BE
utf-16le=UTF-16LE
utf-16=UTF-16
@ -150,9 +147,6 @@ x-viet-vps=x-viet-vps
iso-10646-ucs-2=UTF-16BE
x-iso-10646-ucs-2-be=UTF-16BE
x-iso-10646-ucs-2-le=UTF-16LE
iso-10646-ucs-4=UTF-32BE
x-iso-10646-ucs-4-be=UTF-32BE
x-iso-10646-ucs-4-le=UTF-32LE
x-user-defined=x-user-defined
x-johab=x-johab
x-windows-949=x-windows-949

Просмотреть файл

@ -128,7 +128,6 @@
#include "nsVPSToUnicode.h"
#include "nsUTF7ToUnicode.h"
#include "nsMUTF7ToUnicode.h"
#include "nsUTF32ToUnicode.h"
#include "nsUCS2BEToUnicode.h"
#include "nsT61ToUnicode.h"
#include "nsUserDefinedToUnicode.h"
@ -180,7 +179,6 @@
#include "nsUnicodeToUTF7.h"
#include "nsUnicodeToMUTF7.h"
#include "nsUnicodeToUCS2BE.h"
#include "nsUnicodeToUTF32.h"
#include "nsUnicodeToT61.h"
#include "nsUnicodeToUserDefined.h"
#include "nsUnicodeToSymbol.h"
@ -336,9 +334,6 @@ NS_UCONV_REG_UNREG("x-imap4-modified-utf7", NS_MUTF7TOUNICODE_CID, NS_UNICODETOM
NS_UCONV_REG_UNREG("UTF-16", NS_UTF16TOUNICODE_CID, NS_UNICODETOUTF16_CID)
NS_UCONV_REG_UNREG("UTF-16BE", NS_UTF16BETOUNICODE_CID, NS_UNICODETOUTF16BE_CID)
NS_UCONV_REG_UNREG("UTF-16LE", NS_UTF16LETOUNICODE_CID, NS_UNICODETOUTF16LE_CID)
NS_UCONV_REG_UNREG("UTF-32", NS_UTF32TOUNICODE_CID, NS_UNICODETOUTF32_CID)
NS_UCONV_REG_UNREG("UTF-32BE", NS_UTF32BETOUNICODE_CID, NS_UNICODETOUTF32BE_CID)
NS_UCONV_REG_UNREG("UTF-32LE", NS_UTF32LETOUNICODE_CID, NS_UNICODETOUTF32LE_CID)
NS_UCONV_REG_UNREG("T.61-8bit", NS_T61TOUNICODE_CID, NS_UNICODETOT61_CID)
NS_UCONV_REG_UNREG("x-user-defined", NS_USERDEFINEDTOUNICODE_CID, NS_UNICODETOUSERDEFINED_CID)
NS_UCONV_REG_UNREG("x-mac-arabic" , NS_MACARABICTOUNICODE_CID, NS_UNICODETOMACARABIC_CID)
@ -411,17 +406,11 @@ NS_GENERIC_FACTORY_CONSTRUCTOR(nsMUTF7ToUnicode)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUTF16ToUnicode)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUTF16BEToUnicode)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUTF16LEToUnicode)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUTF32ToUnicode)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUTF32BEToUnicode)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUTF32LEToUnicode)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToUTF7)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToMUTF7)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToUTF16BE)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToUTF16LE)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToUTF16)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToUTF32BE)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToUTF32LE)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToUTF32)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToTSCII)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToTamilTTF)
@ -664,9 +653,6 @@ NS_DEFINE_NAMED_CID(NS_MUTF7TOUNICODE_CID);
NS_DEFINE_NAMED_CID(NS_UTF16TOUNICODE_CID);
NS_DEFINE_NAMED_CID(NS_UTF16BETOUNICODE_CID);
NS_DEFINE_NAMED_CID(NS_UTF16LETOUNICODE_CID);
NS_DEFINE_NAMED_CID(NS_UTF32TOUNICODE_CID);
NS_DEFINE_NAMED_CID(NS_UTF32BETOUNICODE_CID);
NS_DEFINE_NAMED_CID(NS_UTF32LETOUNICODE_CID);
NS_DEFINE_NAMED_CID(NS_T61TOUNICODE_CID);
NS_DEFINE_NAMED_CID(NS_USERDEFINEDTOUNICODE_CID);
NS_DEFINE_NAMED_CID(NS_MACARABICTOUNICODE_CID);
@ -725,9 +711,6 @@ NS_DEFINE_NAMED_CID(NS_UNICODETOMUTF7_CID);
NS_DEFINE_NAMED_CID(NS_UNICODETOUTF16BE_CID);
NS_DEFINE_NAMED_CID(NS_UNICODETOUTF16LE_CID);
NS_DEFINE_NAMED_CID(NS_UNICODETOUTF16_CID);
NS_DEFINE_NAMED_CID(NS_UNICODETOUTF32BE_CID);
NS_DEFINE_NAMED_CID(NS_UNICODETOUTF32LE_CID);
NS_DEFINE_NAMED_CID(NS_UNICODETOUTF32_CID);
NS_DEFINE_NAMED_CID(NS_UNICODETOT61_CID);
NS_DEFINE_NAMED_CID(NS_UNICODETOUSERDEFINED_CID);
NS_DEFINE_NAMED_CID(NS_UNICODETOSYMBOL_CID);
@ -869,9 +852,6 @@ static const mozilla::Module::CIDEntry kUConvCIDs[] = {
{ &kNS_UTF16TOUNICODE_CID, false, NULL, nsUTF16ToUnicodeConstructor },
{ &kNS_UTF16BETOUNICODE_CID, false, NULL, nsUTF16BEToUnicodeConstructor },
{ &kNS_UTF16LETOUNICODE_CID, false, NULL, nsUTF16LEToUnicodeConstructor },
{ &kNS_UTF32TOUNICODE_CID, false, NULL, nsUTF32ToUnicodeConstructor },
{ &kNS_UTF32BETOUNICODE_CID, false, NULL, nsUTF32BEToUnicodeConstructor },
{ &kNS_UTF32LETOUNICODE_CID, false, NULL, nsUTF32LEToUnicodeConstructor },
{ &kNS_T61TOUNICODE_CID, false, NULL, nsT61ToUnicodeConstructor },
{ &kNS_USERDEFINEDTOUNICODE_CID, false, NULL, nsUserDefinedToUnicodeConstructor },
{ &kNS_MACARABICTOUNICODE_CID, false, NULL, nsMacArabicToUnicodeConstructor },
@ -930,9 +910,6 @@ static const mozilla::Module::CIDEntry kUConvCIDs[] = {
{ &kNS_UNICODETOUTF16BE_CID, false, NULL, nsUnicodeToUTF16BEConstructor },
{ &kNS_UNICODETOUTF16LE_CID, false, NULL, nsUnicodeToUTF16LEConstructor },
{ &kNS_UNICODETOUTF16_CID, false, NULL, nsUnicodeToUTF16Constructor },
{ &kNS_UNICODETOUTF32BE_CID, false, NULL, nsUnicodeToUTF32BEConstructor },
{ &kNS_UNICODETOUTF32LE_CID, false, NULL, nsUnicodeToUTF32LEConstructor },
{ &kNS_UNICODETOUTF32_CID, false, NULL, nsUnicodeToUTF32Constructor },
{ &kNS_UNICODETOT61_CID, false, NULL, nsUnicodeToT61Constructor },
{ &kNS_UNICODETOUSERDEFINED_CID, false, NULL, nsUnicodeToUserDefinedConstructor },
{ &kNS_UNICODETOSYMBOL_CID, false, NULL, nsUnicodeToSymbolConstructor },
@ -1076,9 +1053,6 @@ static const mozilla::Module::ContractIDEntry kUConvContracts[] = {
{ NS_UNICODEDECODER_CONTRACTID_BASE "UTF-16", &kNS_UTF16TOUNICODE_CID },
{ NS_UNICODEDECODER_CONTRACTID_BASE "UTF-16BE", &kNS_UTF16BETOUNICODE_CID },
{ NS_UNICODEDECODER_CONTRACTID_BASE "UTF-16LE", &kNS_UTF16LETOUNICODE_CID },
{ NS_UNICODEDECODER_CONTRACTID_BASE "UTF-32", &kNS_UTF32TOUNICODE_CID },
{ NS_UNICODEDECODER_CONTRACTID_BASE "UTF-32BE", &kNS_UTF32BETOUNICODE_CID },
{ NS_UNICODEDECODER_CONTRACTID_BASE "UTF-32LE", &kNS_UTF32LETOUNICODE_CID },
{ NS_UNICODEDECODER_CONTRACTID_BASE "T.61-8bit", &kNS_T61TOUNICODE_CID },
{ NS_UNICODEDECODER_CONTRACTID_BASE "x-user-defined", &kNS_USERDEFINEDTOUNICODE_CID },
{ NS_UNICODEDECODER_CONTRACTID_BASE "x-mac-arabic", &kNS_MACARABICTOUNICODE_CID },
@ -1137,9 +1111,6 @@ static const mozilla::Module::ContractIDEntry kUConvContracts[] = {
{ NS_UNICODEENCODER_CONTRACTID_BASE "UTF-16BE", &kNS_UNICODETOUTF16BE_CID },
{ NS_UNICODEENCODER_CONTRACTID_BASE "UTF-16LE", &kNS_UNICODETOUTF16LE_CID },
{ NS_UNICODEENCODER_CONTRACTID_BASE "UTF-16", &kNS_UNICODETOUTF16_CID },
{ NS_UNICODEENCODER_CONTRACTID_BASE "UTF-32BE", &kNS_UNICODETOUTF32BE_CID },
{ NS_UNICODEENCODER_CONTRACTID_BASE "UTF-32LE", &kNS_UNICODETOUTF32LE_CID },
{ NS_UNICODEENCODER_CONTRACTID_BASE "UTF-32", &kNS_UNICODETOUTF32_CID },
{ NS_UNICODEENCODER_CONTRACTID_BASE "T.61-8bit", &kNS_UNICODETOT61_CID },
{ NS_UNICODEENCODER_CONTRACTID_BASE "x-user-defined", &kNS_UNICODETOUSERDEFINED_CID },
{ NS_UNICODEENCODER_CONTRACTID_BASE "Adobe-Symbol-Encoding", &kNS_UNICODETOSYMBOL_CID },

Просмотреть файл

@ -1,228 +0,0 @@
/* Test case for bug 335531
*
* Uses nsIConverterInputStream to decode UTF-16 text with all combinations
* of UTF-16BE and UTF-16LE with and without BOM.
*
* Sample text is: "Все счастливые семьи похожи друг на друга, каждая несчастливая семья несчастлива по-своему."
*
* The enclosing quotation marks are included in the sample text to test that
* UTF-16LE is recognized even when there is no BOM and the UTF-16LE decoder is
* not explicitly called. This only works when the first character of the text
* is an eight-bit character.
*/
const beBOM="%00%00%FE%FF";
const leBOM="%FF%FE%00%00";
const outBOM="\uFEFF";
const sampleUTF32BE="%00%00%00%22%00%00%04%12%00%00%04%41%00%00%04%35%00%00%00%20%00%00%04%41%00%00%04%47%00%00%04%30%00%00%04%41%00%00%04%42%00%00%04%3B%00%00%04%38%00%00%04%32%00%00%04%4B%00%00%04%35%00%00%00%20%00%00%04%41%00%00%04%35%00%00%04%3C%00%00%04%4C%00%00%04%38%00%00%00%20%00%00%04%3F%00%00%04%3E%00%00%04%45%00%00%04%3E%00%00%04%36%00%00%04%38%00%00%00%20%00%00%04%34%00%00%04%40%00%00%04%43%00%00%04%33%00%00%00%20%00%00%04%3D%00%00%04%30%00%00%00%20%00%00%04%34%00%00%04%40%00%00%04%43%00%00%04%33%00%00%04%30%00%00%00%2C%00%00%00%20%00%00%04%3A%00%00%04%30%00%00%04%36%00%00%04%34%00%00%04%30%00%00%04%4F%00%00%00%20%00%00%04%3D%00%00%04%35%00%00%04%41%00%00%04%47%00%00%04%30%00%00%04%41%00%00%04%42%00%00%04%3B%00%00%04%38%00%00%04%32%00%00%04%30%00%00%04%4F%00%00%00%20%00%00%04%41%00%00%04%35%00%00%04%3C%00%00%04%4C%00%00%04%4F%00%00%00%20%00%00%04%3D%00%00%04%35%00%00%04%41%00%00%04%47%00%00%04%30%00%00%04%41%00%00%04%42%00%00%04%3B%00%00%04%38%00%00%04%32%00%00%04%30%00%00%00%20%00%00%04%3F%00%00%04%3E%00%00%00%2D%00%00%04%41%00%00%04%32%00%00%04%3E%00%00%04%35%00%00%04%3C%00%00%04%43%00%00%00%2E%00%00%00%22";
const sampleUTF32LE="%22%00%00%00%12%04%00%00%41%04%00%00%35%04%00%00%20%00%00%00%41%04%00%00%47%04%00%00%30%04%00%00%41%04%00%00%42%04%00%00%3B%04%00%00%38%04%00%00%32%04%00%00%4B%04%00%00%35%04%00%00%20%00%00%00%41%04%00%00%35%04%00%00%3C%04%00%00%4C%04%00%00%38%04%00%00%20%00%00%00%3F%04%00%00%3E%04%00%00%45%04%00%00%3E%04%00%00%36%04%00%00%38%04%00%00%20%00%00%00%34%04%00%00%40%04%00%00%43%04%00%00%33%04%00%00%20%00%00%00%3D%04%00%00%30%04%00%00%20%00%00%00%34%04%00%00%40%04%00%00%43%04%00%00%33%04%00%00%30%04%00%00%2C%00%00%00%20%00%00%00%3A%04%00%00%30%04%00%00%36%04%00%00%34%04%00%00%30%04%00%00%4F%04%00%00%20%00%00%00%3D%04%00%00%35%04%00%00%41%04%00%00%47%04%00%00%30%04%00%00%41%04%00%00%42%04%00%00%3B%04%00%00%38%04%00%00%32%04%00%00%30%04%00%00%4F%04%00%00%20%00%00%00%41%04%00%00%35%04%00%00%3C%04%00%00%4C%04%00%00%4F%04%00%00%20%00%00%00%3D%04%00%00%35%04%00%00%41%04%00%00%47%04%00%00%30%04%00%00%41%04%00%00%42%04%00%00%3B%04%00%00%38%04%00%00%32%04%00%00%30%04%00%00%20%00%00%00%3F%04%00%00%3E%04%00%00%2D%00%00%00%41%04%00%00%32%04%00%00%3E%04%00%00%35%04%00%00%3C%04%00%00%43%04%00%00%2E%00%00%00%22%00%00%00";
const expectedNoBOM = "\"\u0412\u0441\u0435 \u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u044B\u0435 \u0441\u0435\u043C\u044C\u0438 \u043F\u043E\u0445\u043E\u0436\u0438 \u0434\u0440\u0443\u0433 \u043D\u0430 \u0434\u0440\u0443\u0433\u0430, \u043A\u0430\u0436\u0434\u0430\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430\u044F \u0441\u0435\u043C\u044C\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430 \u043F\u043E-\u0441\u0432\u043E\u0435\u043C\u0443.\"";
function makeText(withBOM, charset)
{
var theText = eval("sample" + charset);
if (withBOM) {
if (charset == "UTF32BE") {
theText = beBOM + theText;
} else {
theText = leBOM + theText;
}
}
return theText;
}
function testCase(withBOM, charset, charsetDec, decoder, bufferLength)
{
var dataURI = "data:text/plain;charset=" + charsetDec + "," +
makeText(withBOM, charset);
var IOService = Components.Constructor("@mozilla.org/network/io-service;1",
"nsIIOService");
var ConverterInputStream =
Components.Constructor("@mozilla.org/intl/converter-input-stream;1",
"nsIConverterInputStream",
"init");
var ios = new IOService();
var channel = ios.newChannel(dataURI, "", null);
var testInputStream = channel.open();
var testConverter = new ConverterInputStream(testInputStream,
decoder,
bufferLength,
0xFFFD);
if (!(testConverter instanceof
Components.interfaces.nsIUnicharLineInputStream))
throw "not line input stream";
var outStr = "";
var more;
do {
// read the line and check for eof
var line = {};
more = testConverter.readLine(line);
outStr += line.value;
} while (more);
var expected = expectedNoBOM;
if (withBOM) {
// BE / LE decoder wouldn't strip the BOM
if (decoder == "UTF-32BE" || decoder == "UTF-32LE") {
expected = outBOM + expectedNoBOM;
}
}
do_check_eq(outStr, expected);
}
// Tests conversion of one to three byte(s) from UTF-32 to Unicode
const expectedString = "\ufffd";
const charset = "UTF-32";
function testCase2(inString) {
var ScriptableUnicodeConverter =
Components.Constructor("@mozilla.org/intl/scriptableunicodeconverter",
"nsIScriptableUnicodeConverter");
var converter = new ScriptableUnicodeConverter();
converter.charset = charset;
var outString;
try {
outString = converter.ConvertToUnicode(inString) + converter.Finish();
} catch(e) {
outString = "\ufffd";
}
do_check_eq(escape(outString), escape(expectedString));
}
/*
* Uses nsIConverterInputStream to decode UTF-32 text with surrogate characters
*
* Sample text is: "g" in Mathematical Bold Symbolls (U+1D420)
*
* The test uses buffers of 4 different lengths to test end of buffer in mid-
* UTF32 character
*/
// Single supplementaly character
// expected: surrogate pair
const test0="%00%00%00%2D%00%00%00%2D%00%01%D4%20%00%00%00%2D%00%00%00%2D";
const expected0 = "--\uD835\uDC20--";
// High surrogate followed by low surrogate (invalid in UTF-32)
// expected: two replacement chars
const test1="%00%00%00%2D%00%00%00%2D%00%00%D8%35%00%00%DC%20%00%00%00%2D%00%00%00%2D";
const expected1 = "--\uFFFD\uFFFD--";
// Lone high surrogate
// expected: one replacement char
const test2="%00%00%00%2D%00%00%00%2D%00%00%D8%35%00%00%00%2D%00%00%00%2D";
const expected2 = "--\uFFFD--";
// Lone low surrogate
// expected: one replacement char
const test3="%00%00%00%2D%00%00%00%2D%00%00%DC%20%00%00%00%2D%00%00%00%2D";
const expected3 = "--\uFFFD--";
// Two high surrogates
// expected: two replacement chars
const test4="%00%00%00%2D%00%00%00%2D%00%00%D8%35%00%00%D8%35%00%00%00%2D%00%00%00%2D";
const expected4 = "--\uFFFD\uFFFD--";
// Two low surrogates
// expected: two replacement chars
const test5="%00%00%00%2D%00%00%00%2D%00%00%DC%20%00%00%DC%20%00%00%00%2D%00%00%00%2D";
const expected5 = "--\uFFFD\uFFFD--";
// Low surrogate followed by high surrogate
// expected: two replacement chars
const test6="%00%00%00%2D%00%00%00%2D%00%00%DC%20%00%00%D8%35%00%00%00%2D%00%00%00%2D";
const expected6 = "--\uFFFD\uFFFD--";
// Lone high surrogate followed by supplementaly character
// expected: replacement char followed by surrogate pair
const test7="%00%00%00%2D%00%00%00%2D%00%00%D8%35%00%01%D4%20%00%00%00%2D%00%00%00%2D";
const expected7 = "--\uFFFD\uD835\uDC20--";
// Lone low surrogate followed by supplementaly character
// expected: replacement char followed by surrogate pair
const test8="%00%00%00%2D%00%00%00%2D%00%00%DC%20%00%01%D4%20%00%00%00%2D%00%00%00%2D";
const expected8 = "--\uFFFD\uD835\uDC20--";
// Supplementaly character followed by lone high surrogate
// expected: surrogate pair followed by replacement char
const test9="%00%00%00%2D%00%00%00%2D%00%01%D4%20%00%00%D8%35%00%00%00%2D%00%00%00%2D";
const expected9 = "--\uD835\uDC20\uFFFD--";
// Supplementaly character followed by lone low surrogate
// expected: surrogate pair followed by replacement char
const test10="%00%00%00%2D%00%00%00%2D%00%01%D4%20%00%00%DC%20%00%00%00%2D%00%00%00%2D";
const expected10 = "--\uD835\uDC20\uFFFD--";
// Lone high surrogate at the end of the input
// expected: one replacement char (invalid in UTF-32)
const test11="%00%00%00%2D%00%00%00%2D%00%00%00%2D%00%00%00%2D%00%00%D8%35";
const expected11 = "----\uFFFD";
// Half code unit at the end of the input
// expected: nothing
const test12="%00%00%00%2D%00%00%00%2D%00%00%00%2D%00%00%00%2D%D8";
const expected12 = "----";
function testCase3(testNumber, bufferLength)
{
var dataURI = "data:text/plain;charset=UTF32BE," + eval("test" + testNumber);
var IOService = Components.Constructor("@mozilla.org/network/io-service;1",
"nsIIOService");
var ConverterInputStream =
Components.Constructor("@mozilla.org/intl/converter-input-stream;1",
"nsIConverterInputStream",
"init");
var ios = new IOService();
var channel = ios.newChannel(dataURI, "", null);
var testInputStream = channel.open();
var testConverter = new ConverterInputStream(testInputStream,
"UTF-32BE",
bufferLength,
0xFFFD);
if (!(testConverter instanceof
Components.interfaces.nsIUnicharLineInputStream))
throw "not line input stream";
var outStr = "";
var more;
do {
// read the line and check for eof
var line = {};
more = testConverter.readLine(line);
outStr += line.value;
} while (more);
// escape the strings before comparing for better readability
do_check_eq(escape(outStr), escape(eval("expected" + testNumber)));
}
function run_test()
{
/* BOM charset charset decoder buffer
declaration length */
testCase(true, "UTF32LE", "UTF-32", "UTF-32", 64);
testCase(true, "UTF32BE", "UTF-32", "UTF-32", 64);
testCase(true, "UTF32LE", "UTF-32", "UTF-32LE", 64);
testCase(true, "UTF32BE", "UTF-32", "UTF-32BE", 64);
testCase(false, "UTF32LE", "UTF-32", "UTF-32", 64);
testCase(false, "UTF32BE", "UTF-32", "UTF-32", 64);
testCase(false, "UTF32LE", "UTF-32", "UTF-32LE", 64);
testCase(false, "UTF32BE", "UTF-32", "UTF-32BE", 64);
testCase(true, "UTF32LE", "UTF-32", "UTF-32", 65);
testCase(true, "UTF32BE", "UTF-32", "UTF-32", 65);
testCase(true, "UTF32LE", "UTF-32", "UTF-32LE", 65);
testCase(true, "UTF32BE", "UTF-32", "UTF-32BE", 65);
testCase(false, "UTF32LE", "UTF-32", "UTF-32", 65);
testCase(false, "UTF32BE", "UTF-32", "UTF-32", 65);
testCase(false, "UTF32LE", "UTF-32", "UTF-32LE", 65);
testCase(false, "UTF32BE", "UTF-32", "UTF-32BE", 65);
testCase2("A");
testCase2("AB");
testCase2("ABC");
for (var test = 0; test <= 12; ++ test) {
for (var bufferLength = 4; bufferLength < 8; ++ bufferLength) {
testCase3(test, bufferLength);
}
}
}

Просмотреть файл

@ -18,7 +18,6 @@ function run_test() {
// exclude known non-ASCII compatible charsets
if (charset.substr(0, "UTF-16".length) == "UTF-16" ||
charset.substr(0, "UTF-32".length) == "UTF-32" ||
charset == "x-imap4-modified-utf7") {
dump("skipping " + counter + " " + charset + "\n");
continue;

Просмотреть файл

@ -103,7 +103,6 @@ CPPSRCS = \
nsUTF7ToUnicode.cpp \
nsMUTF7ToUnicode.cpp \
nsUCS2BEToUnicode.cpp \
nsUTF32ToUnicode.cpp \
nsT61ToUnicode.cpp \
nsUserDefinedToUnicode.cpp \
nsUnicodeToAscii.cpp \
@ -160,7 +159,6 @@ CPPSRCS = \
nsUnicodeToUTF7.cpp \
nsUnicodeToMUTF7.cpp \
nsUnicodeToUCS2BE.cpp \
nsUnicodeToUTF32.cpp \
nsUnicodeToT61.cpp \
nsUnicodeToUserDefined.cpp \
nsUnicodeToSymbol.cpp \

Просмотреть файл

@ -482,11 +482,6 @@
#define NS_UNICODETOUTF16BE_CID \
{ 0xba6151ad, 0x1dfa, 0x11d3, {0xb3, 0xbf, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70}}
// Class ID for our UnicodeToUTF32BE charset converter
// {BA6151AE-1DFA-11d3-B3BF-00805F8A6670}
#define NS_UNICODETOUTF32BE_CID \
{ 0xba6151ae, 0x1dfa, 0x11d3, {0xb3, 0xbf, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70}}
// Class ID for our UnicodeToT61 charset converter
// {BA6151AF-1DFA-11d3-B3BF-00805F8A6670}
#define NS_UNICODETOT61_CID \
@ -512,11 +507,6 @@
#define NS_UTF16BETOUNICODE_CID \
{ 0xba6151b2, 0x1dfa, 0x11d3, {0xb3, 0xbf, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70}}
// Class ID for our UTF32BEToUnicode charset converter
// {BA6151B3-1DFA-11d3-B3BF-00805F8A6670}
#define NS_UTF32BETOUNICODE_CID \
{ 0xba6151b3, 0x1dfa, 0x11d3, {0xb3, 0xbf, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70}}
// Class ID for our T61ToUnicode charset converter
// {BA6151B4-1DFA-11d3-B3BF-00805F8A6670}
#define NS_T61TOUNICODE_CID \
@ -527,31 +517,16 @@
#define NS_UNICODETOUTF16LE_CID \
{ 0xba6151b5, 0x1dfa, 0x11d3, {0xb3, 0xbf, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70}}
// Class ID for our UnicodeToUTF32LE charset converter
// {BA6151B6-1DFA-11d3-B3BF-00805F8A6670}
#define NS_UNICODETOUTF32LE_CID \
{ 0xba6151b6, 0x1dfa, 0x11d3, {0xb3, 0xbf, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70}}
// Class ID for our UTF16ToUnicode charset converter
// {d673255d-1184-400a-b0b5-ee9d1295bd85}
#define NS_UTF16TOUNICODE_CID \
{ 0xd673255d, 0x1184, 0x400a, {0xb0, 0xb5, 0xee,0x9d, 0x12, 0x95, 0xbd, 0x85}}
// Class ID for our UTF32ToUnicode charset converter
// {30DCD313-73E1-447d-8339-37744952154E}
#define NS_UTF32TOUNICODE_CID \
{ 0x30dcd313, 0x73e1, 0x447d, {0x83, 0x39, 0x37, 0x74, 0x49, 0x52, 0x15, 0x4e}}
// Class ID for our UTF16LEToUnicode charset converter
// {BA6151B7-1DFA-11d3-B3BF-00805F8A6670}
#define NS_UTF16LETOUNICODE_CID \
{ 0xba6151b7, 0x1dfa, 0x11d3, {0xb3, 0xbf, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70}}
// Class ID for our UTF32LEToUnicode charset converter
// {BA6151B8-1DFA-11d3-B3BF-00805F8A6670}
#define NS_UTF32LETOUNICODE_CID \
{ 0xba6151b8, 0x1dfa, 0x11d3, {0xb3, 0xbf, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70}}
// Class ID for our ISOIR111ToUnicode charset converter
#define NS_ISOIR111TOUNICODE_CID \
{ 0x9416bfb1, 0x1f93, 0x11d3, {0xb3, 0xbf, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70}}
@ -592,10 +567,6 @@
#define NS_UNICODETOUTF16_CID \
{ 0x49b38f12, 0x6193, 0x11d3, {0xb3, 0xc5, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70}}
// {49B38F14-6193-11d3-B3C5-00805F8A6670}
#define NS_UNICODETOUTF32_CID \
{ 0x49b38f14, 0x6193, 0x11d3, {0xb3, 0xc5, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70}}
// {6803CAC4-1E3B-11d5-A145-005004832142}
#define NS_MACDEVANAGARITOUNICODE_CID \
{ 0x6803cac4, 0x1e3b, 0x11d5, { 0xa1, 0x45, 0x0, 0x50, 0x4, 0x83, 0x21, 0x42 } }

Просмотреть файл

@ -1,309 +0,0 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim:expandtab:shiftwidth=2:tabstop=2:
*/
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Communicator client code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Jungshik Shin <jshin@mailaps.org>
*
* Alternatively, the contents of this file may be used under the terms of
* either of the GNU General Public License Version 2 or later (the "GPL"),
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include "nsUCSupport.h"
#include "nsUTF32ToUnicode.h"
#include "nsCharTraits.h"
#include <string.h>
//----------------------------------------------------------------------
// static functions and macro definition common to nsUTF32(BE|LE)ToUnicode
#if defined(IS_BIG_ENDIAN) || defined(__arm__)
#define LE_STRING_TO_UCS4(s) \
(PRUint8(*(s)) | (PRUint8(*((s) + 1)) << 8) | \
(PRUint8(*((s) + 2)) << 16) | (PRUint8(*((s) + 3)) << 24))
#else
#define LE_STRING_TO_UCS4(s) (*(PRUint32*) (s))
#endif
#if defined(IS_BIG_ENDIAN) && !defined(__sparc__)
#define BE_STRING_TO_UCS4(s) (*(PRUint32*) (s))
#else
#define BE_STRING_TO_UCS4(s) \
(PRUint8(*((s) + 3)) | (PRUint8(*((s) + 2)) << 8) | \
(PRUint8(*((s) + 1)) << 16) | (PRUint8(*(s)) << 24))
#endif
static nsresult ConvertCommon(const char * aSrc,
PRInt32 * aSrcLength,
PRUnichar * aDest,
PRInt32 * aDestLength,
PRUint16 * aState,
PRUint8 * aBuffer,
PRBool aIsLE)
{
NS_ENSURE_TRUE(*aState < 4, NS_ERROR_INVALID_ARG);
NS_ENSURE_TRUE(*aDestLength > 0, NS_ERROR_INVALID_ARG);
const char *src = aSrc;
const char *srcEnd = aSrc + *aSrcLength;
PRUnichar *dest = aDest;
PRUnichar *destEnd = aDest + *aDestLength;
if (*aState > *aSrcLength)
{
memcpy(aBuffer + 4 - *aState, src, *aSrcLength);
*aDestLength = 0;
*aState -= *aSrcLength;
return NS_OK_UDEC_MOREINPUT;
}
PRUint32 ucs4;
// prev. run left a partial UTF-32 seq.
if (*aState > 0)
{
memcpy(aBuffer + 4 - *aState, src, *aState);
ucs4 = aIsLE ? LE_STRING_TO_UCS4(aBuffer) : BE_STRING_TO_UCS4(aBuffer);
if (ucs4 < 0x10000L) // BMP
{
*dest++= IS_SURROGATE(ucs4) ? UCS2_REPLACEMENT_CHAR : PRUnichar(ucs4);
}
else if (ucs4 < 0x110000L) // plane 1 through plane 16
{
if (destEnd - dest < 2)
{
*aSrcLength = 0;
*aDestLength = 0;
return NS_OK_UDEC_MOREOUTPUT;
}
*dest++= H_SURROGATE(ucs4);
*dest++= L_SURROGATE(ucs4);
}
// Codepoints in plane 17 and higher (> 0x10ffff)
// are not representable in UTF-16 we use for the internal
// character representation. This is not a problem
// because Unicode/ISO 10646 will never assign characters
// in plane 17 and higher. Therefore, we convert them
// to Unicode replacement character (0xfffd).
else
*dest++ = UCS2_REPLACEMENT_CHAR;
src += *aState;
*aState = 0;
}
nsresult rv = NS_OK; // conversion result
for ( ; src < srcEnd && dest < destEnd; src += 4)
{
if (srcEnd - src < 4)
{
// fill up aBuffer until src buffer gets exhausted.
memcpy(aBuffer, src, srcEnd - src);
*aState = 4 - (srcEnd - src); // set add. char to read in next run
src = srcEnd;
rv = NS_OK_UDEC_MOREINPUT;
break;
}
ucs4 = aIsLE ? LE_STRING_TO_UCS4(src) : BE_STRING_TO_UCS4(src);
if (ucs4 < 0x10000L) // BMP
{
*dest++= IS_SURROGATE(ucs4) ? UCS2_REPLACEMENT_CHAR : PRUnichar(ucs4);
}
else if (ucs4 < 0x110000L) // plane 1 through plane 16
{
if (destEnd - dest < 2)
break;
// ((ucs4 - 0x10000) >> 10) + 0xd800;
*dest++= H_SURROGATE(ucs4);
*dest++= L_SURROGATE(ucs4);
}
else // plane 17 and higher
*dest++ = UCS2_REPLACEMENT_CHAR;
}
//output not finished, output buffer too short
if((NS_OK == rv) && (src < srcEnd) && (dest >= destEnd))
rv = NS_OK_UDEC_MOREOUTPUT;
*aSrcLength = src - aSrc;
*aDestLength = dest - aDest;
return rv;
}
//----------------------------------------------------------------------
// Class nsUTF32ToUnicode [implementation]
nsUTF32ToUnicodeBase::nsUTF32ToUnicodeBase() : nsBasicDecoderSupport()
{
Reset();
}
//----------------------------------------------------------------------
// Subclassing of nsDecoderSupport class [implementation]
NS_IMETHODIMP nsUTF32ToUnicodeBase::GetMaxLength(const char * aSrc,
PRInt32 aSrcLength,
PRInt32 * aDestLength)
{
// Non-BMP characters take two PRUnichars(a pair of surrogate codepoints)
// so that we have to divide by 2 instead of 4 for the worst case.
*aDestLength = aSrcLength / 2;
return NS_OK;
}
//----------------------------------------------------------------------
// Subclassing of nsBasicDecoderSupport class [implementation]
NS_IMETHODIMP nsUTF32ToUnicodeBase::Reset()
{
// the number of additional bytes to read to complete UTF-32 4byte seq.
mState = 0;
memset(mBufferInc, 0, 4);
return NS_OK;
}
//----------------------------------------------------------------------
// Class nsUTF32BEToUnicode [implementation]
//----------------------------------------------------------------------
// Subclassing of nsUTF32ToUnicodeBase class [implementation]
NS_IMETHODIMP nsUTF32BEToUnicode::Convert(const char * aSrc,
PRInt32 * aSrcLength,
PRUnichar * aDest,
PRInt32 * aDestLength)
{
return ConvertCommon(aSrc, aSrcLength, aDest, aDestLength, &mState,
mBufferInc, PR_FALSE);
}
//----------------------------------------------------------------------
// Class nsUTF32LEToUnicode [implementation]
//----------------------------------------------------------------------
// Subclassing of nsUTF32ToUnicodeBase class [implementation]
NS_IMETHODIMP nsUTF32LEToUnicode::Convert(const char * aSrc,
PRInt32 * aSrcLength,
PRUnichar * aDest,
PRInt32 * aDestLength)
{
return ConvertCommon(aSrc, aSrcLength, aDest, aDestLength, &mState,
mBufferInc, PR_TRUE);
}
//----------------------------------------------------------------------
// Class nsUTF32ToUnicode [implementation]
//----------------------------------------------------------------------
// Subclassing of nsUTF32ToUnicodeBase class [implementation]
NS_IMETHODIMP nsUTF32ToUnicode::Reset()
{
nsresult rv = nsUTF32ToUnicodeBase::Reset();
mState = 4;
mEndian = kUnknown;
mFoundBOM = PR_FALSE;
return rv;
}
NS_IMETHODIMP nsUTF32ToUnicode::Convert(const char * aSrc,
PRInt32 * aSrcLength,
PRUnichar * aDest,
PRInt32 * aDestLength)
{
PRBool foundBOM = PR_FALSE;
if (4 == mState) // Called for the first time.
{
if (*aSrcLength < 4)
return NS_ERROR_ILLEGAL_INPUT;
// check if BOM (0xFEFF) is at the beginning, remove it if found, and
// set mEndian accordingly.
if (0xFF == PRUint8(aSrc[0]) && 0xFE == PRUint8(aSrc[1]) &&
0 == PRUint8(aSrc[2]) && 0 == PRUint8(aSrc[3])) {
aSrc += 4;
*aSrcLength -= 4;
mState = 0;
mEndian = kLittleEndian;
mFoundBOM = foundBOM = PR_TRUE;
}
else if (0 == PRUint8(aSrc[0]) && 0 == PRUint8(aSrc[1]) &&
0xFE == PRUint8(aSrc[2]) && 0xFF == PRUint8(aSrc[3])) {
aSrc += 4;
*aSrcLength -= 4;
mState = 0;
mEndian = kBigEndian;
mFoundBOM = foundBOM = PR_TRUE;
}
// BOM is not found, but we can use a simple heuristic to determine
// the endianness. Assume the first character is [U+0001, U+FFFF].
// Not always valid, but it's very likely to hold for html/xml/css.
#if 0 // BE case will be handled below
else if (!aSrc[0] && !aSrc[1] && (aSrc[2] || aSrc[3])) { // 0x00 0x00 0xhh 0xhh (hh != 00)
mState = 0;
mEndian = kBigEndian;
}
#endif
else if ((aSrc[0] || aSrc[1]) && !aSrc[2] && !aSrc[3]) { // 0xhh 0xhh 0x00 0x00 (hh != 00)
mState = 0;
mEndian = kLittleEndian;
}
else { // Neither BOM nor 'plausible' byte patterns at the beginning.
// Just assume it's BE (following Unicode standard)
// and let the garbage show up in the browser. (security concern?)
// (bug 246194)
mState = 0;
mEndian = kBigEndian;
}
}
nsresult rv = ConvertCommon(aSrc, aSrcLength, aDest, aDestLength, &mState,
mBufferInc, mEndian == kLittleEndian);
if (foundBOM)
*aSrcLength += 4; // need to consume BOM
// If BOM is not found and we're to return NS_OK, signal that BOM
// is not found. Otherwise, return |rv| from |UTF16ConvertToUnicode|
return (rv == NS_OK && !mFoundBOM) ? NS_OK_UDEC_NOBOMFOUND : rv;
}
// XXX : What to do with 'unflushed' mBufferInc?? : Finish()

Просмотреть файл

@ -1,164 +0,0 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim:expandtab:shiftwidth=2:tabstop=2:
*/
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Communicator client code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Jungshik Shin <jshin@mailaps.org>
*
* Alternatively, the contents of this file may be used under the terms of
* either of the GNU General Public License Version 2 or later (the "GPL"),
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#ifndef nsUTF32ToUnicode_h___
#define nsUTF32ToUnicode_h___
//----------------------------------------------------------------------
// Class nsUTF32ToUnicodeBase [declaration]
/**
* A character set converter from UTF-32 family to Unicode.
* The base class for UTF-32BE/UTF-32LE/UTF-32 to Unicode converters.
* @created 08/Dec/2002
* @author Jungshik Shin
*/
class nsUTF32ToUnicodeBase : public nsBasicDecoderSupport
{
protected:
/**
* Class constructor. accessible only by child classes
*/
nsUTF32ToUnicodeBase();
// the number of additional bytes to read to complete an incomplete UTF-32 4byte seq.
PRUint16 mState;
// buffer for an incomplete UTF-32 sequence.
PRUint8 mBufferInc[4];
//--------------------------------------------------------------------
// Subclassing of nsBasicDecoderSupport class [declaration]
NS_IMETHOD GetMaxLength(const char * aSrc, PRInt32 aSrcLength,
PRInt32 * aDestLength);
NS_IMETHOD Reset();
};
//----------------------------------------------------------------------
// Class nsUTF32BEToUnicode [declaration]
/**
* A character set converter from UTF-32BE to Unicode.
* A subclass of UTF32ToUnicodeBase.
* @created 08/Dec/2002
* @author Jungshik Shin
*/
class nsUTF32BEToUnicode : public nsUTF32ToUnicodeBase
{
public:
//--------------------------------------------------------------------
// Subclassing of nsBasicDecoderSupport class [declaration]
NS_IMETHOD Convert(const char * aSrc, PRInt32 * aSrcLength,
PRUnichar * aDest, PRInt32 * aDestLength);
};
//----------------------------------------------------------------------
// Class nsUTF32LEToUnicode [declaration]
/**
* A character set converter from UTF-32LE to Unicode.
* A subclass of UTF32ToUnicodeBase.
* @created 08/Dec/2002
* @author Jungshik Shin
*/
class nsUTF32LEToUnicode : public nsUTF32ToUnicodeBase
{
public:
//--------------------------------------------------------------------
// Subclassing of nsBasicDecoderSupport class [declaration]
NS_IMETHOD Convert(const char * aSrc, PRInt32 * aSrcLength,
PRUnichar * aDest, PRInt32 * aDestLength);
};
//----------------------------------------------------------------------
// Class nsUTF32ToUnicode [declaration]
/**
* A character set converter from UTF-32 to Unicode.
* A subclass of UTF32ToUnicodeBase.
* @created 08/Dec/2002
* @author Jungshik Shin
*/
class nsUTF32ToUnicode : public nsUTF32ToUnicodeBase
{
public:
/**
* Class constructor.
*/
nsUTF32ToUnicode() { Reset(); }
//--------------------------------------------------------------------
// Subclassing of nsBasicDecoderSupport class [declaration]
NS_IMETHOD Convert(const char * aSrc, PRInt32 * aSrcLength,
PRUnichar * aDest, PRInt32 * aDestLength);
//--------------------------------------------------------------------
// Subclassing of nsUTF32ToUnicodeBase class [declaration]
NS_IMETHOD Reset();
private:
enum Endian {kUnknown, kBigEndian, kLittleEndian};
Endian mEndian;
PRBool mFoundBOM;
};
#endif /* nsUTF32ToUnicode_h___ */

Просмотреть файл

@ -1,269 +0,0 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim:expandtab:shiftwidth=2:tabstop=2:
*/
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Communicator client code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Jungshik Shin <jshin@mailaps.org>
*
* Alternatively, the contents of this file may be used under the terms of
* either of the GNU General Public License Version 2 or later (the "GPL"),
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include <string.h>
#include "nsUCSupport.h"
#include "nsUnicodeToUTF32.h"
#ifdef IS_BIG_ENDIAN
#define UCS4_TO_LE_STRING(u, s) \
PR_BEGIN_MACRO \
s[3] = PRUint8(((u) >> 24) & 0xffL); \
s[2] = PRUint8(((u) >> 16) & 0xffL); \
s[1] = PRUint8(((u) >> 8) & 0xffL); \
s[0] = PRUint8((u) & 0xffL); \
PR_END_MACRO
#else
#define UCS4_TO_LE_STRING(u, s) \
PR_BEGIN_MACRO \
*((PRUint32*)(s)) = (u); \
PR_END_MACRO
#endif
#ifdef IS_BIG_ENDIAN
#define UCS4_TO_BE_STRING(u, s) \
PR_BEGIN_MACRO \
*((PRUint32*)(s)) = (u); \
PR_END_MACRO
#else
#define UCS4_TO_BE_STRING(u, s) \
PR_BEGIN_MACRO \
s[0] = PRUint8(((u) >> 24) & 0xffL); \
s[1] = PRUint8(((u) >> 16) & 0xffL); \
s[2] = PRUint8(((u) >> 8) & 0xffL); \
s[3] = PRUint8((u) & 0xffL); \
PR_END_MACRO
#endif
//----------------------------------------------------------------------
// Static functions common to nsUnicodeToUTF32LE and nsUnicodeToUTF32BE
static nsresult ConvertCommon(const PRUnichar * aSrc,
PRInt32 * aSrcLength,
char * aDest,
PRInt32 * aDestLength,
PRUnichar * aHighSurrogate,
PRUnichar * aBOM,
PRBool aIsLE)
{
const PRUnichar * src = aSrc;
const PRUnichar * srcEnd = aSrc + *aSrcLength;
char * dest = aDest;
const char * destEnd = aDest + *aDestLength;
PRUint32 ucs4;
// Handle BOM if necessary
if (0 != *aBOM)
{
if (*aDestLength < 4) {
*aSrcLength = *aDestLength = 0;
return NS_OK_UENC_MOREOUTPUT;
}
*(PRUint32*)dest = *aBOM;
*aBOM = 0;
dest += 4;
}
// left-over high surroage code point from the prev. run.
if (*aHighSurrogate)
{
if (! *aSrcLength)
{
*aDestLength = 0;
return NS_OK_UENC_MOREINPUT;
}
if (*aDestLength < 4)
{
*aSrcLength = 0;
*aDestLength = 0;
return NS_OK_UENC_MOREOUTPUT;
}
if ((*src & 0xfc00) != 0xdc00) // Not a low surrogate codepoint. Unpaird.
ucs4 = PRUint32(*aHighSurrogate);
else
ucs4 = (((*aHighSurrogate & 0x3ffL) << 10) | (*src & 0x3ffL)) + 0x10000;
++src;
if (aIsLE)
UCS4_TO_LE_STRING(ucs4, dest);
else
UCS4_TO_BE_STRING(ucs4, dest);
dest += 4;
*aHighSurrogate = 0;
}
while (src < srcEnd) {
// regular codepoint or an unpaired low surrogate
if ((src[0] & 0xfc00) != 0xd800)
{
if (destEnd - dest < 4)
goto error_more_output;
ucs4 = PRUint32(src[0]);
}
else // high surrogate
{
if ((src+1) >= srcEnd) {
//we need another surrogate to complete this unicode char
*aHighSurrogate = src[0];
*aDestLength = dest - aDest;
return NS_OK_UENC_MOREINPUT;
}
//handle surrogate
if (destEnd - dest < 4)
goto error_more_output;
if ((src[1] & 0xfc00) != 0xdc00) // unpaired
ucs4 = PRUint32(src[0]);
else
{ // convert surrogate pair to UCS4
ucs4 = (((src[0] & 0x3ffL) << 10) | (src[1] & 0x3ffL)) + 0x10000;
*aHighSurrogate = 0;
++src;
}
}
if (aIsLE)
UCS4_TO_LE_STRING(ucs4, dest);
else
UCS4_TO_BE_STRING(ucs4, dest);
dest += 4;
++src;
}
*aDestLength = dest - aDest;
return NS_OK;
error_more_output:
*aSrcLength = src - aSrc;
*aDestLength = dest - aDest;
return NS_OK_UENC_MOREOUTPUT;
}
static nsresult FinishCommon(char * aDest,
PRInt32 * aDestLength,
PRUnichar * aHighSurrogate,
PRBool aIsLE)
{
char * dest = aDest;
if (*aHighSurrogate) {
if (*aDestLength < 4) {
*aDestLength = 0;
return NS_OK_UENC_MOREOUTPUT;
}
PRUint32 high = PRUint32(*aHighSurrogate);
if (aIsLE)
UCS4_TO_LE_STRING(high, dest);
else
UCS4_TO_BE_STRING(high, dest);
*aHighSurrogate = 0;
*aDestLength = 4;
return NS_OK;
}
*aDestLength = 0;
return NS_OK;
}
//----------------------------------------------------------------------
// Class nsUnicodeToUTF32 [implementation]
NS_IMPL_ISUPPORTS1(nsUnicodeToUTF32Base, nsIUnicodeEncoder)
//----------------------------------------------------------------------
// Subclassing of nsIUnicodeEncoder class [implementation]
NS_IMETHODIMP nsUnicodeToUTF32Base::GetMaxLength(const PRUnichar * aSrc,
PRInt32 aSrcLength,
PRInt32 * aDestLength)
{
*aDestLength = aSrcLength * 4;
return NS_OK;
}
//----------------------------------------------------------------------
// Class nsUnicodeToUTF32BE [implementation]
//----------------------------------------------------------------------
// Subclassing of nsUnicodeToUTF32 class [implementation]
NS_IMETHODIMP nsUnicodeToUTF32BE::Convert(const PRUnichar * aSrc,
PRInt32 * aSrcLength,
char * aDest,
PRInt32 * aDestLength)
{
return ConvertCommon(aSrc, aSrcLength, aDest, aDestLength,
&mHighSurrogate, &mBOM, PR_FALSE);
}
NS_IMETHODIMP nsUnicodeToUTF32BE::Finish(char * aDest,
PRInt32 * aDestLength)
{
return FinishCommon(aDest, aDestLength, &mHighSurrogate, PR_FALSE);
}
//----------------------------------------------------------------------
// Class nsUnicodeToUTF32LE [implementation]
//----------------------------------------------------------------------
// Subclassing of nsUnicodeToUTF32 class [implementation]
NS_IMETHODIMP nsUnicodeToUTF32LE::Convert(const PRUnichar * aSrc,
PRInt32 * aSrcLength,
char * aDest,
PRInt32 * aDestLength)
{
return ConvertCommon(aSrc, aSrcLength, aDest, aDestLength,
&mHighSurrogate, &mBOM, PR_TRUE);
}
NS_IMETHODIMP nsUnicodeToUTF32LE::Finish(char * aDest,
PRInt32 * aDestLength)
{
return FinishCommon(aDest, aDestLength, &mHighSurrogate, PR_TRUE);
}

Просмотреть файл

@ -1,157 +0,0 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim:expandtab:shiftwidth=2:tabstop=2:
*/
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Communicator client code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Jungshik Shin <jshin@mailaps.org>
*
* Alternatively, the contents of this file may be used under the terms of
* either of the GNU General Public License Version 2 or later (the "GPL"),
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#ifndef nsUnicodeToUTF32_h___
#define nsUnicodeToUTF32_h___
//----------------------------------------------------------------------
// Class nsUnicodeToUTF32 [declaration]
/**
* A character set converter from UTF-32 family to Unicode.
* The base class for UTF-32/UTF-32BE/UTF-32LE to Unicode converters.
* @created 08/Dec/2002
* @author Jungshik Shin
*/
class nsUnicodeToUTF32Base : public nsIUnicodeEncoder
{
NS_DECL_ISUPPORTS
protected:
/**
* Class constructor. accessible only by child classes
*/
nsUnicodeToUTF32Base() {mBOM = 0; mHighSurrogate = 0;}
virtual ~nsUnicodeToUTF32Base() {}
PRUnichar mHighSurrogate;
NS_IMETHOD GetMaxLength(const PRUnichar * aSrc, PRInt32 aSrcLength,
PRInt32 * aDestLength);
//--------------------------------------------------------------------
// Subclassing of nsIUnicodeEncoder class [declaration]
NS_IMETHOD Reset() {mBOM = 0; mHighSurrogate = 0; return NS_OK;}
NS_IMETHOD SetOutputErrorBehavior(PRInt32 aBehavior,
nsIUnicharEncoder * aEncoder,
PRUnichar aChar)
{return NS_OK;}
protected:
PRUnichar mBOM;
};
//----------------------------------------------------------------------
// Class nsUnicodeToUTF32BE [declaration]
/**
* A character set converter from Unicode to UTF-32BE.
* A subclass of UnicodeToUTF32Base.
* @created 08/Dec/2002
* @author Jungshik Shin
*/
class nsUnicodeToUTF32BE : public nsUnicodeToUTF32Base
{
public:
//--------------------------------------------------------------------
// Subclassing of nsIUnicodeEncoder class [declaration]
NS_IMETHOD Convert(const PRUnichar * aSrc, PRInt32 * aSrcLength,
char * aDest, PRInt32 * aDestLength);
NS_IMETHOD Finish(char * aDest, PRInt32 * aDestLength);
};
//----------------------------------------------------------------------
// Class nsUnicodeToUTF32LE [declaration]
/**
* A character set converter from Unicode to UTF-32LE.
* A subclass of UnicodeToUTF32Base.
* @created 08/Dec/2002
* @author Jungshik Shin
*/
class nsUnicodeToUTF32LE : public nsUnicodeToUTF32Base
{
public:
//--------------------------------------------------------------------
// Subclassing of nsIUnicodeEncoder class [declaration]
NS_IMETHOD Convert(const PRUnichar * aSrc, PRInt32 * aSrcLength,
char * aDest, PRInt32 * aDestLength);
NS_IMETHOD Finish(char * aDest, PRInt32 * aDestLength);
};
//----------------------------------------------------------------------
// Class nsUnicodeToUTF32 [declaration]
/**
* A character set converter from Unicode to UTF-32.
* A subclass of UnicodeToUTF32Base.
* @created 08/Dec/2002
* @author Jungshik Shin
*/
#ifdef IS_LITTLE_ENDIAN
class nsUnicodeToUTF32 : public nsUnicodeToUTF32LE
#elif defined(IS_BIG_ENDIAN)
class nsUnicodeToUTF32 : public nsUnicodeToUTF32BE
#else
#error "Unknown endianness"
#endif
{
public:
nsUnicodeToUTF32() {mBOM = 0xFEFF; mHighSurrogate = 0;};
//--------------------------------------------------------------------
// Subclassing of nsUnicodeToUTF32Base class [declaration]
NS_IMETHOD Reset() {mBOM = 0xFEFF; mHighSurrogate = 0; return NS_OK;};
};
#endif /* nsUnicodeToUTF32_h___ */

Просмотреть файл

@ -34,10 +34,6 @@ while (decoderList.hasMore()) {
data = encodeUTF16BE(testContent);
else if (decoder == "UTF-16LE")
data = encodeUTF16LE(testContent);
else if (decoder == "UTF-32" || decoder == "UTF-32BE")
data = encodeUTF32BE(testContent);
else if (decoder == "UTF-32LE")
data = encodeUTF32LE(testContent);
else
data = encodeURI(testContent);
var dataURI = "data:text/html;charset=" + decoder + "," + data;
@ -73,30 +69,6 @@ function encodeUTF16LE(string)
return encodedString;
}
function encodeUTF32BE(string)
{
var encodedString = "";
for (i = 0; i < string.length; ++i) {
encodedString += "%00";
encodedString += "%00";
encodedString += "%00";
encodedString += encodeURI(string.charAt(i));
}
return encodedString;
}
function encodeUTF32LE(string)
{
var encodedString = "";
for (i = 0; i < string.length; ++i) {
encodedString += encodeURI(string.charAt(i));
encodedString += "%00";
encodedString += "%00";
encodedString += "%00";
}
return encodedString;
}
function lastTest(frame)
{
testFontSize(frame);

Просмотреть файл

@ -564,75 +564,17 @@ static nsresult GetCharsetFromData(const unsigned char* aStyleSheetData,
step = 1;
pos = 0;
}
// Check for a 4-byte encoding BOM before checking for a 2-byte one,
// since the latter can be a proper subset of the former.
else if (aStyleSheetData[0] == 0x00 &&
aStyleSheetData[1] == 0x00 &&
aStyleSheetData[2] == 0xFF &&
aStyleSheetData[3] == 0xFE) {
// 4-byte encoding BOM in 2143 order
NS_WARNING("Our unicode decoders aren't likely to deal with this one");
step = 4;
pos = 6;
aCharset = "UTF-32";
}
else if (aStyleSheetData[0] == 0xFE &&
aStyleSheetData[1] == 0xFF &&
aStyleSheetData[2] == 0x00 &&
aStyleSheetData[3] == 0x00) {
// 4-byte encoding BOM in 3412 order
NS_WARNING("Our unicode decoders aren't likely to deal with this one");
step = 4;
pos = 5;
aCharset = "UTF-32";
}
else if (nsContentUtils::CheckForBOM(aStyleSheetData,
aDataLength, aCharset, &bigEndian)) {
if (aCharset.Equals("UTF-8")) {
step = 1;
pos = 3;
}
else if (aCharset.Equals("UTF-32")) {
step = 4;
pos = bigEndian ? 7 : 4;
}
else if (aCharset.Equals("UTF-16")) {
step = 2;
pos = bigEndian ? 3 : 2;
}
}
else if (aStyleSheetData[0] == 0x00 &&
aStyleSheetData[1] == 0x00 &&
aStyleSheetData[2] == 0x00 &&
aStyleSheetData[3] == 0x40) {
// big-endian 4-byte encoding, no BOM
step = 4;
pos = 3;
}
else if (aStyleSheetData[0] == 0x40 &&
aStyleSheetData[1] == 0x00 &&
aStyleSheetData[2] == 0x00 &&
aStyleSheetData[3] == 0x00) {
// little-endian 4-byte encoding, no BOM
step = 4;
pos = 0;
}
else if (aStyleSheetData[0] == 0x00 &&
aStyleSheetData[1] == 0x00 &&
aStyleSheetData[2] == 0x40 &&
aStyleSheetData[3] == 0x00) {
// 4-byte encoding in 2143 order, no BOM
step = 4;
pos = 2;
}
else if (aStyleSheetData[0] == 0x00 &&
aStyleSheetData[1] == 0x40 &&
aStyleSheetData[2] == 0x00 &&
aStyleSheetData[3] == 0x00) {
// 4-byte encoding in 3412 order, no BOM
step = 4;
pos = 1;
}
else if (aStyleSheetData[0] == 0x00 &&
aStyleSheetData[1] == 0x40 &&
aStyleSheetData[2] == 0x00 &&

Просмотреть файл

@ -1017,7 +1017,7 @@ pref("intl.charsetmenu.browser.more2", "ISO-8859-4, ISO-8859-13, windows-12
pref("intl.charsetmenu.browser.more3", "GB2312, x-gbk, gb18030, HZ-GB-2312, ISO-2022-CN, Big5, Big5-HKSCS, x-euc-tw, EUC-JP, ISO-2022-JP, Shift_JIS, EUC-KR, x-windows-949, x-johab, ISO-2022-KR");
pref("intl.charsetmenu.browser.more4", "armscii-8, GEOSTD8, TIS-620, ISO-8859-11, windows-874, IBM857, ISO-8859-9, x-mac-turkish, windows-1254, x-viet-tcvn5712, VISCII, x-viet-vps, windows-1258, x-mac-devanagari, x-mac-gujarati, x-mac-gurmukhi");
pref("intl.charsetmenu.browser.more5", "ISO-8859-6, windows-1256, IBM864, ISO-8859-8-I, windows-1255, ISO-8859-8, IBM862");
pref("intl.charsetmenu.browser.unicode", "UTF-8, UTF-16LE, UTF-16BE, UTF-32, UTF-32LE, UTF-32BE");
pref("intl.charsetmenu.browser.unicode", "UTF-8, UTF-16LE, UTF-16BE");
pref("intl.charsetmenu.mailedit", "chrome://global/locale/intl.properties");
pref("intl.charsetmenu.browser.cache", "");
pref("intl.charsetmenu.mailview.cache", "");

Просмотреть файл

@ -94,9 +94,6 @@ nsHtml5MetaScanner::tryCharset(nsString* charset)
if (preferred.LowerCaseEqualsLiteral("utf-16") ||
preferred.LowerCaseEqualsLiteral("utf-16be") ||
preferred.LowerCaseEqualsLiteral("utf-16le") ||
preferred.LowerCaseEqualsLiteral("utf-32") ||
preferred.LowerCaseEqualsLiteral("utf-32be") ||
preferred.LowerCaseEqualsLiteral("utf-32le") ||
preferred.LowerCaseEqualsLiteral("utf-7") ||
preferred.LowerCaseEqualsLiteral("jis_x0212-1990") ||
preferred.LowerCaseEqualsLiteral("x-jis0208") ||

Просмотреть файл

@ -917,9 +917,6 @@ nsHtml5StreamParser::internalEncodingDeclaration(nsString* aEncoding)
if (preferred.LowerCaseEqualsLiteral("utf-16") ||
preferred.LowerCaseEqualsLiteral("utf-16be") ||
preferred.LowerCaseEqualsLiteral("utf-16le") ||
preferred.LowerCaseEqualsLiteral("utf-32") ||
preferred.LowerCaseEqualsLiteral("utf-32be") ||
preferred.LowerCaseEqualsLiteral("utf-32le") ||
preferred.LowerCaseEqualsLiteral("utf-7") ||
preferred.LowerCaseEqualsLiteral("jis_x0212-1990") ||
preferred.LowerCaseEqualsLiteral("x-jis0208") ||

Просмотреть файл

@ -2476,11 +2476,6 @@ nsParser::OnStartRequest(nsIRequest *request, nsISupports* aContext)
#define UTF16_BOM "UTF-16"
#define UTF16_BE "UTF-16BE"
#define UTF16_LE "UTF-16LE"
#define UCS4_BOM "UTF-32"
#define UCS4_BE "UTF-32BE"
#define UCS4_LE "UTF-32LE"
#define UCS4_2143 "X-ISO-10646-UCS-4-2143"
#define UCS4_3412 "X-ISO-10646-UCS-4-3412"
#define UTF8 "UTF-8"
static inline PRBool IsSecondMarker(unsigned char aChar)
@ -2510,32 +2505,13 @@ DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen,
switch(aBytes[0])
{
case 0x00:
if(0x00==aBytes[1]) {
// 00 00
if((0xFE==aBytes[2]) && (0xFF==aBytes[3])) {
// 00 00 FE FF UCS-4, big-endian machine (1234 order)
oCharset.Assign(UCS4_BOM);
} else if((0x00==aBytes[2]) && (0x3C==aBytes[3])) {
// 00 00 00 3C UCS-4, big-endian machine (1234 order)
oCharset.Assign(UCS4_BE);
} else if((0xFF==aBytes[2]) && (0xFE==aBytes[3])) {
// 00 00 FF FE UCS-4, unusual octet order (2143)
oCharset.Assign(UCS4_2143);
} else if((0x3C==aBytes[2]) && (0x00==aBytes[3])) {
// 00 00 3C 00 UCS-4, unusual octet order (2143)
oCharset.Assign(UCS4_2143);
}
oCharsetSource = kCharsetFromByteOrderMark;
} else if((0x3C==aBytes[1]) && (0x00==aBytes[2])) {
if((0x3C==aBytes[1]) && (0x00==aBytes[2])) {
// 00 3C 00
if(IsSecondMarker(aBytes[3])) {
// 00 3C 00 SM UTF-16, big-endian, no Byte Order Mark
oCharset.Assign(UTF16_BE);
} else if((0x00==aBytes[3])) {
// 00 3C 00 00 UCS-4, unusual octet order (3412)
oCharset.Assign(UCS4_3412);
oCharsetSource = kCharsetFromByteOrderMark;
}
oCharsetSource = kCharsetFromByteOrderMark;
}
break;
case 0x3C:
@ -2544,11 +2520,8 @@ DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen,
if(IsSecondMarker(aBytes[2])) {
// 3C 00 SM 00 UTF-16, little-endian, no Byte Order Mark
oCharset.Assign(UTF16_LE);
} else if((0x00==aBytes[2])) {
// 3C 00 00 00 UCS-4, little-endian machine (4321 order)
oCharset.Assign(UCS4_LE);
oCharsetSource = kCharsetFromByteOrderMark;
}
oCharsetSource = kCharsetFromByteOrderMark;
// For html, meta tag detector is invoked before this so that we have
// to deal only with XML here.
} else if( (0x3F==aBytes[1]) &&
@ -2640,26 +2613,17 @@ DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen,
break;
case 0xFE:
if(0xFF==aBytes[1]) {
if(0x00==aBytes[2] && 0x00==aBytes[3]) {
// FE FF 00 00 UCS-4, unusual octet order (3412)
oCharset.Assign(UCS4_3412);
} else {
// FE FF UTF-16, big-endian
oCharset.Assign(UTF16_BOM);
}
// FE FF UTF-16, big-endian
oCharset.Assign(UTF16_BOM);
oCharsetSource= kCharsetFromByteOrderMark;
}
break;
case 0xFF:
if(0xFE==aBytes[1]) {
if(0x00==aBytes[2] && 0x00==aBytes[3])
// FF FE 00 00 UTF-32, little-endian
oCharset.Assign(UCS4_BOM);
else
// FF FE
// UTF-16, little-endian
oCharset.Assign(UTF16_BOM);
oCharsetSource= kCharsetFromByteOrderMark;
// FF FE
// UTF-16, little-endian
oCharset.Assign(UTF16_BOM);
oCharsetSource= kCharsetFromByteOrderMark;
}
break;
// case 0x4C: if((0x6F==aBytes[1]) && ((0xA7==aBytes[2] && (0x94==aBytes[3])) {
@ -2852,10 +2816,7 @@ ParserWriteFunc(nsIInputStream* in,
((kCharsetFromByteOrderMark == guessSource) ||
(!preferred.EqualsLiteral("UTF-16") &&
!preferred.EqualsLiteral("UTF-16BE") &&
!preferred.EqualsLiteral("UTF-16LE") &&
!preferred.EqualsLiteral("UTF-32") &&
!preferred.EqualsLiteral("UTF-32BE") &&
!preferred.EqualsLiteral("UTF-32LE")))) {
!preferred.EqualsLiteral("UTF-16LE")))) {
guess = preferred;
pws->mParser->SetDocumentCharset(guess, guessSource);
pws->mParser->SetSinkCharset(preferred);