Bug 716579 - Let a BOM override HTTP-level charset in the HTML and XML parsers. r=smaug.

This commit is contained in:
Henri Sivonen 2012-11-06 13:57:51 +02:00
Родитель d3ba646030
Коммит 4038858f3f
16 изменённых файлов: 235 добавлений и 361 удалений

Просмотреть файл

@ -24,6 +24,7 @@
#include "expat.h"
#include "nsINestedURI.h"
#include "nsCharsetSource.h"
#include "nsIWyciwygChannel.h"
using namespace mozilla;
@ -495,8 +496,8 @@ nsHtml5StreamParser::FinalizeSniffing(const uint8_t* aFromSegment, // can be nul
uint32_t aCountToSniffingLimit)
{
NS_ASSERTION(IsParserThread(), "Wrong thread!");
NS_ASSERTION(mCharsetSource < kCharsetFromMetaTag,
"Should not finalize sniffing when already confident.");
NS_ASSERTION(mCharsetSource < kCharsetFromParentForced,
"Should not finalize sniffing when using forced charset.");
if (mMode == VIEW_SOURCE_XML) {
static const XML_Memory_Handling_Suite memsuite =
{
@ -634,6 +635,11 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
NS_ASSERTION(IsParserThread(), "Wrong thread!");
nsresult rv = NS_OK;
uint32_t writeCount;
// mCharset and mCharsetSource potentially have come from channel or higher
// by now. If we find a BOM, SetupDecodingFromBom() will overwrite them.
// If we don't find a BOM, the previously set values of mCharset and
// mCharsetSource are not modified by the BOM sniffing here.
for (uint32_t i = 0; i < aCount && mBomState != BOM_SNIFFING_OVER; i++) {
switch (mBomState) {
case BOM_SNIFFING_NOT_STARTED:
@ -701,8 +707,36 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
break;
}
}
// if we get here, there either was no BOM or the BOM sniffing isn't complete yet
// if we get here, there either was no BOM or the BOM sniffing isn't complete
// yet
if (mBomState == BOM_SNIFFING_OVER &&
mCharsetSource >= kCharsetFromChannel) {
// There was no BOM and the charset came from channel or higher. mCharset
// still contains the charset from the channel or higher as set by an
// earlier call to SetDocumentCharset(), since we didn't find a BOM and
// overwrite mCharset.
nsCOMPtr<nsICharsetConverterManager> convManager =
do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID);
convManager->GetUnicodeDecoder(mCharset.get(),
getter_AddRefs(mUnicodeDecoder));
if (mUnicodeDecoder) {
mUnicodeDecoder->SetInputErrorBehavior(
nsIUnicodeDecoder::kOnError_Recover);
mFeedChardet = false;
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
mMetaScanner = nullptr;
return WriteSniffingBufferAndCurrentSegment(aFromSegment,
aCount,
aWriteCount);
} else {
// nsHTMLDocument is supposed to make sure this does not happen. Let's
// deal with this anyway, since who knows how kCharsetFromOtherComponent
// is used.
mCharsetSource = kCharsetFromWeakDocTypeDefault;
}
}
if (!mMetaScanner && (mMode == NORMAL ||
mMode == VIEW_SOURCE_HTML ||
mMode == LOAD_AS_DATA)) {
@ -963,7 +997,13 @@ nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest, nsISupports* aContext)
mFeedChardet = false;
}
if (mCharsetSource <= kCharsetFromMetaPrescan) {
nsCOMPtr<nsIWyciwygChannel> wyciwygChannel(do_QueryInterface(mRequest));
if (wyciwygChannel) {
mReparseForbidden = true;
mFeedChardet = false;
// If we are reloading a document.open()ed doc, fall through to converter
// instantiation here and avoid BOM sniffing.
} else if (mCharsetSource < kCharsetFromParentForced) {
// we aren't ready to commit to an encoding yet
// leave converter uninstantiated for now
return NS_OK;

Просмотреть файл

@ -41,6 +41,7 @@
#include "mozilla/Mutex.h"
#include "nsParserConstants.h"
#include "nsCharsetSource.h"
#include "nsContentUtils.h"
using namespace mozilla;
@ -1250,8 +1251,7 @@ nsParser::Parse(nsIURI* aURL,
}
NS_ConvertUTF8toUTF16 theName(spec);
nsScanner* theScanner = new nsScanner(theName, false, mCharset,
mCharsetSource);
nsScanner* theScanner = new nsScanner(theName, false);
CParserContext* pc = new CParserContext(mParserContext, theScanner, aKey,
mCommand, aListener);
if (pc && theScanner) {
@ -1311,7 +1311,7 @@ nsParser::Parse(const nsAString& aSourceBuffer,
if (!pc) {
// Only make a new context if we don't have one, OR if we do, but has a
// different context key.
nsScanner* theScanner = new nsScanner(mUnusedInput, mCharset, mCharsetSource);
nsScanner* theScanner = new nsScanner(mUnusedInput);
NS_ENSURE_TRUE(theScanner, NS_ERROR_OUT_OF_MEMORY);
eAutoDetectResult theStatus = eUnknownDetect;
@ -1674,11 +1674,6 @@ nsParser::OnStartRequest(nsIRequest *request, nsISupports* aContext)
}
#define UTF16_BOM "UTF-16"
#define UTF16_BE "UTF-16BE"
#define UTF16_LE "UTF-16LE"
#define UTF8 "UTF-8"
static inline bool IsSecondMarker(unsigned char aChar)
{
switch (aChar) {
@ -1693,146 +1688,87 @@ static inline bool IsSecondMarker(unsigned char aChar)
}
static bool
DetectByteOrderMark(const unsigned char* aBytes, int32_t aLen,
nsCString& oCharset, int32_t& oCharsetSource)
ExtractCharsetFromXmlDeclaration(const unsigned char* aBytes, int32_t aLen,
nsCString& oCharset)
{
oCharsetSource= kCharsetFromAutoDetection;
oCharset.Truncate();
// See http://www.w3.org/TR/2000/REC-xml-20001006#sec-guessing
// for details
// Also, MS Win2K notepad now generate 3 bytes BOM in UTF8 as UTF8 signature
// We need to check that
// UCS2 BOM FEFF = UTF8 EF BB BF
switch(aBytes[0])
{
case 0x00:
if((0x3C==aBytes[1]) && (0x00==aBytes[2])) {
// 00 3C 00
if(IsSecondMarker(aBytes[3])) {
// 00 3C 00 SM UTF-16, big-endian, no Byte Order Mark
oCharset.Assign(UTF16_BE);
oCharsetSource = kCharsetFromByteOrderMark;
}
}
break;
case 0x3C:
if(0x00==aBytes[1] && (0x00==aBytes[3])) {
// 3C 00 XX 00
if(IsSecondMarker(aBytes[2])) {
// 3C 00 SM 00 UTF-16, little-endian, no Byte Order Mark
oCharset.Assign(UTF16_LE);
oCharsetSource = kCharsetFromByteOrderMark;
}
// For html, meta tag detector is invoked before this so that we have
// to deal only with XML here.
} else if( (0x3F==aBytes[1]) &&
(0x78==aBytes[2]) && (0x6D==aBytes[3]) &&
(0 == PL_strncmp("<?xml", (char*)aBytes, 5 ))) {
// 3C 3F 78 6D
// ASCII characters are in their normal positions, so we can safely
// deal with the XML declaration in the old C way
// The shortest string so far (strlen==5):
// <?xml
int32_t i;
bool versionFound = false, encodingFound = false;
for (i=6; i < aLen && !encodingFound; ++i) {
// end of XML declaration?
if ((((char*)aBytes)[i] == '?') &&
((i+1) < aLen) &&
(((char*)aBytes)[i+1] == '>')) {
break;
}
// Version is required.
if (!versionFound) {
// Want to avoid string comparisons, hence looking for 'n'
// and only if found check the string leading to it. Not
// foolproof, but fast.
// The shortest string allowed before this is (strlen==13):
// <?xml version
if ((((char*)aBytes)[i] == 'n') &&
(i >= 12) &&
(0 == PL_strncmp("versio", (char*)(aBytes+i-6), 6 ))) {
// Fast forward through version
char q = 0;
for (++i; i < aLen; ++i) {
char qi = ((char*)aBytes)[i];
if (qi == '\'' || qi == '"') {
if (q && q == qi) {
// ending quote
versionFound = true;
break;
} else {
// Starting quote
q = qi;
}
}
}
}
} else {
// encoding must follow version
// Want to avoid string comparisons, hence looking for 'g'
// and only if found check the string leading to it. Not
// foolproof, but fast.
// The shortest allowed string before this (strlen==26):
// <?xml version="1" encoding
if ((((char*)aBytes)[i] == 'g') &&
(i >= 25) &&
(0 == PL_strncmp("encodin", (char*)(aBytes+i-7), 7 ))) {
int32_t encStart = 0;
char q = 0;
for (++i; i < aLen; ++i) {
char qi = ((char*)aBytes)[i];
if (qi == '\'' || qi == '"') {
if (q && q == qi) {
int32_t count = i - encStart;
// encoding value is invalid if it is UTF-16
if (count > 0 &&
(0 != PL_strcmp("UTF-16", (char*)(aBytes+encStart)))) {
oCharset.Assign((char*)(aBytes+encStart),count);
oCharsetSource = kCharsetFromMetaTag;
}
encodingFound = true;
break;
} else {
encStart = i+1;
q = qi;
}
}
}
}
} // if (!versionFound)
} // for
}
break;
case 0xEF:
if((0xBB==aBytes[1]) && (0xBF==aBytes[2])) {
// EF BB BF
// Win2K UTF-8 BOM
oCharset.Assign(UTF8);
oCharsetSource= kCharsetFromByteOrderMark;
}
break;
case 0xFE:
if(0xFF==aBytes[1]) {
// FE FF UTF-16, big-endian
oCharset.Assign(UTF16_BOM);
oCharsetSource= kCharsetFromByteOrderMark;
}
break;
case 0xFF:
if(0xFE==aBytes[1]) {
// FF FE
// UTF-16, little-endian
oCharset.Assign(UTF16_BOM);
oCharsetSource= kCharsetFromByteOrderMark;
}
break;
// case 0x4C: if((0x6F==aBytes[1]) && ((0xA7==aBytes[2] && (0x94==aBytes[3])) {
// We do not care EBCIDIC here....
// }
// break;
} // switch
return !oCharset.IsEmpty();
// This code is rather pointless to have. Might as well reuse expat as
// seen in nsHtml5StreamParser. -- hsivonen
oCharset.Truncate();
if ((aLen >= 5) &&
('<' == aBytes[0]) &&
('?' == aBytes[1]) &&
('x' == aBytes[2]) &&
('m' == aBytes[3]) &&
('l' == aBytes[4])) {
int32_t i;
bool versionFound = false, encodingFound = false;
for (i = 6; i < aLen && !encodingFound; ++i) {
// end of XML declaration?
if ((((char*) aBytes)[i] == '?') &&
((i + 1) < aLen) &&
(((char*) aBytes)[i + 1] == '>')) {
break;
}
// Version is required.
if (!versionFound) {
// Want to avoid string comparisons, hence looking for 'n'
// and only if found check the string leading to it. Not
// foolproof, but fast.
// The shortest string allowed before this is (strlen==13):
// <?xml version
if ((((char*) aBytes)[i] == 'n') &&
(i >= 12) &&
(0 == PL_strncmp("versio", (char*) (aBytes + i - 6), 6))) {
// Fast forward through version
char q = 0;
for (++i; i < aLen; ++i) {
char qi = ((char*) aBytes)[i];
if (qi == '\'' || qi == '"') {
if (q && q == qi) {
// ending quote
versionFound = true;
break;
} else {
// Starting quote
q = qi;
}
}
}
}
} else {
// encoding must follow version
// Want to avoid string comparisons, hence looking for 'g'
// and only if found check the string leading to it. Not
// foolproof, but fast.
// The shortest allowed string before this (strlen==26):
// <?xml version="1" encoding
if ((((char*) aBytes)[i] == 'g') && (i >= 25) && (0 == PL_strncmp(
"encodin", (char*) (aBytes + i - 7), 7))) {
int32_t encStart = 0;
char q = 0;
for (++i; i < aLen; ++i) {
char qi = ((char*) aBytes)[i];
if (qi == '\'' || qi == '"') {
if (q && q == qi) {
int32_t count = i - encStart;
// encoding value is invalid if it is UTF-16
if (count > 0 && (0 != PL_strcmp("UTF-16",
(char*) (aBytes + encStart)))) {
oCharset.Assign((char*) (aBytes + encStart), count);
}
encodingFound = true;
break;
} else {
encStart = i + 1;
q = qi;
}
}
}
}
} // if (!versionFound)
} // for
}
return !oCharset.IsEmpty();
}
inline const char
@ -1843,131 +1779,6 @@ GetNextChar(nsACString::const_iterator& aStart,
return (++aStart != aEnd) ? *aStart : '\0';
}
bool
nsParser::DetectMetaTag(const char* aBytes,
int32_t aLen,
nsCString& aCharset,
int32_t& aCharsetSource)
{
aCharsetSource= kCharsetFromMetaTag;
aCharset.SetLength(0);
// XXX Only look inside HTML documents for now. For XML
// documents we should be looking inside the XMLDecl.
if (!mParserContext->mMimeType.EqualsLiteral(TEXT_HTML)) {
return false;
}
// Fast and loose parsing to determine if we have a complete
// META tag in this block, looking upto 2k into it.
const nsASingleFragmentCString& str =
Substring(aBytes, aBytes + NS_MIN(aLen, 2048));
// XXXldb Should be const_char_iterator when FindInReadable supports it.
nsACString::const_iterator begin, end;
str.BeginReading(begin);
str.EndReading(end);
nsACString::const_iterator currPos(begin);
nsACString::const_iterator tokEnd;
nsACString::const_iterator tagEnd(begin);
while (currPos != end) {
if (!FindCharInReadable('<', currPos, end))
break; // no tag found in this buffer
if (GetNextChar(currPos, end) == '!') {
if (GetNextChar(currPos, end) != '-' ||
GetNextChar(currPos, end) != '-') {
// If we only see a <! not followed by --, just skip to the next >.
if (!FindCharInReadable('>', currPos, end)) {
return false; // No more tags to follow.
}
// Continue searching for a meta tag following this "comment".
++currPos;
continue;
}
// Found MDO ( <!-- ). Now search for MDC ( --[*s]> )
bool foundMDC = false;
bool foundMatch = false;
while (!foundMDC) {
if (GetNextChar(currPos, end) == '-' &&
GetNextChar(currPos, end) == '-') {
foundMatch = !foundMatch; // toggle until we've matching "--"
} else if (currPos == end) {
return false; // Couldn't find --[*s]> in this buffer
} else if (foundMatch && *currPos == '>') {
foundMDC = true; // found comment end delimiter.
++currPos;
}
}
continue; // continue searching for META tag.
}
// Find the end of the tag, break if incomplete
tagEnd = currPos;
if (!FindCharInReadable('>', tagEnd, end))
break;
// If this is not a META tag, continue to next loop
if ( (*currPos != 'm' && *currPos != 'M') ||
(*(++currPos) != 'e' && *currPos != 'E') ||
(*(++currPos) != 't' && *currPos != 'T') ||
(*(++currPos) != 'a' && *currPos != 'A') ||
!nsCRT::IsAsciiSpace(*(++currPos))) {
currPos = tagEnd;
continue;
}
// If could not find "charset" in this tag, skip this tag and try next
tokEnd = tagEnd;
if (!CaseInsensitiveFindInReadable(NS_LITERAL_CSTRING("CHARSET"),
currPos, tokEnd)) {
currPos = tagEnd;
continue;
}
currPos = tokEnd;
// skip spaces before '='
while (*currPos == kSpace || *currPos == kNewLine ||
*currPos == kCR || *currPos == kTab) {
++currPos;
}
// skip '='
if (*currPos != '=') {
currPos = tagEnd;
continue;
}
++currPos;
// skip spaces after '='
while (*currPos == kSpace || *currPos == kNewLine ||
*currPos == kCR || *currPos == kTab) {
++currPos;
}
// skip open quote
if (*currPos == '\'' || *currPos == '\"')
++currPos;
// find the end of charset string
tokEnd = currPos;
while (*tokEnd != '\'' && *tokEnd != '\"' && tokEnd != tagEnd)
++tokEnd;
// return true if we successfully got something for charset
if (currPos != tokEnd) {
aCharset.Assign(currPos.get(), tokEnd.get() - currPos.get());
return true;
}
// Nothing specified as charset, continue next loop
currPos = tagEnd;
}
return false;
}
static NS_METHOD
NoOpParserWriteFunc(nsIInputStream* in,
void* closure,
@ -2003,7 +1814,8 @@ ParserWriteFunc(nsIInputStream* in,
{
nsresult result;
ParserWriteStruct* pws = static_cast<ParserWriteStruct*>(closure);
const char* buf = fromRawSegment;
const unsigned char* buf =
reinterpret_cast<const unsigned char*> (fromRawSegment);
uint32_t theNumRead = count;
if (!pws) {
@ -2011,47 +1823,37 @@ ParserWriteFunc(nsIInputStream* in,
}
if (pws->mNeedCharsetCheck) {
int32_t guessSource;
nsAutoCString guess;
nsAutoCString preferred;
pws->mNeedCharsetCheck = false;
if (pws->mParser->DetectMetaTag(buf, theNumRead, guess, guessSource) ||
((count >= 4) &&
DetectByteOrderMark((const unsigned char*)buf,
theNumRead, guess, guessSource))) {
result = nsCharsetAlias::GetPreferred(guess, preferred);
// Only continue if it's a recognized charset and not
// one of a designated set that we ignore.
if (NS_SUCCEEDED(result) &&
((kCharsetFromByteOrderMark == guessSource) ||
(!preferred.EqualsLiteral("UTF-16") &&
!preferred.EqualsLiteral("UTF-16BE") &&
!preferred.EqualsLiteral("UTF-16LE")))) {
guess = preferred;
pws->mParser->SetDocumentCharset(guess, guessSource);
pws->mParser->SetSinkCharset(preferred);
nsCOMPtr<nsICachingChannel> channel(do_QueryInterface(pws->mRequest));
if (channel) {
nsCOMPtr<nsISupports> cacheToken;
channel->GetCacheToken(getter_AddRefs(cacheToken));
if (cacheToken) {
nsCOMPtr<nsICacheEntryDescriptor> cacheDescriptor(do_QueryInterface(cacheToken));
if (cacheDescriptor) {
#ifdef DEBUG
nsresult rv =
#endif
cacheDescriptor->SetMetaDataElement("charset",
guess.get());
NS_ASSERTION(NS_SUCCEEDED(rv),"cannot SetMetaDataElement");
}
}
int32_t source;
nsAutoCString preferred;
nsAutoCString maybePrefer;
pws->mParser->GetDocumentCharset(preferred, source);
// This code was bogus when I found it. It expects the BOM or the XML
// declaration to be entirely in the first network buffer. -- hsivonen
if (nsContentUtils::CheckForBOM(buf, count, maybePrefer)) {
// The decoder will swallow the BOM. The UTF-16 will re-sniff for
// endianness. The value of preferred is now either "UTF-8" or "UTF-16".
preferred.Assign(maybePrefer);
source = kCharsetFromByteOrderMark;
} else if (source < kCharsetFromChannel) {
nsAutoCString declCharset;
if (ExtractCharsetFromXmlDeclaration(buf, count, declCharset)) {
nsresult rv = nsCharsetAlias::GetPreferred(declCharset, maybePrefer);
if (NS_SUCCEEDED(rv)) {
preferred.Assign(maybePrefer);
source = kCharsetFromMetaTag;
}
}
}
pws->mParser->SetDocumentCharset(preferred, source);
pws->mParser->SetSinkCharset(preferred);
}
result = pws->mScanner->Append(buf, theNumRead, pws->mRequest);
result = pws->mScanner->Append(fromRawSegment, theNumRead, pws->mRequest);
if (NS_SUCCEEDED(result)) {
*writeCount = count;
}
@ -2103,8 +1905,7 @@ nsParser::OnDataAvailable(nsIRequest *request, nsISupports* aContext,
uint32_t totalRead;
ParserWriteStruct pws;
pws.mNeedCharsetCheck =
(0 == sourceOffset) && (mCharsetSource < kCharsetFromMetaTag);
pws.mNeedCharsetCheck = true;
pws.mParser = this;
pws.mScanner = theContext->mScanner;
pws.mRequest = request;

Просмотреть файл

@ -246,15 +246,6 @@ class nsParser : public nsIParser,
*/
virtual nsIStreamListener* GetStreamListener();
/**
* Detects the existence of a META tag with charset information in
* the given buffer.
*/
bool DetectMetaTag(const char* aBytes,
int32_t aLen,
nsCString& oCharset,
int32_t& oCharsetSource);
void SetSinkCharset(nsACString& aCharset);
/**

Просмотреть файл

@ -57,8 +57,7 @@ const int kBufsize=64;
* @param aMode represents the parser mode (nav, other)
* @return
*/
nsScanner::nsScanner(const nsAString& anHTMLString, const nsACString& aCharset,
int32_t aSource)
nsScanner::nsScanner(const nsAString& anHTMLString)
{
MOZ_COUNT_CTOR(nsScanner);
@ -84,13 +83,8 @@ nsScanner::nsScanner(const nsAString& anHTMLString, const nsACString& aCharset,
* Use this constructor if you want i/o to be based on strings
* the scanner receives. If you pass a null filename, you
* can still provide data to the scanner via append.
*
* @update gess 5/12/98
* @param aFilename --
* @return
*/
nsScanner::nsScanner(nsString& aFilename,bool aCreateStream,
const nsACString& aCharset, int32_t aSource)
nsScanner::nsScanner(nsString& aFilename, bool aCreateStream)
: mFilename(aFilename)
{
MOZ_COUNT_CTOR(nsScanner);
@ -115,7 +109,8 @@ nsScanner::nsScanner(nsString& aFilename,bool aCreateStream,
mCharsetSource = kCharsetUninitialized;
mHasInvalidCharacter = false;
mReplacementCharacter = PRUnichar(0x0);
SetDocumentCharset(aCharset, aSource);
// XML defaults to UTF-8 and about:blank is UTF-8, too.
SetDocumentCharset(NS_LITERAL_CSTRING("UTF-8"), kCharsetFromDocTypeDefault);
}
nsresult nsScanner::SetDocumentCharset(const nsACString& aCharset , int32_t aSource)
@ -130,6 +125,7 @@ nsresult nsScanner::SetDocumentCharset(const nsACString& aCharset , int32_t aSou
res = nsCharsetAlias::Equals(aCharset, mCharset, &same);
if(NS_SUCCEEDED(res) && same)
{
mCharsetSource = aSource;
return NS_OK; // no difference, don't change it
}
}
@ -137,16 +133,9 @@ nsresult nsScanner::SetDocumentCharset(const nsACString& aCharset , int32_t aSou
// different, need to change it
nsCString charsetName;
res = nsCharsetAlias::GetPreferred(aCharset, charsetName);
MOZ_ASSERT(NS_SUCCEEDED(res), "Should never call with a bogus aCharset.");
if(NS_FAILED(res) && (mCharsetSource == kCharsetUninitialized))
{
// failed - unknown alias , fallback to ISO-8859-1
mCharset.AssignLiteral("ISO-8859-1");
}
else
{
mCharset.Assign(charsetName);
}
mCharset.Assign(charsetName);
mCharsetSource = aSource;

Просмотреть файл

@ -42,29 +42,15 @@ class nsScanner {
public:
/**
* Use this constructor if you want i/o to be based on
* a single string you hand in during construction.
* This short cut was added for Javascript.
*
* @update ftang 3/02/99
* @param aCharset charset
* @param aCharsetSource - where the charset info came from
* @param aMode represents the parser mode (nav, other)
* @return
* Use this constructor for the XML fragment parsing case
*/
nsScanner(const nsAString& anHTMLString, const nsACString& aCharset, int32_t aSource);
nsScanner(const nsAString& anHTMLString);
/**
* Use this constructor if you want i/o to be based on
* a file (therefore a stream) or just data you provide via Append().
*
* @update ftang 3/02/99
* @param aCharset charset
* @param aCharsetSource - where the charset info came from
* @param aMode represents the parser mode (nav, other)
* @return
*/
nsScanner(nsString& aFilename,bool aCreateStream, const nsACString& aCharset, int32_t aSource);
nsScanner(nsString& aFilename, bool aCreateStream);
~nsScanner();

Просмотреть файл

@ -75,6 +75,15 @@ MOCHITEST_FILES = parser_datreader.js \
test_viewsource.html \
test_bug715112.html \
test_bug715739.html \
test_bug716579.html \
file_bug716579-8.html \
file_bug716579-8.html^headers^ \
file_bug716579-16.html \
file_bug716579-16.html^headers^ \
file_bug716579-8.xhtml \
file_bug716579-8.xhtml^headers^ \
file_bug716579-16.xhtml \
file_bug716579-16.xhtml^headers^ \
test_bug717180.html \
file_bug717180.html \
$(NULL)

Двоичные данные
parser/htmlparser/tests/mochitest/file_bug716579-16.html Normal file

Двоичный файл не отображается.

Просмотреть файл

@ -0,0 +1 @@
Content-Type: text/html; charset=windows-874

Двоичные данные
parser/htmlparser/tests/mochitest/file_bug716579-16.xhtml Normal file

Двоичный файл не отображается.

Просмотреть файл

@ -0,0 +1 @@
Content-Type: application/xhtml+xml; charset=windows-874

Просмотреть файл

@ -0,0 +1,3 @@
<script>
parent.html8 = "€";
</script>

Просмотреть файл

@ -0,0 +1 @@
Content-Type: text/html; charset=windows-874

Просмотреть файл

@ -0,0 +1,7 @@
<html xmlns="http://www.w3.org/1999/xhtml">
<body>
<script>
parent.xml8 = "€";
</script>
</body>
</html>

Просмотреть файл

@ -0,0 +1 @@
Content-Type: application/xhtml+xml; charset=windows-874

Просмотреть файл

@ -0,0 +1,44 @@
<!DOCTYPE HTML>
<html>
<!--
https://bugzilla.mozilla.org/show_bug.cgi?id=716579
-->
<head>
<meta charset="windows-1251">
<title>Test for Bug 716579</title>
<script type="application/javascript" src="/tests/SimpleTest/SimpleTest.js"></script>
<link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
</head>
<body>
<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=716579">Mozilla Bug 716579</a>
<p id="display"></p>
<pre id="test">
<script type="application/javascript">
/** Test for Bug 716579 **/
var html8 = "FAIL";
var html16 = "FAIL";
var xml8 = "FAIL";
var xml16 = "FAIL";
SimpleTest.waitForExplicitFinish();
window.onload = function() {
is(html8, "\u20AC", "HTML UTF-8 failed.");
is(html16, "\u20AC", "HTML UTF-16 failed.");
is(xml8, "\u20AC", "XML UTF-8 failed.");
is(xml16, "\u20AC", "XML UTF-16 failed.");
SimpleTest.finish();
};
</script>
</pre>
<div id="content" style="display: none">
<iframe src="file_bug716579-8.html"></iframe>
<iframe src="file_bug716579-16.html"></iframe>
<iframe src="file_bug716579-8.xhtml"></iframe>
<iframe src="file_bug716579-16.xhtml"></iframe>
</div>
</body>
</html>

Просмотреть файл

@ -17,9 +17,9 @@
#define kCharsetFromMetaPrescan 8 // this one and smaller: HTML5 Tentative
#define kCharsetFromMetaTag 9 // this one and greater: HTML5 Confident
#define kCharsetFromIrreversibleAutoDetection 10
#define kCharsetFromByteOrderMark 11
#define kCharsetFromChannel 12
#define kCharsetFromOtherComponent 13
#define kCharsetFromChannel 11
#define kCharsetFromOtherComponent 12
#define kCharsetFromByteOrderMark 13
// Levels below here will be forced onto childframes too
#define kCharsetFromParentForced 14
#define kCharsetFromUserForced 15