зеркало из https://github.com/mozilla/gecko-dev.git
Bug 631751 - Sniff Basic Latin BOMless UTF-16 for IE compat. r=bzbarsky, a=bzbarsky.
This commit is contained in:
Родитель
13bff10470
Коммит
f68ddc8cba
|
@ -70,6 +70,10 @@ _TEST_FILES = \
|
|||
test_bug547487.html \
|
||||
bug620106_text.html \
|
||||
test_bug620106.html \
|
||||
bug631751le_text.html \
|
||||
test_bug631751le.html \
|
||||
bug631751be_text.html \
|
||||
test_bug631751be.html \
|
||||
$(NULL)
|
||||
|
||||
libs:: $(_TEST_FILES)
|
||||
|
|
Двоичный файл не отображается.
Двоичный файл не отображается.
|
@ -0,0 +1,33 @@
|
|||
<!DOCTYPE HTML>
|
||||
<html>
|
||||
<!--
|
||||
https://bugzilla.mozilla.org/show_bug.cgi?id=631751
|
||||
-->
|
||||
<head>
|
||||
<title>Test for Bug 631751</title>
|
||||
<script type="text/javascript"
|
||||
src="chrome://mochikit/content/MochiKit/packed.js"></script>
|
||||
<script type="text/javascript"
|
||||
src="chrome://mochikit/content/tests/SimpleTest/SimpleTest.js">
|
||||
</script>
|
||||
<script type="text/javascript" src="CharsetDetectionTests.js"></script>
|
||||
<link rel="stylesheet" type="text/css"
|
||||
href="chrome://mochikit/content/tests/SimpleTest/test.css" />
|
||||
</head>
|
||||
<body>
|
||||
<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=631751">Mozilla Bug 631751</a>
|
||||
<p id="display"></p>
|
||||
<div id="content" style="display: none">
|
||||
</div>
|
||||
<iframe id="testframe"></iframe>
|
||||
<pre id="test">
|
||||
<script class="testbody" type="text/javascript">
|
||||
/** Test for Bug 631751 **/
|
||||
/* Note! This test uses the chardet test harness but doesn't test chardet! */
|
||||
CharsetDetectionTests("bug631751be_text.html",
|
||||
"UTF-16BE",
|
||||
new Array(""));
|
||||
</script>
|
||||
</pre>
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,33 @@
|
|||
<!DOCTYPE HTML>
|
||||
<html>
|
||||
<!--
|
||||
https://bugzilla.mozilla.org/show_bug.cgi?id=631751
|
||||
-->
|
||||
<head>
|
||||
<title>Test for Bug 631751</title>
|
||||
<script type="text/javascript"
|
||||
src="chrome://mochikit/content/MochiKit/packed.js"></script>
|
||||
<script type="text/javascript"
|
||||
src="chrome://mochikit/content/tests/SimpleTest/SimpleTest.js">
|
||||
</script>
|
||||
<script type="text/javascript" src="CharsetDetectionTests.js"></script>
|
||||
<link rel="stylesheet" type="text/css"
|
||||
href="chrome://mochikit/content/tests/SimpleTest/test.css" />
|
||||
</head>
|
||||
<body>
|
||||
<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=631751">Mozilla Bug 631751</a>
|
||||
<p id="display"></p>
|
||||
<div id="content" style="display: none">
|
||||
</div>
|
||||
<iframe id="testframe"></iframe>
|
||||
<pre id="test">
|
||||
<script class="testbody" type="text/javascript">
|
||||
/** Test for Bug 631751 **/
|
||||
/* Note! This test uses the chardet test harness but doesn't test chardet! */
|
||||
CharsetDetectionTests("bug631751le_text.html",
|
||||
"UTF-16LE",
|
||||
new Array(""));
|
||||
</script>
|
||||
</pre>
|
||||
</body>
|
||||
</html>
|
|
@ -334,6 +334,48 @@ nsHtml5StreamParser::SetupDecodingFromBom(const char* aCharsetName, const char*
|
|||
return rv;
|
||||
}
|
||||
|
||||
void
|
||||
nsHtml5StreamParser::SniffBOMlessUTF16BasicLatin(const PRUint8* aFromSegment,
|
||||
PRUint32 aCountToSniffingLimit)
|
||||
{
|
||||
// Make sure there's enough data. Require room for "<title></title>"
|
||||
if (mSniffingLength + aCountToSniffingLimit < 30) {
|
||||
return;
|
||||
}
|
||||
// even-numbered bytes tracked at 0, odd-numbered bytes tracked at 1
|
||||
PRBool byteNonZero[2] = { PR_FALSE, PR_FALSE };
|
||||
PRUint32 i = 0;
|
||||
if (mSniffingBuffer) {
|
||||
for (; i < mSniffingLength; ++i) {
|
||||
if (mSniffingBuffer[i]) {
|
||||
if (byteNonZero[1 - (i % 2)]) {
|
||||
return;
|
||||
}
|
||||
byteNonZero[i % 2] = PR_TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (aFromSegment) {
|
||||
for (PRUint32 j = 0; j < aCountToSniffingLimit; ++j) {
|
||||
if (aFromSegment[j]) {
|
||||
if (byteNonZero[1 - ((i + j) % 2)]) {
|
||||
return;
|
||||
}
|
||||
byteNonZero[(i + j) % 2] = PR_TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (byteNonZero[0]) {
|
||||
mCharset.Assign("UTF-16LE");
|
||||
} else {
|
||||
mCharset.Assign("UTF-16BE");
|
||||
}
|
||||
mCharsetSource = kCharsetFromIrreversibleAutoDetection;
|
||||
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
|
||||
mFeedChardet = PR_FALSE;
|
||||
}
|
||||
|
||||
nsresult
|
||||
nsHtml5StreamParser::FinalizeSniffing(const PRUint8* aFromSegment, // can be null
|
||||
PRUint32 aCount,
|
||||
|
@ -346,6 +388,10 @@ nsHtml5StreamParser::FinalizeSniffing(const PRUint8* aFromSegment, // can be nul
|
|||
mFeedChardet = PR_FALSE;
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount);
|
||||
}
|
||||
// Check for BOMless UTF-16 with Basic
|
||||
// Latin content for compat with IE. See bug 631751.
|
||||
SniffBOMlessUTF16BasicLatin(aFromSegment, aCountToSniffingLimit);
|
||||
// the charset may have been set now
|
||||
// maybe try chardet now;
|
||||
if (mFeedChardet) {
|
||||
PRBool dontFeed;
|
||||
|
|
|
@ -256,7 +256,13 @@ class nsHtml5StreamParser : public nsIStreamListener,
|
|||
nsresult WriteStreamBytes(const PRUint8* aFromSegment,
|
||||
PRUint32 aCount,
|
||||
PRUint32* aWriteCount);
|
||||
|
||||
|
||||
/**
|
||||
* Check whether every other byte in the sniffing buffer is zero.
|
||||
*/
|
||||
void SniffBOMlessUTF16BasicLatin(const PRUint8* aFromSegment,
|
||||
PRUint32 aCountToSniffingLimit);
|
||||
|
||||
/**
|
||||
* <meta charset> scan failed. Try chardet if applicable. After this, the
|
||||
* the parser will have some encoding even if a last resolt fallback.
|
||||
|
|
|
@ -98,13 +98,14 @@ enum eParserDocType {
|
|||
#define kCharsetFromHintPrevDoc 7
|
||||
#define kCharsetFromMetaPrescan 8 // this one and smaller: HTML5 Tentative
|
||||
#define kCharsetFromMetaTag 9 // this one and greater: HTML5 Confident
|
||||
#define kCharsetFromByteOrderMark 10
|
||||
#define kCharsetFromChannel 11
|
||||
#define kCharsetFromOtherComponent 12
|
||||
#define kCharsetFromIrreversibleAutoDetection 10
|
||||
#define kCharsetFromByteOrderMark 11
|
||||
#define kCharsetFromChannel 12
|
||||
#define kCharsetFromOtherComponent 13
|
||||
// Levels below here will be forced onto childframes too
|
||||
#define kCharsetFromParentForced 13
|
||||
#define kCharsetFromUserForced 14
|
||||
#define kCharsetFromPreviousLoading 15
|
||||
#define kCharsetFromParentForced 14
|
||||
#define kCharsetFromUserForced 15
|
||||
#define kCharsetFromPreviousLoading 16
|
||||
|
||||
enum eStreamState {eNone,eOnStart,eOnDataAvail,eOnStop};
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче