Backed out changeset dc6b9ca8f3fa (bug 1727491) for causing mochitest failures on test_bug631751be.html. CLOSED TREE

This commit is contained in:
criss 2021-08-30 11:14:38 +03:00
Родитель a20e5e1327
Коммит 02cf484af4
8 изменённых файлов: 64 добавлений и 5 удалений

Двоичные данные
dom/html/test/test_bug615595.html

Двоичный файл не отображается.

Просмотреть файл

@ -12,6 +12,7 @@ EncLateMetaReload=The page was reloaded, because the character encoding declarat
EncLateMetaTooLate=The character encoding declaration of document was found too late for it to take effect. The encoding declaration needs to be moved to be within the first 1024 bytes of the file.
EncMetaUnsupported=An unsupported character encoding was declared for the HTML document using a meta tag. The declaration was ignored.
EncProtocolUnsupported=An unsupported character encoding was declared on the transfer protocol level. The declaration was ignored.
EncBomlessUtf16=Detected UTF-16-encoded Basic Latin-only text without a byte order mark and without a transfer protocol-level declaration. Encoding this content in UTF-16 is inefficient and the character encoding should have been declared in any case.
EncMetaUtf16=A meta tag was used to declare the character encoding as UTF-16. This was interpreted as an UTF-8 declaration instead.
EncMetaUserDefined=A meta tag was used to declare the character encoding as x-user-defined. This was interpreted as a windows-1252 declaration instead for compatibility with intentionally mis-encoded legacy fonts. This site should migrate to Unicode.

Просмотреть файл

@ -503,6 +503,44 @@ void nsHtml5StreamParser::SetupDecodingFromUtf16BogoXml(
mLastBuffer->AdvanceEnd(3);
}
void nsHtml5StreamParser::SniffBOMlessUTF16BasicLatin(const uint8_t* aBuf,
size_t aBufLen) {
// Avoid underspecified heuristic craziness for XHR
if (mMode == LOAD_AS_DATA) {
return;
}
// Make sure there's enough data. Require room for "<title></title>"
if (aBufLen < 30) {
return;
}
// even-numbered bytes tracked at 0, odd-numbered bytes tracked at 1
bool byteZero[2] = {false, false};
bool byteNonZero[2] = {false, false};
uint32_t i = 0;
for (; i < aBufLen; ++i) {
if (aBuf[i]) {
if (byteNonZero[1 - (i % 2)]) {
return;
}
byteNonZero[i % 2] = true;
} else {
if (byteZero[1 - (i % 2)]) {
return;
}
byteZero[i % 2] = true;
}
}
if (byteNonZero[0]) {
mEncoding = UTF_16LE_ENCODING;
} else {
mEncoding = UTF_16BE_ENCODING;
}
mCharsetSource = kCharsetFromIrreversibleAutoDetection;
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
DontGuessEncoding();
mTreeBuilder->MaybeComplainAboutCharset("EncBomlessUtf16", true, 0);
}
void nsHtml5StreamParser::SetEncodingFromExpat(const char16_t* aEncoding) {
if (aEncoding) {
nsDependentString utf16(aEncoding);
@ -696,9 +734,15 @@ nsresult nsHtml5StreamParser::FinalizeSniffing(Span<const uint8_t> aFromSegment,
mEncoding = WrapNotNull(encoding);
mCharsetSource = kCharsetFromXmlDeclaration;
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
} else if (mCharsetSource < kCharsetFromIrreversibleAutoDetection) {
// meta scan and XML declaration check failed.
// Check for BOMless UTF-16 with Basic
// Latin content for compat with IE. See bug 631751.
SniffBOMlessUTF16BasicLatin(buf, bufLen);
}
}
if (mForceAutoDetection) {
if (mForceAutoDetection &&
mCharsetSource != kCharsetFromIrreversibleAutoDetection) {
// neither meta nor XML declaration found, honor override
FinalizeSniffingWithDetector(aFromSegment, aCountToSniffingLimit, false);
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
@ -893,10 +937,11 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
}
MOZ_ASSERT(!(mBomState == BOM_SNIFFING_OVER && mChannelHadCharset &&
!mForceAutoDetection),
"How come we're running post-BOM sniffing with channel charset unless "
"we're also processing forced detection?");
MOZ_ASSERT(
!(mBomState == BOM_SNIFFING_OVER && mChannelHadCharset &&
!mForceAutoDetection),
"How come we're running post-BOM sniffing with channel charset unless "
"we're also processing forced detection?");
if (!mMetaScanner &&
(mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA)) {

Просмотреть файл

@ -349,6 +349,11 @@ class nsHtml5StreamParser final : public nsISupports {
*/
nsresult WriteStreamBytes(mozilla::Span<const uint8_t> aFromSegment);
/**
* Check whether every other byte in the sniffing buffer is zero.
*/
void SniffBOMlessUTF16BasicLatin(const uint8_t* aBuf, size_t aBufLen);
/**
* Write the start of the stream to detector.
*/

Двоичный файл не отображается.

Просмотреть файл

@ -16,6 +16,7 @@ support-files =
file_bug594730-9.html
file_bug642908.sjs
file_bug655682.sjs
file_bug672453_bomless_utf16.html
file_bug672453_http_unsupported.html
file_bug672453_http_unsupported.html^headers^
file_bug672453_late_meta.html

Просмотреть файл

@ -23,6 +23,7 @@ var tests = [
"file_bug672453_meta_restart.html",
"file_bug672453_meta_unsupported.html",
"file_bug672453_http_unsupported.html",
"file_bug672453_bomless_utf16.html",
"file_bug672453_meta_utf16.html",
"file_bug672453_meta_non_superset.html",
"file_bug672453_meta_userdefined.html",
@ -49,6 +50,10 @@ var expectedErrors = [
sourceName: "http://mochi.test:8888/tests/parser/htmlparser/tests/mochitest/file_bug672453_http_unsupported.html",
lineNumber: 0,
isWarning: false },
{ errorMessage: "Detected UTF-16-encoded Basic Latin-only text without a byte order mark and without a transfer protocol-level declaration. Encoding this content in UTF-16 is inefficient and the character encoding should have been declared in any case.",
sourceName: "http://mochi.test:8888/tests/parser/htmlparser/tests/mochitest/file_bug672453_bomless_utf16.html",
lineNumber: 0,
isWarning: false },
{ errorMessage: "A meta tag was used to declare the character encoding as UTF-16. This was interpreted as an UTF-8 declaration instead.",
sourceName: "http://mochi.test:8888/tests/parser/htmlparser/tests/mochitest/file_bug672453_meta_utf16.html",
lineNumber: 1,

Просмотреть файл

@ -35,6 +35,8 @@ enum {
kCharsetFromFinalUserForcedAutoDetection,
kCharsetFromXmlDeclarationUtf16, // This one is overridden by
// kCharsetFromChannel
kCharsetFromIrreversibleAutoDetection, // This one is overridden by
// kCharsetFromChannel
kCharsetFromByteOrderMark,
kCharsetFromUtf8OnlyMime, // For JSON, WebVTT and such
kCharsetFromBuiltIn, // resource: URLs