зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1727491 - Remove support for BOMless unlabeled Latin1 Supplement-range UTF-16LE|BE. r=emk
Differential Revision: https://phabricator.services.mozilla.com/D123596
This commit is contained in:
Родитель
9fa1f1913b
Коммит
5397b4f0a9
Двоичные данные
dom/html/test/test_bug615595.html
Двоичные данные
dom/html/test/test_bug615595.html
Двоичный файл не отображается.
|
@ -12,7 +12,6 @@ EncLateMetaReload=The page was reloaded, because the character encoding declarat
|
|||
EncLateMetaTooLate=The character encoding declaration of document was found too late for it to take effect. The encoding declaration needs to be moved to be within the first 1024 bytes of the file.
|
||||
EncMetaUnsupported=An unsupported character encoding was declared for the HTML document using a meta tag. The declaration was ignored.
|
||||
EncProtocolUnsupported=An unsupported character encoding was declared on the transfer protocol level. The declaration was ignored.
|
||||
EncBomlessUtf16=Detected UTF-16-encoded Basic Latin-only text without a byte order mark and without a transfer protocol-level declaration. Encoding this content in UTF-16 is inefficient and the character encoding should have been declared in any case.
|
||||
EncMetaUtf16=A meta tag was used to declare the character encoding as UTF-16. This was interpreted as an UTF-8 declaration instead.
|
||||
EncMetaUserDefined=A meta tag was used to declare the character encoding as x-user-defined. This was interpreted as a windows-1252 declaration instead for compatibility with intentionally mis-encoded legacy fonts. This site should migrate to Unicode.
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@ https://bugzilla.mozilla.org/show_bug.cgi?id=631751
|
|||
/** Test for Bug 631751 **/
|
||||
/* Note! This test uses the chardet test harness but doesn't test chardet! */
|
||||
CharsetDetectionTests("bug631751be_text.html",
|
||||
"UTF-16BE",
|
||||
"UTF-8", // Test runs from file: URL, so ASCII gets detected as UTF-8.
|
||||
new Array(""));
|
||||
</script>
|
||||
</pre>
|
||||
|
|
|
@ -23,7 +23,7 @@ https://bugzilla.mozilla.org/show_bug.cgi?id=631751
|
|||
/** Test for Bug 631751 **/
|
||||
/* Note! This test uses the chardet test harness but doesn't test chardet! */
|
||||
CharsetDetectionTests("bug631751le_text.html",
|
||||
"UTF-16LE",
|
||||
"UTF-8", // Test runs from file: URL, so ASCII gets detected as UTF-8.
|
||||
new Array(""));
|
||||
</script>
|
||||
</pre>
|
||||
|
|
|
@ -503,44 +503,6 @@ void nsHtml5StreamParser::SetupDecodingFromUtf16BogoXml(
|
|||
mLastBuffer->AdvanceEnd(3);
|
||||
}
|
||||
|
||||
void nsHtml5StreamParser::SniffBOMlessUTF16BasicLatin(const uint8_t* aBuf,
|
||||
size_t aBufLen) {
|
||||
// Avoid underspecified heuristic craziness for XHR
|
||||
if (mMode == LOAD_AS_DATA) {
|
||||
return;
|
||||
}
|
||||
// Make sure there's enough data. Require room for "<title></title>"
|
||||
if (aBufLen < 30) {
|
||||
return;
|
||||
}
|
||||
// even-numbered bytes tracked at 0, odd-numbered bytes tracked at 1
|
||||
bool byteZero[2] = {false, false};
|
||||
bool byteNonZero[2] = {false, false};
|
||||
uint32_t i = 0;
|
||||
for (; i < aBufLen; ++i) {
|
||||
if (aBuf[i]) {
|
||||
if (byteNonZero[1 - (i % 2)]) {
|
||||
return;
|
||||
}
|
||||
byteNonZero[i % 2] = true;
|
||||
} else {
|
||||
if (byteZero[1 - (i % 2)]) {
|
||||
return;
|
||||
}
|
||||
byteZero[i % 2] = true;
|
||||
}
|
||||
}
|
||||
if (byteNonZero[0]) {
|
||||
mEncoding = UTF_16LE_ENCODING;
|
||||
} else {
|
||||
mEncoding = UTF_16BE_ENCODING;
|
||||
}
|
||||
mCharsetSource = kCharsetFromIrreversibleAutoDetection;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
DontGuessEncoding();
|
||||
mTreeBuilder->MaybeComplainAboutCharset("EncBomlessUtf16", true, 0);
|
||||
}
|
||||
|
||||
void nsHtml5StreamParser::SetEncodingFromExpat(const char16_t* aEncoding) {
|
||||
if (aEncoding) {
|
||||
nsDependentString utf16(aEncoding);
|
||||
|
@ -734,15 +696,9 @@ nsresult nsHtml5StreamParser::FinalizeSniffing(Span<const uint8_t> aFromSegment,
|
|||
mEncoding = WrapNotNull(encoding);
|
||||
mCharsetSource = kCharsetFromXmlDeclaration;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
} else if (mCharsetSource < kCharsetFromIrreversibleAutoDetection) {
|
||||
// meta scan and XML declaration check failed.
|
||||
// Check for BOMless UTF-16 with Basic
|
||||
// Latin content for compat with IE. See bug 631751.
|
||||
SniffBOMlessUTF16BasicLatin(buf, bufLen);
|
||||
}
|
||||
}
|
||||
if (mForceAutoDetection &&
|
||||
mCharsetSource != kCharsetFromIrreversibleAutoDetection) {
|
||||
if (mForceAutoDetection) {
|
||||
// neither meta nor XML declaration found, honor override
|
||||
FinalizeSniffingWithDetector(aFromSegment, aCountToSniffingLimit, false);
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
|
||||
|
|
|
@ -349,11 +349,6 @@ class nsHtml5StreamParser final : public nsISupports {
|
|||
*/
|
||||
nsresult WriteStreamBytes(mozilla::Span<const uint8_t> aFromSegment);
|
||||
|
||||
/**
|
||||
* Check whether every other byte in the sniffing buffer is zero.
|
||||
*/
|
||||
void SniffBOMlessUTF16BasicLatin(const uint8_t* aBuf, size_t aBufLen);
|
||||
|
||||
/**
|
||||
* Write the start of the stream to detector.
|
||||
*/
|
||||
|
|
Двоичный файл не отображается.
|
@ -16,7 +16,6 @@ support-files =
|
|||
file_bug594730-9.html
|
||||
file_bug642908.sjs
|
||||
file_bug655682.sjs
|
||||
file_bug672453_bomless_utf16.html
|
||||
file_bug672453_http_unsupported.html
|
||||
file_bug672453_http_unsupported.html^headers^
|
||||
file_bug672453_late_meta.html
|
||||
|
|
|
@ -23,7 +23,6 @@ var tests = [
|
|||
"file_bug672453_meta_restart.html",
|
||||
"file_bug672453_meta_unsupported.html",
|
||||
"file_bug672453_http_unsupported.html",
|
||||
"file_bug672453_bomless_utf16.html",
|
||||
"file_bug672453_meta_utf16.html",
|
||||
"file_bug672453_meta_non_superset.html",
|
||||
"file_bug672453_meta_userdefined.html",
|
||||
|
@ -50,10 +49,6 @@ var expectedErrors = [
|
|||
sourceName: "http://mochi.test:8888/tests/parser/htmlparser/tests/mochitest/file_bug672453_http_unsupported.html",
|
||||
lineNumber: 0,
|
||||
isWarning: false },
|
||||
{ errorMessage: "Detected UTF-16-encoded Basic Latin-only text without a byte order mark and without a transfer protocol-level declaration. Encoding this content in UTF-16 is inefficient and the character encoding should have been declared in any case.",
|
||||
sourceName: "http://mochi.test:8888/tests/parser/htmlparser/tests/mochitest/file_bug672453_bomless_utf16.html",
|
||||
lineNumber: 0,
|
||||
isWarning: false },
|
||||
{ errorMessage: "A meta tag was used to declare the character encoding as UTF-16. This was interpreted as an UTF-8 declaration instead.",
|
||||
sourceName: "http://mochi.test:8888/tests/parser/htmlparser/tests/mochitest/file_bug672453_meta_utf16.html",
|
||||
lineNumber: 1,
|
||||
|
|
|
@ -35,8 +35,6 @@ enum {
|
|||
kCharsetFromFinalUserForcedAutoDetection,
|
||||
kCharsetFromXmlDeclarationUtf16, // This one is overridden by
|
||||
// kCharsetFromChannel
|
||||
kCharsetFromIrreversibleAutoDetection, // This one is overridden by
|
||||
// kCharsetFromChannel
|
||||
kCharsetFromByteOrderMark,
|
||||
kCharsetFromUtf8OnlyMime, // For JSON, WebVTT and such
|
||||
kCharsetFromBuiltIn, // resource: URLs
|
||||
|
|
Загрузка…
Ссылка в новой задаче