зеркало из https://github.com/mozilla/gecko-dev.git
Bug 801402 - Use FindEncodingForLabel from HTML parser. r=hsivonen
This commit is contained in:
Родитель
af291bee68
Коммит
d0b455915b
|
@ -14,7 +14,6 @@ EncMetaUnsupported=An unsupported character encoding was declared for the HTML d
|
|||
EncProtocolUnsupported=An unsupported character encoding was declared on the transfer protocol level. The declaration was ignored.
|
||||
EncBomlessUtf16=Detected UTF-16-encoded Basic Latin-only text without a byte order mark and without a transfer protocol-level declaration. Encoding this content in UTF-16 is inefficient and the character encoding should have been declared in any case.
|
||||
EncMetaUtf16=A meta tag was used to declare the character encoding as UTF-16. This was interpreted as an UTF-8 declaration instead.
|
||||
EncMetaNonRoughSuperset=A meta tag was used to declare a character encoding the does not encode the Basic Latin range roughly like US-ASCII. The declaration was ignored.
|
||||
|
||||
# The bulk of the messages below are derived from
|
||||
# http://hg.mozilla.org/projects/htmlparser/file/1f633cef7de7/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java
|
||||
|
|
|
@ -4,10 +4,12 @@
|
|||
|
||||
#include "nsICharsetConverterManager.h"
|
||||
#include "nsServiceManagerUtils.h"
|
||||
#include "nsCharsetAlias.h"
|
||||
#include "nsEncoderDecoderUtils.h"
|
||||
#include "nsTraceRefcnt.h"
|
||||
|
||||
#include "mozilla/dom/EncodingUtils.h"
|
||||
|
||||
using mozilla::dom::EncodingUtils;
|
||||
|
||||
void
|
||||
nsHtml5MetaScanner::sniff(nsHtml5ByteReadable* bytes, nsIUnicodeDecoder** decoder, nsACString& charset)
|
||||
|
@ -48,8 +50,7 @@ nsHtml5MetaScanner::tryCharset(nsString* charset)
|
|||
return true;
|
||||
}
|
||||
nsAutoCString preferred;
|
||||
res = nsCharsetAlias::GetPreferred(encoding, preferred);
|
||||
if (NS_FAILED(res)) {
|
||||
if (!EncodingUtils::FindEncodingForLabel(encoding, preferred)) {
|
||||
return false;
|
||||
}
|
||||
if (preferred.LowerCaseEqualsLiteral("utf-16") ||
|
||||
|
|
|
@ -6,7 +6,6 @@
|
|||
|
||||
#include "nsHtml5StreamParser.h"
|
||||
#include "nsICharsetConverterManager.h"
|
||||
#include "nsCharsetAlias.h"
|
||||
#include "nsServiceManagerUtils.h"
|
||||
#include "nsEncoderDecoderUtils.h"
|
||||
#include "nsContentUtils.h"
|
||||
|
@ -26,8 +25,10 @@
|
|||
#include "nsCharsetSource.h"
|
||||
#include "nsIWyciwygChannel.h"
|
||||
|
||||
using namespace mozilla;
|
||||
#include "mozilla/dom/EncodingUtils.h"
|
||||
|
||||
using namespace mozilla;
|
||||
using mozilla::dom::EncodingUtils;
|
||||
|
||||
int32_t nsHtml5StreamParser::sTimerInitialDelay = 120;
|
||||
int32_t nsHtml5StreamParser::sTimerSubsequentDelay = 120;
|
||||
|
@ -1193,28 +1194,25 @@ nsHtml5StreamParser::OnDataAvailable(nsIRequest* aRequest,
|
|||
bool
|
||||
nsHtml5StreamParser::PreferredForInternalEncodingDecl(nsACString& aEncoding)
|
||||
{
|
||||
nsAutoCString newEncoding(aEncoding);
|
||||
newEncoding.Trim(" \t\r\n\f");
|
||||
if (newEncoding.LowerCaseEqualsLiteral("utf-16") ||
|
||||
newEncoding.LowerCaseEqualsLiteral("utf-16be") ||
|
||||
newEncoding.LowerCaseEqualsLiteral("utf-16le")) {
|
||||
mTreeBuilder->MaybeComplainAboutCharset("EncMetaUtf16",
|
||||
true,
|
||||
mTokenizer->getLineNumber());
|
||||
newEncoding.Assign("UTF-8");
|
||||
}
|
||||
|
||||
nsresult rv = NS_OK;
|
||||
bool eq;
|
||||
rv = nsCharsetAlias::Equals(newEncoding, mCharset, &eq);
|
||||
if (NS_FAILED(rv)) {
|
||||
nsAutoCString newEncoding;
|
||||
if (!EncodingUtils::FindEncodingForLabel(aEncoding, newEncoding)) {
|
||||
// the encoding name is bogus
|
||||
mTreeBuilder->MaybeComplainAboutCharset("EncMetaUnsupported",
|
||||
true,
|
||||
mTokenizer->getLineNumber());
|
||||
return false;
|
||||
}
|
||||
if (eq) {
|
||||
|
||||
if (newEncoding.EqualsLiteral("UTF-16") ||
|
||||
newEncoding.EqualsLiteral("UTF-16BE") ||
|
||||
newEncoding.EqualsLiteral("UTF-16LE")) {
|
||||
mTreeBuilder->MaybeComplainAboutCharset("EncMetaUtf16",
|
||||
true,
|
||||
mTokenizer->getLineNumber());
|
||||
newEncoding.Assign("UTF-8");
|
||||
}
|
||||
|
||||
if (newEncoding.Equals(mCharset)) {
|
||||
if (mCharsetSource < kCharsetFromMetaPrescan) {
|
||||
if (mInitialEncodingWasFromParentFrame) {
|
||||
mTreeBuilder->MaybeComplainAboutCharset("EncLateMetaFrame",
|
||||
|
@ -1231,36 +1229,7 @@ nsHtml5StreamParser::PreferredForInternalEncodingDecl(nsACString& aEncoding)
|
|||
return false;
|
||||
}
|
||||
|
||||
// XXX check HTML5 non-IANA aliases here
|
||||
|
||||
nsAutoCString preferred;
|
||||
rv = nsCharsetAlias::GetPreferred(newEncoding, preferred);
|
||||
if (NS_FAILED(rv)) {
|
||||
// This charset has been blacklisted for permitting XSS smuggling.
|
||||
// EncMetaNonRoughSuperset is a reasonable approximation to the
|
||||
// right error message.
|
||||
mTreeBuilder->MaybeComplainAboutCharset("EncMetaNonRoughSuperset",
|
||||
true,
|
||||
mTokenizer->getLineNumber());
|
||||
return false;
|
||||
}
|
||||
|
||||
// ??? Explicit further blacklist of character sets that are not
|
||||
// "rough supersets" of ASCII. Some of these are handled above (utf-16),
|
||||
// some by the XSS smuggling blacklist in charsetData.properties,
|
||||
// maybe all of the remainder should also be blacklisted there.
|
||||
if (preferred.LowerCaseEqualsLiteral("utf-16") ||
|
||||
preferred.LowerCaseEqualsLiteral("utf-16be") ||
|
||||
preferred.LowerCaseEqualsLiteral("utf-16le") ||
|
||||
preferred.LowerCaseEqualsLiteral("utf-7") ||
|
||||
preferred.LowerCaseEqualsLiteral("x-imap4-modified-utf7")) {
|
||||
// Not a rough ASCII superset
|
||||
mTreeBuilder->MaybeComplainAboutCharset("EncMetaNonRoughSuperset",
|
||||
true,
|
||||
mTokenizer->getLineNumber());
|
||||
return false;
|
||||
}
|
||||
aEncoding.Assign(preferred);
|
||||
aEncoding.Assign(newEncoding);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -14,7 +14,6 @@
|
|||
#include "nsIChannel.h"
|
||||
#include "nsICachingChannel.h"
|
||||
#include "nsICacheEntryDescriptor.h"
|
||||
#include "nsCharsetAlias.h"
|
||||
#include "nsICharsetConverterManager.h"
|
||||
#include "nsIInputStream.h"
|
||||
#include "CNavDTD.h"
|
||||
|
@ -43,7 +42,10 @@
|
|||
#include "nsCharsetSource.h"
|
||||
#include "nsContentUtils.h"
|
||||
|
||||
#include "mozilla/dom/EncodingUtils.h"
|
||||
|
||||
using namespace mozilla;
|
||||
using mozilla::dom::EncodingUtils;
|
||||
|
||||
#define NS_PARSER_FLAG_PARSER_ENABLED 0x00000002
|
||||
#define NS_PARSER_FLAG_OBSERVERS_ENABLED 0x00000004
|
||||
|
@ -1840,8 +1842,7 @@ ParserWriteFunc(nsIInputStream* in,
|
|||
nsAutoCString declCharset;
|
||||
|
||||
if (ExtractCharsetFromXmlDeclaration(buf, count, declCharset)) {
|
||||
nsresult rv = nsCharsetAlias::GetPreferred(declCharset, maybePrefer);
|
||||
if (NS_SUCCEEDED(rv)) {
|
||||
if (EncodingUtils::FindEncodingForLabel(declCharset, maybePrefer)) {
|
||||
preferred.Assign(maybePrefer);
|
||||
source = kCharsetFromMetaTag;
|
||||
}
|
||||
|
|
|
@ -10,7 +10,6 @@
|
|||
#include "nsDebug.h"
|
||||
#include "nsIServiceManager.h"
|
||||
#include "nsICharsetConverterManager.h"
|
||||
#include "nsCharsetAlias.h"
|
||||
#include "nsReadableUtils.h"
|
||||
#include "nsIInputStream.h"
|
||||
#include "nsIFile.h"
|
||||
|
@ -20,6 +19,10 @@
|
|||
#include "nsParser.h"
|
||||
#include "nsCharsetSource.h"
|
||||
|
||||
#include "mozilla/dom/EncodingUtils.h"
|
||||
|
||||
using mozilla::dom::EncodingUtils;
|
||||
|
||||
// We replace NUL characters with this character.
|
||||
static PRUnichar sInvalid = UCS2_REPLACEMENT_CHAR;
|
||||
|
||||
|
@ -118,12 +121,12 @@ nsresult nsScanner::SetDocumentCharset(const nsACString& aCharset , int32_t aSou
|
|||
if (aSource < mCharsetSource) // priority is lower the the current one , just
|
||||
return NS_OK;
|
||||
|
||||
nsresult res = NS_OK;
|
||||
nsCString charsetName;
|
||||
bool valid = EncodingUtils::FindEncodingForLabel(aCharset, charsetName);
|
||||
MOZ_ASSERT(valid, "Should never call with a bogus aCharset.");
|
||||
if (!mCharset.IsEmpty())
|
||||
{
|
||||
bool same;
|
||||
res = nsCharsetAlias::Equals(aCharset, mCharset, &same);
|
||||
if(NS_SUCCEEDED(res) && same)
|
||||
if (charsetName.Equals(mCharset))
|
||||
{
|
||||
mCharsetSource = aSource;
|
||||
return NS_OK; // no difference, don't change it
|
||||
|
@ -131,9 +134,6 @@ nsresult nsScanner::SetDocumentCharset(const nsACString& aCharset , int32_t aSou
|
|||
}
|
||||
|
||||
// different, need to change it
|
||||
nsCString charsetName;
|
||||
res = nsCharsetAlias::GetPreferred(aCharset, charsetName);
|
||||
MOZ_ASSERT(NS_SUCCEEDED(res), "Should never call with a bogus aCharset.");
|
||||
|
||||
mCharset.Assign(charsetName);
|
||||
|
||||
|
@ -142,7 +142,7 @@ nsresult nsScanner::SetDocumentCharset(const nsACString& aCharset , int32_t aSou
|
|||
NS_ASSERTION(nsParser::GetCharsetConverterManager(),
|
||||
"Must have the charset converter manager!");
|
||||
|
||||
res = nsParser::GetCharsetConverterManager()->
|
||||
nsresult res = nsParser::GetCharsetConverterManager()->
|
||||
GetUnicodeDecoderRaw(mCharset.get(), getter_AddRefs(mUnicodeDecoder));
|
||||
if (NS_SUCCEEDED(res) && mUnicodeDecoder)
|
||||
{
|
||||
|
|
Загрузка…
Ссылка в новой задаче