зеркало из https://github.com/mozilla/pjs.git
- Bug 243040: Re-escape attribute values which Gecko unescaped for us. Not doing this allowed to insert arbitary unchecked HTML into the result.
- JS quotes in attribute values are now probably possible (but JS still usually disabled by allowed attribute prefs and javascript: URL blocker) - removed base64 blocker which is probably pointless - comment and debug changes - little code style cleanup r=akk, sr=jst
This commit is contained in:
Родитель
dd2ed21b3a
Коммит
a6d55e7b8a
|
@ -36,16 +36,13 @@
|
|||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
// Removes potentially insecure or offending HTML
|
||||
|
||||
/* I used nsPlaintextSerializer as base for this class. I don't understand
|
||||
all of the functions in the beginning. Possible that I fail to do
|
||||
something or do something useless.
|
||||
I am not proud about the implementation here at all.
|
||||
Feel free to fix it :-).
|
||||
|
||||
I am moderately concerned about methods to obfuscate HTML, which the
|
||||
parser can decode during execution.
|
||||
E.g. there are these dreaded data: and javascript URLs and
|
||||
base64 encoding (which I don't really understand how it alloies
|
||||
*/
|
||||
|
||||
#include "mozSanitizingSerializer.h"
|
||||
|
@ -58,14 +55,19 @@
|
|||
#include "nsContentUtils.h"
|
||||
#include "nsReadableUtils.h"
|
||||
#include "plstr.h"
|
||||
//#include "nsDependentString.h"
|
||||
#include "nsIProperties.h"
|
||||
#include "nsUnicharUtils.h"
|
||||
#include "nsIURI.h"
|
||||
#include "nsNetUtil.h"
|
||||
#include "nsEscape.h"
|
||||
|
||||
//#define DEBUG_BenB
|
||||
|
||||
static inline PRUnichar* escape(const nsString& source)
|
||||
{
|
||||
return nsEscapeHTML2(source.get(), source.Length());
|
||||
}
|
||||
|
||||
/* XXX: |printf|s in some error conditions. They are intended as information
|
||||
for the user, because they complain about malformed pref values.
|
||||
Not sure, if popping up dialog boxes is the right thing for such code
|
||||
|
@ -89,16 +91,18 @@ nsresult NS_NewSanitizingHTMLSerializer(nsIContentSerializer** aSerializer)
|
|||
mozSanitizingHTMLSerializer::mozSanitizingHTMLSerializer()
|
||||
: mAllowedTags(30) // Just some initial buffer size
|
||||
{
|
||||
|
||||
mOutputString = nsnull;
|
||||
}
|
||||
|
||||
mozSanitizingHTMLSerializer::~mozSanitizingHTMLSerializer()
|
||||
{
|
||||
#ifdef DEBUG_BenB
|
||||
printf("Output:\n%s\n", NS_LossyConvertUCS2toASCII(*mOutputString).get());
|
||||
#endif
|
||||
mAllowedTags.Enumerate(ReleaseProperties);
|
||||
}
|
||||
|
||||
//<copied from="xpcom/ds/nsProperties.cpp">
|
||||
//<copy from="xpcom/ds/nsProperties.cpp">
|
||||
PRBool PR_CALLBACK
|
||||
mozSanitizingHTMLSerializer::ReleaseProperties(nsHashKey* key, void* data,
|
||||
void* closure)
|
||||
|
@ -107,7 +111,7 @@ mozSanitizingHTMLSerializer::ReleaseProperties(nsHashKey* key, void* data,
|
|||
NS_IF_RELEASE(prop);
|
||||
return PR_TRUE;
|
||||
}
|
||||
//</copied>
|
||||
//</copy>
|
||||
|
||||
NS_IMPL_ISUPPORTS4(mozSanitizingHTMLSerializer,
|
||||
nsIContentSerializer,
|
||||
|
@ -149,8 +153,8 @@ mozSanitizingHTMLSerializer::Initialize(nsAString* aOutString,
|
|||
NS_IMETHODIMP
|
||||
mozSanitizingHTMLSerializer::Flush(nsAString& aStr)
|
||||
{
|
||||
#if DEBUG_BenB
|
||||
printf("Flush: -%s-", NS_LossyConvertUCS2toASCII(aString).get());
|
||||
#ifdef DEBUG_BenB
|
||||
printf("Flush: -%s-", NS_LossyConvertUCS2toASCII(aStr).get());
|
||||
#endif
|
||||
Write(aStr);
|
||||
return NS_OK;
|
||||
|
@ -166,7 +170,6 @@ mozSanitizingHTMLSerializer::AppendDocumentStart(nsIDOMDocument *aDocument,
|
|||
void
|
||||
mozSanitizingHTMLSerializer::Write(const nsAString& aString)
|
||||
{
|
||||
// printf("%s", NS_LossyConvertUCS2toASCII(aString).get());
|
||||
mOutputString->Append(aString);
|
||||
}
|
||||
|
||||
|
@ -360,7 +363,7 @@ mozSanitizingHTMLSerializer::SetTitle(const nsString& aValue)
|
|||
// missing </title> tag won't result in everything
|
||||
// being eaten up as the title.
|
||||
Write(NS_LITERAL_STRING("<title>"));
|
||||
Write(aValue);
|
||||
Write(nsAdoptingString(escape(aValue)));
|
||||
Write(NS_LITERAL_STRING("</title>"));
|
||||
}
|
||||
return NS_OK;
|
||||
|
@ -379,7 +382,8 @@ mozSanitizingHTMLSerializer::SetDocumentCharset(nsACString& aCharset)
|
|||
Write(NS_LITERAL_STRING("\n<meta http-equiv=\"Context-Type\" content=\"text/html; charset=")
|
||||
/* Danger: breaking the line within the string literal, like
|
||||
"foo"\n"bar", breaks win32! */
|
||||
+ NS_ConvertASCIItoUCS2(aCharset) + NS_LITERAL_STRING("\">\n"));
|
||||
+ nsAdoptingString(escape(NS_ConvertASCIItoUCS2(aCharset)))
|
||||
+ NS_LITERAL_STRING("\">\n"));
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
|
@ -529,7 +533,7 @@ mozSanitizingHTMLSerializer::DoAddLeaf(PRInt32 aTag,
|
|||
if (type == eHTMLTag_whitespace ||
|
||||
type == eHTMLTag_newline)
|
||||
{
|
||||
Write(aText);
|
||||
Write(aText); // sure to be safe?
|
||||
}
|
||||
else if (type == eHTMLTag_text)
|
||||
{
|
||||
|
@ -538,13 +542,13 @@ mozSanitizingHTMLSerializer::DoAddLeaf(PRInt32 aTag,
|
|||
Write(text);
|
||||
else
|
||||
Write(NS_LITERAL_STRING(TEXT_REMOVED)); // Does not happen (yet)
|
||||
if (NS_FAILED(rv))
|
||||
return rv;
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
}
|
||||
else if (type == eHTMLTag_entity)
|
||||
{
|
||||
Write(NS_LITERAL_STRING("&"));
|
||||
Write(aText);
|
||||
Write(aText); // sure to be safe?
|
||||
Write(NS_LITERAL_STRING(";"));
|
||||
// using + operator here might give an infinitive loop, see above.
|
||||
}
|
||||
else
|
||||
|
@ -562,19 +566,7 @@ mozSanitizingHTMLSerializer::DoAddLeaf(PRInt32 aTag,
|
|||
nsresult
|
||||
mozSanitizingHTMLSerializer::SanitizeTextNode(nsString& aText /*inout*/)
|
||||
{
|
||||
nsString& text = aText; // alias
|
||||
|
||||
PRInt32 pos;
|
||||
if ((pos = text.Find("base64")) != kNotFound)
|
||||
// Probably useless, not sure, but perfhit
|
||||
// return NS_ERROR_ILLEGAL_VALUE; -- this is too extreme
|
||||
text.Insert(NS_LITERAL_STRING(TEXT_BREAKER), pos + 1);
|
||||
/* Insert some other text after the first char of the problematic
|
||||
text, so we prevent the processing by Gecko.
|
||||
No idea, if that is needed, but better do it than being sorry.
|
||||
Somebody who knows all the dangers and how they are reflected
|
||||
in Gecko please jump in. */
|
||||
|
||||
aText.Adopt(escape(aText));
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
|
@ -584,14 +576,14 @@ mozSanitizingHTMLSerializer::SanitizeTextNode(nsString& aText /*inout*/)
|
|||
unwanted / dangerous URLs appear in the document
|
||||
(like javascript: and data:).
|
||||
|
||||
Pass the value as |value| arg. It will be modified in-place.
|
||||
Pass the value as |aValue| arg. It will be modified in-place.
|
||||
|
||||
If the value is not allowed at all, we return with NS_ERROR_ILLEGAL_VALUE.
|
||||
In that case, do not use the |value|, but output nothing.
|
||||
In that case, do not use the |aValue|, but output nothing.
|
||||
*/
|
||||
nsresult
|
||||
mozSanitizingHTMLSerializer::SanitizeAttrValue(nsHTMLTag aTag,
|
||||
const nsAString& attr_name,
|
||||
const nsAString& anAttrName,
|
||||
nsString& aValue /*inout*/)
|
||||
{
|
||||
/* First, cut the attribute to 1000 chars.
|
||||
|
@ -599,42 +591,42 @@ mozSanitizingHTMLSerializer::SanitizeAttrValue(nsHTMLTag aTag,
|
|||
considering that we don't support any JS. The longest attributes
|
||||
I can think of are URLs, and URLs with 1000 chars are likely to be
|
||||
bogus, too. */
|
||||
nsAutoString value(Substring(aValue, 0, 1000));
|
||||
//value.Truncate(1000); //-- this cuts half of the document !!?!!
|
||||
aValue = Substring(aValue, 0, 1000);
|
||||
//aValue.Truncate(1000); //-- this cuts half of the document !!?!!
|
||||
|
||||
#ifdef DEBUG_BenB
|
||||
printf("7: %s\n", ToNewUTF8String(value));
|
||||
#endif
|
||||
|
||||
value.StripChars("\"'"); /* This will break javascript attributes,
|
||||
but who wants javascript in
|
||||
sanitized HTML? */
|
||||
aValue.Adopt(escape(aValue));
|
||||
|
||||
/* Check some known bad stuff. Add more!
|
||||
I don't care too much, if it happens to trigger in some innocent cases
|
||||
(like <img alt="Statistical data: Mortage rates and newspapers">) -
|
||||
security first. */
|
||||
if (value.Find("javascript:") != kNotFound ||
|
||||
value.Find("data:") != kNotFound ||
|
||||
value.Find("base64") != kNotFound)
|
||||
if (aValue.Find("javascript:") != kNotFound ||
|
||||
aValue.Find("data:") != kNotFound ||
|
||||
aValue.Find("base64") != kNotFound)
|
||||
return NS_ERROR_ILLEGAL_VALUE;
|
||||
|
||||
// Check img src scheme
|
||||
if (aTag == eHTMLTag_img &&
|
||||
attr_name.Equals(NS_LITERAL_STRING("src"), nsCaseInsensitiveStringComparator()))
|
||||
anAttrName.Equals(NS_LITERAL_STRING("src"),
|
||||
nsCaseInsensitiveStringComparator()))
|
||||
{
|
||||
nsresult rv;
|
||||
nsCOMPtr<nsIIOService> ioService;
|
||||
ioService = do_GetIOService(&rv);
|
||||
if (NS_FAILED(rv)) return rv;
|
||||
nsCOMPtr<nsIIOService> ioService = do_GetIOService(&rv);
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
nsCAutoString scheme;
|
||||
rv = ioService->ExtractScheme(NS_LossyConvertUCS2toASCII(value), scheme);
|
||||
if (NS_FAILED(rv)) return rv;
|
||||
rv = ioService->ExtractScheme(NS_LossyConvertUCS2toASCII(aValue), scheme);
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
|
||||
if (!scheme.Equals("cid", nsCaseInsensitiveCStringComparator()))
|
||||
return NS_ERROR_ILLEGAL_VALUE;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_BenB
|
||||
printf("attribute value for %s: -%s-\n",
|
||||
NS_LossyConvertUCS2toASCII(anAttrName).get(),
|
||||
NS_LossyConvertUCS2toASCII(aValue).get());
|
||||
#endif
|
||||
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
|
@ -707,9 +699,6 @@ nsresult
|
|||
mozSanitizingHTMLSerializer::ParsePrefs(const nsAString& aPref)
|
||||
{
|
||||
char* pref = ToNewCString(aPref);
|
||||
#ifdef DEBUG_BenB
|
||||
printf("pref: -%s-\n", pref);
|
||||
#endif
|
||||
char* tags_lasts;
|
||||
for (char* iTag = PL_strtok_r(pref, " ", &tags_lasts);
|
||||
iTag;
|
||||
|
@ -734,16 +723,10 @@ mozSanitizingHTMLSerializer::ParseTagPref(const nsCAutoString& tagpref)
|
|||
return NS_ERROR_OUT_OF_MEMORY;
|
||||
|
||||
// Parsing tag
|
||||
#ifdef DEBUG_BenB
|
||||
printf("Processing tag pref -%s-\n", tagpref.get());
|
||||
#endif
|
||||
PRInt32 bracket = tagpref.Find("(");
|
||||
nsCAutoString tag = tagpref;
|
||||
if (bracket != kNotFound)
|
||||
tag.Truncate(bracket);
|
||||
#ifdef DEBUG_BenB
|
||||
printf( "Tag -%s-\n", tag.get());
|
||||
#endif
|
||||
if (tag.Equals(""))
|
||||
{
|
||||
printf(" malformed pref: %s\n", tagpref.get());
|
||||
|
@ -754,14 +737,6 @@ mozSanitizingHTMLSerializer::ParseTagPref(const nsCAutoString& tagpref)
|
|||
NS_ConvertASCIItoUCS2 tag_widestr(tag);
|
||||
PRInt32 tag_id;
|
||||
parserService->HTMLStringTagToId(tag_widestr, &tag_id);
|
||||
#ifdef DEBUG_BenB
|
||||
printf(" Have tag %d\n", tag_id);
|
||||
const PRUnichar* tag_back;
|
||||
parserService->HTMLIdToStringTag(tag_id, &tag_back);
|
||||
printf(" Equals -%s-\n", tag_back
|
||||
? NS_ConvertUCS2toUTF8(tag_back).get()
|
||||
: "");
|
||||
#endif
|
||||
if (tag_id == eHTMLTag_userdefined ||
|
||||
tag_id == eHTMLTag_unknown)
|
||||
{
|
||||
|
@ -799,18 +774,12 @@ mozSanitizingHTMLSerializer::ParseTagPref(const nsCAutoString& tagpref)
|
|||
attrList.Append(Substring(tagpref,
|
||||
bracket + 1,
|
||||
tagpref.Length() - 2 - bracket));
|
||||
#ifdef DEBUG_BenB
|
||||
printf(" Attr list: -%s-\n", attrList.get());
|
||||
#endif
|
||||
char* attrs_lasts;
|
||||
for (char* iAttr = PL_strtok_r(attrList.BeginWriting(),
|
||||
",", &attrs_lasts);
|
||||
iAttr;
|
||||
iAttr = PL_strtok_r(NULL, ",", &attrs_lasts))
|
||||
{
|
||||
#ifdef DEBUG_BenB
|
||||
printf(" Processing attr -%s-\n", iAttr);
|
||||
#endif
|
||||
attr_bag->Set(iAttr, 0);
|
||||
}
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче