- Bug 243040: Re-escape attribute values which Gecko unescaped for us. Not doing this allowed to insert arbitary unchecked HTML into the result.

- JS quotes in attribute values are now probably possible (but JS still usually disabled by allowed attribute prefs and javascript: URL blocker)
- removed base64 blocker which is probably pointless
- comment and debug changes
- little code style cleanup
r=akk, sr=jst
This commit is contained in:
mozilla.BenB%bucksch.org 2004-06-05 22:47:40 +00:00
Родитель dd2ed21b3a
Коммит a6d55e7b8a
1 изменённых файлов: 44 добавлений и 75 удалений

Просмотреть файл

@ -36,16 +36,13 @@
*
* ***** END LICENSE BLOCK ***** */
// Removes potentially insecure or offending HTML
/* I used nsPlaintextSerializer as base for this class. I don't understand
all of the functions in the beginning. Possible that I fail to do
something or do something useless.
I am not proud about the implementation here at all.
Feel free to fix it :-).
I am moderately concerned about methods to obfuscate HTML, which the
parser can decode during execution.
E.g. there are these dreaded data: and javascript URLs and
base64 encoding (which I don't really understand how it alloies
*/
#include "mozSanitizingSerializer.h"
@ -58,14 +55,19 @@
#include "nsContentUtils.h"
#include "nsReadableUtils.h"
#include "plstr.h"
//#include "nsDependentString.h"
#include "nsIProperties.h"
#include "nsUnicharUtils.h"
#include "nsIURI.h"
#include "nsNetUtil.h"
#include "nsEscape.h"
//#define DEBUG_BenB
static inline PRUnichar* escape(const nsString& source)
{
return nsEscapeHTML2(source.get(), source.Length());
}
/* XXX: |printf|s in some error conditions. They are intended as information
for the user, because they complain about malformed pref values.
Not sure, if popping up dialog boxes is the right thing for such code
@ -89,16 +91,18 @@ nsresult NS_NewSanitizingHTMLSerializer(nsIContentSerializer** aSerializer)
mozSanitizingHTMLSerializer::mozSanitizingHTMLSerializer()
: mAllowedTags(30) // Just some initial buffer size
{
mOutputString = nsnull;
}
mozSanitizingHTMLSerializer::~mozSanitizingHTMLSerializer()
{
#ifdef DEBUG_BenB
printf("Output:\n%s\n", NS_LossyConvertUCS2toASCII(*mOutputString).get());
#endif
mAllowedTags.Enumerate(ReleaseProperties);
}
//<copied from="xpcom/ds/nsProperties.cpp">
//<copy from="xpcom/ds/nsProperties.cpp">
PRBool PR_CALLBACK
mozSanitizingHTMLSerializer::ReleaseProperties(nsHashKey* key, void* data,
void* closure)
@ -107,7 +111,7 @@ mozSanitizingHTMLSerializer::ReleaseProperties(nsHashKey* key, void* data,
NS_IF_RELEASE(prop);
return PR_TRUE;
}
//</copied>
//</copy>
NS_IMPL_ISUPPORTS4(mozSanitizingHTMLSerializer,
nsIContentSerializer,
@ -149,8 +153,8 @@ mozSanitizingHTMLSerializer::Initialize(nsAString* aOutString,
NS_IMETHODIMP
mozSanitizingHTMLSerializer::Flush(nsAString& aStr)
{
#if DEBUG_BenB
printf("Flush: -%s-", NS_LossyConvertUCS2toASCII(aString).get());
#ifdef DEBUG_BenB
printf("Flush: -%s-", NS_LossyConvertUCS2toASCII(aStr).get());
#endif
Write(aStr);
return NS_OK;
@ -166,7 +170,6 @@ mozSanitizingHTMLSerializer::AppendDocumentStart(nsIDOMDocument *aDocument,
void
mozSanitizingHTMLSerializer::Write(const nsAString& aString)
{
// printf("%s", NS_LossyConvertUCS2toASCII(aString).get());
mOutputString->Append(aString);
}
@ -360,7 +363,7 @@ mozSanitizingHTMLSerializer::SetTitle(const nsString& aValue)
// missing </title> tag won't result in everything
// being eaten up as the title.
Write(NS_LITERAL_STRING("<title>"));
Write(aValue);
Write(nsAdoptingString(escape(aValue)));
Write(NS_LITERAL_STRING("</title>"));
}
return NS_OK;
@ -379,7 +382,8 @@ mozSanitizingHTMLSerializer::SetDocumentCharset(nsACString& aCharset)
Write(NS_LITERAL_STRING("\n<meta http-equiv=\"Context-Type\" content=\"text/html; charset=")
/* Danger: breaking the line within the string literal, like
"foo"\n"bar", breaks win32! */
+ NS_ConvertASCIItoUCS2(aCharset) + NS_LITERAL_STRING("\">\n"));
+ nsAdoptingString(escape(NS_ConvertASCIItoUCS2(aCharset)))
+ NS_LITERAL_STRING("\">\n"));
return NS_OK;
}
@ -529,7 +533,7 @@ mozSanitizingHTMLSerializer::DoAddLeaf(PRInt32 aTag,
if (type == eHTMLTag_whitespace ||
type == eHTMLTag_newline)
{
Write(aText);
Write(aText); // sure to be safe?
}
else if (type == eHTMLTag_text)
{
@ -538,13 +542,13 @@ mozSanitizingHTMLSerializer::DoAddLeaf(PRInt32 aTag,
Write(text);
else
Write(NS_LITERAL_STRING(TEXT_REMOVED)); // Does not happen (yet)
if (NS_FAILED(rv))
return rv;
NS_ENSURE_SUCCESS(rv, rv);
}
else if (type == eHTMLTag_entity)
{
Write(NS_LITERAL_STRING("&"));
Write(aText);
Write(aText); // sure to be safe?
Write(NS_LITERAL_STRING(";"));
// using + operator here might give an infinitive loop, see above.
}
else
@ -562,19 +566,7 @@ mozSanitizingHTMLSerializer::DoAddLeaf(PRInt32 aTag,
nsresult
mozSanitizingHTMLSerializer::SanitizeTextNode(nsString& aText /*inout*/)
{
nsString& text = aText; // alias
PRInt32 pos;
if ((pos = text.Find("base64")) != kNotFound)
// Probably useless, not sure, but perfhit
// return NS_ERROR_ILLEGAL_VALUE; -- this is too extreme
text.Insert(NS_LITERAL_STRING(TEXT_BREAKER), pos + 1);
/* Insert some other text after the first char of the problematic
text, so we prevent the processing by Gecko.
No idea, if that is needed, but better do it than being sorry.
Somebody who knows all the dangers and how they are reflected
in Gecko please jump in. */
aText.Adopt(escape(aText));
return NS_OK;
}
@ -584,14 +576,14 @@ mozSanitizingHTMLSerializer::SanitizeTextNode(nsString& aText /*inout*/)
unwanted / dangerous URLs appear in the document
(like javascript: and data:).
Pass the value as |value| arg. It will be modified in-place.
Pass the value as |aValue| arg. It will be modified in-place.
If the value is not allowed at all, we return with NS_ERROR_ILLEGAL_VALUE.
In that case, do not use the |value|, but output nothing.
In that case, do not use the |aValue|, but output nothing.
*/
nsresult
mozSanitizingHTMLSerializer::SanitizeAttrValue(nsHTMLTag aTag,
const nsAString& attr_name,
const nsAString& anAttrName,
nsString& aValue /*inout*/)
{
/* First, cut the attribute to 1000 chars.
@ -599,42 +591,42 @@ mozSanitizingHTMLSerializer::SanitizeAttrValue(nsHTMLTag aTag,
considering that we don't support any JS. The longest attributes
I can think of are URLs, and URLs with 1000 chars are likely to be
bogus, too. */
nsAutoString value(Substring(aValue, 0, 1000));
//value.Truncate(1000); //-- this cuts half of the document !!?!!
aValue = Substring(aValue, 0, 1000);
//aValue.Truncate(1000); //-- this cuts half of the document !!?!!
#ifdef DEBUG_BenB
printf("7: %s\n", ToNewUTF8String(value));
#endif
value.StripChars("\"'"); /* This will break javascript attributes,
but who wants javascript in
sanitized HTML? */
aValue.Adopt(escape(aValue));
/* Check some known bad stuff. Add more!
I don't care too much, if it happens to trigger in some innocent cases
(like <img alt="Statistical data: Mortage rates and newspapers">) -
security first. */
if (value.Find("javascript:") != kNotFound ||
value.Find("data:") != kNotFound ||
value.Find("base64") != kNotFound)
if (aValue.Find("javascript:") != kNotFound ||
aValue.Find("data:") != kNotFound ||
aValue.Find("base64") != kNotFound)
return NS_ERROR_ILLEGAL_VALUE;
// Check img src scheme
if (aTag == eHTMLTag_img &&
attr_name.Equals(NS_LITERAL_STRING("src"), nsCaseInsensitiveStringComparator()))
anAttrName.Equals(NS_LITERAL_STRING("src"),
nsCaseInsensitiveStringComparator()))
{
nsresult rv;
nsCOMPtr<nsIIOService> ioService;
ioService = do_GetIOService(&rv);
if (NS_FAILED(rv)) return rv;
nsCOMPtr<nsIIOService> ioService = do_GetIOService(&rv);
NS_ENSURE_SUCCESS(rv, rv);
nsCAutoString scheme;
rv = ioService->ExtractScheme(NS_LossyConvertUCS2toASCII(value), scheme);
if (NS_FAILED(rv)) return rv;
rv = ioService->ExtractScheme(NS_LossyConvertUCS2toASCII(aValue), scheme);
NS_ENSURE_SUCCESS(rv, rv);
if (!scheme.Equals("cid", nsCaseInsensitiveCStringComparator()))
return NS_ERROR_ILLEGAL_VALUE;
}
#ifdef DEBUG_BenB
printf("attribute value for %s: -%s-\n",
NS_LossyConvertUCS2toASCII(anAttrName).get(),
NS_LossyConvertUCS2toASCII(aValue).get());
#endif
return NS_OK;
}
@ -707,9 +699,6 @@ nsresult
mozSanitizingHTMLSerializer::ParsePrefs(const nsAString& aPref)
{
char* pref = ToNewCString(aPref);
#ifdef DEBUG_BenB
printf("pref: -%s-\n", pref);
#endif
char* tags_lasts;
for (char* iTag = PL_strtok_r(pref, " ", &tags_lasts);
iTag;
@ -734,16 +723,10 @@ mozSanitizingHTMLSerializer::ParseTagPref(const nsCAutoString& tagpref)
return NS_ERROR_OUT_OF_MEMORY;
// Parsing tag
#ifdef DEBUG_BenB
printf("Processing tag pref -%s-\n", tagpref.get());
#endif
PRInt32 bracket = tagpref.Find("(");
nsCAutoString tag = tagpref;
if (bracket != kNotFound)
tag.Truncate(bracket);
#ifdef DEBUG_BenB
printf( "Tag -%s-\n", tag.get());
#endif
if (tag.Equals(""))
{
printf(" malformed pref: %s\n", tagpref.get());
@ -754,14 +737,6 @@ mozSanitizingHTMLSerializer::ParseTagPref(const nsCAutoString& tagpref)
NS_ConvertASCIItoUCS2 tag_widestr(tag);
PRInt32 tag_id;
parserService->HTMLStringTagToId(tag_widestr, &tag_id);
#ifdef DEBUG_BenB
printf(" Have tag %d\n", tag_id);
const PRUnichar* tag_back;
parserService->HTMLIdToStringTag(tag_id, &tag_back);
printf(" Equals -%s-\n", tag_back
? NS_ConvertUCS2toUTF8(tag_back).get()
: "");
#endif
if (tag_id == eHTMLTag_userdefined ||
tag_id == eHTMLTag_unknown)
{
@ -799,18 +774,12 @@ mozSanitizingHTMLSerializer::ParseTagPref(const nsCAutoString& tagpref)
attrList.Append(Substring(tagpref,
bracket + 1,
tagpref.Length() - 2 - bracket));
#ifdef DEBUG_BenB
printf(" Attr list: -%s-\n", attrList.get());
#endif
char* attrs_lasts;
for (char* iAttr = PL_strtok_r(attrList.BeginWriting(),
",", &attrs_lasts);
iAttr;
iAttr = PL_strtok_r(NULL, ",", &attrs_lasts))
{
#ifdef DEBUG_BenB
printf(" Processing attr -%s-\n", iAttr);
#endif
attr_bag->Set(iAttr, 0);
}