From a6c809d88a6525796d0dd1d404d7bcf4478f1889 Mon Sep 17 00:00:00 2001 From: "mozilla.BenB%bucksch.org" Date: Tue, 16 Apr 2002 18:41:45 +0000 Subject: [PATCH] 108153, 30888, 69529, 126082: New HTML message body options for Mailnews Provide a work around / fix for some of the security and usability problems that HTML mails currently impose, by not rendering everything the sender throws at the user. We achieve that by either - removing offending HTML tags/attributes - using the plaintext alternative (if existing) or converting HTML to plaintext and back to HTML (if there's only HTML) or - rendering the HTML source. r=ducarroz, sr=bienvenu This is the libmime backend. It adds 2 new libmime classes for the HTML conversion, code to select the right libmime classes and some other helperstuff/tweaks. --- mailnews/mime/src/Makefile.in | 6 + mailnews/mime/src/makefile.win | 6 + mailnews/mime/src/mimei.cpp | 228 +++++++++++++++++++++++++++------ mailnews/mime/src/mimei.h | 75 ++++++----- mailnews/mime/src/mimemalt.cpp | 28 ++++ mailnews/mime/src/mimemoz2.cpp | 165 +++++++++++++++++++++--- mailnews/mime/src/mimemoz2.h | 7 + mailnews/mime/src/mimethtm.cpp | 54 ++++---- mailnews/mime/src/mimetpla.h | 8 +- mailnews/mime/src/mimetric.cpp | 4 - 10 files changed, 451 insertions(+), 130 deletions(-) diff --git a/mailnews/mime/src/Makefile.in b/mailnews/mime/src/Makefile.in index 98329831f716..e327e0318f12 100644 --- a/mailnews/mime/src/Makefile.in +++ b/mailnews/mime/src/Makefile.in @@ -62,7 +62,11 @@ REQUIRES = xpcom \ nkcache \ pipnss \ imglib2 \ + content \ + htmlparser \ + layout \ $(NULL) +#content, htmlparser and layout are for HTML*() in mimemoz2.cpp. ifdef BUILD_SMIME REQUIRES += \ @@ -96,6 +100,8 @@ CPPSRCS = \ mimetenr.cpp \ mimetext.cpp \ mimethtm.cpp \ + mimethpl.cpp \ + mimethsa.cpp \ mimetpla.cpp \ mimetpfl.cpp \ mimetric.cpp \ diff --git a/mailnews/mime/src/makefile.win b/mailnews/mime/src/makefile.win index 03a867c39848..1252d42d8c57 100644 --- a/mailnews/mime/src/makefile.win +++ b/mailnews/mime/src/makefile.win @@ -49,11 +49,15 @@ REQUIRES = xpcom \ nkcache \ xpconnect \ pipnss \ + content \ + htmlparser \ + layout \ !if defined(BUILD_SMIME) msgsmime \ !endif imglib2 \ $(NULL) +#content, htmlparser and layout are for HTML*() in mimemoz2.cpp. include <$(DEPTH)\config\config.mak> @@ -127,6 +131,8 @@ OBJS= \ .\$(OBJDIR)\mimetenr.obj \ .\$(OBJDIR)\mimetext.obj \ .\$(OBJDIR)\mimethtm.obj \ + .\$(OBJDIR)\mimethpl.obj \ + .\$(OBJDIR)\mimethsa.obj \ .\$(OBJDIR)\mimetpla.obj \ .\$(OBJDIR)\mimetpfl.obj \ .\$(OBJDIR)\mimetric.obj \ diff --git a/mailnews/mime/src/mimei.cpp b/mailnews/mime/src/mimei.cpp index 1a3433ede390..2a3777b754dd 100644 --- a/mailnews/mime/src/mimei.cpp +++ b/mailnews/mime/src/mimei.cpp @@ -39,28 +39,29 @@ #include "mimesun.h" /* | | |--- MimeSunAttachment */ #include "mimemsig.h" /* | | |--- MimeMultipartSigned (abstract)*/ #ifdef ENABLE_SMIME -#include "mimemcms.h" /* | | |---MimeMultipartSignedCMS */ +#include "mimemcms.h" /* | | |---MimeMultipartSignedCMS */ #endif #include "mimecryp.h" /* | |--- MimeEncrypted (abstract) */ #ifdef ENABLE_SMIME #include "mimecms.h" /* | | |--- MimeEncryptedPKCS7 */ #endif - #include "mimemsg.h" /* | |--- MimeMessage */ #include "mimeunty.h" /* | |--- MimeUntypedText */ #include "mimeleaf.h" /* |--- MimeLeaf (abstract) */ #include "mimetext.h" /* | |--- MimeInlineText (abstract) */ #include "mimetpla.h" /* | | |--- MimeInlineTextPlain */ -#include "mimetpfl.h" /* | | |--- MimeInlineTextPlainFlowed */ +#include "mimethpl.h" /* | | | |--- M.I.TextHTMLAsPlaintext */ +#include "mimetpfl.h" /* | | |--- MimeInlineTextPlainFlowed */ #include "mimethtm.h" /* | | |--- MimeInlineTextHTML */ +#include "mimethsa.h" /* | | | |--- M.I.TextHTMLSanitized */ #include "mimetric.h" /* | | |--- MimeInlineTextRichtext */ #include "mimetenr.h" /* | | | |--- MimeInlineTextEnriched */ -/* SUPPORTED VIA PLUGIN | | |--------- MimeInlineTextCalendar */ - -#include "nsIPref.h" +/* SUPPORTED VIA PLUGIN | | |--- MimeInlineTextVCard */ +/* SUPPORTED VIA PLUGIN | | |--- MimeInlineTextCalendar */ #include "mimeiimg.h" /* | |--- MimeInlineImage */ #include "mimeeobj.h" /* | |--- MimeExternalObject */ #include "mimeebod.h" /* |--- MimeExternalBody */ + /* If you add classes here,also add them to mimei.h */ #include "prlog.h" #include "prmem.h" #include "prenv.h" @@ -78,6 +79,7 @@ #include "nsMimeStringResources.h" #include "nsMimeTypes.h" #include "nsMsgUtils.h" +#include "nsIPref.h" #include "imgILoader.h" #define IMAP_EXTERNAL_CONTENT_HEADER "X-Mozilla-IMAP-Part" @@ -288,6 +290,75 @@ mime_free (MimeObject *object) PR_Free(object); } + +PRBool mime_is_allowed_class(const MimeObjectClass *clazz, + PRInt32 types_of_classes_to_disallow) +{ + if (types_of_classes_to_disallow == 0) + return PR_TRUE; + PRBool avoid_html = (types_of_classes_to_disallow >= 1); + PRBool avoid_images = (types_of_classes_to_disallow >= 2); + PRBool avoid_strange_content = (types_of_classes_to_disallow >= 3); + PRBool allow_only_vanilla_classes = (types_of_classes_to_disallow == 100); + + if (allow_only_vanilla_classes) + /* A "safe" class is one that is unlikely to have security bugs or to + allow security exploits or one that is essential for the usefulness + of the application, even for paranoid users. + What's included here is more personal judgement than following + strict rules, though, unfortunately. + The function returns true only for known good classes, i.e. is a + "whitelist" in this case. + This idea comes from Georgi Guninski. + */ + return + ( + clazz == (MimeObjectClass *)&mimeInlineTextPlainClass || + clazz == (MimeObjectClass *)&mimeInlineTextPlainFlowedClass || + clazz == (MimeObjectClass *)&mimeInlineTextHTMLSanitizedClass || + clazz == (MimeObjectClass *)&mimeInlineTextHTMLAsPlaintextClass || + /* The latter 2 classes bear some risk, because they use the Gecko + HTML parser, but the user has the option to make an explicit + choice in this case, via html_as. */ + clazz == (MimeObjectClass *)&mimeMultipartMixedClass || + clazz == (MimeObjectClass *)&mimeMultipartAlternativeClass || + clazz == (MimeObjectClass *)&mimeMultipartDigestClass || + clazz == (MimeObjectClass *)&mimeMultipartAppleDoubleClass || + clazz == (MimeObjectClass *)&mimeMessageClass || + clazz == (MimeObjectClass *)&mimeExternalObjectClass || + /* mimeUntypedTextClass? -- does uuencode */ +#ifdef ENABLE_SMIME + clazz == (MimeObjectClass *)&mimeMultipartSignedCMSClass || + clazz == (MimeObjectClass *)&mimeEncryptedCMSClass || +#endif + clazz == 0 + ); + + /* Contrairy to above, the below code is a "blacklist", i.e. it + *excludes* some "bad" classes. */ + return + !( + (avoid_html + && ( + clazz == (MimeObjectClass *)&mimeInlineTextHTMLClass + /* Should not happen - we protect against that in + mime_find_class(). Still for safety... */ + )) || + (avoid_images + && ( + clazz == (MimeObjectClass *)&mimeInlineImageClass + )) || + (avoid_strange_content + && ( + clazz == (MimeObjectClass *)&mimeInlineTextEnrichedClass || + clazz == (MimeObjectClass *)&mimeInlineTextRichtextClass || + clazz == (MimeObjectClass *)&mimeSunAttachmentClass || + clazz == (MimeObjectClass *)&mimeExternalBodyClass + )) + ); +} + + MimeObjectClass * mime_find_class (const char *content_type, MimeHeaders *hdrs, MimeDisplayOptions *opts, PRBool exact_match_p) @@ -296,7 +367,33 @@ mime_find_class (const char *content_type, MimeHeaders *hdrs, MimeObjectClass *tempClass = 0; contentTypeHandlerInitStruct ctHandlerInfo; - /* + // Read some prefs + nsIPref *pref = GetPrefServiceManager(opts); + PRInt32 html_as = 0; // def. see below + PRInt32 types_of_classes_to_disallow = 0; /* Let only a few libmime classes + process incoming data. This protects from bugs (e.g. buffer overflows) + and from security loopholes (e.g. allowing unchecked HTML in some + obscure classes, although the user has html_as > 0). + This option is mainly for the UI of html_as. + 0 = allow all available classes + 1 = Use hardcoded blacklist to avoid rendering (incoming) HTML + 2 = ... and images + 3 = ... and some other uncommon content types + 100 = Use hardcoded whitelist to avoid even more bugs(buffer overflows). + This mode will limit the features available (e.g. uncommon + attachment types and inline images) and is for paranoid users. + */ + if (pref) + { + pref->GetIntPref("mailnews.display.html_as", &html_as); + pref->GetIntPref("mailnews.display.disallow_mime_handlers", + &types_of_classes_to_disallow); + if (types_of_classes_to_disallow > 0 && html_as == 0) + // We have non-sensical prefs. Do some fixup. + html_as = 1; + } + + /* * What we do first is check for an external content handler plugin. * This will actually extend the mime handling by calling a routine * which will allow us to load an external content type handler @@ -305,25 +402,64 @@ mime_find_class (const char *content_type, MimeHeaders *hdrs, */ if ((tempClass = mime_locate_external_content_handler(content_type, &ctHandlerInfo)) != NULL) { - clazz = (MimeObjectClass *)tempClass; + if (types_of_classes_to_disallow > 0 + && (!nsCRT::strncasecmp(content_type, "text/x-vcard", 12) || + !nsCRT::strncasecmp(content_type, "text/calendar", 13)) + ) + /* Use a little hack to prevent some dangerous plugins, which ship + with Mozilla, to run. + For the truely user-installed plugins, we rely on the judgement + of the user. */ + { + if (!exact_match_p) + clazz = (MimeObjectClass *)&mimeExternalObjectClass; // As attachment + } + else + clazz = (MimeObjectClass *)tempClass; } else { if (!content_type || !*content_type || - !nsCRT::strcasecmp(content_type, "text")) /* with no / in the type */ + !nsCRT::strcasecmp(content_type, "text")) /* with no / in the type */ clazz = (MimeObjectClass *)&mimeUntypedTextClass; - /* Subtypes of text... + /* Subtypes of text... */ else if (!nsCRT::strncasecmp(content_type, "text/", 5)) { if (!nsCRT::strcasecmp(content_type+5, "html")) - clazz = (MimeObjectClass *)&mimeInlineTextHTMLClass; + { + if (opts + && opts->format_out == nsMimeOutput::nsMimeMessageSaveAs) + // SaveAs in new modes doesn't work yet. + { + clazz = (MimeObjectClass *)&mimeInlineTextHTMLClass; + types_of_classes_to_disallow = 0; + } + else if (html_as == 0) // Render sender's HTML + clazz = (MimeObjectClass *)&mimeInlineTextHTMLClass; + else if (html_as == 1) // convert HTML to plaintext + // Do a HTML->TXT->HTML conversion, see mimethpl.h. + clazz = (MimeObjectClass *)&mimeInlineTextHTMLAsPlaintextClass; + else if (html_as == 2) // display HTML source + /* This is for the freaks. Treat HTML as plaintext, + which will cause the HTML source to be displayed. + Not very user-friendly, but some seem to want this. */ + clazz = (MimeObjectClass *)&mimeInlineTextPlainClass; + else if (html_as == 3) // Sanitize + // Strip all but allowed HTML + clazz = (MimeObjectClass *)&mimeInlineTextHTMLSanitizedClass; + else // Goofy pref + /* User has an unknown pref value. Maybe he used a newer Mozilla + with a new alternative to avoid HTML. Defaulting to option 1, + which is less dangerous than defaulting to the raw HTML. */ + clazz = (MimeObjectClass *)&mimeInlineTextHTMLAsPlaintextClass; + } else if (!nsCRT::strcasecmp(content_type+5, "enriched")) clazz = (MimeObjectClass *)&mimeInlineTextEnrichedClass; else if (!nsCRT::strcasecmp(content_type+5, "richtext")) clazz = (MimeObjectClass *)&mimeInlineTextRichtextClass; - else if (!nsCRT::strcasecmp(content_type+5, "rtf")) + else if (!nsCRT::strcasecmp(content_type+5, "rtf")) clazz = (MimeObjectClass *)&mimeExternalObjectClass; else if (!nsCRT::strcasecmp(content_type+5, "plain")) { @@ -331,12 +467,10 @@ mime_find_class (const char *content_type, MimeHeaders *hdrs, clazz = (MimeObjectClass *)&mimeInlineTextPlainClass; PRBool disable_format_flowed = PR_FALSE; - nsIPref *pref = GetPrefServiceManager(opts); if (pref) - (void)pref->GetBoolPref( - "mailnews.display.disable_format_flowed_support", - &disable_format_flowed); - + pref->GetBoolPref("mailnews.display.disable_format_flowed_support", + &disable_format_flowed); + if(!disable_format_flowed) { // Check for format=flowed, damn, it is already stripped away from @@ -355,11 +489,11 @@ mime_find_class (const char *content_type, MimeHeaders *hdrs, // a nsMimeTypes.h but that one isn't included. Bug? char *content_type_format = (content_type_row - ? MimeHeaders_get_parameter(content_type_row, "format", NULL, NULL) + ? MimeHeaders_get_parameter(content_type_row, "format", NULL,NULL) : 0); if (content_type_format && !nsCRT::strcasecmp(content_type_format, - "flowed")) + "flowed")) clazz = (MimeObjectClass *)&mimeInlineTextPlainFlowedClass; PR_FREEIF(content_type_format); PR_FREEIF(content_type_row); @@ -380,7 +514,7 @@ mime_find_class (const char *content_type, MimeHeaders *hdrs, else if (!nsCRT::strcasecmp(content_type+10, "digest")) clazz = (MimeObjectClass *)&mimeMultipartDigestClass; else if (!nsCRT::strcasecmp(content_type+10, "appledouble") || - !nsCRT::strcasecmp(content_type+10, "header-set")) + !nsCRT::strcasecmp(content_type+10, "header-set")) clazz = (MimeObjectClass *)&mimeMultipartAppleDoubleClass; else if (!nsCRT::strcasecmp(content_type+10, "parallel")) clazz = (MimeObjectClass *)&mimeMultipartParallelClass; @@ -393,7 +527,7 @@ mime_find_class (const char *content_type, MimeHeaders *hdrs, know about. */ char *ct = (hdrs ? MimeHeaders_get(hdrs, HEADER_CONTENT_TYPE, - PR_FALSE, PR_FALSE) + PR_FALSE, PR_FALSE) : 0); char *proto = (ct ? MimeHeaders_get_parameter(ct, PARAM_PROTOCOL, NULL, NULL) @@ -401,18 +535,20 @@ mime_find_class (const char *content_type, MimeHeaders *hdrs, char *micalg = (ct ? MimeHeaders_get_parameter(ct, PARAM_MICALG, NULL, NULL) : 0); - - if (proto && (!nsCRT::strcasecmp(proto, APPLICATION_XPKCS7_SIGNATURE) && - micalg && (!nsCRT::strcasecmp(micalg, PARAM_MICALG_MD5) || - !nsCRT::strcasecmp(micalg, PARAM_MICALG_SHA1) || - !nsCRT::strcasecmp(micalg, PARAM_MICALG_SHA1_2) || - !nsCRT::strcasecmp(micalg, PARAM_MICALG_SHA1_3) || - !nsCRT::strcasecmp(micalg, PARAM_MICALG_SHA1_4) || - !nsCRT::strcasecmp(micalg, PARAM_MICALG_SHA1_5) || - !nsCRT::strcasecmp(micalg, PARAM_MICALG_MD2)))) - clazz = (MimeObjectClass *)&mimeMultipartSignedCMSClass; - else - clazz = 0; + + if (proto + && (!nsCRT::strcasecmp(proto, APPLICATION_XPKCS7_SIGNATURE) + && micalg + && (!nsCRT::strcasecmp(micalg, PARAM_MICALG_MD5) || + !nsCRT::strcasecmp(micalg, PARAM_MICALG_SHA1) || + !nsCRT::strcasecmp(micalg, PARAM_MICALG_SHA1_2) || + !nsCRT::strcasecmp(micalg, PARAM_MICALG_SHA1_3) || + !nsCRT::strcasecmp(micalg, PARAM_MICALG_SHA1_4) || + !nsCRT::strcasecmp(micalg, PARAM_MICALG_SHA1_5) || + !nsCRT::strcasecmp(micalg, PARAM_MICALG_MD2)))) + clazz = (MimeObjectClass *)&mimeMultipartSignedCMSClass; + else + clazz = 0; PR_FREEIF(proto); PR_FREEIF(micalg); PR_FREEIF(ct); @@ -460,20 +596,30 @@ mime_find_class (const char *content_type, MimeHeaders *hdrs, treat as non-text types (which would be bad) without this special-case... */ else if (!nsCRT::strcasecmp(content_type, APPLICATION_PGP) || - !nsCRT::strcasecmp(content_type, APPLICATION_PGP2)) - clazz = (MimeObjectClass *)&mimeInlineTextPlainClass; - + !nsCRT::strcasecmp(content_type, APPLICATION_PGP2)) + clazz = (MimeObjectClass *)&mimeInlineTextPlainClass; + else if (!nsCRT::strcasecmp(content_type, SUN_ATTACHMENT)) clazz = (MimeObjectClass *)&mimeSunAttachmentClass; - /* Everything else gets represented as a clickable link. + /* Everything else gets represented as a clickable link. */ else if (!exact_match_p) - clazz = (MimeObjectClass *)&mimeExternalObjectClass; + clazz = (MimeObjectClass *)&mimeExternalObjectClass; + + if (!mime_is_allowed_class(clazz, types_of_classes_to_disallow)) + { + /* Do that check here (not after the if block), because we want to allow + user-installed plugins. */ + if(!exact_match_p) + clazz = (MimeObjectClass *)&mimeExternalObjectClass; + else + clazz = 0; + } } - if (!exact_match_p) - PR_ASSERT(clazz); + if (!exact_match_p) + PR_ASSERT(clazz); if (!clazz) return 0; PR_ASSERT(clazz); @@ -615,6 +761,8 @@ mime_create (const char *content_type, MimeHeaders *hdrs, (clazz != (MimeObjectClass *)&mimeInlineTextPlainClass) && (clazz != (MimeObjectClass *)&mimeInlineTextPlainFlowedClass) && (clazz != (MimeObjectClass *)&mimeInlineTextHTMLClass) && + (clazz != (MimeObjectClass *)&mimeInlineTextHTMLSanitizedClass) && + (clazz != (MimeObjectClass *)&mimeInlineTextHTMLAsPlaintextClass) && (clazz != (MimeObjectClass *)&mimeInlineTextRichtextClass) && (clazz != (MimeObjectClass *)&mimeInlineTextEnrichedClass) && (clazz != (MimeObjectClass *)&mimeMessageClass) && diff --git a/mailnews/mime/src/mimei.h b/mailnews/mime/src/mimei.h index f2dafd4404d4..afcb811e039f 100644 --- a/mailnews/mime/src/mimei.h +++ b/mailnews/mime/src/mimei.h @@ -57,57 +57,68 @@ MimeObject (abstract) | - |--- MimeContainer (abstract) + +--- MimeContainer (abstract) | | - | |--- MimeMultipart (abstract) + | +--- MimeMultipart (abstract) | | | - | | |--- MimeMultipartMixed + | | +--- MimeMultipartMixed | | | - | | |--- MimeMultipartDigest + | | +--- MimeMultipartDigest | | | - | | |--- MimeMultipartParallel + | | +--- MimeMultipartParallel | | | - | | |--- MimeMultipartAlternative + | | +--- MimeMultipartAlternative | | | - | | |--- MimeMultipartRelated + | | +--- MimeMultipartRelated | | | - | | |--- MimeMultipartAppleDouble + | | +--- MimeMultipartAppleDouble | | | - | | |--- MimeSunAttachment + | | +--- MimeSunAttachment | | | - | | |--- MimeMultipartSigned (abstract) + | | \--- MimeMultipartSigned (abstract) | | | - | | |--- MimeMultipartSigned + | | \--- MimeMultipartSignedCMS | | - | |--- MimeXlateed (abstract) + | +--- MimeEncrypted (abstract) | | | - | | |--- MimeXlateed + | | \--- MimeEncryptedPKCS7 | | - | |--- MimeMessage + | +--- MimeXlateed (abstract) + | | | + | | \--- MimeXlateed | | - | |--- MimeUntypedText + | +--- MimeMessage + | | + | \--- MimeUntypedText | - |--- MimeLeaf (abstract) + +--- MimeLeaf (abstract) | | - | |--- MimeInlineText (abstract) + | +--- MimeInlineText (abstract) | | | - | | |--- MimeInlineTextPlain - | | | - | | |--- MimeInlineTextHTML - | | | - | | |--- MimeInlineTextRichtext + | | +--- MimeInlineTextPlain | | | | - | | | |--- MimeInlineTextEnriched - | | | - | | |--- MimeInlineTextVCard - | | | - | | |--- MimeInlineTextCalendar + | | | \--- MimeInlineTextHTMLAsPlaintext + | | | + | | +--- MimeInlineTextPlainFlowed + | | | + | | +--- MimeInlineTextHTML + | | | | + | | | \--- MimeInlineTextHTMLSanitized + | | | + | | +--- MimeInlineTextRichtext + | | | | + | | | \--- MimeInlineTextEnriched + | | | + | | +--- MimeInlineTextVCard + | | | + | | \--- MimeInlineTextCalendar | | - | |--- MimeInlineImage + | +--- MimeInlineImage | | - | |--- MimeExternalObject + | \--- MimeExternalObject | - |--- MimeExternalBody + \--- MimeExternalBody + ========================================================================= The definition of these classes is somewhat idiosyncratic, since I defined @@ -213,6 +224,10 @@ parentClass.whatnot.object.finalize(object); // (works...) object->clazz->superclass->finalize(object); // WRONG!! } + + If you write a libmime content type handler, libmime might create several + instances of your class at once and call e.g. the same finalize code for + 3 different objects in a row. */ #include "mimehdrs.h" diff --git a/mailnews/mime/src/mimemalt.cpp b/mailnews/mime/src/mimemalt.cpp index da0af99c7f94..c10fd324f8a4 100644 --- a/mailnews/mime/src/mimemalt.cpp +++ b/mailnews/mime/src/mimemalt.cpp @@ -20,6 +20,7 @@ * the Initial Developer. All Rights Reserved. * * Contributor(s): + * Ben Bucksch * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or @@ -41,6 +42,10 @@ #include "prlog.h" #include "nsMimeTypes.h" #include "nsMimeStringResources.h" +#include "nsIPref.h" +#include "mimemoz2.h" // for prefs + +static NS_DEFINE_CID(kPrefCID, NS_PREF_CID); #define MIME_SUPERCLASS mimeMultipartClass MimeDefClass(MimeMultipartAlternative, MimeMultipartAlternativeClass, @@ -227,6 +232,29 @@ MimeMultipartAlternative_display_part_p(MimeObject *self, are themselves unknown. */ + // prefer_plaintext pref + nsIPref *pref = GetPrefServiceManager(self->options); + PRBool prefer_plaintext = PR_FALSE; + if (pref) + (void)pref->GetBoolPref("mailnews.display.prefer_plaintext", + &prefer_plaintext); + if (prefer_plaintext + && self->options->format_out != nsMimeOutput::nsMimeMessageSaveAs + && (!nsCRT::strncasecmp(ct, "text/html", 9) || + !nsCRT::strncasecmp(ct, "text/enriched", 13) || + !nsCRT::strncasecmp(ct, "text/richtext", 13)) + ) + // if the user prefers plaintext and this is the "rich" (e.g. HTML) part... + { +#if DEBUG + printf ("Ignoring %s alternative\n", ct); +#endif + return PR_FALSE; + } +#if DEBUG + printf ("Considering %s alternative\n", ct); +#endif + MimeObjectClass *clazz = mime_find_class (ct, sub_hdrs, self->options, PR_TRUE); PRBool result = (clazz ? clazz->displayable_inline_p(clazz, sub_hdrs) diff --git a/mailnews/mime/src/mimemoz2.cpp b/mailnews/mime/src/mimemoz2.cpp index 79872d6b6d2b..842e9a13d34b 100644 --- a/mailnews/mime/src/mimemoz2.cpp +++ b/mailnews/mime/src/mimemoz2.cpp @@ -86,10 +86,26 @@ #include "nsITransport.h" #include "mimeebod.h" #include "mimeeobj.h" +// +#include "nsXPCOM.h" +#include "nsParserCIID.h" +#include "nsIParser.h" +#include "nsIHTMLContentSink.h" +#include "nsIContentSerializer.h" +#include "nsLayoutCID.h" +#include "nsIComponentManager.h" +#include "nsReadableUtils.h" +#include "nsIHTMLToTextSink.h" +#include "mozISanitizingSerializer.h" +// static NS_DEFINE_CID(kPrefCID, NS_PREF_CID); static NS_DEFINE_CID(kIOServiceCID, NS_IOSERVICE_CID); +// +static NS_DEFINE_CID(kParserCID, NS_PARSER_CID); +static NS_DEFINE_CID(kNavDTDCID, NS_CNAVDTD_CID); +// #ifdef HAVE_MIME_DATA_SLOT #define LOCK_LAST_CACHED_MESSAGE @@ -1209,27 +1225,6 @@ mime_image_write_buffer(char *buf, PRInt32 size, void *image_closure) return size; } -// -// Utility for finding HTML part. -// -static MimeObject* -mime_find_text_html_part_1(MimeObject* obj) -{ - if (mime_subclass_p(obj->clazz, - (MimeObjectClass*) &mimeInlineTextHTMLClass)) { - return obj; - } - if (mime_subclass_p(obj->clazz, (MimeObjectClass*) &mimeContainerClass)) { - MimeContainer* cobj = (MimeContainer*) obj; - PRInt32 i; - for (i=0 ; inchildren ; i++) { - MimeObject* result = mime_find_text_html_part_1(cobj->children[i]); - if (result) return result; - } - } - return NULL; -} - MimeObject* mime_get_main_object(MimeObject* obj) { @@ -2105,3 +2100,131 @@ nsresult GetMailNewsFont(MimeObject *obj, PRBool styleFixed, PRInt32 *fontPixel return NS_OK; } + +/* This function syncronously converts an HTML document (as string) + to plaintext (as string) using the Gecko converter. + + flags: see nsIDocumentEncoder.h +*/ +// TODO: |printf|s? +/* */ +nsresult +HTML2Plaintext(const nsString& inString, nsString& outString, + PRUint32 flags, PRUint32 wrapCol) +{ + nsresult rv = NS_OK; + +#if DEBUG_BenB + printf("Converting HTML to plaintext\n"); + char* charstar = ToNewUTF8String(inString); + printf("HTML source is:\n--------------------\n%s--------------------\n", + charstar); + delete[] charstar; +#endif + + // Create a parser + nsCOMPtr parser = do_CreateInstance(kParserCID); + NS_ENSURE_TRUE(parser, NS_ERROR_FAILURE); + + // Create the appropriate output sink + nsCOMPtr sink = + do_CreateInstance(NS_PLAINTEXTSINK_CONTRACTID); + NS_ENSURE_TRUE(sink, NS_ERROR_FAILURE); + + nsCOMPtr textSink(do_QueryInterface(sink)); + NS_ENSURE_TRUE(textSink, NS_ERROR_FAILURE); + + textSink->Initialize(&outString, flags, wrapCol); + + parser->SetContentSink(sink); + nsCOMPtr dtd = do_CreateInstance(kNavDTDCID); + NS_ENSURE_TRUE(dtd, NS_ERROR_FAILURE); + + parser->RegisterDTD(dtd); + + rv = parser->Parse(inString, 0, NS_LITERAL_CSTRING("text/html"), + PR_FALSE, PR_TRUE); + + // Aah! How can NS_ERROR and NS_ABORT_IF_FALSE be no-ops in release builds??? + if (NS_FAILED(rv)) + { + NS_ERROR("Parse() failed!"); + return rv; + } + +#if DEBUG_BenB + charstar = ToNewUTF8String(outString); + printf("Plaintext is:\n--------------------\n%s--------------------\n", + charstar); + delete[] charstar; +#endif + + return rv; +} +// + + + +/* This function syncronously sanitizes an HTML document (string->string) + using the Gecko ContentSink mozISanitizingHTMLSerializer. + + flags: currently unused + allowedTags: see mozSanitizingHTMLSerializer::ParsePrefs() +*/ +// copied from HTML2Plaintext above +nsresult +HTMLSanitize(const nsString& inString, nsString& outString, + PRUint32 flags, const nsAString& allowedTags) +{ + nsresult rv = NS_OK; + +#if DEBUG_BenB + printf("Sanitizing HTML\n"); + char* charstar = ToNewUTF8String(inString); + printf("Original HTML is:\n--------------------\n%s--------------------\n", + charstar); + delete[] charstar; +#endif + + // Create a parser + nsCOMPtr parser = do_CreateInstance(kParserCID); + NS_ENSURE_TRUE(parser, NS_ERROR_FAILURE); + + // Create the appropriate output sink + nsCOMPtr sink = + do_CreateInstance(MOZ_SANITIZINGHTMLSERIALIZER_CONTRACTID); + NS_ENSURE_TRUE(sink, NS_ERROR_FAILURE); + + nsCOMPtr sanSink(do_QueryInterface(sink)); + NS_ENSURE_TRUE(sanSink, NS_ERROR_FAILURE); + + sanSink->Initialize(&outString, flags, allowedTags); + + parser->SetContentSink(sink); + nsCOMPtr dtd = do_CreateInstance(kNavDTDCID); + NS_ENSURE_TRUE(dtd, NS_ERROR_FAILURE); + + parser->RegisterDTD(dtd); + + rv = parser->Parse(inString, 0, NS_LITERAL_CSTRING("text/html"), + PR_FALSE, PR_TRUE); + if (NS_FAILED(rv)) + { + NS_ERROR("Parse() failed!"); + return rv; + } + +#if DEBUG_BenB + charstar = ToNewUTF8String(outString); + printf("Sanitized HTML is:\n--------------------\n%s--------------------\n", + charstar); + delete[] charstar; +#endif + + return rv; +} diff --git a/mailnews/mime/src/mimemoz2.h b/mailnews/mime/src/mimemoz2.h index aad0e5e75b85..4dc40b485c54 100644 --- a/mailnews/mime/src/mimemoz2.h +++ b/mailnews/mime/src/mimemoz2.h @@ -194,6 +194,13 @@ extern "C" nsIPref *GetPrefServiceManager(MimeDisplayOptions *opt); // Get the text converter... mozITXTToHTMLConv *GetTextConverter(MimeDisplayOptions *opt); +nsresult +HTML2Plaintext(const nsString& inString, nsString& outString, + PRUint32 flags, PRUint32 wrapCol); +nsresult +HTMLSanitize(const nsString& inString, nsString& outString, + PRUint32 flags, const nsAString& allowedTags); + /* This is the next generation string retrieval call */ extern "C" char *MimeGetStringByID(PRInt32 stringID); diff --git a/mailnews/mime/src/mimethtm.cpp b/mailnews/mime/src/mimethtm.cpp index 90824f8d3f37..92aefce2155b 100644 --- a/mailnews/mime/src/mimethtm.cpp +++ b/mailnews/mime/src/mimethtm.cpp @@ -180,38 +180,35 @@ MimeInlineTextHTML_parse_line (char *line, PRInt32 length, MimeObject *obj) (cp = PL_strncasestr(cp, "CHARSET=", length - (int)(cp - line))) ) { - if (cp) + char* cp1 = cp + 8; //8 for the length of "CHARSET=" + char* cp2 = PL_strnpbrk(cp1, " \"\'", length - (int)(cp1 - line)); + if (cp2) { - char* cp1 = cp + 8; //8 for the length of "CHARSET=" - char* cp2 = PL_strnpbrk(cp1, " \"\'", length - (int)(cp1 - line)); - if (cp2) - { - char* charset = PL_strndup(cp1, (int)(cp2 - cp1)); + char* charset = PL_strndup(cp1, (int)(cp2 - cp1)); - // Fix bug 101434, in this case since this parsing is a char* - // operation, a real UTF-16 or UTF-32 document won't be parse - // correctly, if it got parse, it cannot be UTF-16 nor UTF-32 - // there fore, we ignore them if somehow we got that value - // 6 == strlen("UTF-16") or strlen("UTF-32"), this will cover - // UTF-16, UTF-16BE, UTF-16LE, UTF-32, UTF-32BE, UTF-32LE - if ((charset != nsnull) && - nsCRT::strncasecmp(charset, "UTF-16", 6) && - nsCRT::strncasecmp(charset, "UTF-32", 6)) - { - textHTML->charset = charset; + // Fix bug 101434, in this case since this parsing is a char* + // operation, a real UTF-16 or UTF-32 document won't be parse + // correctly, if it got parse, it cannot be UTF-16 nor UTF-32 + // there fore, we ignore them if somehow we got that value + // 6 == strlen("UTF-16") or strlen("UTF-32"), this will cover + // UTF-16, UTF-16BE, UTF-16LE, UTF-32, UTF-32BE, UTF-32LE + if ((charset != nsnull) && + nsCRT::strncasecmp(charset, "UTF-16", 6) && + nsCRT::strncasecmp(charset, "UTF-32", 6)) + { + textHTML->charset = charset; - // write out the data without the charset part... - if (textHTML->charset) - { - int err = MimeObject_write(obj, line, cp - line, PR_TRUE); - if (err == 0) - err = MimeObject_write(obj, cp2, length - (int)(cp2 - line), PR_TRUE); + // write out the data without the charset part... + if (textHTML->charset) + { + int err = MimeObject_write(obj, line, cp - line, PR_TRUE); + if (err == 0) + err = MimeObject_write(obj, cp2, length - (int)(cp2 - line), PR_TRUE); - return err; - } + return err; } - PR_FREEIF(charset); } + PR_FREEIF(charset); } } } @@ -220,11 +217,6 @@ MimeInlineTextHTML_parse_line (char *line, PRInt32 length, MimeObject *obj) return MimeObject_write(obj, line, length, PR_TRUE); } -/* This method is the same as that of MimeInlineTextRichtext (and thus - MimeInlineTextEnriched); maybe that means that MimeInlineTextHTML - should share a common parent with them which is not also shared by - MimeInlineTextPlain? - */ static int MimeInlineTextHTML_parse_eof (MimeObject *obj, PRBool abort_p) { diff --git a/mailnews/mime/src/mimetpla.h b/mailnews/mime/src/mimetpla.h index 2c6dcc7d4eb5..a33fc72e6612 100644 --- a/mailnews/mime/src/mimetpla.h +++ b/mailnews/mime/src/mimetpla.h @@ -35,15 +35,15 @@ * * ***** END LICENSE BLOCK ***** */ +/* The MimeInlineTextPlain class implements the text/plain MIME content type, + and is also used for all otherwise-unknown text/ subtypes. + */ + #ifndef _MIMETPLA_H_ #define _MIMETPLA_H_ #include "mimetext.h" -/* The MimeInlineTextHTML class implements the text/plain MIME content type, - and is also used for all otherwise-unknown text/ subtypes. - */ - typedef struct MimeInlineTextPlainClass MimeInlineTextPlainClass; typedef struct MimeInlineTextPlain MimeInlineTextPlain; diff --git a/mailnews/mime/src/mimetric.cpp b/mailnews/mime/src/mimetric.cpp index 6c5b2ef1ee66..6bdf68ee17cf 100644 --- a/mailnews/mime/src/mimetric.cpp +++ b/mailnews/mime/src/mimetric.cpp @@ -351,10 +351,6 @@ MimeInlineTextRichtext_parse_begin (MimeObject *obj) } -/* This method is largely the same as that of MimeInlineTextHTML; maybe that - means that MimeInlineTextRichtext and MimeInlineTextEnriched should share - a common parent with it which is not also shared by MimeInlineTextPlain? - */ static int MimeInlineTextRichtext_parse_eof (MimeObject *obj, PRBool abort_p) {