New HTML message body options for Mailnews
Provide a work around / fix for some of the security and usability problems that HTML mails currently impose, by not rendering everything the sender throws at the user.

We achieve that by either
- removing offending HTML tags/attributes
- using the plaintext alternative (if existing) or converting HTML to plaintext and back to HTML (if there's only HTML) or
- rendering the HTML source.

r=ducarroz, sr=bienvenu

This is the libmime backend. It adds 2 new libmime classes for the HTML conversion, code to select the right libmime classes and some other helperstuff/tweaks.
This commit is contained in:
mozilla.BenB%bucksch.org 2002-04-16 18:41:45 +00:00
Родитель 322a9014aa
Коммит a6c809d88a
10 изменённых файлов: 451 добавлений и 130 удалений

Просмотреть файл

@ -62,7 +62,11 @@ REQUIRES = xpcom \
nkcache \
pipnss \
imglib2 \
content \
htmlparser \
layout \
$(NULL)
#content, htmlparser and layout are for HTML*() in mimemoz2.cpp.
ifdef BUILD_SMIME
REQUIRES += \
@ -96,6 +100,8 @@ CPPSRCS = \
mimetenr.cpp \
mimetext.cpp \
mimethtm.cpp \
mimethpl.cpp \
mimethsa.cpp \
mimetpla.cpp \
mimetpfl.cpp \
mimetric.cpp \

Просмотреть файл

@ -49,11 +49,15 @@ REQUIRES = xpcom \
nkcache \
xpconnect \
pipnss \
content \
htmlparser \
layout \
!if defined(BUILD_SMIME)
msgsmime \
!endif
imglib2 \
$(NULL)
#content, htmlparser and layout are for HTML*() in mimemoz2.cpp.
include <$(DEPTH)\config\config.mak>
@ -127,6 +131,8 @@ OBJS= \
.\$(OBJDIR)\mimetenr.obj \
.\$(OBJDIR)\mimetext.obj \
.\$(OBJDIR)\mimethtm.obj \
.\$(OBJDIR)\mimethpl.obj \
.\$(OBJDIR)\mimethsa.obj \
.\$(OBJDIR)\mimetpla.obj \
.\$(OBJDIR)\mimetpfl.obj \
.\$(OBJDIR)\mimetric.obj \

Просмотреть файл

@ -39,28 +39,29 @@
#include "mimesun.h" /* | | |--- MimeSunAttachment */
#include "mimemsig.h" /* | | |--- MimeMultipartSigned (abstract)*/
#ifdef ENABLE_SMIME
#include "mimemcms.h" /* | | |---MimeMultipartSignedCMS */
#include "mimemcms.h" /* | | |---MimeMultipartSignedCMS */
#endif
#include "mimecryp.h" /* | |--- MimeEncrypted (abstract) */
#ifdef ENABLE_SMIME
#include "mimecms.h" /* | | |--- MimeEncryptedPKCS7 */
#endif
#include "mimemsg.h" /* | |--- MimeMessage */
#include "mimeunty.h" /* | |--- MimeUntypedText */
#include "mimeleaf.h" /* |--- MimeLeaf (abstract) */
#include "mimetext.h" /* | |--- MimeInlineText (abstract) */
#include "mimetpla.h" /* | | |--- MimeInlineTextPlain */
#include "mimetpfl.h" /* | | |--- MimeInlineTextPlainFlowed */
#include "mimethpl.h" /* | | | |--- M.I.TextHTMLAsPlaintext */
#include "mimetpfl.h" /* | | |--- MimeInlineTextPlainFlowed */
#include "mimethtm.h" /* | | |--- MimeInlineTextHTML */
#include "mimethsa.h" /* | | | |--- M.I.TextHTMLSanitized */
#include "mimetric.h" /* | | |--- MimeInlineTextRichtext */
#include "mimetenr.h" /* | | | |--- MimeInlineTextEnriched */
/* SUPPORTED VIA PLUGIN | | |--------- MimeInlineTextCalendar */
#include "nsIPref.h"
/* SUPPORTED VIA PLUGIN | | |--- MimeInlineTextVCard */
/* SUPPORTED VIA PLUGIN | | |--- MimeInlineTextCalendar */
#include "mimeiimg.h" /* | |--- MimeInlineImage */
#include "mimeeobj.h" /* | |--- MimeExternalObject */
#include "mimeebod.h" /* |--- MimeExternalBody */
/* If you add classes here,also add them to mimei.h */
#include "prlog.h"
#include "prmem.h"
#include "prenv.h"
@ -78,6 +79,7 @@
#include "nsMimeStringResources.h"
#include "nsMimeTypes.h"
#include "nsMsgUtils.h"
#include "nsIPref.h"
#include "imgILoader.h"
#define IMAP_EXTERNAL_CONTENT_HEADER "X-Mozilla-IMAP-Part"
@ -288,6 +290,75 @@ mime_free (MimeObject *object)
PR_Free(object);
}
PRBool mime_is_allowed_class(const MimeObjectClass *clazz,
PRInt32 types_of_classes_to_disallow)
{
if (types_of_classes_to_disallow == 0)
return PR_TRUE;
PRBool avoid_html = (types_of_classes_to_disallow >= 1);
PRBool avoid_images = (types_of_classes_to_disallow >= 2);
PRBool avoid_strange_content = (types_of_classes_to_disallow >= 3);
PRBool allow_only_vanilla_classes = (types_of_classes_to_disallow == 100);
if (allow_only_vanilla_classes)
/* A "safe" class is one that is unlikely to have security bugs or to
allow security exploits or one that is essential for the usefulness
of the application, even for paranoid users.
What's included here is more personal judgement than following
strict rules, though, unfortunately.
The function returns true only for known good classes, i.e. is a
"whitelist" in this case.
This idea comes from Georgi Guninski.
*/
return
(
clazz == (MimeObjectClass *)&mimeInlineTextPlainClass ||
clazz == (MimeObjectClass *)&mimeInlineTextPlainFlowedClass ||
clazz == (MimeObjectClass *)&mimeInlineTextHTMLSanitizedClass ||
clazz == (MimeObjectClass *)&mimeInlineTextHTMLAsPlaintextClass ||
/* The latter 2 classes bear some risk, because they use the Gecko
HTML parser, but the user has the option to make an explicit
choice in this case, via html_as. */
clazz == (MimeObjectClass *)&mimeMultipartMixedClass ||
clazz == (MimeObjectClass *)&mimeMultipartAlternativeClass ||
clazz == (MimeObjectClass *)&mimeMultipartDigestClass ||
clazz == (MimeObjectClass *)&mimeMultipartAppleDoubleClass ||
clazz == (MimeObjectClass *)&mimeMessageClass ||
clazz == (MimeObjectClass *)&mimeExternalObjectClass ||
/* mimeUntypedTextClass? -- does uuencode */
#ifdef ENABLE_SMIME
clazz == (MimeObjectClass *)&mimeMultipartSignedCMSClass ||
clazz == (MimeObjectClass *)&mimeEncryptedCMSClass ||
#endif
clazz == 0
);
/* Contrairy to above, the below code is a "blacklist", i.e. it
*excludes* some "bad" classes. */
return
!(
(avoid_html
&& (
clazz == (MimeObjectClass *)&mimeInlineTextHTMLClass
/* Should not happen - we protect against that in
mime_find_class(). Still for safety... */
)) ||
(avoid_images
&& (
clazz == (MimeObjectClass *)&mimeInlineImageClass
)) ||
(avoid_strange_content
&& (
clazz == (MimeObjectClass *)&mimeInlineTextEnrichedClass ||
clazz == (MimeObjectClass *)&mimeInlineTextRichtextClass ||
clazz == (MimeObjectClass *)&mimeSunAttachmentClass ||
clazz == (MimeObjectClass *)&mimeExternalBodyClass
))
);
}
MimeObjectClass *
mime_find_class (const char *content_type, MimeHeaders *hdrs,
MimeDisplayOptions *opts, PRBool exact_match_p)
@ -296,7 +367,33 @@ mime_find_class (const char *content_type, MimeHeaders *hdrs,
MimeObjectClass *tempClass = 0;
contentTypeHandlerInitStruct ctHandlerInfo;
/*
// Read some prefs
nsIPref *pref = GetPrefServiceManager(opts);
PRInt32 html_as = 0; // def. see below
PRInt32 types_of_classes_to_disallow = 0; /* Let only a few libmime classes
process incoming data. This protects from bugs (e.g. buffer overflows)
and from security loopholes (e.g. allowing unchecked HTML in some
obscure classes, although the user has html_as > 0).
This option is mainly for the UI of html_as.
0 = allow all available classes
1 = Use hardcoded blacklist to avoid rendering (incoming) HTML
2 = ... and images
3 = ... and some other uncommon content types
100 = Use hardcoded whitelist to avoid even more bugs(buffer overflows).
This mode will limit the features available (e.g. uncommon
attachment types and inline images) and is for paranoid users.
*/
if (pref)
{
pref->GetIntPref("mailnews.display.html_as", &html_as);
pref->GetIntPref("mailnews.display.disallow_mime_handlers",
&types_of_classes_to_disallow);
if (types_of_classes_to_disallow > 0 && html_as == 0)
// We have non-sensical prefs. Do some fixup.
html_as = 1;
}
/*
* What we do first is check for an external content handler plugin.
* This will actually extend the mime handling by calling a routine
* which will allow us to load an external content type handler
@ -305,25 +402,64 @@ mime_find_class (const char *content_type, MimeHeaders *hdrs,
*/
if ((tempClass = mime_locate_external_content_handler(content_type, &ctHandlerInfo)) != NULL)
{
clazz = (MimeObjectClass *)tempClass;
if (types_of_classes_to_disallow > 0
&& (!nsCRT::strncasecmp(content_type, "text/x-vcard", 12) ||
!nsCRT::strncasecmp(content_type, "text/calendar", 13))
)
/* Use a little hack to prevent some dangerous plugins, which ship
with Mozilla, to run.
For the truely user-installed plugins, we rely on the judgement
of the user. */
{
if (!exact_match_p)
clazz = (MimeObjectClass *)&mimeExternalObjectClass; // As attachment
}
else
clazz = (MimeObjectClass *)tempClass;
}
else
{
if (!content_type || !*content_type ||
!nsCRT::strcasecmp(content_type, "text")) /* with no / in the type */
!nsCRT::strcasecmp(content_type, "text")) /* with no / in the type */
clazz = (MimeObjectClass *)&mimeUntypedTextClass;
/* Subtypes of text...
/* Subtypes of text...
*/
else if (!nsCRT::strncasecmp(content_type, "text/", 5))
{
if (!nsCRT::strcasecmp(content_type+5, "html"))
clazz = (MimeObjectClass *)&mimeInlineTextHTMLClass;
{
if (opts
&& opts->format_out == nsMimeOutput::nsMimeMessageSaveAs)
// SaveAs in new modes doesn't work yet.
{
clazz = (MimeObjectClass *)&mimeInlineTextHTMLClass;
types_of_classes_to_disallow = 0;
}
else if (html_as == 0) // Render sender's HTML
clazz = (MimeObjectClass *)&mimeInlineTextHTMLClass;
else if (html_as == 1) // convert HTML to plaintext
// Do a HTML->TXT->HTML conversion, see mimethpl.h.
clazz = (MimeObjectClass *)&mimeInlineTextHTMLAsPlaintextClass;
else if (html_as == 2) // display HTML source
/* This is for the freaks. Treat HTML as plaintext,
which will cause the HTML source to be displayed.
Not very user-friendly, but some seem to want this. */
clazz = (MimeObjectClass *)&mimeInlineTextPlainClass;
else if (html_as == 3) // Sanitize
// Strip all but allowed HTML
clazz = (MimeObjectClass *)&mimeInlineTextHTMLSanitizedClass;
else // Goofy pref
/* User has an unknown pref value. Maybe he used a newer Mozilla
with a new alternative to avoid HTML. Defaulting to option 1,
which is less dangerous than defaulting to the raw HTML. */
clazz = (MimeObjectClass *)&mimeInlineTextHTMLAsPlaintextClass;
}
else if (!nsCRT::strcasecmp(content_type+5, "enriched"))
clazz = (MimeObjectClass *)&mimeInlineTextEnrichedClass;
else if (!nsCRT::strcasecmp(content_type+5, "richtext"))
clazz = (MimeObjectClass *)&mimeInlineTextRichtextClass;
else if (!nsCRT::strcasecmp(content_type+5, "rtf"))
else if (!nsCRT::strcasecmp(content_type+5, "rtf"))
clazz = (MimeObjectClass *)&mimeExternalObjectClass;
else if (!nsCRT::strcasecmp(content_type+5, "plain"))
{
@ -331,12 +467,10 @@ mime_find_class (const char *content_type, MimeHeaders *hdrs,
clazz = (MimeObjectClass *)&mimeInlineTextPlainClass;
PRBool disable_format_flowed = PR_FALSE;
nsIPref *pref = GetPrefServiceManager(opts);
if (pref)
(void)pref->GetBoolPref(
"mailnews.display.disable_format_flowed_support",
&disable_format_flowed);
pref->GetBoolPref("mailnews.display.disable_format_flowed_support",
&disable_format_flowed);
if(!disable_format_flowed)
{
// Check for format=flowed, damn, it is already stripped away from
@ -355,11 +489,11 @@ mime_find_class (const char *content_type, MimeHeaders *hdrs,
// a nsMimeTypes.h but that one isn't included. Bug?
char *content_type_format =
(content_type_row
? MimeHeaders_get_parameter(content_type_row, "format", NULL, NULL)
? MimeHeaders_get_parameter(content_type_row, "format", NULL,NULL)
: 0);
if (content_type_format && !nsCRT::strcasecmp(content_type_format,
"flowed"))
"flowed"))
clazz = (MimeObjectClass *)&mimeInlineTextPlainFlowedClass;
PR_FREEIF(content_type_format);
PR_FREEIF(content_type_row);
@ -380,7 +514,7 @@ mime_find_class (const char *content_type, MimeHeaders *hdrs,
else if (!nsCRT::strcasecmp(content_type+10, "digest"))
clazz = (MimeObjectClass *)&mimeMultipartDigestClass;
else if (!nsCRT::strcasecmp(content_type+10, "appledouble") ||
!nsCRT::strcasecmp(content_type+10, "header-set"))
!nsCRT::strcasecmp(content_type+10, "header-set"))
clazz = (MimeObjectClass *)&mimeMultipartAppleDoubleClass;
else if (!nsCRT::strcasecmp(content_type+10, "parallel"))
clazz = (MimeObjectClass *)&mimeMultipartParallelClass;
@ -393,7 +527,7 @@ mime_find_class (const char *content_type, MimeHeaders *hdrs,
know about. */
char *ct = (hdrs
? MimeHeaders_get(hdrs, HEADER_CONTENT_TYPE,
PR_FALSE, PR_FALSE)
PR_FALSE, PR_FALSE)
: 0);
char *proto = (ct
? MimeHeaders_get_parameter(ct, PARAM_PROTOCOL, NULL, NULL)
@ -401,18 +535,20 @@ mime_find_class (const char *content_type, MimeHeaders *hdrs,
char *micalg = (ct
? MimeHeaders_get_parameter(ct, PARAM_MICALG, NULL, NULL)
: 0);
if (proto && (!nsCRT::strcasecmp(proto, APPLICATION_XPKCS7_SIGNATURE) &&
micalg && (!nsCRT::strcasecmp(micalg, PARAM_MICALG_MD5) ||
!nsCRT::strcasecmp(micalg, PARAM_MICALG_SHA1) ||
!nsCRT::strcasecmp(micalg, PARAM_MICALG_SHA1_2) ||
!nsCRT::strcasecmp(micalg, PARAM_MICALG_SHA1_3) ||
!nsCRT::strcasecmp(micalg, PARAM_MICALG_SHA1_4) ||
!nsCRT::strcasecmp(micalg, PARAM_MICALG_SHA1_5) ||
!nsCRT::strcasecmp(micalg, PARAM_MICALG_MD2))))
clazz = (MimeObjectClass *)&mimeMultipartSignedCMSClass;
else
clazz = 0;
if (proto
&& (!nsCRT::strcasecmp(proto, APPLICATION_XPKCS7_SIGNATURE)
&& micalg
&& (!nsCRT::strcasecmp(micalg, PARAM_MICALG_MD5) ||
!nsCRT::strcasecmp(micalg, PARAM_MICALG_SHA1) ||
!nsCRT::strcasecmp(micalg, PARAM_MICALG_SHA1_2) ||
!nsCRT::strcasecmp(micalg, PARAM_MICALG_SHA1_3) ||
!nsCRT::strcasecmp(micalg, PARAM_MICALG_SHA1_4) ||
!nsCRT::strcasecmp(micalg, PARAM_MICALG_SHA1_5) ||
!nsCRT::strcasecmp(micalg, PARAM_MICALG_MD2))))
clazz = (MimeObjectClass *)&mimeMultipartSignedCMSClass;
else
clazz = 0;
PR_FREEIF(proto);
PR_FREEIF(micalg);
PR_FREEIF(ct);
@ -460,20 +596,30 @@ mime_find_class (const char *content_type, MimeHeaders *hdrs,
treat as non-text types (which would be bad) without this special-case...
*/
else if (!nsCRT::strcasecmp(content_type, APPLICATION_PGP) ||
!nsCRT::strcasecmp(content_type, APPLICATION_PGP2))
clazz = (MimeObjectClass *)&mimeInlineTextPlainClass;
!nsCRT::strcasecmp(content_type, APPLICATION_PGP2))
clazz = (MimeObjectClass *)&mimeInlineTextPlainClass;
else if (!nsCRT::strcasecmp(content_type, SUN_ATTACHMENT))
clazz = (MimeObjectClass *)&mimeSunAttachmentClass;
/* Everything else gets represented as a clickable link.
/* Everything else gets represented as a clickable link.
*/
else if (!exact_match_p)
clazz = (MimeObjectClass *)&mimeExternalObjectClass;
clazz = (MimeObjectClass *)&mimeExternalObjectClass;
if (!mime_is_allowed_class(clazz, types_of_classes_to_disallow))
{
/* Do that check here (not after the if block), because we want to allow
user-installed plugins. */
if(!exact_match_p)
clazz = (MimeObjectClass *)&mimeExternalObjectClass;
else
clazz = 0;
}
}
if (!exact_match_p)
PR_ASSERT(clazz);
if (!exact_match_p)
PR_ASSERT(clazz);
if (!clazz) return 0;
PR_ASSERT(clazz);
@ -615,6 +761,8 @@ mime_create (const char *content_type, MimeHeaders *hdrs,
(clazz != (MimeObjectClass *)&mimeInlineTextPlainClass) &&
(clazz != (MimeObjectClass *)&mimeInlineTextPlainFlowedClass) &&
(clazz != (MimeObjectClass *)&mimeInlineTextHTMLClass) &&
(clazz != (MimeObjectClass *)&mimeInlineTextHTMLSanitizedClass) &&
(clazz != (MimeObjectClass *)&mimeInlineTextHTMLAsPlaintextClass) &&
(clazz != (MimeObjectClass *)&mimeInlineTextRichtextClass) &&
(clazz != (MimeObjectClass *)&mimeInlineTextEnrichedClass) &&
(clazz != (MimeObjectClass *)&mimeMessageClass) &&

Просмотреть файл

@ -57,57 +57,68 @@
MimeObject (abstract)
|
|--- MimeContainer (abstract)
+--- MimeContainer (abstract)
| |
| |--- MimeMultipart (abstract)
| +--- MimeMultipart (abstract)
| | |
| | |--- MimeMultipartMixed
| | +--- MimeMultipartMixed
| | |
| | |--- MimeMultipartDigest
| | +--- MimeMultipartDigest
| | |
| | |--- MimeMultipartParallel
| | +--- MimeMultipartParallel
| | |
| | |--- MimeMultipartAlternative
| | +--- MimeMultipartAlternative
| | |
| | |--- MimeMultipartRelated
| | +--- MimeMultipartRelated
| | |
| | |--- MimeMultipartAppleDouble
| | +--- MimeMultipartAppleDouble
| | |
| | |--- MimeSunAttachment
| | +--- MimeSunAttachment
| | |
| | |--- MimeMultipartSigned (abstract)
| | \--- MimeMultipartSigned (abstract)
| | |
| | |--- MimeMultipartSigned
| | \--- MimeMultipartSignedCMS
| |
| |--- MimeXlateed (abstract)
| +--- MimeEncrypted (abstract)
| | |
| | |--- MimeXlateed
| | \--- MimeEncryptedPKCS7
| |
| |--- MimeMessage
| +--- MimeXlateed (abstract)
| | |
| | \--- MimeXlateed
| |
| |--- MimeUntypedText
| +--- MimeMessage
| |
| \--- MimeUntypedText
|
|--- MimeLeaf (abstract)
+--- MimeLeaf (abstract)
| |
| |--- MimeInlineText (abstract)
| +--- MimeInlineText (abstract)
| | |
| | |--- MimeInlineTextPlain
| | |
| | |--- MimeInlineTextHTML
| | |
| | |--- MimeInlineTextRichtext
| | +--- MimeInlineTextPlain
| | | |
| | | |--- MimeInlineTextEnriched
| | |
| | |--- MimeInlineTextVCard
| | |
| | |--- MimeInlineTextCalendar
| | | \--- MimeInlineTextHTMLAsPlaintext
| | |
| | +--- MimeInlineTextPlainFlowed
| | |
| | +--- MimeInlineTextHTML
| | | |
| | | \--- MimeInlineTextHTMLSanitized
| | |
| | +--- MimeInlineTextRichtext
| | | |
| | | \--- MimeInlineTextEnriched
| | |
| | +--- MimeInlineTextVCard
| | |
| | \--- MimeInlineTextCalendar
| |
| |--- MimeInlineImage
| +--- MimeInlineImage
| |
| |--- MimeExternalObject
| \--- MimeExternalObject
|
|--- MimeExternalBody
\--- MimeExternalBody
=========================================================================
The definition of these classes is somewhat idiosyncratic, since I defined
@ -213,6 +224,10 @@
parentClass.whatnot.object.finalize(object); // (works...)
object->clazz->superclass->finalize(object); // WRONG!!
}
If you write a libmime content type handler, libmime might create several
instances of your class at once and call e.g. the same finalize code for
3 different objects in a row.
*/
#include "mimehdrs.h"

Просмотреть файл

@ -20,6 +20,7 @@
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Ben Bucksch <mozilla@bucksch.org>
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
@ -41,6 +42,10 @@
#include "prlog.h"
#include "nsMimeTypes.h"
#include "nsMimeStringResources.h"
#include "nsIPref.h"
#include "mimemoz2.h" // for prefs
static NS_DEFINE_CID(kPrefCID, NS_PREF_CID);
#define MIME_SUPERCLASS mimeMultipartClass
MimeDefClass(MimeMultipartAlternative, MimeMultipartAlternativeClass,
@ -227,6 +232,29 @@ MimeMultipartAlternative_display_part_p(MimeObject *self,
are themselves unknown.
*/
// prefer_plaintext pref
nsIPref *pref = GetPrefServiceManager(self->options);
PRBool prefer_plaintext = PR_FALSE;
if (pref)
(void)pref->GetBoolPref("mailnews.display.prefer_plaintext",
&prefer_plaintext);
if (prefer_plaintext
&& self->options->format_out != nsMimeOutput::nsMimeMessageSaveAs
&& (!nsCRT::strncasecmp(ct, "text/html", 9) ||
!nsCRT::strncasecmp(ct, "text/enriched", 13) ||
!nsCRT::strncasecmp(ct, "text/richtext", 13))
)
// if the user prefers plaintext and this is the "rich" (e.g. HTML) part...
{
#if DEBUG
printf ("Ignoring %s alternative\n", ct);
#endif
return PR_FALSE;
}
#if DEBUG
printf ("Considering %s alternative\n", ct);
#endif
MimeObjectClass *clazz = mime_find_class (ct, sub_hdrs, self->options, PR_TRUE);
PRBool result = (clazz
? clazz->displayable_inline_p(clazz, sub_hdrs)

Просмотреть файл

@ -86,10 +86,26 @@
#include "nsITransport.h"
#include "mimeebod.h"
#include "mimeeobj.h"
// <for functions="HTML2Plaintext,HTMLSantinize">
#include "nsXPCOM.h"
#include "nsParserCIID.h"
#include "nsIParser.h"
#include "nsIHTMLContentSink.h"
#include "nsIContentSerializer.h"
#include "nsLayoutCID.h"
#include "nsIComponentManager.h"
#include "nsReadableUtils.h"
#include "nsIHTMLToTextSink.h"
#include "mozISanitizingSerializer.h"
// </for>
static NS_DEFINE_CID(kPrefCID, NS_PREF_CID);
static NS_DEFINE_CID(kIOServiceCID, NS_IOSERVICE_CID);
// <for functions="HTML2Plaintext,HTMLSantinize">
static NS_DEFINE_CID(kParserCID, NS_PARSER_CID);
static NS_DEFINE_CID(kNavDTDCID, NS_CNAVDTD_CID);
// </for>
#ifdef HAVE_MIME_DATA_SLOT
#define LOCK_LAST_CACHED_MESSAGE
@ -1209,27 +1225,6 @@ mime_image_write_buffer(char *buf, PRInt32 size, void *image_closure)
return size;
}
//
// Utility for finding HTML part.
//
static MimeObject*
mime_find_text_html_part_1(MimeObject* obj)
{
if (mime_subclass_p(obj->clazz,
(MimeObjectClass*) &mimeInlineTextHTMLClass)) {
return obj;
}
if (mime_subclass_p(obj->clazz, (MimeObjectClass*) &mimeContainerClass)) {
MimeContainer* cobj = (MimeContainer*) obj;
PRInt32 i;
for (i=0 ; i<cobj->nchildren ; i++) {
MimeObject* result = mime_find_text_html_part_1(cobj->children[i]);
if (result) return result;
}
}
return NULL;
}
MimeObject*
mime_get_main_object(MimeObject* obj)
{
@ -2105,3 +2100,131 @@ nsresult GetMailNewsFont(MimeObject *obj, PRBool styleFixed, PRInt32 *fontPixel
return NS_OK;
}
/* This function syncronously converts an HTML document (as string)
to plaintext (as string) using the Gecko converter.
flags: see nsIDocumentEncoder.h
*/
// TODO: |printf|s?
/* <copy from="mozilla/htmlparser/test/outsinks/Convert.cpp"
author="akk"
adapted-by="Ben Bucksch"
comment=" 'This code would not have been possible without akk.' ;-P.
No, really. "
> */
nsresult
HTML2Plaintext(const nsString& inString, nsString& outString,
PRUint32 flags, PRUint32 wrapCol)
{
nsresult rv = NS_OK;
#if DEBUG_BenB
printf("Converting HTML to plaintext\n");
char* charstar = ToNewUTF8String(inString);
printf("HTML source is:\n--------------------\n%s--------------------\n",
charstar);
delete[] charstar;
#endif
// Create a parser
nsCOMPtr<nsIParser> parser = do_CreateInstance(kParserCID);
NS_ENSURE_TRUE(parser, NS_ERROR_FAILURE);
// Create the appropriate output sink
nsCOMPtr<nsIContentSink> sink =
do_CreateInstance(NS_PLAINTEXTSINK_CONTRACTID);
NS_ENSURE_TRUE(sink, NS_ERROR_FAILURE);
nsCOMPtr<nsIHTMLToTextSink> textSink(do_QueryInterface(sink));
NS_ENSURE_TRUE(textSink, NS_ERROR_FAILURE);
textSink->Initialize(&outString, flags, wrapCol);
parser->SetContentSink(sink);
nsCOMPtr<nsIDTD> dtd = do_CreateInstance(kNavDTDCID);
NS_ENSURE_TRUE(dtd, NS_ERROR_FAILURE);
parser->RegisterDTD(dtd);
rv = parser->Parse(inString, 0, NS_LITERAL_CSTRING("text/html"),
PR_FALSE, PR_TRUE);
// Aah! How can NS_ERROR and NS_ABORT_IF_FALSE be no-ops in release builds???
if (NS_FAILED(rv))
{
NS_ERROR("Parse() failed!");
return rv;
}
#if DEBUG_BenB
charstar = ToNewUTF8String(outString);
printf("Plaintext is:\n--------------------\n%s--------------------\n",
charstar);
delete[] charstar;
#endif
return rv;
}
// </copy>
/* This function syncronously sanitizes an HTML document (string->string)
using the Gecko ContentSink mozISanitizingHTMLSerializer.
flags: currently unused
allowedTags: see mozSanitizingHTMLSerializer::ParsePrefs()
*/
// copied from HTML2Plaintext above
nsresult
HTMLSanitize(const nsString& inString, nsString& outString,
PRUint32 flags, const nsAString& allowedTags)
{
nsresult rv = NS_OK;
#if DEBUG_BenB
printf("Sanitizing HTML\n");
char* charstar = ToNewUTF8String(inString);
printf("Original HTML is:\n--------------------\n%s--------------------\n",
charstar);
delete[] charstar;
#endif
// Create a parser
nsCOMPtr<nsIParser> parser = do_CreateInstance(kParserCID);
NS_ENSURE_TRUE(parser, NS_ERROR_FAILURE);
// Create the appropriate output sink
nsCOMPtr<nsIContentSink> sink =
do_CreateInstance(MOZ_SANITIZINGHTMLSERIALIZER_CONTRACTID);
NS_ENSURE_TRUE(sink, NS_ERROR_FAILURE);
nsCOMPtr<mozISanitizingHTMLSerializer> sanSink(do_QueryInterface(sink));
NS_ENSURE_TRUE(sanSink, NS_ERROR_FAILURE);
sanSink->Initialize(&outString, flags, allowedTags);
parser->SetContentSink(sink);
nsCOMPtr<nsIDTD> dtd = do_CreateInstance(kNavDTDCID);
NS_ENSURE_TRUE(dtd, NS_ERROR_FAILURE);
parser->RegisterDTD(dtd);
rv = parser->Parse(inString, 0, NS_LITERAL_CSTRING("text/html"),
PR_FALSE, PR_TRUE);
if (NS_FAILED(rv))
{
NS_ERROR("Parse() failed!");
return rv;
}
#if DEBUG_BenB
charstar = ToNewUTF8String(outString);
printf("Sanitized HTML is:\n--------------------\n%s--------------------\n",
charstar);
delete[] charstar;
#endif
return rv;
}

Просмотреть файл

@ -194,6 +194,13 @@ extern "C" nsIPref *GetPrefServiceManager(MimeDisplayOptions *opt);
// Get the text converter...
mozITXTToHTMLConv *GetTextConverter(MimeDisplayOptions *opt);
nsresult
HTML2Plaintext(const nsString& inString, nsString& outString,
PRUint32 flags, PRUint32 wrapCol);
nsresult
HTMLSanitize(const nsString& inString, nsString& outString,
PRUint32 flags, const nsAString& allowedTags);
/* This is the next generation string retrieval call */
extern "C" char *MimeGetStringByID(PRInt32 stringID);

Просмотреть файл

@ -180,38 +180,35 @@ MimeInlineTextHTML_parse_line (char *line, PRInt32 length, MimeObject *obj)
(cp = PL_strncasestr(cp, "CHARSET=", length - (int)(cp - line)))
)
{
if (cp)
char* cp1 = cp + 8; //8 for the length of "CHARSET="
char* cp2 = PL_strnpbrk(cp1, " \"\'", length - (int)(cp1 - line));
if (cp2)
{
char* cp1 = cp + 8; //8 for the length of "CHARSET="
char* cp2 = PL_strnpbrk(cp1, " \"\'", length - (int)(cp1 - line));
if (cp2)
{
char* charset = PL_strndup(cp1, (int)(cp2 - cp1));
char* charset = PL_strndup(cp1, (int)(cp2 - cp1));
// Fix bug 101434, in this case since this parsing is a char*
// operation, a real UTF-16 or UTF-32 document won't be parse
// correctly, if it got parse, it cannot be UTF-16 nor UTF-32
// there fore, we ignore them if somehow we got that value
// 6 == strlen("UTF-16") or strlen("UTF-32"), this will cover
// UTF-16, UTF-16BE, UTF-16LE, UTF-32, UTF-32BE, UTF-32LE
if ((charset != nsnull) &&
nsCRT::strncasecmp(charset, "UTF-16", 6) &&
nsCRT::strncasecmp(charset, "UTF-32", 6))
{
textHTML->charset = charset;
// Fix bug 101434, in this case since this parsing is a char*
// operation, a real UTF-16 or UTF-32 document won't be parse
// correctly, if it got parse, it cannot be UTF-16 nor UTF-32
// there fore, we ignore them if somehow we got that value
// 6 == strlen("UTF-16") or strlen("UTF-32"), this will cover
// UTF-16, UTF-16BE, UTF-16LE, UTF-32, UTF-32BE, UTF-32LE
if ((charset != nsnull) &&
nsCRT::strncasecmp(charset, "UTF-16", 6) &&
nsCRT::strncasecmp(charset, "UTF-32", 6))
{
textHTML->charset = charset;
// write out the data without the charset part...
if (textHTML->charset)
{
int err = MimeObject_write(obj, line, cp - line, PR_TRUE);
if (err == 0)
err = MimeObject_write(obj, cp2, length - (int)(cp2 - line), PR_TRUE);
// write out the data without the charset part...
if (textHTML->charset)
{
int err = MimeObject_write(obj, line, cp - line, PR_TRUE);
if (err == 0)
err = MimeObject_write(obj, cp2, length - (int)(cp2 - line), PR_TRUE);
return err;
}
return err;
}
PR_FREEIF(charset);
}
PR_FREEIF(charset);
}
}
}
@ -220,11 +217,6 @@ MimeInlineTextHTML_parse_line (char *line, PRInt32 length, MimeObject *obj)
return MimeObject_write(obj, line, length, PR_TRUE);
}
/* This method is the same as that of MimeInlineTextRichtext (and thus
MimeInlineTextEnriched); maybe that means that MimeInlineTextHTML
should share a common parent with them which is not also shared by
MimeInlineTextPlain?
*/
static int
MimeInlineTextHTML_parse_eof (MimeObject *obj, PRBool abort_p)
{

Просмотреть файл

@ -35,15 +35,15 @@
*
* ***** END LICENSE BLOCK ***** */
/* The MimeInlineTextPlain class implements the text/plain MIME content type,
and is also used for all otherwise-unknown text/ subtypes.
*/
#ifndef _MIMETPLA_H_
#define _MIMETPLA_H_
#include "mimetext.h"
/* The MimeInlineTextHTML class implements the text/plain MIME content type,
and is also used for all otherwise-unknown text/ subtypes.
*/
typedef struct MimeInlineTextPlainClass MimeInlineTextPlainClass;
typedef struct MimeInlineTextPlain MimeInlineTextPlain;

Просмотреть файл

@ -351,10 +351,6 @@ MimeInlineTextRichtext_parse_begin (MimeObject *obj)
}
/* This method is largely the same as that of MimeInlineTextHTML; maybe that
means that MimeInlineTextRichtext and MimeInlineTextEnriched should share
a common parent with it which is not also shared by MimeInlineTextPlain?
*/
static int
MimeInlineTextRichtext_parse_eof (MimeObject *obj, PRBool abort_p)
{