b=325080. add atom 0.3 and better html handling to feed processor. r+a=ben

This commit is contained in:
sayrer%gmail.com 2006-05-06 04:13:20 +00:00
Родитель ab1ff1cf1e
Коммит 889faf818b
28 изменённых файлов: 525 добавлений и 45 удалений

Просмотреть файл

@ -86,6 +86,7 @@ REQUIRES = \
xuldoc \ xuldoc \
alerts \ alerts \
url-classifier \ url-classifier \
feeds \
$(NULL) $(NULL)
EXPORTS = nsToolkitCompsCID.h EXPORTS = nsToolkitCompsCID.h
@ -98,6 +99,7 @@ LOCAL_INCLUDES = \
-I$(srcdir)/../typeaheadfind/src \ -I$(srcdir)/../typeaheadfind/src \
-I$(srcdir)/../alerts/src \ -I$(srcdir)/../alerts/src \
-I$(srcdir)/../url-classifier/src \ -I$(srcdir)/../url-classifier/src \
-I$(srcdir)/../feeds/src \
$(NULL) $(NULL)
SHARED_LIBRARY_LIBS = \ SHARED_LIBRARY_LIBS = \
@ -119,6 +121,10 @@ ifdef MOZ_URL_CLASSIFIER
SHARED_LIBRARY_LIBS += $(DIST)/lib/$(LIB_PREFIX)urlclassifier_s.$(LIB_SUFFIX) SHARED_LIBRARY_LIBS += $(DIST)/lib/$(LIB_PREFIX)urlclassifier_s.$(LIB_SUFFIX)
endif endif
ifdef MOZ_FEEDS
SHARED_LIBRARY_LIBS += $(DIST)/lib/$(LIB_PREFIX)feed_s.$(LIB_SUFFIX)
endif
ifndef MOZ_SUITE ifndef MOZ_SUITE
# XXX Suite isn't ready to build this just yet # XXX Suite isn't ready to build this just yet
SHARED_LIBRARY_LIBS += ../typeaheadfind/src/$(LIB_PREFIX)fastfind_s.$(LIB_SUFFIX) SHARED_LIBRARY_LIBS += ../typeaheadfind/src/$(LIB_PREFIX)fastfind_s.$(LIB_SUFFIX)

Просмотреть файл

@ -77,6 +77,8 @@
#define NS_URLCLASSIFIERDBSERVICE_CONTRACTID \ #define NS_URLCLASSIFIERDBSERVICE_CONTRACTID \
"@mozilla.org/url-classifier/dbservice;1" "@mozilla.org/url-classifier/dbservice;1"
#define NS_SCRIPTABLEUNESCAPEHTML_CONTRACTID "@mozilla.org/feed-unescapehtml;1"
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
// {A0CCAAF8-09DA-44D8-B250-9AC3E93C8117} // {A0CCAAF8-09DA-44D8-B250-9AC3E93C8117}
@ -120,3 +122,6 @@
#define NS_URLCLASSIFIERDBSERVICE_CID \ #define NS_URLCLASSIFIERDBSERVICE_CID \
{ 0x5eb7c3c1, 0xec1f, 0x4007, { 0x87, 0xcc, 0xee, 0xfb, 0x37, 0xd6, 0x8c, 0xe6} } { 0x5eb7c3c1, 0xec1f, 0x4007, { 0x87, 0xcc, 0xee, 0xfb, 0x37, 0xd6, 0x8c, 0xe6} }
#define NS_SCRIPTABLEUNESCAPEHTML_CID \
{ 0x10f2f5f0, 0xf103, 0x4901, { 0x98, 0x0f, 0xba, 0x11, 0xbd, 0x70, 0xd6, 0x0d} }

Просмотреть файл

@ -58,6 +58,10 @@
#include "nsUrlClassifierDBService.h" #include "nsUrlClassifierDBService.h"
#endif #endif
#ifdef MOZ_FEEDS
#include "nsScriptableUnescapeHTML.h"
#endif
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
NS_GENERIC_FACTORY_CONSTRUCTOR_INIT(nsAppStartup, Init) NS_GENERIC_FACTORY_CONSTRUCTOR_INIT(nsAppStartup, Init)
@ -82,6 +86,10 @@ NS_GENERIC_FACTORY_SINGLETON_CONSTRUCTOR(nsUrlClassifierDBService,
nsUrlClassifierDBService::GetInstance) nsUrlClassifierDBService::GetInstance)
#endif #endif
#ifdef MOZ_FEEDS
NS_GENERIC_FACTORY_CONSTRUCTOR(nsScriptableUnescapeHTML)
#endif
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
static const nsModuleComponentInfo components[] = static const nsModuleComponentInfo components[] =
@ -125,6 +133,12 @@ static const nsModuleComponentInfo components[] =
NS_URLCLASSIFIERDBSERVICE_CONTRACTID, NS_URLCLASSIFIERDBSERVICE_CONTRACTID,
nsUrlClassifierDBServiceConstructor }, nsUrlClassifierDBServiceConstructor },
#endif #endif
#ifdef MOZ_FEEDS
{ "Unescape HTML",
NS_SCRIPTABLEUNESCAPEHTML_CID,
NS_SCRIPTABLEUNESCAPEHTML_CONTRACTID,
nsScriptableUnescapeHTMLConstructor },
#endif
}; };
NS_IMPL_NSGETMODULE(nsToolkitCompsModule, components) NS_IMPL_NSGETMODULE(nsToolkitCompsModule, components)

Просмотреть файл

@ -49,6 +49,9 @@ XPIDLSRCS = nsIFeedProcessor.idl \
nsIFeedListener.idl \ nsIFeedListener.idl \
nsIFeed.idl \ nsIFeed.idl \
nsIFeedContainer.idl \ nsIFeedContainer.idl \
nsIFeedEntry.idl nsIFeedEntry.idl \
nsIFeedTextConstruct.idl \
nsIScriptableUnescapeHTML.idl \
$(NULL)
include $(topsrcdir)/config/rules.mk include $(topsrcdir)/config/rules.mk

Просмотреть файл

@ -70,4 +70,5 @@ interface nsIFeedTextConstruct : nsISupports
* The content of the text construct. * The content of the text construct.
*/ */
attribute AString text; attribute AString text;
} };

Просмотреть файл

@ -0,0 +1,51 @@
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is mozilla.org code.
*
* The Initial Developer of the Original Code is Robert Sayre.
* Portions created by the Initial Developer are Copyright (C) 2006
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include "nsISupports.idl"
/**
* A utility class that unescapes HTML strings.
*/
[scriptable, uuid(0ff58de6-2460-4095-9ff9-9756efedc756)]
interface nsIScriptableUnescapeHTML : nsISupports
{
/**
* Converts all entities to Unicode.
*
* @param src The HTML string to escape.
*/
AString unescape(in AString src);
};

Просмотреть файл

@ -48,8 +48,11 @@ const IO_CONTRACTID = "@mozilla.org/network/io-service;1"
const BAG_CONTRACTID = "@mozilla.org/hash-property-bag;1" const BAG_CONTRACTID = "@mozilla.org/hash-property-bag;1"
const ARRAY_CONTRACTID = "@mozilla.org/array;1"; const ARRAY_CONTRACTID = "@mozilla.org/array;1";
const SAX_CONTRACTID = "@mozilla.org/saxparser/xmlreader;1"; const SAX_CONTRACTID = "@mozilla.org/saxparser/xmlreader;1";
const UNESCAPE_CONTRACTID = "@mozilla.org/feed-unescapehtml;1";
var gIoService = Cc[IO_CONTRACTID].getService(Ci.nsIIOService); var gIoService = Cc[IO_CONTRACTID].getService(Ci.nsIIOService);
var gUnescapeHTML = Cc[UNESCAPE_CONTRACTID].
getService(Ci.nsIScriptableUnescapeHTML);
/***** Some general utils *****/ /***** Some general utils *****/
function strToURI(link, base) { function strToURI(link, base) {
@ -80,6 +83,17 @@ function isIID(a, iid) {
return rv; return rv;
} }
function isIFeedTextConstruct(a) {
var rv = false;
try {
a.QueryInterface(Ci.nsIFeedTextConstruct);
rv = true;
}
catch(e) {
}
return rv;
}
function isIArray(a) { function isIArray(a) {
return isIID(a, Ci.nsIArray); return isIID(a, Ci.nsIArray);
} }
@ -92,6 +106,20 @@ function stripTags(someHTML) {
return someHTML.replace(/<[^>]+>/g,""); return someHTML.replace(/<[^>]+>/g,"");
} }
function plainTextFromTextConstruct(textConstruct) {
if (textConstruct != null &&
isIFeedTextConstruct(textConstruct)) {
var text = textConstruct.text;
if (textConstruct.type != "text") {
text = gUnescapeHTML.unescape(stripTags(text));
}
return text;
}
// it was not a textConstruct, just a string
return textConstruct;
}
function xmlEscape(s) { function xmlEscape(s) {
s = s.replace(/&/g, "&amp;"); s = s.replace(/&/g, "&amp;");
s = s.replace(/>/g, "&gt;"); s = s.replace(/>/g, "&gt;");
@ -201,6 +229,7 @@ function W3CToIETFDate(dateString) {
// namespace map // namespace map
var gNamespaces = { var gNamespaces = {
"http://www.w3.org/2005/Atom":"atom", "http://www.w3.org/2005/Atom":"atom",
"http://purl.org/atom/ns#":"atom03",
"http://purl.org/rss/1.0/modules/content/":"content", "http://purl.org/rss/1.0/modules/content/":"content",
"http://purl.org/dc/elements/1.1/":"dc", "http://purl.org/dc/elements/1.1/":"dc",
"http://www.w3.org/1999/02/22-rdf-syntax-ns#":"rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#":"rdf",
@ -224,7 +253,9 @@ var gKnownTextElements = ["title","link","description","language","copyright",
"atom:logo", "atom:published", "atom:updated", "atom:logo", "atom:published", "atom:updated",
"wfw:comment", "wfw:commentRss", "wiki:version", "wfw:comment", "wfw:commentRss", "wiki:version",
"wiki:status", "wiki:importance","wiki:diff", "wiki:status", "wiki:importance","wiki:diff",
"wiki:history","content:encoded", "atom:icon"]; "wiki:history","content:encoded", "atom:icon",
"atom03:title", "atom03:summary", "atom03:content",
"atom03:tagline", "atom:title"];
function FeedResult() {} function FeedResult() {}
FeedResult.prototype = { FeedResult.prototype = {
@ -250,17 +281,33 @@ FeedResult.prototype = {
function Feed() { function Feed() {
this._sub = null; this._sub = null;
this._title = null;
this.items = []; this.items = [];
this.link = null;
} }
Feed.prototype = { Feed.prototype = {
subtitle: function Feed_subtitle(doStripTags) { subtitle: function Feed_subtitle(doStripTags) {
return doStripTags ? stripTags(this._sub) : this._sub; if (this._sub == null)
return null;
if (doStripTags)
return plainTextFromTextConstruct(this._sub);
if (isIID(this._sub, Ci.nsIFeedTextConstruct))
return this._sub.text;
return this._sub;
},
get title() {
return plainTextFromTextConstruct(this._title);
}, },
searchLists: { searchLists: {
_sub: ["description","dc:description","rss1:description","atom:subtitle"], _sub: ["description","dc:description","rss1:description",
items: ["items","entries"], "atom03:tagline","atom:subtitle"],
title: ["title","rss1:title","atom:title"], items: ["items","atom03_entries","entries"],
_title: ["title","rss1:title", "atom03:title","atom:title"],
link: [["link",strToURI],["rss1:link",strToURI]], link: [["link",strToURI],["rss1:link",strToURI]],
categories: ["categories", "dc:subject"], categories: ["categories", "dc:subject"],
cloud: ["cloud"], cloud: ["cloud"],
@ -292,31 +339,51 @@ Feed.prototype = {
function Entry() { function Entry() {
this._summary = null; this._summary = null;
this._content = null; this._content = null;
this._title = null;
this.fields = Cc["@mozilla.org/hash-property-bag;1"]. this.fields = Cc["@mozilla.org/hash-property-bag;1"].
createInstance(Ci.nsIWritablePropertyBag2); createInstance(Ci.nsIWritablePropertyBag2);
this.link = null;
} }
Entry.prototype = { Entry.prototype = {
fields: null, fields: null,
get title() {
return plainTextFromTextConstruct(this._title);
},
summary: function Entry_summary(doStripTags) { summary: function Entry_summary(doStripTags) {
if (this._summary == null) if (this._summary == null)
return null; return null;
return doStripTags ? stripTags(this._summary) : this._summary;
if (doStripTags)
return plainTextFromTextConstruct(this._summary);
if (isIID(this._summary, Ci.nsIFeedTextConstruct))
return this._summary.text;
return this._summary;
}, },
content: function Entry_content(doStripTags) { content: function Entry_content(doStripTags) {
if (this._content == null) if (this._content == null)
return null; return null;
return doStripTags ? stripTags(this._content) : this._content;
if (doStripTags)
return plainTextFromTextConstruct(this._content);
if (isIID(this._content, Ci.nsIFeedTextConstruct))
return this._content.text;
return this._content;
}, },
enclosures: null, enclosures: null,
mediaContent: null, mediaContent: null,
searchLists: { searchLists: {
title: ["title","rss1:title","atom:title"], _title: ["title","rss1:title","atom03:title","atom:title"],
link: [["link",strToURI],["rss1:link",strToURI]], link: [["link",strToURI],["rss1:link",strToURI]],
_summary: ["description", "rss1:description", _summary: ["description", "rss1:description", "dc:description",
"dc:description", "atom:summary"], "atom03:summary", "atom:summary"],
_content: ["content:encoded", "atom:content"], _content: ["content:encoded","atom03:content","atom:content"]
}, },
normalize: function Feed_normalize() { normalize: function Feed_normalize() {
@ -338,7 +405,7 @@ function TextConstruct() {
this.lang = null; this.lang = null;
this.base = null; this.base = null;
this.type = "text"; this.type = "text";
this.text = ""; this.text = null;
} }
TextConstruct.prototype = { TextConstruct.prototype = {
@ -625,7 +692,8 @@ ExtensionHandler.prototype = {
if (this._depth == 0) { if (this._depth == 0) {
if (this._isSimple) { if (this._isSimple) {
this._processor.returnFromExtHandler(this._uri, this._localName, this._processor.returnFromExtHandler(this._uri, this._localName,
trimString(this._buf)); trimString(this._buf),
this._attrs);
} }
else { else {
this._processor.returnFromExtHandler(null,null,null); this._processor.returnFromExtHandler(null,null,null);
@ -694,8 +762,19 @@ function FeedProcessor() {
this.listener = null; this.listener = null;
// These elements can contain (X)HTML or plain text. // These elements can contain (X)HTML or plain text.
this._textConstructs = ["atom:title", "atom:summary", "atom:rights", // We keep a table here that contains their default treatment
"atom:content", "atom:subtitle"]; this._textConstructs = {"atom:title":"text",
"atom:summary":"text",
"atom:rights":"text",
"atom:content":"text",
"atom:subtitle":"text",
"description":"html",
"rss1:description":"html",
"content:encoded":"html",
"atom03:title":"text",
"atom03:tagline":"text",
"atom03:summary":"text",
"atom03:content":"text"};
this._stack = []; this._stack = [];
this._trans = { this._trans = {
@ -707,8 +786,11 @@ function FeedProcessor() {
// verify that until we hit a rss1:channel element. // verify that until we hit a rss1:channel element.
"rdf:RDF": new WrapperElementInfo("RDF"), "rdf:RDF": new WrapperElementInfo("RDF"),
//If we hit a Atom 1.0 element, treat as Atom 1.0. // If we hit a Atom 1.0 element, treat as Atom 1.0.
"atom:feed": new FeedElementInfo("Atom", "atom"), "atom:feed": new FeedElementInfo("Atom", "atom"),
// Treat as Atom 0.3
"atom03:feed": new FeedElementInfo("Atom03", "atom03"),
}, },
/********* RSS2 **********/ /********* RSS2 **********/
@ -766,6 +848,21 @@ function FeedProcessor() {
"atom:contributor": new ElementInfo("contributor", null, null, true), "atom:contributor": new ElementInfo("contributor", null, null, true),
"atom:link": new ElementInfo("links", null, null, true), "atom:link": new ElementInfo("links", null, null, true),
}, },
/********* ATOM 0.3 **********/
"IN_ATOM03": {
"atom03:author": new ElementInfo("author", null, null, true),
"atom03:link": new ElementInfo("links", null, null, true),
"atom03:entry": new ElementInfo("atom03_entries", Cc[ENTRY_CONTRACTID],
null, true)
},
"IN_ATOM03_ENTRIES": {
"atom03:author": new ElementInfo("author", null, null, true),
"atom03:link": new ElementInfo("links", null, null, true),
"atom03:entry": new ElementInfo("atom03_entries", Cc[ENTRY_CONTRACTID],
null, true)
}
} }
} }
@ -920,10 +1017,10 @@ FeedProcessor.prototype = {
// The Atom spec explicitly says the div is not part of the content, // The Atom spec explicitly says the div is not part of the content,
// and explicitly allows whitespace collapsing. // and explicitly allows whitespace collapsing.
// //
if (this._result.version == "atom" && if ((this._result.version == "atom" || this._result.version == "atom03") &&
arrayContains(this._textConstructs, key)) { this._textConstructs[key] != null) {
var type = attributes.getValue("","type"); var type = attributes.getValueFromName("","type");
if (type == "xhtml") { if (type != null && type.indexOf("xhtml") >= 0) {
this._xhtmlHandler = this._xhtmlHandler =
new XHTMLHandler(this, (this._result.version == "atom")); new XHTMLHandler(this, (this._result.version == "atom"));
this._reader.contentHandler = this._xhtmlHandler; this._reader.contentHandler = this._xhtmlHandler;
@ -1119,7 +1216,8 @@ FeedProcessor.prototype = {
// unknown element values are returned here. See startElement above // unknown element values are returned here. See startElement above
// for how this works. // for how this works.
returnFromExtHandler: function FP_returnExt(uri, localName, chars) { returnFromExtHandler:
function FP_returnExt(uri, localName, chars, attributes) {
--this._depth; --this._depth;
// take control of the SAX events // take control of the SAX events
@ -1143,10 +1241,44 @@ FeedProcessor.prototype = {
container = container.queryElementAt(container.length - 1, container = container.queryElementAt(container.length - 1,
Ci.nsIWritablePropertyBag2); Ci.nsIWritablePropertyBag2);
} }
// Assign the property // Make the buffer our new property
var prefix = gNamespaces[uri] ? gNamespaces[uri] + ":" : ""; var prefix = gNamespaces[uri] ? gNamespaces[uri] + ":" : "";
container.setPropertyAsAString(prefix+localName, chars); var propName = prefix + localName;
// But, it could be something containing HTML. If so,
// we need to know about that.
if (this._textConstructs[propName] != null &&
(this._result.version.indexOf("rss") == -1 ||
this._handlerStack[this._depth].containerClass != null)) {
var newProp = Cc[TEXTCONSTRUCT_CONTRACTID].
createInstance(Ci.nsIFeedTextConstruct);
newProp.text = chars;
// Look up the default type in our table
var type = this._textConstructs[propName];
var typeAttribute = attributes.getValueFromName("","type");
if (this._result.version == "atom" && typeAttribute != null) {
type = typeAttribute;
}
else if (this._result.version == "atom03" && typeAttribute != null) {
if (typeAttribute.toLowerCase().indexOf("xhtml") >= 0) {
type = "xhtml";
}
else if (typeAttribute.toLowerCase().indexOf("html") >= 0) {
type = "html";
}
else if (typeAttribute.toLowerCase().indexOf("text") >= 0) {
type = "text";
}
}
newProp.type = type;
container.setPropertyAsInterface(propName, newProp);
}
else {
container.setPropertyAsAString(propName, chars);
}
}, },
// Sometimes, we'll hand off SAX handling duties to an XHTMLHandler // Sometimes, we'll hand off SAX handling duties to an XHTMLHandler
@ -1166,7 +1298,11 @@ FeedProcessor.prototype = {
// Assign the property // Assign the property
var prefix = gNamespaces[uri] ? gNamespaces[uri] + ":" : ""; var prefix = gNamespaces[uri] ? gNamespaces[uri] + ":" : "";
container.setPropertyAsAString(prefix + localName, chars); var newProp = newProp = Cc[TEXTCONSTRUCT_CONTRACTID].
createInstance(Ci.nsIFeedTextConstruct);
newProp.text = chars;
newProp.type = "xhtml";
container.setPropertyAsInterface(prefix + localName, newProp);
// XHTML will cause us to peek too far. The XHTML handler will // XHTML will cause us to peek too far. The XHTML handler will
// send us an end element to call. RFC4287-valid feeds allow a // send us an end element to call. RFC4287-valid feeds allow a
@ -1288,7 +1424,7 @@ var Module = {
// Entry // Entry
cr.unregisterFactoryLocation(ENTRY_CLASSID, location); cr.unregisterFactoryLocation(ENTRY_CLASSID, location);
// Text Construct // Text Construct
cr.unregisterFactoryLocation(TEXTCONSTUCT_CLASSID, location); cr.unregisterFactoryLocation(TEXTCONSTRUCT_CLASSID, location);
}, },
canUnload: function(cm) { canUnload: function(cm) {

Просмотреть файл

@ -42,6 +42,26 @@ VPATH = @srcdir@
include $(DEPTH)/config/autoconf.mk include $(DEPTH)/config/autoconf.mk
MODULE = feeds
LIBRARY_NAME = feed_s
MOZILLA_INTERNAL_API = 1
FORCE_STATIC_LIB = 1
LIBXUL_LIBRARY = 1
REQUIRES = \
xpcom \
necko \
string \
js \
dom \
htmlparser \
content \
layout \
$(NULL)
CPPSRCS = nsScriptableUnescapeHTML.cpp \
$(NULL)
EXTRA_COMPONENTS = FeedProcessor.js EXTRA_COMPONENTS = FeedProcessor.js
include $(topsrcdir)/config/rules.mk include $(topsrcdir)/config/rules.mk

Просмотреть файл

@ -0,0 +1,99 @@
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is mozilla.org code.
*
* The Initial Developer of the Original Code is Robert Sayre.
* Portions created by the Initial Developer are Copyright (C) 2006
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include "nsString.h"
#include "nsCRT.h"
#include "nsISupportsArray.h"
#include "nsIComponentManager.h"
#include "nsCOMPtr.h"
#include "nsXPCOM.h"
#include "nsISupportsPrimitives.h"
#include "nsXPIDLString.h"
#include "nsIParser.h"
#include "nsIDTD.h"
#include "nsNetCID.h"
#include "nsNetUtil.h"
#include "nsParserCIID.h"
#include "nsParserCIID.h"
#include "nsIContentSink.h"
#include "nsIHTMLToTextSink.h"
#include "nsIDocumentEncoder.h"
#include "nsIScriptableUnescapeHTML.h"
#include "nsScriptableUnescapeHTML.h"
NS_IMPL_ISUPPORTS1(nsScriptableUnescapeHTML, nsIScriptableUnescapeHTML)
static NS_DEFINE_CID(kCParserCID, NS_PARSER_CID);
// From /widget/HTMLConverter
//
// Takes HTML and converts it to plain text but in unicode.
//
NS_IMETHODIMP
nsScriptableUnescapeHTML::Unescape(const nsAString & aFromStr,
nsAString & aToStr)
{
// create the parser to do the conversion.
aToStr.SetLength(0);
nsresult rv;
nsCOMPtr<nsIParser> parser = do_CreateInstance(kCParserCID, &rv);
if ( !parser )
return rv;
// convert it!
nsCOMPtr<nsIContentSink> sink;
sink = do_CreateInstance(NS_PLAINTEXTSINK_CONTRACTID);
NS_ENSURE_TRUE(sink, NS_ERROR_FAILURE);
nsCOMPtr<nsIHTMLToTextSink> textSink(do_QueryInterface(sink));
NS_ENSURE_TRUE(textSink, NS_ERROR_FAILURE);
textSink->Initialize(&aToStr, nsIDocumentEncoder::OutputSelectionOnly
| nsIDocumentEncoder::OutputAbsoluteLinks, 0);
parser->SetContentSink(sink);
parser->Parse(aFromStr, 0, NS_LITERAL_CSTRING("text/html"),
PR_TRUE, eDTDMode_fragment);
return NS_OK;
}

Просмотреть файл

@ -0,0 +1,49 @@
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is mozilla.org code.
*
* The Initial Developer of the Original Code is Robert Sayre.
* Portions created by the Initial Developer are Copyright (C) 2006
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#ifndef nsScriptableHTMLUnescape_h__
#define nsScriptableHTMLUnescape_h__
#include "nsIScriptableUnescapeHTML.h"
class nsScriptableUnescapeHTML : public nsIScriptableUnescapeHTML
{
public:
NS_DECL_ISUPPORTS
NS_DECL_NSISCRIPTABLEUNESCAPEHTML
};
#endif // nsScriptableHTMLUnescape_h__

Просмотреть файл

@ -57,6 +57,7 @@ TestListener.prototype = {
(isIID(feed, Components.interfaces.nsIFeed)); (isIID(feed, Components.interfaces.nsIFeed));
try { try {
if(!eval(testcase.expect)){ if(!eval(testcase.expect)){
print(testcase.path + ": \n");
print("FAILED! Test was: \"" + testcase.desc + "\" |\n" + testcase.expect + '|\n'); print("FAILED! Test was: \"" + testcase.desc + "\" |\n" + testcase.expect + '|\n');
}else{ }else{
passed += 1; passed += 1;

Просмотреть файл

@ -0,0 +1,28 @@
<?xml version="1.0" encoding="utf-8"?>
<!--
Description: HTML title w/ CDATA
Expect: var title = feed.items.queryElementAt(0, Components.interfaces.nsIFeedEntry).title; title == "<title>";
-->
<feed xmlns="http://www.w3.org/2005/Atom">
<id>http://atomtests.philringnalda.com/tests/item/title/html-cdata.atom</id>
<title>Atom item title html cdata</title>
<updated>2005-12-18T00:13:00Z</updated>
<author>
<name>Phil Ringnalda</name>
<uri>http://weblog.philringnalda.com/</uri>
</author>
<link rel="self" href="http://atomtests.philringnalda.com/tests/item/title/html-cdata.atom"/>
<entry>
<id>http://atomtests.philringnalda.com/tests/item/title/html-cdata.atom/1</id>
<title type="html"><![CDATA[&lt;title>]]></title>
<updated>2005-12-18T00:13:00Z</updated>
<summary>An item with a type="html" title consisting of a less-than
character, the word 'title' and a greater-than character, where
the character entity reference for the less-than is escaped by being
in a CDATA section.</summary>
<link href="http://atomtests.philringnalda.com/alt/title-title.html"/>
<category term="item title"/>
</entry>
</feed>

Просмотреть файл

@ -2,7 +2,7 @@
<!-- <!--
Description: atom feed and entry with random attributes works Description: atom feed and entry with random attributes works
Expect: var parent = feed.items.queryElementAt(1, Components.interfaces.nsIFeedEntry).parent; parent.fields.getProperty('atom:title') == "hmm"; Expect: var parent = feed.items.queryElementAt(1, Components.interfaces.nsIFeedEntry).parent; parent.title == "hmm";
--> -->
<feed xmlns="http://www.w3.org/2005/Atom" <feed xmlns="http://www.w3.org/2005/Atom"

Просмотреть файл

@ -2,7 +2,7 @@
<!-- <!--
Description: atom feed and entry with random attributes works Description: atom feed and entry with random attributes works
Expect: feed.items.queryElementAt(1, Components.interfaces.nsIFeedEntry).fields.getProperty('atom:title') == "test"; Expect: feed.items.queryElementAt(1, Components.interfaces.nsIFeedEntry).title == "test";
--> -->
<feed xmlns="http://www.w3.org/2005/Atom" <feed xmlns="http://www.w3.org/2005/Atom"

Просмотреть файл

@ -2,7 +2,7 @@
<!-- <!--
Description: atom rights works with HTML Description: atom rights works with HTML
Expect: feed.fields.getProperty('atom:rights') == '<i>test</i> rights' Expect: feed.fields.getProperty('atom:rights') != null
--> -->
<feed xmlns="http://www.w3.org/2005/Atom"> <feed xmlns="http://www.w3.org/2005/Atom">

Просмотреть файл

@ -2,7 +2,7 @@
<!-- <!--
Description: atom rights works Description: atom rights works
Expect: feed.fields.getProperty('atom:rights') == '<b>test</b> rights' Expect: feed.fields.getProperty('atom:rights') != null
--> -->
<feed xmlns="http://www.w3.org/2005/Atom"> <feed xmlns="http://www.w3.org/2005/Atom">

Просмотреть файл

@ -2,7 +2,7 @@
<!-- <!--
Description: atom rights works with nested divs Description: atom rights works with nested divs
Expect: feed.fields.getProperty('atom:rights') == '<div><div>test</div> rights</div>' Expect: feed.fields.getProperty('atom:rights') != null
--> -->
<feed xmlns="http://www.w3.org/2005/Atom"> <feed xmlns="http://www.w3.org/2005/Atom">

Просмотреть файл

@ -2,7 +2,7 @@
<!-- <!--
Description: atom subtitle works Description: atom subtitle works
Expect: feed.subtitle(false) == '<b>test</b> subtitle'; Expect: var sub = feed.subtitle(false); sub == '<b>test</b> subtitle';
--> -->
<feed xmlns="http://www.w3.org/2005/Atom"> <feed xmlns="http://www.w3.org/2005/Atom">

Просмотреть файл

@ -0,0 +1,46 @@
<?xml version="1.0" encoding="utf-8"?>
<!--
Description: XHTML title with apos
Expect: feed.title == "Tantek's Updates"
-->
<feed xml:lang="en-US"
xmlns="http://www.w3.org/2005/Atom">
<title type="xhtml">
<div xmlns="http://www.w3.org/1999/xhtml">Tantek's Updates</div>
</title>
<link href="http://tantek.com/"
rel="alternate" title="Tantek's Posts" type="text/html"/>
<link href="http://tantek.com/updates.atom"
rel="self" />
<id>http://tantek.com/updates.atom</id>
<author>
<name>Tantek</name>
<uri>http://tantek.com/</uri>
</author>
<updated>2006-05-02T20:13:00-07:00</updated>
<entry>
<updated>2006-04-22T00:00:00-07:00</updated>
<published>2006-04-22T00:00:00-07:00</published>
<link href="http://www.makezine.com/faire/"
rel="alternate" title="Make Faire" type="text/html"/>
<id>http://www.makezine.com/faire/</id>
<title>Make Faire</title>
<content type="xhtml" xml:space="preserve">
<div xmlns="http://www.w3.org/1999/xhtml">
<div class="vevent">
<a class="url" href="http://www.makezine.com/faire/">
<abbr class="dtstart" title="20060422">
4/22</abbr>-<abbr class="dtend" title="20060424">23</abbr>
<span class="summary">
Make Faire
</span> @
<span class="location">
San Mateo Fairgrounds
</span>
</a>
</div>
</div>
</content>
</entry>
</feed>

Просмотреть файл

@ -2,7 +2,7 @@
<!-- <!--
Description: atom title works Description: atom title works
Expect: feed.fields.getProperty('atom:title') == '<b>test</b> title' Expect: feed.title == 'test title'
--> -->
<feed xmlns="http://www.w3.org/2005/Atom"> <feed xmlns="http://www.w3.org/2005/Atom">

Просмотреть файл

@ -2,7 +2,7 @@
<!-- <!--
Description: channel description works Description: channel description works
Expect: feed.fields.getProperty('description') == 'test' Expect: var desc = feed.fields.getProperty('description'); desc == 'test';
--> -->
<rss version="2.0" > <rss version="2.0" >

Просмотреть файл

@ -1,8 +1,8 @@
<?xml version="1.0" encoding="iso-8859-1"?> <?xml version="1.0" encoding="iso-8859-1"?>
<!-- <!--
Description: channel description works Description: channel description markup is not HTML
Expect: feed.subtitle(true) == 'test' Expect: feed.subtitle(true) == '<i><b>test</b></i>'
--> -->
<rss version="2.0" > <rss version="2.0" >

Просмотреть файл

@ -2,7 +2,7 @@
<!-- <!--
Description: item title works Description: item title works
Expect: feed.items.queryElementAt(0, Components.interfaces.nsIFeedEntry).fields.getProperty('content:encoded') == 'foobar' Expect: feed.items.queryElementAt(0, Components.interfaces.nsIFeedEntry).content(true) == 'foobar'
--> -->
<rss version="2.0" > <rss version="2.0" >

Просмотреть файл

@ -2,7 +2,7 @@
<!-- <!--
Description: item desc encoded works Description: item desc encoded works
Expect: feed.items.queryElementAt(0, Components.interfaces.nsIFeedEntry).fields.getProperty('description') == 'I\'m headed for France. I wasn\'t gonna go this year, but then last week <a href="http://www.imdb.com/title/tt0086525/">Valley Girl</a> came out and I said to myself, Joe Bob, you gotta get out of the country for a while.' Expect: feed.items.queryElementAt(0, Components.interfaces.nsIFeedEntry).summary(false) == 'I\'m headed for France. I wasn\'t gonna go this year, but then last week <a href="http://www.imdb.com/title/tt0086525/">Valley Girl</a> came out and I said to myself, Joe Bob, you gotta get out of the country for a while.'
--> -->
<rss version="2.0" > <rss version="2.0" >

Просмотреть файл

@ -2,7 +2,7 @@
<!-- <!--
Description: item desc encoded works Description: item desc encoded works
Expect: feed.items.queryElementAt(1, Components.interfaces.nsIFeedEntry).fields.getProperty('description') == 'I\'m headed for France. I wasn\'t gonna go this year, but then last week <a href="http://www.imdb.com/title/tt0086525/">Valley Girl</a> came out and I said to myself, Joe Bob, you gotta get out of the country for a while.' Expect: feed.items.queryElementAt(1, Components.interfaces.nsIFeedEntry).summary(false) == 'I\'m headed for France. I wasn\'t gonna go this year, but then last week <a href="http://www.imdb.com/title/tt0086525/">Valley Girl</a> came out and I said to myself, Joe Bob, you gotta get out of the country for a while.'
--> -->
<rss version="2.0" > <rss version="2.0" >

Просмотреть файл

@ -2,7 +2,7 @@
<!-- <!--
Description: item desc CDATA works Description: item desc CDATA works
Expect: feed.items.queryElementAt(0, Components.interfaces.nsIFeedEntry).fields.getProperty('description') == 'I\'m headed for France. I wasn\'t gonna go this year, but then last week <a href="http://www.imdb.com/title/tt0086525/">Valley Girl</a> came out and I said to myself, Joe Bob, you gotta get out of the country for a while.' Expect: feed.items.queryElementAt(0, Components.interfaces.nsIFeedEntry).summary(false) == 'I\'m headed for France. I wasn\'t gonna go this year, but then last week <a href="http://www.imdb.com/title/tt0086525/">Valley Girl</a> came out and I said to myself, Joe Bob, you gotta get out of the country for a while.'
--> -->
<rss version="2.0" > <rss version="2.0" >

Просмотреть файл

@ -0,0 +1,21 @@
<?xml version="1.0" encoding="utf-8"?>
<!--
Description: item desc encoded, double-escaped entity
Expect: var summary = feed.items.queryElementAt(0, Components.interfaces.nsIFeedEntry).summary(true); summary == "test D\u00e9sol\u00e9e";
-->
<rss version="2.0" >
<channel>
<item>
<comments>http://example.org</comments>
<author>jbb@dallas.example.com (Joe Bob Briggs)</author>
<title>test</title>
<category domain="foo">bar</category>
<description>
&lt;b>test D&amp;eacute;sol&amp;eacute;e&lt;/b>
</description>
</item>
</channel>
</rss>

Просмотреть файл

@ -2,7 +2,7 @@
<!-- <!--
Description: item desc plain text works Description: item desc plain text works
Expect: feed.items.queryElementAt(0, Components.interfaces.nsIFeedEntry).fields.getProperty('description') == "I'm headed for France. I wasn't gonna go this year, but then last week \"Valley Girl\" came out and I said to myself, Joe Bob, you gotta get out of the country for a while." Expect: feed.items.queryElementAt(0, Components.interfaces.nsIFeedEntry).summary(false) == "I'm headed for France. I wasn't gonna go this year, but then last week \"Valley Girl\" came out and I said to myself, Joe Bob, you gotta get out of the country for a while."
--> -->
<rss version="2.0" > <rss version="2.0" >