Bug #279150 --> Break apart the rss feed parser into a separate JS object to make it easier to

hook up unit testing.

Thanks to Robert Sayer for getting this going.
This commit is contained in:
scott%scott-macgregor.org 2005-02-01 04:04:59 +00:00
Родитель 0723d93e05
Коммит 5ecb05ff26
6 изменённых файлов: 550 добавлений и 447 удалений

Просмотреть файл

@ -1,78 +1,94 @@
var rdfcontainer = # -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
Components # ***** BEGIN LICENSE BLOCK *****
.classes["@mozilla.org/rdf/container-utils;1"] # Version: MPL 1.1/GPL 2.0/LGPL 2.1
.getService(Components.interfaces.nsIRDFContainerUtils); #
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
#
# The Original Code is the RSS Parsing Engine
#
# Contributor(s):
#
# Alternatively, the contents of this file may be used under the terms of
# either the GNU General Public License Version 2 or later (the "GPL"), or
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
# in which case the provisions of the GPL or the LGPL are applicable instead
# of those above. If you wish to allow use of your version of this file only
# under the terms of either the GPL or the LGPL, and not to allow others to
# use your version of this file under the terms of the MPL, indicate your
# decision by deleting the provisions above and replace them with the notice
# and other provisions required by the GPL or the LGPL. If you do not delete
# the provisions above, a recipient may use your version of this file under
# the terms of any one of the MPL, the GPL or the LGPL.
#
# ***** END LICENSE BLOCK ***** */
var rdfparser =
Components
.classes["@mozilla.org/rdf/xml-parser;1"]
.createInstance(Components.interfaces.nsIRDFXMLParser);
// For use when serializing content in Atom feeds.
var serializer =
Components
.classes["@mozilla.org/xmlextras/xmlserializer;1"]
.createInstance(Components.interfaces.nsIDOMSerializer);
// error codes used to inform the consumer about attempts to download a feed // error codes used to inform the consumer about attempts to download a feed
const kNewsBlogSuccess = 0; const kNewsBlogSuccess = 0;
const kNewsBlogInvalidFeed = 1; // usually means there was an error trying to parse the feed... const kNewsBlogInvalidFeed = 1; // usually means there was an error trying to parse the feed...
const kNewsBlogRequestFailure = 2; // generic networking failure when trying to download the feed. const kNewsBlogRequestFailure = 2; // generic networking failure when trying to download the feed.
const kNewsBlogFeedIsBusy = 3;
// Cache for all of the feeds currently being downloaded, indexed by URL, so the load event listener
// can access the Feed objects after it finishes downloading the feed.
var FeedCache =
{
mFeeds: new Array(),
// Hash of feeds being downloaded, indexed by URL, so the load event listener putFeed: function (aFeed)
// can access the Feed objects after it finishes downloading the feed files. {
function FeedCache(){ this.mFeeds[this.normalizeHost(aFeed.url)] = aFeed;
this.nsURI = Components.classes["@mozilla.org/network/standard-url;1"]. },
createInstance(Components.interfaces.nsIURI);
return this;
}
FeedCache.prototype.putFeed =function(feed) { getFeed: function (aUrl)
this[this.normalizeHost(feed.url)] = feed; {
} return this.mFeeds[this.normalizeHost(aUrl)];
},
FeedCache.prototype.getFeedWithUrl =function(url) { removeFeed: function (aUrl)
return this[this.normalizeHost(url)]; {
} delete this.mFeeds[this.normalizeHost(aUrl)];
},
FeedCache.prototype.removeFeedWithUrl = function(url) { normalizeHost: function (aUrl)
delete this[this.normalizeHost(url)]; {
} normalizedUrl = Components.classes["@mozilla.org/network/standard-url;1"].
createInstance(Components.interfaces.nsIURI);
normalizedUrl.spec = aUrl;
normalizedUrl.host = normalizedUrl.host.toLowerCase();
return normalizedUrl.spec;
}
};
FeedCache.prototype.normalizeHost = function(url){ function Feed(resource)
this.nsURI.spec = url; {
this.nsURI.host = this.nsURI.host.toLowerCase(); this.resource = resource.QueryInterface(Components.interfaces.nsIRDFResource);
return this.nsURI.spec; this.description = null;
} this.author = null;
this.request = null;
this.folder = null;
this.server = null;
this.downloadCallback = null;
this.items = new Array();
var gFzFeedCache = new FeedCache(); return this;
function Feed(resource) {
this.resource = resource.QueryInterface(Components.interfaces.nsIRDFResource);
this.description = null;
this.author = null;
this.request = null;
this.folder = null;
this.server = null;
this.downloadCallback = null;
this.items = new Array();
return this;
} }
// The name of the message folder corresponding to the feed. // The name of the message folder corresponding to the feed.
// XXX This should be called something more descriptive like "folderName". // XXX This should be called something more descriptive like "folderName".
// XXX Or maybe, when we support nested folders and downloading into any folder, // XXX Or maybe, when we support nested folders and downloading into any folder,
// there could just be a reference to the folder itself called "folder". // there could just be a reference to the folder itself called "folder".
Feed.prototype.name getter = function() { Feed.prototype.name getter = function()
{
var name = this.title || this.description || this.url; var name = this.title || this.description || this.url;
if (!name) if (!name)
throw("couldn't compute feed name, as feed has no title, description, or URL."); throw("couldn't compute feed name, as feed has no title, description, or URL.");
@ -89,7 +105,8 @@ Feed.prototype.name getter = function() {
return name; return name;
} }
Feed.prototype.download = function(parseItems, aCallback) { Feed.prototype.download = function(parseItems, aCallback)
{
this.downloadCallback = aCallback; // may be null this.downloadCallback = aCallback; // may be null
// Whether or not to parse items when downloading and parsing the feed. // Whether or not to parse items when downloading and parsing the feed.
@ -108,8 +125,12 @@ Feed.prototype.download = function(parseItems, aCallback) {
// Before we try to download the feed, make sure we aren't already processing the feed // Before we try to download the feed, make sure we aren't already processing the feed
// by looking up the url in our feed cache // by looking up the url in our feed cache
if (gFzFeedCache.getFeedWithUrl(this.url)) if (FeedCache.getFeed(this.url))
return; // don't do anything, the feed is already in use {
if (this.downloadCallback)
this.downloadCallback.downloaded(this, kNewsBlogFeedIsBusy);
return ; // don't do anything, the feed is already in use
}
this.request = Components.classes["@mozilla.org/xmlextras/xmlhttprequest;1"] this.request = Components.classes["@mozilla.org/xmlextras/xmlhttprequest;1"]
.createInstance(Components.interfaces.nsIXMLHttpRequest); .createInstance(Components.interfaces.nsIXMLHttpRequest);
@ -119,15 +140,16 @@ Feed.prototype.download = function(parseItems, aCallback) {
this.request.overrideMimeType("text/xml"); this.request.overrideMimeType("text/xml");
this.request.onload = Feed.onDownloaded; this.request.onload = Feed.onDownloaded;
this.request.onerror = Feed.onDownloadError; this.request.onerror = Feed.onDownloadError;
gFzFeedCache.putFeed(this); FeedCache.putFeed(this);
this.request.send(null); this.request.send(null);
} }
Feed.onDownloaded = function(event) { Feed.onDownloaded = function(event)
{
var request = event.target; var request = event.target;
var url = request.channel.originalURI.spec; var url = request.channel.originalURI.spec;
debug(url + " downloaded"); debug(url + " downloaded");
var feed = gFzFeedCache.getFeedWithUrl(url); var feed = FeedCache.getFeed(url);
if (!feed) if (!feed)
throw("error after downloading " + url + ": couldn't retrieve feed from request"); throw("error after downloading " + url + ": couldn't retrieve feed from request");
@ -136,413 +158,148 @@ Feed.onDownloaded = function(event) {
// parse will asynchronously call the download callback when it is done // parse will asynchronously call the download callback when it is done
} }
Feed.onProgress = function(event) { Feed.onProgress = function(event)
{
var request = event.target; var request = event.target;
var url = request.channel.originalURI.spec; var url = request.channel.originalURI.spec;
var feed = gFzFeedCache.getFeedWithUrl(url); var feed = FeedCache.getFeed(url);
if (feed.downloadCallback) if (feed.downloadCallback)
feed.downloadCallback.onProgress(feed, event.position, event.totalSize); feed.downloadCallback.onProgress(feed, event.position, event.totalSize);
} }
Feed.onDownloadError = function(event) { Feed.onDownloadError = function(event)
{
var request = event.target; var request = event.target;
var url = request.channel.originalURI.spec; var url = request.channel.originalURI.spec;
var feed = gFzFeedCache.getFeedWithUrl(url); var feed = FeedCache.getFeed(url);
if (feed.downloadCallback) if (feed.downloadCallback)
feed.downloadCallback.downloaded(feed, kNewsBlogRequestFailure); feed.downloadCallback.downloaded(feed, kNewsBlogRequestFailure);
gFzFeedCache.removeFeedWithUrl(url); FeedCache.removeFeed(url);
} }
Feed.prototype.onParseError = function(feed) { Feed.prototype.onParseError = function(feed)
{
if (feed && feed.downloadCallback) if (feed && feed.downloadCallback)
{ {
if (feed.downloadCallback) if (feed.downloadCallback)
feed.downloadCallback.downloaded(feed, kNewsBlogInvalidFeed); feed.downloadCallback.downloaded(feed, kNewsBlogInvalidFeed);
gFzFeedCache.removeFeedWithUrl(url); FeedCache.removeFeed(url);
} }
} }
Feed.prototype.url getter = function() { Feed.prototype.url getter = function()
var ds = getSubscriptionsDS(this.server); {
var url = ds.GetTarget(this.resource, DC_IDENTIFIER, true); var ds = getSubscriptionsDS(this.server);
if (url) var url = ds.GetTarget(this.resource, DC_IDENTIFIER, true);
url = url.QueryInterface(Components.interfaces.nsIRDFLiteral).Value; if (url)
else url = url.QueryInterface(Components.interfaces.nsIRDFLiteral).Value;
url = this.resource.Value; else
return url; url = this.resource.Value;
return url;
} }
Feed.prototype.title getter = function() { Feed.prototype.title getter = function()
var ds = getSubscriptionsDS(this.server); {
var title = ds.GetTarget(this.resource, DC_TITLE, true); var ds = getSubscriptionsDS(this.server);
if (title) var title = ds.GetTarget(this.resource, DC_TITLE, true);
title = title.QueryInterface(Components.interfaces.nsIRDFLiteral).Value; if (title)
return title; title = title.QueryInterface(Components.interfaces.nsIRDFLiteral).Value;
return title;
} }
Feed.prototype.title setter = function(new_title) { Feed.prototype.title setter = function(new_title)
var ds = getSubscriptionsDS(this.server); {
new_title = rdf.GetLiteral(new_title || ""); var ds = getSubscriptionsDS(this.server);
var old_title = ds.GetTarget(this.resource, DC_TITLE, true); new_title = rdf.GetLiteral(new_title || "");
if (old_title) var old_title = ds.GetTarget(this.resource, DC_TITLE, true);
ds.Change(this.resource, DC_TITLE, old_title, new_title); if (old_title)
else ds.Change(this.resource, DC_TITLE, old_title, new_title);
ds.Assert(this.resource, DC_TITLE, new_title, true); else
ds.Assert(this.resource, DC_TITLE, new_title, true);
} }
Feed.prototype.quickMode getter = function() { Feed.prototype.quickMode getter = function()
var ds = getSubscriptionsDS(this.server); {
var quickMode = ds.GetTarget(this.resource, FZ_QUICKMODE, true); var ds = getSubscriptionsDS(this.server);
if (quickMode) { var quickMode = ds.GetTarget(this.resource, FZ_QUICKMODE, true);
quickMode = quickMode.QueryInterface(Components.interfaces.nsIRDFLiteral); if (quickMode)
quickMode = quickMode.Value; {
quickMode = eval(quickMode); quickMode = quickMode.QueryInterface(Components.interfaces.nsIRDFLiteral);
} quickMode = quickMode.Value;
return quickMode; quickMode = eval(quickMode);
}
return quickMode;
} }
Feed.prototype.quickMode setter = function(new_quickMode) { Feed.prototype.quickMode setter = function(new_quickMode)
var ds = getSubscriptionsDS(this.server); {
new_quickMode = rdf.GetLiteral(new_quickMode || ""); var ds = getSubscriptionsDS(this.server);
var old_quickMode = ds.GetTarget(this.resource, FZ_QUICKMODE, true); new_quickMode = rdf.GetLiteral(new_quickMode || "");
if (old_quickMode) var old_quickMode = ds.GetTarget(this.resource, FZ_QUICKMODE, true);
ds.Change(this.resource, FZ_QUICKMODE, old_quickMode, new_quickMode); if (old_quickMode)
else ds.Change(this.resource, FZ_QUICKMODE, old_quickMode, new_quickMode);
ds.Assert(this.resource, FZ_QUICKMODE, new_quickMode, true); else
ds.Assert(this.resource, FZ_QUICKMODE, new_quickMode, true);
} }
Feed.prototype.parse = function() { Feed.prototype.parse = function()
{
// Figures out what description language (RSS, Atom) and version this feed // Figures out what description language (RSS, Atom) and version this feed
// is using and calls a language/version-specific feed parser. // is using and calls a language/version-specific feed parser.
debug("parsing feed " + this.url); debug("parsing feed " + this.url);
if (!this.request.responseText) { if (!this.request.responseText)
return this.onParseError(this); return this.onParseError(this);
}
else if (this.request.responseText.search(/=(['"])http:\/\/purl\.org\/rss\/1\.0\/\1/) != -1) { // create a feed parser which will parse the feed for us
debug(this.url + " is an RSS 1.x (RDF-based) feed"); var parser = new FeedParser();
this.parseAsRSS1(); this.itemsToStore = parser.parseFeed(this, this.request.responseText, this.request.responseXML, this.request.channel.URI);
}
else if (this.request.responseText.search(/=(['"])http:\/\/purl.org\/atom\/ns#\1/) != -1) { // storeNextItem will iterate through the parsed items, storing each one.
debug(this.url + " is an Atom feed"); this.itemsToStoreIndex = 0;
this.parseAsAtom(); this.storeNextItem();
} }
else if (this.request.responseText.search(/"http:\/\/my\.netscape\.com\/rdf\/simple\/0\.9\/"/) != -1)
{ Feed.prototype.invalidateItems = function ()
// RSS 0.9x is forward compatible with RSS 2.0, so use the RSS2 parser to handle it. {
debug(this.url + " is an 0.9x feed");
this.parseAsRSS2();
}
// XXX Explicitly check for RSS 2.0 instead of letting it be handled by the
// default behavior (who knows, we may change the default at some point).
else {
// We don't know what kind of feed this is; let's pretend it's RSS 0.9x
// and hope things work out for the best. In theory even RSS 1.0 feeds
// could be parsed by the 0.9x parser if the RSS namespace was the default.
debug(this.url + " is of unknown format; assuming an RSS 0.9x feed");
this.parseAsRSS2();
}
var ds = getItemsDS(this.server); var ds = getItemsDS(this.server);
ds = ds.QueryInterface(Components.interfaces.nsIRDFRemoteDataSource); debug("invalidating items for " + this.url);
ds.Flush(); var items = ds.GetSources(FZ_FEED, this.resource, true);
} var item;
Feed.prototype.parseAsRSS2 = function() {
if (!this.request.responseXML || !(this.request.responseXML instanceof Components.interfaces.nsIDOMXMLDocument))
return this.onParseError(this);
// Get the first channel (assuming there is only one per RSS File).
var channel = this.request.responseXML.getElementsByTagName("channel")[0];
if (!channel)
return this.onParseError(this);
this.title = this.title || getNodeValue(channel.getElementsByTagName("title")[0]);
this.description = getNodeValue(channel.getElementsByTagName("description")[0]);
if (!this.parseItems)
return;
this.invalidateItems();
var itemNodes = this.request.responseXML.getElementsByTagName("item");
this.itemsToStore = new Array();
this.itemsToStoreIndex = 0;
var converter = Components
.classes["@mozilla.org/intl/scriptableunicodeconverter"]
.createInstance(Components.interfaces.nsIScriptableUnicodeConverter);
converter.charset = 'UTF-8';
for ( var i=0 ; i<itemNodes.length ; i++ ) {
var itemNode = itemNodes[i];
var item = new FeedItem();
item.feed = this;
item.characterSet = "UTF-8";
var link = getNodeValue(itemNode.getElementsByTagName("link")[0]);
var guidNode = itemNode.getElementsByTagName("guid")[0];
if (guidNode) {
var guid = getNodeValue(guidNode);
var isPermaLink =
guidNode.getAttribute('isPermaLink') == 'false' ? false : true;
}
// getNodeValue returns unicode strings...
// we need to do the proper conversion on these before we call into
// item.Store();
item.url = link ? link : (guid && isPermaLink) ? guid : null;
item.id = guid;
item.description = getNodeValue(itemNode.getElementsByTagName("description")[0]);
item.title = converter.ConvertFromUnicode(getNodeValue(itemNode.getElementsByTagName("title")[0])
|| (item.description ? item.description.substr(0, 150) : null)
|| item.title);
// do this after we potentially assign item.description into item.title
// because that potential assignment assumes the value is in unicode still
item.description = converter.ConvertFromUnicode(item.description);
item.author = getNodeValue(itemNode.getElementsByTagName("author")[0]
|| itemNode.getElementsByTagName("creator")[0])
|| this.title
|| item.author;
item.date = getNodeValue(itemNode.getElementsByTagName("pubDate")[0]
|| itemNode.getElementsByTagName("date")[0])
|| item.date;
// If the date is invalid, users will see the beginning of the epoch
// unless we reset it here, so they'll see the current time instead.
// This is typical aggregator behavior.
if(item.date){
item.date = trimString(item.date);
if(!isValidRFC822Date(item.date) ){
// XXX Use this on the other formats as well
item.date = dateRescue(item.date);
}
}
var content = getNodeValue(itemNode.getElementsByTagNameNS(RSS_CONTENT_NS, "encoded")[0]);
if (content)
item.content = converter.ConvertFromUnicode(content);
this.itemsToStore[i] = item;
}
this.storeNextItem();
}
Feed.prototype.parseAsRSS1 = function() {
// RSS 1.0 is valid RDF, so use the RDF parser/service to extract data.
// Create a new RDF data source and parse the feed into it.
var ds = Components
.classes["@mozilla.org/rdf/datasource;1?name=in-memory-datasource"]
.createInstance(Components.interfaces.nsIRDFDataSource);
rdfparser.parseString(ds, this.request.channel.URI, this.request.responseText);
// Get information about the feed as a whole.
var channel = ds.GetSource(RDF_TYPE, RSS_CHANNEL, true);
this.title = this.title || getRDFTargetValue(ds, channel, RSS_TITLE);
this.description = getRDFTargetValue(ds, channel, RSS_DESCRIPTION);
if (!this.parseItems)
return;
this.invalidateItems();
var items = ds.GetTarget(channel, RSS_ITEMS, true);
if (items)
items = rdfcontainer.MakeSeq(ds, items).GetElements();
// If the channel doesn't list any items, look for resources of type "item" while (items.hasMoreElements())
// (a hacky workaround for some buggy feeds). {
if (!items || !items.hasMoreElements()) item = items.getNext();
items = ds.GetSources(RDF_TYPE, RSS_ITEM, true); item = item.QueryInterface(Components.interfaces.nsIRDFResource);
debug("invalidating " + item.Value);
this.itemsToStore = new Array(); var valid = ds.GetTarget(item, FZ_VALID, true);
this.itemsToStoreIndex = 0; if (valid)
var index = 0; ds.Unassert(item, FZ_VALID, valid, true);
var converter = Components
.classes["@mozilla.org/intl/scriptableunicodeconverter"]
.createInstance(Components.interfaces.nsIScriptableUnicodeConverter);
converter.charset = "UTF-8";
while (items.hasMoreElements()) {
var itemResource = items.getNext().QueryInterface(Components.interfaces.nsIRDFResource);
var item = new FeedItem();
item.feed = this;
item.characterSet = "UTF-8";
// Prefer the value of the link tag to the item URI since the URI could be
// a relative URN.
var uri = itemResource.Value;
var link = getRDFTargetValue(ds, itemResource, RSS_LINK);
item.url = link || uri;
item.id = item.url;
item.description = getRDFTargetValue(ds, itemResource, RSS_DESCRIPTION);
item.title = getRDFTargetValue(ds, itemResource, RSS_TITLE)
|| getRDFTargetValue(ds, itemResource, DC_SUBJECT)
|| (item.description ? item.description.substr(0, 150) : null)
|| item.title;
item.author = getRDFTargetValue(ds, itemResource, DC_CREATOR)
|| getRDFTargetValue(ds, channel, DC_CREATOR)
|| this.title
|| item.author;
item.date = getRDFTargetValue(ds, itemResource, DC_DATE) || item.date;
item.content = getRDFTargetValue(ds, itemResource, RSS_CONTENT_ENCODED);
this.itemsToStore[index++] = item;
} }
this.storeNextItem();
} }
Feed.prototype.parseAsAtom = function() { Feed.prototype.removeInvalidItems = function()
if (!this.request.responseXML || !(this.request.responseXML instanceof Components.interfaces.nsIDOMXMLDocument)) {
return this.onParseError(this); var ds = getItemsDS(this.server);
debug("removing invalid items for " + this.url);
// Get the first channel (assuming there is only one per Atom File). var items = ds.GetSources(FZ_FEED, this.resource, true);
var channel = this.request.responseXML.getElementsByTagName("feed")[0]; var item;
if (!channel) while (items.hasMoreElements())
return this.onParseError(this); {
item = items.getNext();
this.title = this.title || getNodeValue(channel.getElementsByTagName("title")[0]); item = item.QueryInterface(Components.interfaces.nsIRDFResource);
this.description = getNodeValue(channel.getElementsByTagName("tagline")[0]); if (ds.HasAssertion(item, FZ_VALID, RDF_LITERAL_TRUE, true))
continue;
if (!this.parseItems) debug("removing " + item.Value);
return; ds.Unassert(item, FZ_FEED, this.resource, true);
if (ds.hasArcOut(item, FZ_FEED))
this.invalidateItems(); debug(item.Value + " is from more than one feed; only the reference to this feed removed");
else
var items = this.request.responseXML.getElementsByTagName("entry"); removeAssertions(ds, item);
this.itemsToStore = new Array();
this.itemsToStoreIndex = 0;
for ( var i=0 ; i<items.length ; i++ ) {
var itemNode = items[i];
var item = new FeedItem();
item.feed = this;
item.characterSet = "UTF-8";
var url;
var links = itemNode.getElementsByTagName("link");
for ( var j=0 ; j<links.length ; j++ ) {
var alink = links[j];
if (alink && alink.getAttribute('rel') && alink.getAttribute('rel') == 'alternate' && alink.getAttribute('href')) {
url = alink.getAttribute('href');
break;
}
}
item.url = url;
item.id = getNodeValue(itemNode.getElementsByTagName("id")[0]);
item.description = getNodeValue(itemNode.getElementsByTagName("summary")[0]);
item.title = getNodeValue(itemNode.getElementsByTagName("title")[0])
|| (item.description ? item.description.substr(0, 150) : null)
|| item.title;
var authorEl = itemNode.getElementsByTagName("author")[0]
|| itemNode.getElementsByTagName("contributor")[0]
|| channel.getElementsByTagName("author")[0];
var author = "";
if (authorEl) {
var name = getNodeValue(authorEl.getElementsByTagName("name")[0]);
var email = getNodeValue(authorEl.getElementsByTagName("email")[0]);
if (name)
author = name + (email ? " <" + email + ">" : "");
else if (email)
author = email;
}
item.author = author || item.author || this.title;
item.date = getNodeValue(itemNode.getElementsByTagName("modified")[0]
|| itemNode.getElementsByTagName("issued")[0]
|| itemNode.getElementsByTagName("created")[0])
|| item.date;
// XXX We should get the xml:base attribute from the content tag as well
// and use it as the base HREF of the message.
// XXX Atom feeds can have multiple content elements; we should differentiate
// between them and pick the best one.
// Some Atom feeds wrap the content in a CTYPE declaration; others use
// a namespace to identify the tags as HTML; and a few are buggy and put
// HTML tags in without declaring their namespace so they look like Atom.
// We deal with the first two but not the third.
var content;
var contentNode = itemNode.getElementsByTagName("content")[0];
if (contentNode) {
content = "";
for ( var j=0 ; j<contentNode.childNodes.length ; j++ ) {
var node = contentNode.childNodes.item(j);
if (node.nodeType == node.CDATA_SECTION_NODE)
content += node.data;
else
content += serializer.serializeToString(node);
//content += getNodeValue(node);
}
if (contentNode.getAttribute('mode') == "escaped") {
content = content.replace(/&lt;/g, "<");
content = content.replace(/&gt;/g, ">");
content = content.replace(/&amp;/g, "&");
}
if (content == "")
content = null;
}
item.content = content;
this.itemsToStore[i] = item;
} }
this.storeNextItem();
}
Feed.prototype.invalidateItems = function invalidateItems() {
var ds = getItemsDS(this.server);
debug("invalidating items for " + this.url);
var items = ds.GetSources(FZ_FEED, this.resource, true);
var item;
while (items.hasMoreElements()) {
item = items.getNext();
item = item.QueryInterface(Components.interfaces.nsIRDFResource);
debug("invalidating " + item.Value);
var valid = ds.GetTarget(item, FZ_VALID, true);
if (valid)
ds.Unassert(item, FZ_VALID, valid, true);
}
}
Feed.prototype.removeInvalidItems = function() {
var ds = getItemsDS(this.server);
debug("removing invalid items for " + this.url);
var items = ds.GetSources(FZ_FEED, this.resource, true);
var item;
while (items.hasMoreElements()) {
item = items.getNext();
item = item.QueryInterface(Components.interfaces.nsIRDFResource);
if (ds.HasAssertion(item, FZ_VALID, RDF_LITERAL_TRUE, true))
continue;
debug("removing " + item.Value);
ds.Unassert(item, FZ_FEED, this.resource, true);
if (ds.hasArcOut(item, FZ_FEED))
debug(item.Value + " is from more than one feed; only the reference to this feed removed");
else
removeAssertions(ds, item);
}
} }
// gets the next item from gItemsToStore and forces that item to be stored // gets the next item from gItemsToStore and forces that item to be stored
@ -590,34 +347,33 @@ Feed.prototype.storeNextItem = function()
Feed.prototype.cleanupParsingState = function(feed) { Feed.prototype.cleanupParsingState = function(feed) {
// now that we are done parsing the feed, remove the feed from our feed cache // now that we are done parsing the feed, remove the feed from our feed cache
gFzFeedCache.removeFeedWithUrl(feed.url); FeedCache.removeFeed(feed.url);
feed.removeInvalidItems(); feed.removeInvalidItems();
// let's be sure to flush any feed item changes back to disk // let's be sure to flush any feed item changes back to disk
var ds = getItemsDS(feed.server); var ds = getItemsDS(feed.server);
ds.QueryInterface(Components.interfaces.nsIRDFRemoteDataSource).Flush(); // flush any changes ds.QueryInterface(Components.interfaces.nsIRDFRemoteDataSource).Flush(); // flush any changes
if (feed.downloadCallback) if (feed.downloadCallback)
feed.downloadCallback.downloaded(feed, kNewsBlogSuccess); feed.downloadCallback.downloaded(feed, kNewsBlogSuccess);
feed.request = null; // force the xml http request to go away. This helps reduce some this.request = null; // force the xml http request to go away. This helps reduce some nasty assertions on shut down.
// nasty assertions on shut down of all things. this.itemsToStore = "";
this.itemsToStoreIndex = 0;
this.storeItemsTimer = null;
}
this.itemsToStore = ""; Feed.prototype.notify = function(aTimer)
this.itemsToStoreIndex = 0; {
this.storeItemsTimer = null;
}
Feed.prototype.notify = function(aTimer) {
this.storeNextItem(); this.storeNextItem();
} }
Feed.prototype.QueryInterface = function(aIID) { Feed.prototype.QueryInterface = function(aIID)
{
if (aIID.equals(Components.interfaces.nsITimerCallback) || aIID.equals(Components.interfaces.nsISupports)) if (aIID.equals(Components.interfaces.nsITimerCallback) || aIID.equals(Components.interfaces.nsISupports))
return this; return this;
Components.returnCode = Components.results.NS_ERROR_NO_INTERFACE; Components.returnCode = Components.results.NS_ERROR_NO_INTERFACE;
return null; return null;
} }

Просмотреть файл

@ -0,0 +1,344 @@
# -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
# ***** BEGIN LICENSE BLOCK *****
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
#
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
#
# The Original Code is the RSS Parsing Engine
#
# Contributor(s):
#
# Alternatively, the contents of this file may be used under the terms of
# either the GNU General Public License Version 2 or later (the "GPL"), or
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
# in which case the provisions of the GPL or the LGPL are applicable instead
# of those above. If you wish to allow use of your version of this file only
# under the terms of either the GPL or the LGPL, and not to allow others to
# use your version of this file under the terms of the MPL, indicate your
# decision by deleting the provisions above and replace them with the notice
# and other provisions required by the GPL or the LGPL. If you do not delete
# the provisions above, a recipient may use your version of this file under
# the terms of any one of the MPL, the GPL or the LGPL.
#
# ***** END LICENSE BLOCK ***** */
// The feed parser depends on FeedItems.js, Feed.js.
var rdfcontainer = Components.classes["@mozilla.org/rdf/container-utils;1"].getService(Components.interfaces.nsIRDFContainerUtils);
var rdfparser = Components.classes["@mozilla.org/rdf/xml-parser;1"].createInstance(Components.interfaces.nsIRDFXMLParser);
var serializer = Components.classes["@mozilla.org/xmlextras/xmlserializer;1"].createInstance(Components.interfaces.nsIDOMSerializer);
function FeedParser()
{}
FeedParser.prototype =
{
// parseFeed returns an array of parsed items ready for processing
// it is currently a synchronous operation. If there was an error parsing the feed,
// parseFeed returns an empty feed in addition to calling aFeed.onParseError
parseFeed: function (aFeed, aSource, aDOM, aBaseURI)
{
if (!aSource || !(aDOM instanceof Components.interfaces.nsIDOMXMLDocument))
{
aFeed.onParseError(aFeed);
return new Array();
}
else if (aSource.search(/=(['"])http:\/\/purl\.org\/rss\/1\.0\/\1/) != -1)
{
debug(aFeed.url + " is an RSS 1.x (RDF-based) feed");
return this.parseAsRSS1(aFeed, aSource, aBaseURI);
}
else if (aSource.search(/=(['"])http:\/\/purl.org\/atom\/ns#\1/) != -1)
{
debug(aFeed.url + " is an Atom feed");
return this.parseAsAtom(aFeed, aDOM);
}
else if (aSource.search(/"http:\/\/my\.netscape\.com\/rdf\/simple\/0\.9\/"/) != -1)
{
// RSS 0.9x is forward compatible with RSS 2.0, so use the RSS2 parser to handle it.
debug(aFeed.url + " is an 0.9x feed");
return this.parseAsRSS2(aFeed, aDOM);
}
// XXX Explicitly check for RSS 2.0 instead of letting it be handled by the
// default behavior (who knows, we may change the default at some point).
else
{
// We don't know what kind of feed this is; let's pretend it's RSS 0.9x
// and hope things work out for the best. In theory even RSS 1.0 feeds
// could be parsed by the 0.9x parser if the RSS namespace was the default.
debug(aFeed.url + " is of unknown format; assuming an RSS 0.9x feed");
return this.parseAsRSS2(aFeed, aDOM);
}
},
parseAsRSS2: function (aFeed, aDOM)
{
// Get the first channel (assuming there is only one per RSS File).
var parsedItems = new Array();
var channel = aDOM.getElementsByTagName("channel")[0];
if (!channel)
return aFeed.onParseError(aFeed);
aFeed.title = aFeed.title || getNodeValue(channel.getElementsByTagName("title")[0]);
aFeed.description = getNodeValue(channel.getElementsByTagName("description")[0]);
if (!aFeed.parseItems)
return parsedItems;
aFeed.invalidateItems();
var itemNodes = aDOM.getElementsByTagName("item");
var converter = Components.classes["@mozilla.org/intl/scriptableunicodeconverter"].
createInstance(Components.interfaces.nsIScriptableUnicodeConverter);
converter.charset = 'UTF-8';
for (var i=0; i<itemNodes.length; i++)
{
var itemNode = itemNodes[i];
var item = new FeedItem();
item.feed = aFeed;
item.characterSet = "UTF-8";
var link = getNodeValue(itemNode.getElementsByTagName("link")[0]);
var guidNode = itemNode.getElementsByTagName("guid")[0];
var guid;
var isPermaLink;
if (guidNode)
{
guid = getNodeValue(guidNode);
isPermaLink = guidNode.getAttribute('isPermaLink') == 'false' ? false : true;
}
// getNodeValue returns unicode strings...
// we need to do the proper conversion on these before we call into
// item.Store();
item.url = link ? link : (guid && isPermaLink) ? guid : null;
item.id = guid;
item.description = getNodeValue(itemNode.getElementsByTagName("description")[0]);
item.title = converter.ConvertFromUnicode(getNodeValue(itemNode.getElementsByTagName("title")[0])
|| (item.description ? item.description.substr(0, 150) : null)
|| item.title);
// do this after we potentially assign item.description into item.title
// because that potential assignment assumes the value is in unicode still
item.description = converter.ConvertFromUnicode(item.description);
item.author = getNodeValue(itemNode.getElementsByTagName("author")[0]
|| itemNode.getElementsByTagName("creator")[0])
|| aFeed.title
|| item.author;
item.date = getNodeValue(itemNode.getElementsByTagName("pubDate")[0]
|| itemNode.getElementsByTagName("date")[0])
|| item.date;
// If the date is invalid, users will see the beginning of the epoch
// unless we reset it here, so they'll see the current time instead.
// This is typical aggregator behavior.
if(item.date)
{
item.date = trimString(item.date);
if(!isValidRFC822Date(item.date))
{
// XXX Use this on the other formats as well
item.date = dateRescue(item.date);
}
}
var content = getNodeValue(itemNode.getElementsByTagNameNS(RSS_CONTENT_NS, "encoded")[0]);
if (content)
item.content = converter.ConvertFromUnicode(content);
parsedItems[i] = item;
}
return parsedItems;
},
parseAsRSS1 : function(aFeed, aSource, aBaseURI)
{
var parsedItems = new Array();
// RSS 1.0 is valid RDF, so use the RDF parser/service to extract data.
// Create a new RDF data source and parse the feed into it.
var ds = Components.classes["@mozilla.org/rdf/datasource;1?name=in-memory-datasource"]
.createInstance(Components.interfaces.nsIRDFDataSource);
rdfparser.parseString(ds, aBaseURI, aSource);
// Get information about the feed as a whole.
var channel = ds.GetSource(RDF_TYPE, RSS_CHANNEL, true);
aFeed.title = aFeed.title || getRDFTargetValue(ds, channel, RSS_TITLE);
aFeed.description = getRDFTargetValue(ds, channel, RSS_DESCRIPTION);
if (!aFeed.parseItems)
return parsedItems;
aFeed.invalidateItems();
var items = ds.GetTarget(channel, RSS_ITEMS, true);
if (items)
items = rdfcontainer.MakeSeq(ds, items).GetElements();
// If the channel doesn't list any items, look for resources of type "item"
// (a hacky workaround for some buggy feeds).
if (!items || !items.hasMoreElements())
items = ds.GetSources(RDF_TYPE, RSS_ITEM, true);
var index = 0;
var converter = Components.classes["@mozilla.org/intl/scriptableunicodeconverter"]
.createInstance(Components.interfaces.nsIScriptableUnicodeConverter);
converter.charset = "UTF-8";
while (items.hasMoreElements())
{
var itemResource = items.getNext().QueryInterface(Components.interfaces.nsIRDFResource);
var item = new FeedItem();
item.feed = aFeed;
item.characterSet = "UTF-8";
// Prefer the value of the link tag to the item URI since the URI could be
// a relative URN.
var uri = itemResource.Value;
var link = getRDFTargetValue(ds, itemResource, RSS_LINK);
item.url = link || uri;
item.id = item.url;
item.description = getRDFTargetValue(ds, itemResource, RSS_DESCRIPTION);
item.title = getRDFTargetValue(ds, itemResource, RSS_TITLE)
|| getRDFTargetValue(ds, itemResource, DC_SUBJECT)
|| (item.description ? item.description.substr(0, 150) : null)
|| item.title;
item.author = getRDFTargetValue(ds, itemResource, DC_CREATOR)
|| getRDFTargetValue(ds, channel, DC_CREATOR)
|| aFeed.title
|| item.author;
item.date = getRDFTargetValue(ds, itemResource, DC_DATE) || item.date;
item.content = getRDFTargetValue(ds, itemResource, RSS_CONTENT_ENCODED);
parsedItems[index++] = item;
}
return parsedItems;
},
parseAsAtom: function(aFeed, aDOM)
{
var parsedItems = new Array();
// Get the first channel (assuming there is only one per Atom File).
var channel = aDOM.getElementsByTagName("feed")[0];
if (!channel)
{
aFeed.onParseError(aFeed);
return parsedItems;
}
aFeed.title = aFeed.title || getNodeValue(channel.getElementsByTagName("title")[0]);
aFeed.description = getNodeValue(channel.getElementsByTagName("tagline")[0]);
if (!aFeed.parseItems)
return parsedItems;
aFeed.invalidateItems();
var items = this.mDOM.getElementsByTagName("entry");
debug("Items to parse: " + items.length);
for (var i=0; i<items.length; i++)
{
var itemNode = items[i];
var item = new FeedItem();
item.feed = aFeed;
item.characterSet = "UTF-8";
var url;
var links = itemNode.getElementsByTagName("link");
for (var j=0; j < links.length; j++)
{
var alink = links[j];
if (alink && alink.getAttribute('rel') && alink.getAttribute('rel') == 'alternate' && alink.getAttribute('href'))
{
url = alink.getAttribute('href');
break;
}
}
item.url = url;
item.id = getNodeValue(itemNode.getElementsByTagName("id")[0]);
item.description = getNodeValue(itemNode.getElementsByTagName("summary")[0]);
item.title = getNodeValue(itemNode.getElementsByTagName("title")[0])
|| (item.description ? item.description.substr(0, 150) : null)
|| item.title;
var authorEl = itemNode.getElementsByTagName("author")[0]
|| itemNode.getElementsByTagName("contributor")[0]
|| channel.getElementsByTagName("author")[0];
var author = "";
if (authorEl)
{
var name = getNodeValue(authorEl.getElementsByTagName("name")[0]);
var email = getNodeValue(authorEl.getElementsByTagName("email")[0]);
if (name)
author = name + (email ? " <" + email + ">" : "");
else if (email)
author = email;
}
item.author = author || item.author || aFeed.title;
item.date = getNodeValue(itemNode.getElementsByTagName("modified")[0]
|| itemNode.getElementsByTagName("issued")[0]
|| itemNode.getElementsByTagName("created")[0])
|| item.date;
// XXX We should get the xml:base attribute from the content tag as well
// and use it as the base HREF of the message.
// XXX Atom feeds can have multiple content elements; we should differentiate
// between them and pick the best one.
// Some Atom feeds wrap the content in a CTYPE declaration; others use
// a namespace to identify the tags as HTML; and a few are buggy and put
// HTML tags in without declaring their namespace so they look like Atom.
// We deal with the first two but not the third.
var content;
var contentNode = itemNode.getElementsByTagName("content")[0];
if (contentNode)
{
content = "";
for (var j=0; j < contentNode.childNodes.length; j++)
{
var node = contentNode.childNodes.item(j);
if (node.nodeType == node.CDATA_SECTION_NODE)
content += node.data;
else
content += serializer.serializeToString(node);
}
if (contentNode.getAttribute('mode') == "escaped")
{
content = content.replace(/&lt;/g, "<");
content = content.replace(/&gt;/g, ">");
content = content.replace(/&amp;/g, "&");
}
if (content == "")
content = null;
}
item.content = content;
parsedItems[i] = item;
}
return parsedItems;
}
};

Просмотреть файл

@ -119,7 +119,7 @@ var feedDownloadCallback = {
} }
else if (aErrorCode == kNewsBlogInvalidFeed) // the feed was bad... else if (aErrorCode == kNewsBlogInvalidFeed) // the feed was bad...
window.alert(document.getElementById('bundle_newsblog').getFormattedString('newsblog-invalidFeed', [feed.url])); window.alert(document.getElementById('bundle_newsblog').getFormattedString('newsblog-invalidFeed', [feed.url]));
else // we never even downloaded the feed...(kNewsBlogRequestFailure) else if (aErrorCode == kNewsBlogRequestFailure)
window.alert(document.getElementById('bundle_newsblog').getFormattedString('newsblog-networkError', [feed.url])); window.alert(document.getElementById('bundle_newsblog').getFormattedString('newsblog-networkError', [feed.url]));
// re-enable the add button now that we are done subscribing // re-enable the add button now that we are done subscribing

Просмотреть файл

@ -50,12 +50,13 @@
windowtype="Mail:News-BlogSubscriptions" windowtype="Mail:News-BlogSubscriptions"
flex="1"> flex="1">
<script type="application/x-javascript" src="utils.js" /> <script type="application/x-javascript" src="utils.js"/>
<script type="application/x-javascript" src="file-utils.js" /> <script type="application/x-javascript" src="file-utils.js"/>
<script type="application/x-javascript" src="debug-utils.js" /> <script type="application/x-javascript" src="debug-utils.js"/>
<script type="application/x-javascript" src="subscriptions.js" /> <script type="application/x-javascript" src="subscriptions.js"/>
<script type="application/x-javascript" src="Feed.js" /> <script type="application/x-javascript" src="Feed.js"/>
<script type="application/x-javascript" src="FeedItem.js" /> <script type="application/x-javascript" src="FeedItem.js"/>
<script type="application/x-javascript" src="feed-parser.js"/>
<stringbundle id="bundle_newsblog" src="chrome://messenger-newsblog/locale/newsblog.properties"/> <stringbundle id="bundle_newsblog" src="chrome://messenger-newsblog/locale/newsblog.properties"/>

Просмотреть файл

@ -5,6 +5,7 @@ newsblog.jar:
* content/messenger-newsblog/debug-utils.js (content/debug-utils.js) * content/messenger-newsblog/debug-utils.js (content/debug-utils.js)
* content/messenger-newsblog/Feed.js (content/Feed.js) * content/messenger-newsblog/Feed.js (content/Feed.js)
* content/messenger-newsblog/FeedItem.js (content/FeedItem.js) * content/messenger-newsblog/FeedItem.js (content/FeedItem.js)
* content/messenger-newsblog/feed-parser.js (content/feed-parser.js)
* content/messenger-newsblog/file-utils.js (content/file-utils.js) * content/messenger-newsblog/file-utils.js (content/file-utils.js)
* content/messenger-newsblog/subscriptions.js (content/subscriptions.js) * content/messenger-newsblog/subscriptions.js (content/subscriptions.js)
* content/messenger-newsblog/utils.js (content/utils.js) * content/messenger-newsblog/utils.js (content/utils.js)

Просмотреть файл

@ -309,6 +309,7 @@ function loadScripts()
{ {
scriptLoader.loadSubScript("chrome://messenger-newsblog/content/Feed.js"); scriptLoader.loadSubScript("chrome://messenger-newsblog/content/Feed.js");
scriptLoader.loadSubScript("chrome://messenger-newsblog/content/FeedItem.js"); scriptLoader.loadSubScript("chrome://messenger-newsblog/content/FeedItem.js");
scriptLoader.loadSubScript("chrome://messenger-newsblog/content/feed-parser.js");
scriptLoader.loadSubScript("chrome://messenger-newsblog/content/file-utils.js"); scriptLoader.loadSubScript("chrome://messenger-newsblog/content/file-utils.js");
scriptLoader.loadSubScript("chrome://messenger-newsblog/content/utils.js"); scriptLoader.loadSubScript("chrome://messenger-newsblog/content/utils.js");
} }
@ -362,7 +363,7 @@ var progressNotifier = {
[feed.url], 1)); [feed.url], 1));
else if (aErrorCode == kNewsBlogRequestFailure) else if (aErrorCode == kNewsBlogRequestFailure)
this.mStatusFeedback.showStatusString(GetNewsBlogStringBundle().formatStringFromName("newsblog-networkError", this.mStatusFeedback.showStatusString(GetNewsBlogStringBundle().formatStringFromName("newsblog-networkError",
[feed.url], 1)); [feed.url], 1));
this.mStatusFeedback.stopMeteors(); this.mStatusFeedback.stopMeteors();
} }