Bug #279150 --> Break apart the rss feed parser into a separate JS object to make it easier to

hook up unit testing. Thanks to Robert Sayer for getting this going.
2005-02-01 04:04:59 +00:00 · 2005-02-01 04:04:59 +00:00 · 5ecb05ff26
--- a/mail/extensions/newsblog/content/Feed.js
+++ b/mail/extensions/newsblog/content/Feed.js
@ -1,78 +1,94 @@
-var rdfcontainer =
-  Components
-    .classes["@mozilla.org/rdf/container-utils;1"]
-      .getService(Components.interfaces.nsIRDFContainerUtils);
+# -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+# ***** BEGIN LICENSE BLOCK *****
+# Version: MPL 1.1/GPL 2.0/LGPL 2.1
+#
+# The contents of this file are subject to the Mozilla Public License Version
+# 1.1 (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+# http://www.mozilla.org/MPL/
+#
+# Software distributed under the License is distributed on an "AS IS" basis,
+# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+# for the specific language governing rights and limitations under the
+# License.
+#
+# The Original Code is the RSS Parsing Engine
+#
+# Contributor(s):
+#
+# Alternatively, the contents of this file may be used under the terms of
+# either the GNU General Public License Version 2 or later (the "GPL"), or
+# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+# in which case the provisions of the GPL or the LGPL are applicable instead
+# of those above. If you wish to allow use of your version of this file only
+# under the terms of either the GPL or the LGPL, and not to allow others to
+# use your version of this file under the terms of the MPL, indicate your
+# decision by deleting the provisions above and replace them with the notice
+# and other provisions required by the GPL or the LGPL. If you do not delete
+# the provisions above, a recipient may use your version of this file under
+# the terms of any one of the MPL, the GPL or the LGPL.
+#
+# ***** END LICENSE BLOCK ***** */

-var rdfparser =
-  Components
-    .classes["@mozilla.org/rdf/xml-parser;1"]
-      .createInstance(Components.interfaces.nsIRDFXMLParser);
-
-// For use when serializing content in Atom feeds.
-var serializer =   
-  Components
-    .classes["@mozilla.org/xmlextras/xmlserializer;1"]
-      .createInstance(Components.interfaces.nsIDOMSerializer);

 // error codes used to inform the consumer about attempts to download a feed
-
 const kNewsBlogSuccess = 0;
 const kNewsBlogInvalidFeed = 1; // usually means there was an error trying to parse the feed...
 const kNewsBlogRequestFailure = 2; // generic networking failure when trying to download the feed.
+const kNewsBlogFeedIsBusy = 3;

+// Cache for all of the feeds currently being downloaded, indexed by URL, so the load event listener
+// can access the Feed objects after it finishes downloading the feed.
+var FeedCache = 
+{
+  mFeeds: new Array(),

-// Hash of feeds being downloaded, indexed by URL, so the load event listener
-// can access the Feed objects after it finishes downloading the feed files.
-function FeedCache(){
-    this.nsURI = Components.classes["@mozilla.org/network/standard-url;1"].
-                            createInstance(Components.interfaces.nsIURI);
-    return this; 
-}
+  putFeed: function (aFeed)
+  {
+    this.mFeeds[this.normalizeHost(aFeed.url)] = aFeed;
+  },

-FeedCache.prototype.putFeed =function(feed) {
-    this[this.normalizeHost(feed.url)] = feed;
-}
+  getFeed: function (aUrl)
+  {
+    return this.mFeeds[this.normalizeHost(aUrl)];
+  },

-FeedCache.prototype.getFeedWithUrl =function(url) {
-    return this[this.normalizeHost(url)];
-}
+  removeFeed: function (aUrl)
+  {
+    delete this.mFeeds[this.normalizeHost(aUrl)];
+  },

-FeedCache.prototype.removeFeedWithUrl = function(url) {
-    delete this[this.normalizeHost(url)];
-}
+  normalizeHost: function (aUrl)
+  {
+    normalizedUrl = Components.classes["@mozilla.org/network/standard-url;1"].
+                    createInstance(Components.interfaces.nsIURI);
+    normalizedUrl.spec = aUrl;    
+    normalizedUrl.host = normalizedUrl.host.toLowerCase();
+    return normalizedUrl.spec;
+  }
+};

-FeedCache.prototype.normalizeHost = function(url){
-    this.nsURI.spec = url;    
-    this.nsURI.host = this.nsURI.host.toLowerCase();
-    return this.nsURI.spec;
-}
+function Feed(resource) 
+{
+  this.resource = resource.QueryInterface(Components.interfaces.nsIRDFResource);
+  this.description = null;
+  this.author = null;
+ 
+  this.request = null;
+  this.folder = null;
+  this.server = null;
+  this.downloadCallback = null;
+  this.items = new Array();

-var gFzFeedCache = new FeedCache();
-
-
-
-function Feed(resource) {
-    this.resource = resource.QueryInterface(Components.interfaces.nsIRDFResource);
-
-    this.description = null;
-    this.author = null;
-  
-    this.request = null;
-    this.folder = null;
-    this.server = null;
-
-    this.downloadCallback = null;
-
-    this.items = new Array();
-  
-    return this;
+  return this;
 }

 // The name of the message folder corresponding to the feed.
 // XXX This should be called something more descriptive like "folderName".
 // XXX Or maybe, when we support nested folders and downloading into any folder,
 // there could just be a reference to the folder itself called "folder".
-Feed.prototype.name getter = function() {
+Feed.prototype.name getter = function() 
+{
  var name = this.title || this.description || this.url;
  if (!name)
    throw("couldn't compute feed name, as feed has no title, description, or URL.");
@ -89,7 +105,8 @@ Feed.prototype.name getter = function() {
  return name;
 }

-Feed.prototype.download = function(parseItems, aCallback) {
+Feed.prototype.download = function(parseItems, aCallback) 
+{
  this.downloadCallback = aCallback; // may be null 

  // Whether or not to parse items when downloading and parsing the feed.
@ -108,8 +125,12 @@ Feed.prototype.download = function(parseItems, aCallback) {

  // Before we try to download the feed, make sure we aren't already processing the feed
  // by looking up the url in our feed cache
-  if (gFzFeedCache.getFeedWithUrl(this.url))
-    return; // don't do anything, the feed is already in use
+  if (FeedCache.getFeed(this.url))
+  {
+    if (this.downloadCallback)
+      this.downloadCallback.downloaded(this, kNewsBlogFeedIsBusy);
+    return ; // don't do anything, the feed is already in use
+  }

  this.request = Components.classes["@mozilla.org/xmlextras/xmlhttprequest;1"]
                 .createInstance(Components.interfaces.nsIXMLHttpRequest);
@ -119,15 +140,16 @@ Feed.prototype.download = function(parseItems, aCallback) {
  this.request.overrideMimeType("text/xml");
  this.request.onload = Feed.onDownloaded;
  this.request.onerror = Feed.onDownloadError;
-  gFzFeedCache.putFeed(this);
+  FeedCache.putFeed(this);
  this.request.send(null);
 }

-Feed.onDownloaded = function(event) {
+Feed.onDownloaded = function(event) 
+{
  var request = event.target;
  var url = request.channel.originalURI.spec;
  debug(url + " downloaded");
-  var feed = gFzFeedCache.getFeedWithUrl(url);
+  var feed = FeedCache.getFeed(url);
  if (!feed)
    throw("error after downloading " + url + ": couldn't retrieve feed from request");
  
@ -136,413 +158,148 @@ Feed.onDownloaded = function(event) {
  // parse will asynchronously call the download callback when it is done
 }

-Feed.onProgress = function(event) {
+Feed.onProgress = function(event) 
+{
  var request = event.target;
  var url = request.channel.originalURI.spec;
-  var feed = gFzFeedCache.getFeedWithUrl(url);
+  var feed = FeedCache.getFeed(url);

  if (feed.downloadCallback)
    feed.downloadCallback.onProgress(feed, event.position, event.totalSize);
 }

-Feed.onDownloadError = function(event) {
+Feed.onDownloadError = function(event) 
+{
  var request = event.target;
  var url = request.channel.originalURI.spec;
-  var feed = gFzFeedCache.getFeedWithUrl(url);
+  var feed = FeedCache.getFeed(url);
  if (feed.downloadCallback)
    feed.downloadCallback.downloaded(feed, kNewsBlogRequestFailure);

-  gFzFeedCache.removeFeedWithUrl(url);
+  FeedCache.removeFeed(url);
 }

-Feed.prototype.onParseError = function(feed) {
+Feed.prototype.onParseError = function(feed) 
+{
  if (feed && feed.downloadCallback)
  {
    if (feed.downloadCallback)
      feed.downloadCallback.downloaded(feed, kNewsBlogInvalidFeed);
-    gFzFeedCache.removeFeedWithUrl(url);
+    FeedCache.removeFeed(url);
  }
 }

-Feed.prototype.url getter = function() {
-    var ds = getSubscriptionsDS(this.server);
-    var url = ds.GetTarget(this.resource, DC_IDENTIFIER, true);
-    if (url)
-      url = url.QueryInterface(Components.interfaces.nsIRDFLiteral).Value;
-    else
-      url = this.resource.Value;
-    return url;
+Feed.prototype.url getter = function() 
+{
+  var ds = getSubscriptionsDS(this.server);
+  var url = ds.GetTarget(this.resource, DC_IDENTIFIER, true);
+  if (url)
+    url = url.QueryInterface(Components.interfaces.nsIRDFLiteral).Value;
+  else
+    url = this.resource.Value;
+  return url;
 }

-Feed.prototype.title getter = function() {
-    var ds = getSubscriptionsDS(this.server);
-    var title = ds.GetTarget(this.resource, DC_TITLE, true);
-    if (title)
-      title = title.QueryInterface(Components.interfaces.nsIRDFLiteral).Value;
-    return title;
+Feed.prototype.title getter = function() 
+{
+  var ds = getSubscriptionsDS(this.server);
+  var title = ds.GetTarget(this.resource, DC_TITLE, true);
+  if (title)
+    title = title.QueryInterface(Components.interfaces.nsIRDFLiteral).Value;
+  return title;
 }

-Feed.prototype.title setter = function(new_title) {
-    var ds = getSubscriptionsDS(this.server);
-    new_title = rdf.GetLiteral(new_title || "");
-    var old_title = ds.GetTarget(this.resource, DC_TITLE, true);
-    if (old_title)
-        ds.Change(this.resource, DC_TITLE, old_title, new_title);
-    else
-        ds.Assert(this.resource, DC_TITLE, new_title, true);
+Feed.prototype.title setter = function(new_title) 
+{
+  var ds = getSubscriptionsDS(this.server);
+  new_title = rdf.GetLiteral(new_title || "");
+  var old_title = ds.GetTarget(this.resource, DC_TITLE, true);
+  if (old_title)
+      ds.Change(this.resource, DC_TITLE, old_title, new_title);
+  else
+      ds.Assert(this.resource, DC_TITLE, new_title, true);
 }

-Feed.prototype.quickMode getter = function() {
-    var ds = getSubscriptionsDS(this.server);
-    var quickMode = ds.GetTarget(this.resource, FZ_QUICKMODE, true);
-    if (quickMode) {
-        quickMode = quickMode.QueryInterface(Components.interfaces.nsIRDFLiteral);
-        quickMode = quickMode.Value;
-        quickMode = eval(quickMode);
-    }    
-    return quickMode;
+Feed.prototype.quickMode getter = function() 
+{
+  var ds = getSubscriptionsDS(this.server);
+  var quickMode = ds.GetTarget(this.resource, FZ_QUICKMODE, true);
+  if (quickMode) 
+  {
+    quickMode = quickMode.QueryInterface(Components.interfaces.nsIRDFLiteral);
+    quickMode = quickMode.Value;
+    quickMode = eval(quickMode);
+  }    
+  return quickMode;
 }

-Feed.prototype.quickMode setter = function(new_quickMode) {
-    var ds = getSubscriptionsDS(this.server);
-    new_quickMode = rdf.GetLiteral(new_quickMode || "");
-    var old_quickMode = ds.GetTarget(this.resource, FZ_QUICKMODE, true);
-    if (old_quickMode)
-        ds.Change(this.resource, FZ_QUICKMODE, old_quickMode, new_quickMode);
-    else
-        ds.Assert(this.resource, FZ_QUICKMODE, new_quickMode, true);
+Feed.prototype.quickMode setter = function(new_quickMode) 
+{
+  var ds = getSubscriptionsDS(this.server);
+  new_quickMode = rdf.GetLiteral(new_quickMode || "");
+  var old_quickMode = ds.GetTarget(this.resource, FZ_QUICKMODE, true);
+  if (old_quickMode)
+    ds.Change(this.resource, FZ_QUICKMODE, old_quickMode, new_quickMode);
+  else
+    ds.Assert(this.resource, FZ_QUICKMODE, new_quickMode, true);
 }

-Feed.prototype.parse = function() {
+Feed.prototype.parse = function() 
+{
  // Figures out what description language (RSS, Atom) and version this feed
  // is using and calls a language/version-specific feed parser.

  debug("parsing feed " + this.url);

-  if (!this.request.responseText) {
+  if (!this.request.responseText) 
    return this.onParseError(this);
-  }
-  else if (this.request.responseText.search(/=(['"])http:\/\/purl\.org\/rss\/1\.0\/\1/) != -1) {
-    debug(this.url + " is an RSS 1.x (RDF-based) feed");
-    this.parseAsRSS1();
-  }
-  else if (this.request.responseText.search(/=(['"])http:\/\/purl.org\/atom\/ns#\1/) != -1) {
-    debug(this.url + " is an Atom feed");
-    this.parseAsAtom();
-  }
-  else if (this.request.responseText.search(/"http:\/\/my\.netscape\.com\/rdf\/simple\/0\.9\/"/) != -1)
-  {
-    // RSS 0.9x is forward compatible with RSS 2.0, so use the RSS2 parser to handle it.
-    debug(this.url + " is an 0.9x feed");
-    this.parseAsRSS2();
-  }
-  // XXX Explicitly check for RSS 2.0 instead of letting it be handled by the
-  // default behavior (who knows, we may change the default at some point).
-  else {
-    // We don't know what kind of feed this is; let's pretend it's RSS 0.9x
-    // and hope things work out for the best.  In theory even RSS 1.0 feeds
-    // could be parsed by the 0.9x parser if the RSS namespace was the default.
-    debug(this.url + " is of unknown format; assuming an RSS 0.9x feed");
-    this.parseAsRSS2();
-  }
+
+  // create a feed parser which will parse the feed for us
+  var parser = new FeedParser();
+  this.itemsToStore = parser.parseFeed(this, this.request.responseText, this.request.responseXML, this.request.channel.URI);
+  
+  // storeNextItem will iterate through the parsed items, storing each one.
+  this.itemsToStoreIndex = 0;
+  this.storeNextItem();
+}
+
+Feed.prototype.invalidateItems = function () 
+{
  var ds = getItemsDS(this.server);
-  ds = ds.QueryInterface(Components.interfaces.nsIRDFRemoteDataSource);
-  ds.Flush();
-}
-
-Feed.prototype.parseAsRSS2 = function() {
-  if (!this.request.responseXML || !(this.request.responseXML instanceof Components.interfaces.nsIDOMXMLDocument))
-    return this.onParseError(this);
-
-  // Get the first channel (assuming there is only one per RSS File).
-  var channel = this.request.responseXML.getElementsByTagName("channel")[0];
-  if (!channel)
-    return this.onParseError(this);
-
-  this.title = this.title || getNodeValue(channel.getElementsByTagName("title")[0]);
-  this.description = getNodeValue(channel.getElementsByTagName("description")[0]);
-
-  if (!this.parseItems)
-    return;
-
-  this.invalidateItems();
-
-  var itemNodes = this.request.responseXML.getElementsByTagName("item");
-
-  this.itemsToStore = new Array();
-  this.itemsToStoreIndex = 0; 
-
-  var converter = Components
-    .classes["@mozilla.org/intl/scriptableunicodeconverter"]
-      .createInstance(Components.interfaces.nsIScriptableUnicodeConverter);
-
-  converter.charset = 'UTF-8';
-
-  for ( var i=0 ; i<itemNodes.length ; i++ ) {
-    var itemNode = itemNodes[i];
-    var item = new FeedItem();
-    item.feed = this;
-
-    item.characterSet = "UTF-8";
-
-    var link = getNodeValue(itemNode.getElementsByTagName("link")[0]);
-
-    var guidNode = itemNode.getElementsByTagName("guid")[0];
-    if (guidNode) {
-      var guid = getNodeValue(guidNode);
-      var isPermaLink =
-        guidNode.getAttribute('isPermaLink') == 'false' ? false : true;
-    }
-
-    // getNodeValue returns unicode strings...
-    // we need to do the proper conversion on these before we call into
-    // item.Store();
-
-    item.url = link ? link : (guid && isPermaLink) ? guid : null;
-    item.id = guid;
-    item.description = getNodeValue(itemNode.getElementsByTagName("description")[0]);
-    item.title = converter.ConvertFromUnicode(getNodeValue(itemNode.getElementsByTagName("title")[0])
-                 || (item.description ? item.description.substr(0, 150) : null)
-                 || item.title);
-    // do this after we potentially assign item.description into item.title
-    // because that potential assignment assumes the value is in unicode still
-    item.description = converter.ConvertFromUnicode(item.description);
-
-    item.author = getNodeValue(itemNode.getElementsByTagName("author")[0]
-                               || itemNode.getElementsByTagName("creator")[0])
-                  || this.title
-                  || item.author;
-    item.date = getNodeValue(itemNode.getElementsByTagName("pubDate")[0]
-                             || itemNode.getElementsByTagName("date")[0])
-                || item.date;
-    
-    // If the date is invalid, users will see the beginning of the epoch
-    // unless we reset it here, so they'll see the current time instead.
-    // This is typical aggregator behavior.
-    if(item.date){
-      item.date = trimString(item.date);
-      if(!isValidRFC822Date(item.date) ){
-        // XXX Use this on the other formats as well
-        item.date = dateRescue(item.date);
-      }
-    }
-
-    var content = getNodeValue(itemNode.getElementsByTagNameNS(RSS_CONTENT_NS, "encoded")[0]);
-    if (content)
-      item.content = converter.ConvertFromUnicode(content);
-
-    this.itemsToStore[i] = item;
-  }
-
-  this.storeNextItem();
-}
-
-Feed.prototype.parseAsRSS1 = function() {
-  // RSS 1.0 is valid RDF, so use the RDF parser/service to extract data.
-
-  // Create a new RDF data source and parse the feed into it.
-  var ds = Components
-             .classes["@mozilla.org/rdf/datasource;1?name=in-memory-datasource"]
-               .createInstance(Components.interfaces.nsIRDFDataSource);
-
-  rdfparser.parseString(ds, this.request.channel.URI, this.request.responseText);
-
-  // Get information about the feed as a whole.
-  var channel = ds.GetSource(RDF_TYPE, RSS_CHANNEL, true);
-
-  this.title = this.title || getRDFTargetValue(ds, channel, RSS_TITLE);
-  this.description = getRDFTargetValue(ds, channel, RSS_DESCRIPTION);
-
-  if (!this.parseItems)
-    return;
-
-  this.invalidateItems();
-
-  var items = ds.GetTarget(channel, RSS_ITEMS, true);
-  if (items)
-    items = rdfcontainer.MakeSeq(ds, items).GetElements();
+  debug("invalidating items for " + this.url);
+  var items = ds.GetSources(FZ_FEED, this.resource, true);
+  var item;
  
-  // If the channel doesn't list any items, look for resources of type "item"
-  // (a hacky workaround for some buggy feeds).
-  if (!items || !items.hasMoreElements())
-    items = ds.GetSources(RDF_TYPE, RSS_ITEM, true);
-
-  this.itemsToStore = new Array();
-  this.itemsToStoreIndex = 0; 
-  var index = 0; 
-
-  var converter = Components
-    .classes["@mozilla.org/intl/scriptableunicodeconverter"]
-      .createInstance(Components.interfaces.nsIScriptableUnicodeConverter);
-
-  converter.charset = "UTF-8";
-
-  while (items.hasMoreElements()) {
-    var itemResource = items.getNext().QueryInterface(Components.interfaces.nsIRDFResource);
-    var item = new FeedItem();
-    item.feed = this;
-
-    item.characterSet = "UTF-8";
-
-    // Prefer the value of the link tag to the item URI since the URI could be
-    // a relative URN.
-    var uri = itemResource.Value;
-    var link = getRDFTargetValue(ds, itemResource, RSS_LINK);
-
-    item.url = link || uri;
-    item.id = item.url;
-    item.description = getRDFTargetValue(ds, itemResource, RSS_DESCRIPTION);
-    item.title = getRDFTargetValue(ds, itemResource, RSS_TITLE)
-                 || getRDFTargetValue(ds, itemResource, DC_SUBJECT)
-                 || (item.description ? item.description.substr(0, 150) : null)
-                 || item.title;
-    item.author = getRDFTargetValue(ds, itemResource, DC_CREATOR)
-                  || getRDFTargetValue(ds, channel, DC_CREATOR)
-                  || this.title
-                  || item.author;
-    item.date = getRDFTargetValue(ds, itemResource, DC_DATE) || item.date;
-    item.content = getRDFTargetValue(ds, itemResource, RSS_CONTENT_ENCODED);
-
-    this.itemsToStore[index++] = item;
+  while (items.hasMoreElements()) 
+  {
+    item = items.getNext();
+    item = item.QueryInterface(Components.interfaces.nsIRDFResource);
+    debug("invalidating " + item.Value);
+    var valid = ds.GetTarget(item, FZ_VALID, true);
+    if (valid)
+      ds.Unassert(item, FZ_VALID, valid, true);
  }
-
-  this.storeNextItem();
 }

-Feed.prototype.parseAsAtom = function() {
-  if (!this.request.responseXML || !(this.request.responseXML instanceof Components.interfaces.nsIDOMXMLDocument))
-    return this.onParseError(this);
-
-  // Get the first channel (assuming there is only one per Atom File).
-  var channel = this.request.responseXML.getElementsByTagName("feed")[0];
-  if (!channel)
-    return this.onParseError(this);
-
-  this.title = this.title || getNodeValue(channel.getElementsByTagName("title")[0]);
-  this.description = getNodeValue(channel.getElementsByTagName("tagline")[0]);
-
-  if (!this.parseItems)
-    return;
-
-  this.invalidateItems();
-
-  var items = this.request.responseXML.getElementsByTagName("entry");
-
-  this.itemsToStore = new Array();
-  this.itemsToStoreIndex = 0; 
-
-  for ( var i=0 ; i<items.length ; i++ ) {
-    var itemNode = items[i];
-    var item = new FeedItem();
-    item.feed = this;
-
-    item.characterSet = "UTF-8";
-
-    var url;
-    var links = itemNode.getElementsByTagName("link");
-    for ( var j=0 ; j<links.length ; j++ ) {
-      var alink = links[j];
-      if (alink && alink.getAttribute('rel') && alink.getAttribute('rel') == 'alternate' && alink.getAttribute('href')) {
-        url = alink.getAttribute('href');
-        break;
-      }
-    }
-
-    item.url = url;
-    item.id = getNodeValue(itemNode.getElementsByTagName("id")[0]);
-    item.description = getNodeValue(itemNode.getElementsByTagName("summary")[0]);
-    item.title = getNodeValue(itemNode.getElementsByTagName("title")[0])
-                 || (item.description ? item.description.substr(0, 150) : null)
-                 || item.title;
-
-    var authorEl = itemNode.getElementsByTagName("author")[0]
-                 || itemNode.getElementsByTagName("contributor")[0]
-                 || channel.getElementsByTagName("author")[0];
-    var author = "";
-
-    if (authorEl) {
-      var name = getNodeValue(authorEl.getElementsByTagName("name")[0]);
-      var email = getNodeValue(authorEl.getElementsByTagName("email")[0]);
-      if (name)
-        author = name + (email ? " <" + email + ">" : "");
-      else if (email)
-        author = email;
-    }
-    item.author = author || item.author || this.title;
-
-    item.date = getNodeValue(itemNode.getElementsByTagName("modified")[0]
-                             || itemNode.getElementsByTagName("issued")[0]
-                             || itemNode.getElementsByTagName("created")[0])
-                || item.date;
-
-    // XXX We should get the xml:base attribute from the content tag as well
-    // and use it as the base HREF of the message.
-    // XXX Atom feeds can have multiple content elements; we should differentiate
-    // between them and pick the best one.
-    // Some Atom feeds wrap the content in a CTYPE declaration; others use
-    // a namespace to identify the tags as HTML; and a few are buggy and put
-    // HTML tags in without declaring their namespace so they look like Atom.
-    // We deal with the first two but not the third.
-    var content;
-    var contentNode = itemNode.getElementsByTagName("content")[0];
-    if (contentNode) {
-      content = "";
-      for ( var j=0 ; j<contentNode.childNodes.length ; j++ ) {
-        var node = contentNode.childNodes.item(j);
-        if (node.nodeType == node.CDATA_SECTION_NODE)
-          content += node.data;
-        else
-          content += serializer.serializeToString(node);
-          //content += getNodeValue(node);
-      }
-      if (contentNode.getAttribute('mode') == "escaped") {
-        content = content.replace(/&lt;/g, "<");
-        content = content.replace(/&gt;/g, ">");
-        content = content.replace(/&amp;/g, "&");
-      }
-      if (content == "")
-        content = null;
-    }
-    item.content = content;
-
-    this.itemsToStore[i] = item;
+Feed.prototype.removeInvalidItems = function() 
+{
+  var ds = getItemsDS(this.server);
+  debug("removing invalid items for " + this.url);
+  var items = ds.GetSources(FZ_FEED, this.resource, true);
+  var item;
+  while (items.hasMoreElements()) 
+  {
+    item = items.getNext();
+    item = item.QueryInterface(Components.interfaces.nsIRDFResource);
+    if (ds.HasAssertion(item, FZ_VALID, RDF_LITERAL_TRUE, true))
+      continue;
+    debug("removing " + item.Value);
+    ds.Unassert(item, FZ_FEED, this.resource, true);
+    if (ds.hasArcOut(item, FZ_FEED))
+      debug(item.Value + " is from more than one feed; only the reference to this feed removed");
+    else
+      removeAssertions(ds, item);
  }
-  
-  this.storeNextItem();
-}
-
-Feed.prototype.invalidateItems = function invalidateItems() {
-    var ds = getItemsDS(this.server);
-    debug("invalidating items for " + this.url);
-    var items = ds.GetSources(FZ_FEED, this.resource, true);
-    var item;
-    while (items.hasMoreElements()) {
-        item = items.getNext();
-        item = item.QueryInterface(Components.interfaces.nsIRDFResource);
-        debug("invalidating " + item.Value);
-        var valid = ds.GetTarget(item, FZ_VALID, true);
-        if (valid)
-            ds.Unassert(item, FZ_VALID, valid, true);
-    }
-}
-
-Feed.prototype.removeInvalidItems = function() {
-    var ds = getItemsDS(this.server);
-    debug("removing invalid items for " + this.url);
-    var items = ds.GetSources(FZ_FEED, this.resource, true);
-    var item;
-    while (items.hasMoreElements()) {
-        item = items.getNext();
-        item = item.QueryInterface(Components.interfaces.nsIRDFResource);
-        if (ds.HasAssertion(item, FZ_VALID, RDF_LITERAL_TRUE, true))
-            continue;
-        debug("removing " + item.Value);
-        ds.Unassert(item, FZ_FEED, this.resource, true);
-        if (ds.hasArcOut(item, FZ_FEED))
-            debug(item.Value + " is from more than one feed; only the reference to this feed removed");
-        else
-            removeAssertions(ds, item);
-    }
 }

 // gets the next item from gItemsToStore and forces that item to be stored
@ -590,34 +347,33 @@ Feed.prototype.storeNextItem = function()

 Feed.prototype.cleanupParsingState = function(feed) {
    // now that we are done parsing the feed, remove the feed from our feed cache
-  gFzFeedCache.removeFeedWithUrl(feed.url);
+  FeedCache.removeFeed(feed.url);

  feed.removeInvalidItems();

-    // let's be sure to flush any feed item changes back to disk
+  // let's be sure to flush any feed item changes back to disk
  var ds = getItemsDS(feed.server);
-    ds.QueryInterface(Components.interfaces.nsIRDFRemoteDataSource).Flush(); // flush any changes
+  ds.QueryInterface(Components.interfaces.nsIRDFRemoteDataSource).Flush(); // flush any changes

  if (feed.downloadCallback)
    feed.downloadCallback.downloaded(feed, kNewsBlogSuccess);

-  feed.request = null; // force the xml http request to go away. This helps reduce some
-                              // nasty assertions on shut down of all things.
+  this.request = null; // force the xml http request to go away. This helps reduce some nasty assertions on shut down. 
+  this.itemsToStore = "";
+  this.itemsToStoreIndex = 0;
+  this.storeItemsTimer = null;
+}   

-    this.itemsToStore = "";
-    this.itemsToStoreIndex = 0;
-    this.storeItemsTimer = null;
-  }   
-
-Feed.prototype.notify = function(aTimer) {
+Feed.prototype.notify = function(aTimer) 
+{
  this.storeNextItem();
 }

-Feed.prototype.QueryInterface = function(aIID) {
+Feed.prototype.QueryInterface = function(aIID) 
+{
  if (aIID.equals(Components.interfaces.nsITimerCallback) || aIID.equals(Components.interfaces.nsISupports))
    return this;

  Components.returnCode = Components.results.NS_ERROR_NO_INTERFACE;
  return null;    
 }
-
--- a/mail/extensions/newsblog/content/feed-parser.js
+++ b/mail/extensions/newsblog/content/feed-parser.js
@ -0,0 +1,344 @@
+# -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+# ***** BEGIN LICENSE BLOCK *****
+# Version: MPL 1.1/GPL 2.0/LGPL 2.1
+#
+# The contents of this file are subject to the Mozilla Public License Version
+# 1.1 (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+# http://www.mozilla.org/MPL/
+#
+# Software distributed under the License is distributed on an "AS IS" basis,
+# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+# for the specific language governing rights and limitations under the
+# License.
+#
+# The Original Code is the RSS Parsing Engine
+#
+# Contributor(s):
+#
+# Alternatively, the contents of this file may be used under the terms of
+# either the GNU General Public License Version 2 or later (the "GPL"), or
+# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+# in which case the provisions of the GPL or the LGPL are applicable instead
+# of those above. If you wish to allow use of your version of this file only
+# under the terms of either the GPL or the LGPL, and not to allow others to
+# use your version of this file under the terms of the MPL, indicate your
+# decision by deleting the provisions above and replace them with the notice
+# and other provisions required by the GPL or the LGPL. If you do not delete
+# the provisions above, a recipient may use your version of this file under
+# the terms of any one of the MPL, the GPL or the LGPL.
+#
+# ***** END LICENSE BLOCK ***** */
+
+// The feed parser depends on FeedItems.js, Feed.js.
+
+var rdfcontainer =  Components.classes["@mozilla.org/rdf/container-utils;1"].getService(Components.interfaces.nsIRDFContainerUtils);
+var rdfparser = Components.classes["@mozilla.org/rdf/xml-parser;1"].createInstance(Components.interfaces.nsIRDFXMLParser);
+var serializer = Components.classes["@mozilla.org/xmlextras/xmlserializer;1"].createInstance(Components.interfaces.nsIDOMSerializer);
+
+function FeedParser() 
+{}
+
+FeedParser.prototype = 
+{
+  // parseFeed returns an array of parsed items ready for processing
+  // it is currently a synchronous operation. If there was an error parsing the feed, 
+  // parseFeed returns an empty feed in addition to calling aFeed.onParseError
+  parseFeed: function (aFeed, aSource, aDOM, aBaseURI)
+  {
+    if (!aSource || !(aDOM instanceof Components.interfaces.nsIDOMXMLDocument))
+    {
+      aFeed.onParseError(aFeed);   
+      return new Array();
+    }
+    else if (aSource.search(/=(['"])http:\/\/purl\.org\/rss\/1\.0\/\1/) != -1) 
+    {
+      debug(aFeed.url + " is an RSS 1.x (RDF-based) feed");
+      return this.parseAsRSS1(aFeed, aSource, aBaseURI);
+    } 
+    else if (aSource.search(/=(['"])http:\/\/purl.org\/atom\/ns#\1/) != -1) 
+    {
+      debug(aFeed.url + " is an Atom feed");
+      return this.parseAsAtom(aFeed, aDOM);
+    }
+    else if (aSource.search(/"http:\/\/my\.netscape\.com\/rdf\/simple\/0\.9\/"/) != -1)
+    {
+      // RSS 0.9x is forward compatible with RSS 2.0, so use the RSS2 parser to handle it.
+      debug(aFeed.url + " is an 0.9x feed");
+      return this.parseAsRSS2(aFeed, aDOM);
+    }
+    // XXX Explicitly check for RSS 2.0 instead of letting it be handled by the
+    // default behavior (who knows, we may change the default at some point).
+    else 
+    {
+      // We don't know what kind of feed this is; let's pretend it's RSS 0.9x
+      // and hope things work out for the best.  In theory even RSS 1.0 feeds
+      // could be parsed by the 0.9x parser if the RSS namespace was the default.
+      debug(aFeed.url + " is of unknown format; assuming an RSS 0.9x feed");
+      return this.parseAsRSS2(aFeed, aDOM);
+    }
+  },
+
+  parseAsRSS2: function (aFeed, aDOM) 
+  {
+    // Get the first channel (assuming there is only one per RSS File).
+    var parsedItems = new Array();
+
+    var channel = aDOM.getElementsByTagName("channel")[0];
+    if (!channel)
+      return aFeed.onParseError(aFeed);
+
+    aFeed.title = aFeed.title || getNodeValue(channel.getElementsByTagName("title")[0]);
+    aFeed.description = getNodeValue(channel.getElementsByTagName("description")[0]);
+
+    if (!aFeed.parseItems)
+      return parsedItems;
+
+    aFeed.invalidateItems();
+    var itemNodes = aDOM.getElementsByTagName("item");   
+    var converter = Components.classes["@mozilla.org/intl/scriptableunicodeconverter"].
+                    createInstance(Components.interfaces.nsIScriptableUnicodeConverter);
+    converter.charset = 'UTF-8';
+
+    for (var i=0; i<itemNodes.length; i++) 
+    {
+      var itemNode = itemNodes[i];
+      var item = new FeedItem();
+      item.feed = aFeed;
+      item.characterSet = "UTF-8";
+
+      var link = getNodeValue(itemNode.getElementsByTagName("link")[0]);
+      var guidNode = itemNode.getElementsByTagName("guid")[0];
+      var guid;
+      var isPermaLink;
+      if (guidNode) 
+      {
+        guid = getNodeValue(guidNode);
+        isPermaLink = guidNode.getAttribute('isPermaLink') == 'false' ? false : true;
+      }
+
+      // getNodeValue returns unicode strings...
+      // we need to do the proper conversion on these before we call into
+      // item.Store();
+
+      item.url = link ? link : (guid && isPermaLink) ? guid : null;
+      item.id = guid;
+      item.description = getNodeValue(itemNode.getElementsByTagName("description")[0]);
+      item.title = converter.ConvertFromUnicode(getNodeValue(itemNode.getElementsByTagName("title")[0])
+                   || (item.description ? item.description.substr(0, 150) : null)
+                   || item.title);
+      // do this after we potentially assign item.description into item.title
+      // because that potential assignment assumes the value is in unicode still
+      item.description = converter.ConvertFromUnicode(item.description);
+
+      item.author = getNodeValue(itemNode.getElementsByTagName("author")[0]
+                                 || itemNode.getElementsByTagName("creator")[0])
+                                 || aFeed.title
+                                 || item.author;
+      item.date = getNodeValue(itemNode.getElementsByTagName("pubDate")[0]
+                               || itemNode.getElementsByTagName("date")[0])
+                               || item.date;
+    
+      // If the date is invalid, users will see the beginning of the epoch
+      // unless we reset it here, so they'll see the current time instead.
+      // This is typical aggregator behavior.
+      if(item.date)
+      {
+        item.date = trimString(item.date);
+        if(!isValidRFC822Date(item.date))
+        {
+          // XXX Use this on the other formats as well
+          item.date = dateRescue(item.date);
+        }
+      }
+
+      var content = getNodeValue(itemNode.getElementsByTagNameNS(RSS_CONTENT_NS, "encoded")[0]);
+      if (content)
+        item.content = converter.ConvertFromUnicode(content);
+
+      parsedItems[i] = item;
+    }
+
+    return parsedItems;
+  },
+
+  parseAsRSS1 : function(aFeed, aSource, aBaseURI) 
+  {
+    var parsedItems = new Array();
+
+    // RSS 1.0 is valid RDF, so use the RDF parser/service to extract data.
+    // Create a new RDF data source and parse the feed into it.
+    var ds = Components.classes["@mozilla.org/rdf/datasource;1?name=in-memory-datasource"]
+             .createInstance(Components.interfaces.nsIRDFDataSource);
+
+    rdfparser.parseString(ds, aBaseURI, aSource);
+    
+    // Get information about the feed as a whole.
+    var channel = ds.GetSource(RDF_TYPE, RSS_CHANNEL, true);
+
+    aFeed.title = aFeed.title || getRDFTargetValue(ds, channel, RSS_TITLE);
+    aFeed.description = getRDFTargetValue(ds, channel, RSS_DESCRIPTION);
+
+    if (!aFeed.parseItems)
+      return parsedItems;
+
+    aFeed.invalidateItems();
+
+    var items = ds.GetTarget(channel, RSS_ITEMS, true);
+    if (items)
+      items = rdfcontainer.MakeSeq(ds, items).GetElements();
+  
+    // If the channel doesn't list any items, look for resources of type "item"
+    // (a hacky workaround for some buggy feeds).
+    if (!items || !items.hasMoreElements())
+      items = ds.GetSources(RDF_TYPE, RSS_ITEM, true);
+
+    var index = 0; 
+
+    var converter = Components.classes["@mozilla.org/intl/scriptableunicodeconverter"]
+                   .createInstance(Components.interfaces.nsIScriptableUnicodeConverter);
+    converter.charset = "UTF-8";
+
+    while (items.hasMoreElements()) 
+    {
+      var itemResource = items.getNext().QueryInterface(Components.interfaces.nsIRDFResource);
+      var item = new FeedItem();
+      item.feed = aFeed;
+      item.characterSet = "UTF-8";
+
+      // Prefer the value of the link tag to the item URI since the URI could be
+      // a relative URN.
+      var uri = itemResource.Value;
+      var link = getRDFTargetValue(ds, itemResource, RSS_LINK);
+
+      item.url = link || uri;
+      item.id = item.url;
+      item.description = getRDFTargetValue(ds, itemResource, RSS_DESCRIPTION);
+      item.title = getRDFTargetValue(ds, itemResource, RSS_TITLE)
+                                     || getRDFTargetValue(ds, itemResource, DC_SUBJECT)
+                                     || (item.description ? item.description.substr(0, 150) : null)
+                                     || item.title;
+      item.author = getRDFTargetValue(ds, itemResource, DC_CREATOR)
+                                      || getRDFTargetValue(ds, channel, DC_CREATOR)
+                                      || aFeed.title
+                                      || item.author;
+      
+      item.date = getRDFTargetValue(ds, itemResource, DC_DATE) || item.date;
+      item.content = getRDFTargetValue(ds, itemResource, RSS_CONTENT_ENCODED);
+
+      parsedItems[index++] = item;
+    }
+  
+    return parsedItems;
+  },
+
+  parseAsAtom: function(aFeed, aDOM) 
+  {
+    var parsedItems = new Array();
+
+    // Get the first channel (assuming there is only one per Atom File).
+    var channel = aDOM.getElementsByTagName("feed")[0];
+    if (!channel)
+    {
+      aFeed.onParseError(aFeed);
+      return parsedItems;
+    }
+
+    aFeed.title = aFeed.title || getNodeValue(channel.getElementsByTagName("title")[0]);
+    aFeed.description = getNodeValue(channel.getElementsByTagName("tagline")[0]);
+
+    if (!aFeed.parseItems)
+      return parsedItems;
+
+    aFeed.invalidateItems();
+    var items = this.mDOM.getElementsByTagName("entry");
+    debug("Items to parse: " + items.length);
+  
+    for (var i=0; i<items.length; i++) 
+    {
+      var itemNode = items[i];
+      var item = new FeedItem();
+      item.feed = aFeed;
+      item.characterSet = "UTF-8";
+
+      var url;
+      var links = itemNode.getElementsByTagName("link");
+      for (var j=0; j < links.length; j++) 
+      {
+        var alink = links[j];
+        if (alink && alink.getAttribute('rel') && alink.getAttribute('rel') == 'alternate' && alink.getAttribute('href')) 
+        {
+          url = alink.getAttribute('href');
+          break;
+        }
+      }
+
+      item.url = url;
+      item.id = getNodeValue(itemNode.getElementsByTagName("id")[0]);
+      item.description = getNodeValue(itemNode.getElementsByTagName("summary")[0]);
+      item.title = getNodeValue(itemNode.getElementsByTagName("title")[0])
+                                || (item.description ? item.description.substr(0, 150) : null)
+                                || item.title;
+
+      var authorEl = itemNode.getElementsByTagName("author")[0]
+                     || itemNode.getElementsByTagName("contributor")[0]
+                     || channel.getElementsByTagName("author")[0];
+      var author = "";
+
+      if (authorEl) 
+      {
+        var name = getNodeValue(authorEl.getElementsByTagName("name")[0]);
+        var email = getNodeValue(authorEl.getElementsByTagName("email")[0]);
+        if (name)
+          author = name + (email ? " <" + email + ">" : "");
+        else if (email)
+          author = email;
+      }
+      
+      item.author = author || item.author || aFeed.title;
+
+      item.date = getNodeValue(itemNode.getElementsByTagName("modified")[0]
+                               || itemNode.getElementsByTagName("issued")[0]
+                               || itemNode.getElementsByTagName("created")[0])
+                               || item.date;
+
+      // XXX We should get the xml:base attribute from the content tag as well
+      // and use it as the base HREF of the message.
+      // XXX Atom feeds can have multiple content elements; we should differentiate
+      // between them and pick the best one.
+      // Some Atom feeds wrap the content in a CTYPE declaration; others use
+      // a namespace to identify the tags as HTML; and a few are buggy and put
+      // HTML tags in without declaring their namespace so they look like Atom.
+      // We deal with the first two but not the third.
+      
+      var content;
+      var contentNode = itemNode.getElementsByTagName("content")[0];
+      if (contentNode) 
+      {
+        content = "";
+        for (var j=0; j < contentNode.childNodes.length; j++) 
+        {
+          var node = contentNode.childNodes.item(j);
+          if (node.nodeType == node.CDATA_SECTION_NODE)
+            content += node.data;
+          else
+            content += serializer.serializeToString(node);
+        }
+      
+        if (contentNode.getAttribute('mode') == "escaped") 
+        {
+          content = content.replace(/&lt;/g, "<");
+          content = content.replace(/&gt;/g, ">");
+          content = content.replace(/&amp;/g, "&");
+        }
+      
+        if (content == "")
+          content = null;
+      }
+      
+      item.content = content;
+      parsedItems[i] = item;
+    }
+    return parsedItems;
+  }
+};
--- a/mail/extensions/newsblog/content/subscriptions.js
+++ b/mail/extensions/newsblog/content/subscriptions.js
@ -119,7 +119,7 @@ var feedDownloadCallback = {
    } 
    else if (aErrorCode == kNewsBlogInvalidFeed) //  the feed was bad...
      window.alert(document.getElementById('bundle_newsblog').getFormattedString('newsblog-invalidFeed', [feed.url]));
-    else // we never even downloaded the feed...(kNewsBlogRequestFailure)
+    else if (aErrorCode == kNewsBlogRequestFailure) 
      window.alert(document.getElementById('bundle_newsblog').getFormattedString('newsblog-networkError', [feed.url]));

    // re-enable the add button now that we are done subscribing
--- a/mail/extensions/newsblog/content/subscriptions.xul
+++ b/mail/extensions/newsblog/content/subscriptions.xul
@ -50,12 +50,13 @@
        windowtype="Mail:News-BlogSubscriptions"
        flex="1">

-  <script type="application/x-javascript" src="utils.js" />
-  <script type="application/x-javascript" src="file-utils.js" />
-  <script type="application/x-javascript" src="debug-utils.js" />
-  <script type="application/x-javascript" src="subscriptions.js" />
-  <script type="application/x-javascript" src="Feed.js" />
-  <script type="application/x-javascript" src="FeedItem.js" />
+  <script type="application/x-javascript" src="utils.js"/>
+  <script type="application/x-javascript" src="file-utils.js"/>
+  <script type="application/x-javascript" src="debug-utils.js"/>
+  <script type="application/x-javascript" src="subscriptions.js"/>
+  <script type="application/x-javascript" src="Feed.js"/>
+  <script type="application/x-javascript" src="FeedItem.js"/>
+  <script type="application/x-javascript" src="feed-parser.js"/>

  <stringbundle id="bundle_newsblog" src="chrome://messenger-newsblog/locale/newsblog.properties"/>

--- a/mail/extensions/newsblog/jar.mn
+++ b/mail/extensions/newsblog/jar.mn
@ -5,6 +5,7 @@ newsblog.jar:
 *  content/messenger-newsblog/debug-utils.js                    (content/debug-utils.js)
 *  content/messenger-newsblog/Feed.js                           (content/Feed.js)
 *  content/messenger-newsblog/FeedItem.js                       (content/FeedItem.js)
+*  content/messenger-newsblog/feed-parser.js                    (content/feed-parser.js)
 *  content/messenger-newsblog/file-utils.js                     (content/file-utils.js)
 *  content/messenger-newsblog/subscriptions.js                  (content/subscriptions.js)
 *  content/messenger-newsblog/utils.js                          (content/utils.js)
--- a/mail/extensions/newsblog/js/newsblog.js
+++ b/mail/extensions/newsblog/js/newsblog.js
@ -309,6 +309,7 @@ function loadScripts()
  { 
    scriptLoader.loadSubScript("chrome://messenger-newsblog/content/Feed.js");
    scriptLoader.loadSubScript("chrome://messenger-newsblog/content/FeedItem.js");
+    scriptLoader.loadSubScript("chrome://messenger-newsblog/content/feed-parser.js");
    scriptLoader.loadSubScript("chrome://messenger-newsblog/content/file-utils.js");
    scriptLoader.loadSubScript("chrome://messenger-newsblog/content/utils.js");
  }
@ -362,7 +363,7 @@ var progressNotifier = {
                                            [feed.url], 1));
      else if (aErrorCode == kNewsBlogRequestFailure)
        this.mStatusFeedback.showStatusString(GetNewsBlogStringBundle().formatStringFromName("newsblog-networkError",
-                                            [feed.url], 1));      
+                                            [feed.url], 1));                                           
      this.mStatusFeedback.stopMeteors();
    }