Backed out changeset 8a292af9c827 for android robocop orange

2015-03-20 22:47:21 -07:00 · 2015-03-20 22:47:21 -07:00 · 24111fd7bd
--- a/toolkit/components/reader/JSDOMParser.js
+++ b/toolkit/components/reader/JSDOMParser.js
@ -1,10 +1,3 @@
-/*
- * DO NOT MODIFY THIS FILE DIRECTLY!
- *
- * This is a shared library that is maintained in an external repo:
- * https://github.com/mozilla/readability
- */
-
 /* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
 * You can obtain one at http://mozilla.org/MPL/2.0/. */
@ -38,7 +31,7 @@
  }

  // When a style is set in JS, map it to the corresponding CSS attribute
-  var styleMap = {
+  let styleMap = {
    "alignmentBaseline": "alignment-baseline",
    "background": "background",
    "backgroundAttachment": "background-attachment",
@ -230,7 +223,7 @@
  };

  // Elements that can be self-closing
-  var voidElems = {
+  let voidElems = {
    "area": true,
    "base": true,
    "br": true,
@ -246,10 +239,8 @@
    "source": true,
  };

-  var whitespace = [" ", "\t", "\n", "\r"];
-
  // See http://www.w3schools.com/dom/dom_nodetype.asp
-  var nodeTypes = {
+  let nodeTypes = {
    ELEMENT_NODE: 1,
    ATTRIBUTE_NODE: 2,
    TEXT_NODE: 3,
@ -266,12 +257,14 @@

  function getElementsByTagName(tag) {
    tag = tag.toUpperCase();
-    var elems = [];
-    var allTags = (tag === "*");
+    let elems = [];
+    let allTags = (tag === "*");
    function getElems(node) {
-      var length = node.children.length;
-      for (var i = 0; i < length; i++) {
-        var child = node.children[i];
+      let length = node.childNodes.length;
+      for (let i = 0; i < length; i++) {
+        let child = node.childNodes[i];
+        if (child.nodeType !== 1)
+          continue;
        if (allTags || (child.tagName === tag))
          elems.push(child);
        getElems(child);
@ -281,7 +274,7 @@
    return elems;
  }

-  var Node = function () {};
+  let Node = function () {};

  Node.prototype = {
    attributes: null,
@ -290,23 +283,18 @@
    nodeName: null,
    parentNode: null,
    textContent: null,
-    nextSibling: null,
-    previousSibling: null,

    get firstChild() {
      return this.childNodes[0] || null;
    },

-    get firstElementChild() {
-      return this.children[0] || null;
-    },
+    get nextSibling() {
+      if (this.parentNode) {
+        let childNodes = this.parentNode.childNodes;
+        return childNodes[childNodes.indexOf(this) + 1] || null;
+      }

-    get lastChild() {
-      return this.childNodes[this.childNodes.length - 1] || null;
-    },
-
-    get lastElementChild() {
-      return this.children[this.children.length - 1] || null;
+      return null;
    },

    appendChild: function (child) {
@ -314,152 +302,48 @@
        child.parentNode.removeChild(child);
      }

-      var last = this.lastChild;
-      if (last)
-        last.nextSibling = child;
-      child.previousSibling = last;
-
-      if (child.nodeType === Node.ELEMENT_NODE) {
-        child.previousElementSibling = this.children[this.children.length - 1] || null;
-        this.children.push(child);
-        child.previousElementSibling && (child.previousElementSibling.nextElementSibling = child);
-      }
      this.childNodes.push(child);
      child.parentNode = this;
    },

    removeChild: function (child) {
-      var childNodes = this.childNodes;
-      var childIndex = childNodes.indexOf(child);
+      let childNodes = this.childNodes;
+      let childIndex = childNodes.indexOf(child);
      if (childIndex === -1) {
        throw "removeChild: node not found";
      } else {
        child.parentNode = null;
-        var prev = child.previousSibling;
-        var next = child.nextSibling;
-        if (prev)
-          prev.nextSibling = next;
-        if (next)
-          next.previousSibling = prev;
-
-        if (child.nodeType === Node.ELEMENT_NODE) {
-          prev = child.previousElementSibling;
-          next = child.nextElementSibling;
-          if (prev)
-            prev.nextElementSibling = next;
-          if (next)
-            next.previousElementSibling = prev;
-          this.children.splice(this.children.indexOf(child), 1);
-        }
-
-        child.previousSibling = child.nextSibling = null;
-        child.previousElementSibling = child.nextElementSibling = null;
-
        return childNodes.splice(childIndex, 1)[0];
      }
    },

    replaceChild: function (newNode, oldNode) {
-      var childNodes = this.childNodes;
-      var childIndex = childNodes.indexOf(oldNode);
+      let childNodes = this.childNodes;
+      let childIndex = childNodes.indexOf(oldNode);
      if (childIndex === -1) {
        throw "replaceChild: node not found";
      } else {
-        // This will take care of updating the new node if it was somewhere else before:
        if (newNode.parentNode)
          newNode.parentNode.removeChild(newNode);

        childNodes[childIndex] = newNode;
-
-        // update the new node's sibling properties, and its new siblings' sibling properties
-        newNode.nextSibling = oldNode.nextSibling;
-        newNode.previousSibling = oldNode.previousSibling;
-        if (newNode.nextSibling)
-          newNode.nextSibling.previousSibling = newNode;
-        if (newNode.previousSibling)
-          newNode.previousSibling.nextSibling = newNode;
-
        newNode.parentNode = this;
-
-        // Now deal with elements before we clear out those values for the old node,
-        // because it can help us take shortcuts here:
-        if (newNode.nodeType === Node.ELEMENT_NODE) {
-          if (oldNode.nodeType === Node.ELEMENT_NODE) {
-            // Both were elements, which makes this easier, we just swap things out:
-            newNode.previousElementSibling = oldNode.previousElementSibling;
-            newNode.nextElementSibling = oldNode.nextElementSibling;
-            if (newNode.previousElementSibling)
-              newNode.previousElementSibling.nextElementSibling = newNode;
-            if (newNode.nextElementSibling)
-              newNode.nextElementSibling.previousElementSibling = newNode;
-            this.children[this.children.indexOf(oldNode)] = newNode;
-          } else {
-            // Hard way:
-            newNode.previousElementSibling = (function() {
-              for (var i = childIndex - 1; i >= 0; i--) {
-                if (childNodes[i].nodeType === Node.ELEMENT_NODE)
-                  return childNodes[i];
-              }
-              return null;
-            })();
-            if (newNode.previousElementSibling) {
-              newNode.nextElementSibling = newNode.previousElementSibling.nextElementSibling;
-            } else {
-              newNode.nextElementSibling = (function() {
-                for (var i = childIndex + 1; i < childNodes.length; i++) {
-                  if (childNodes[i].nodeType === Node.ELEMENT_NODE)
-                    return childNodes[i];
-                }
-                return null;
-              })();
-            }
-            if (newNode.previousElementSibling)
-              newNode.previousElementSibling.nextElementSibling = newNode;
-            if (newNode.nextElementSibling)
-              newNode.nextElementSibling.previousElementSibling = newNode;
-
-            if (newNode.nextElementSibling)
-              this.children.splice(this.children.indexOf(newNode.nextElementSibling), 0, newNode);
-            else
-              this.children.push(newNode);
-          }
-        } else {
-          // new node is not an element node.
-          // if the old one was, update its element siblings:
-          if (oldNode.nodeType === Node.ELEMENT_NODE) {
-            if (oldNode.previousElementSibling)
-              oldNode.previousElementSibling.nextElementSibling = oldNode.nextElementSibling;
-            if (oldNode.nextElementSibling)
-              oldNode.nextElementSibling.previousElementSibling = oldNode.previousElementSibling;
-            this.children.splice(this.children.indexOf(oldNode), 1);
-          }
-          // If the old node wasn't an element, neither the new nor the old node was an element,
-          // and the children array and its members shouldn't need any updating.
-        }
-
-
        oldNode.parentNode = null;
-        oldNode.previousSibling = null;
-        oldNode.nextSibling = null;
-        if (oldNode.nodeType === Node.ELEMENT_NODE) {
-          oldNode.previousElementSibling = null;
-          oldNode.nextElementSibling = null;
-        }
        return oldNode;
      }
    }
  };

-  for (var i in nodeTypes) {
+  for (let i in nodeTypes) {
    Node[i] = Node.prototype[i] = nodeTypes[i];
  }

-  var Attribute = function (name, value) {
+  let Attribute = function (name, value) {
    this.name = name;
    this.value = value;
  };

-  var Comment = function () {
+  let Comment = function () {
    this.childNodes = [];
  };

@ -470,7 +354,7 @@
    nodeType: Node.COMMENT_NODE
  };

-  var Text = function () {
+  let Text = function () {
    this.childNodes = [];
  };

@ -482,10 +366,9 @@
    textContent: ""
  }

-  var Document = function () {
+  let Document = function () {
    this.styleSheets = [];
    this.childNodes = [];
-    this.children = [];
  };

  Document.prototype = {
@ -499,11 +382,11 @@

    getElementById: function (id) {
      function getElem(node) {
-        var length = node.children.length;
+        let length = node.childNodes.length;
        if (node.id === id)
          return node;
-        for (var i = 0; i < length; i++) {
-          var el = getElem(node.children[i]);
+        for (let i = 0; i < length; i++) {
+          let el = getElem(node.childNodes[i]);
          if (el)
            return el;
        }
@ -513,16 +396,14 @@
    },

    createElement: function (tag) {
-      var node = new Element(tag);
+      let node = new Element(tag);
      return node;
    }
  };

-  var Element = function (tag) {
+  let Element = function (tag) {
    this.attributes = [];
    this.childNodes = [];
-    this.children = [];
-    this.nextElementSibling = this.previousElementSibling = null;
    this.localName = tag.toLowerCase();
    this.tagName = tag.toUpperCase();
    this.style = new Style(this);
@ -573,16 +454,16 @@

    get innerHTML() {
      function getHTML(node) {
-        var i = 0;
+        let i = 0;
        for (i = 0; i < node.childNodes.length; i++) {
-          var child = node.childNodes[i];
+          let child = node.childNodes[i];
          if (child.localName) {
            arr.push("<" + child.localName);

            // serialize attribute list
-            for (var j = 0; j < child.attributes.length; j++) {
-              var attr = child.attributes[j];
-              var quote = (attr.value.indexOf('"') === -1 ? '"' : "'");
+            for (let j = 0; j < child.attributes.length; j++) {
+              let attr = child.attributes[j];
+              let quote = (attr.value.indexOf('"') === -1 ? '"' : "'");
              arr.push(" " + attr.name + '=' + quote + attr.value + quote);
            }

@ -603,30 +484,30 @@

      // Using Array.join() avoids the overhead from lazy string concatenation.
      // See http://blog.cdleary.com/2012/01/string-representation-in-spidermonkey/#ropes
-      var arr = [];
+      let arr = [];
      getHTML(this);
      return arr.join("");
    },

    set innerHTML(html) {
-      var parser = new JSDOMParser();
-      var node = parser.parse(html);
-      for (var i = this.childNodes.length; --i >= 0;) {
+      let parser = new JSDOMParser();
+      let node = parser.parse(html);
+      for (let i = this.childNodes.length; --i >= 0;) {
        this.childNodes[i].parentNode = null;
      }
      this.childNodes = node.childNodes;
-      for (var i = this.childNodes.length; --i >= 0;) {
+      for (let i = this.childNodes.length; --i >= 0;) {
        this.childNodes[i].parentNode = this;
      }
    },

    set textContent(text) {
      // clear parentNodes for existing children
-      for (var i = this.childNodes.length; --i >= 0;) {
+      for (let i = this.childNodes.length; --i >= 0;) {
        this.childNodes[i].parentNode = null;
      }

-      var node = new Text();
+      let node = new Text();
      this.childNodes = [ node ];
      node.textContent = text;
      node.parentNode = this;
@ -634,9 +515,9 @@

    get textContent() {
      function getText(node) {
-        var nodes = node.childNodes;
-        for (var i = 0; i < nodes.length; i++) {
-          var child = nodes[i];
+        let nodes = node.childNodes;
+        for (let i = 0; i < nodes.length; i++) {
+          let child = nodes[i];
          if (child.nodeType === 3) {
            text.push(child.textContent);
          } else {
@ -647,14 +528,14 @@

      // Using Array.join() avoids the overhead from lazy string concatenation.
      // See http://blog.cdleary.com/2012/01/string-representation-in-spidermonkey/#ropes
-      var text = [];
+      let text = [];
      getText(this);
      return text.join("");
    },

    getAttribute: function (name) {
-      for (var i = this.attributes.length; --i >= 0;) {
-        var attr = this.attributes[i];
+      for (let i = this.attributes.length; --i >= 0;) {
+        let attr = this.attributes[i];
        if (attr.name === name)
          return attr.value;
      }
@ -662,8 +543,8 @@
    },

    setAttribute: function (name, value) {
-      for (var i = this.attributes.length; --i >= 0;) {
-        var attr = this.attributes[i];
+      for (let i = this.attributes.length; --i >= 0;) {
+        let attr = this.attributes[i];
        if (attr.name === name) {
          attr.value = value;
          return;
@ -673,8 +554,8 @@
    },

    removeAttribute: function (name) {
-      for (var i = this.attributes.length; --i >= 0;) {
-        var attr = this.attributes[i];
+      for (let i = this.attributes.length; --i >= 0;) {
+        let attr = this.attributes[i];
        if (attr.name === name) {
          this.attributes.splice(i, 1);
          break;
@ -683,7 +564,7 @@
    }
  };

-  var Style = function (node) {
+  let Style = function (node) {
    this.node = node;
  };

@ -694,14 +575,14 @@
  // manipulations, so this should be okay.
  Style.prototype = {
    getStyle: function (styleName) {
-      var attr = this.node.getAttribute("style");
+      let attr = this.node.getAttribute("style");
      if (!attr)
        return undefined;

-      var styles = attr.split(";");
-      for (var i = 0; i < styles.length; i++) {
-        var style = styles[i].split(":");
-        var name = style[0].trim();
+      let styles = attr.split(";");
+      for (let i = 0; i < styles.length; i++) {
+        let style = styles[i].split(":");
+        let name = style[0].trim();
        if (name === styleName)
          return style[1].trim();
      }
@ -710,12 +591,12 @@
    },

    setStyle: function (styleName, styleValue) {
-      var value = this.node.getAttribute("style") || "";
-      var index = 0;
+      let value = this.node.getAttribute("style") || "";
+      let index = 0;
      do {
-        var next = value.indexOf(";", index) + 1;
-        var length = next - index - 1;
-        var style = (length > 0 ? value.substr(index, length) : value.substr(index));
+        let next = value.indexOf(";", index) + 1;
+        let length = next - index - 1;
+        let style = (length > 0 ? value.substr(index, length) : value.substr(index));
        if (style.substr(0, style.indexOf(":")).trim() === styleName) {
          value = value.substr(0, index).trim() + (next ? " " + value.substr(next).trim() : "");
          break;
@ -730,7 +611,7 @@

  // For each item in styleMap, define a getter and setter on the style
  // property.
-  for (var jsName in styleMap) {
+  for (let jsName in styleMap) {
    (function (cssName) {
      Style.prototype.__defineGetter__(jsName, function () {
        return this.getStyle(cssName);
@ -741,7 +622,7 @@
    }) (styleMap[jsName]);
  }

-  var JSDOMParser = function () {
+  let JSDOMParser = function () {
    this.currentChar = 0;

    // In makeElementNode() we build up many strings one char at a time. Using
@ -778,8 +659,8 @@
     * character and returns the text string in between.
     */
    readString: function (quote) {
-      var str;
-      var n = this.html.indexOf(quote, this.currentChar);
+      let str;
+      let n = this.html.indexOf(quote, this.currentChar);
      if (n === -1) {
        this.currentChar = this.html.length;
        str = null;
@ -796,9 +677,9 @@
     * pair and adds the result to the attributes list.
     */
    readAttribute: function (node) {
-      var name = "";
+      let name = "";

-      var n = this.html.indexOf("=", this.currentChar);
+      let n = this.html.indexOf("=", this.currentChar);
      if (n === -1) {
        this.currentChar = this.html.length;
      } else {
@ -811,14 +692,14 @@
        return;

      // After a '=', we should see a '"' for the attribute value
-      var c = this.nextChar();
+      let c = this.nextChar();
      if (c !== '"' && c !== "'") {
-        error("Error reading attribute " + name + ", expecting '\"'");
+        error("expecting '\"'");
        return;
      }

      // Read the attribute value (and consume the matching quote)
-      var value = this.readString(c);
+      let value = this.readString(c);

      if (!value)
        return;
@ -837,30 +718,29 @@
     *          Element
     */
    makeElementNode: function (retPair) {
-      var c = this.nextChar();
+      let c = this.nextChar();

      // Read the Element tag name
-      var strBuf = this.strBuf;
+      let strBuf = this.strBuf;
      strBuf.length = 0;
-      while (whitespace.indexOf(c) == -1 && c !== ">" && c !== "/") {
+      while (c !== " " && c !== ">" && c !== "/") {
        if (c === undefined)
          return false;
        strBuf.push(c);
        c = this.nextChar();
      }
-      var tag = strBuf.join('');
+      let tag = strBuf.join('');

      if (!tag)
        return false;

-      var node = new Element(tag);
+      let node = new Element(tag);

      // Read Element attributes
      while (c !== "/" && c !== ">") {
        if (c === undefined)
          return false;
-        while (whitespace.indexOf(this.html[this.currentChar++]) != -1);
-        this.currentChar--;
+        while (this.match(" "));
        c = this.nextChar();
        if (c !== "/" && c !== ">") {
          --this.currentChar;
@ -869,12 +749,12 @@
      }

      // If this is a self-closing tag, read '/>'
-      var closed = tag in voidElems;
+      let closed = tag in voidElems;
      if (c === "/") {
        closed = true;
        c = this.nextChar();
        if (c !== ">") {
-          error("expected '>' to close " + tag);
+          error("expected '>'");
          return false;
        }
      }
@ -891,7 +771,7 @@
     * @returns whether input matched string
     */
    match: function (str) {
-      var strlen = str.length;
+      let strlen = str.length;
      if (this.html.substr(this.currentChar, strlen) === str) {
        this.currentChar += strlen;
        return true;
@ -904,7 +784,7 @@
     * and including the matched string.
     */
    discardTo: function (str) {
-      var index = this.html.indexOf(str, this.currentChar) + str.length;
+      let index = this.html.indexOf(str, this.currentChar) + str.length;
      if (index === -1)
        this.currentChar = this.html.length;
      this.currentChar = index;
@ -914,27 +794,16 @@
     * Reads child nodes for the given node.
     */
    readChildren: function (node) {
-      var child;
+      let child;
      while ((child = this.readNode())) {
        // Don't keep Comment nodes
        if (child.nodeType !== 8) {
-          node.appendChild(child);
+          node.childNodes.push(child);
+          child.parentNode = node;
        }
      }
    },

-    readScript: function (node) {
-      var index = this.html.indexOf("</script>", this.currentChar);
-      if (index === -1) {
-        index = this.html.length;
-      }
-      var txt = new Text();
-      txt.textContent = this.html.substring(this.currentChar, index === -1 ? this.html.length : index);
-      node.appendChild(txt);
-      this.currentChar = index;
-    },
-
-
    /**
     * Reads the next child node from the input. If we're reading a closing
     * tag, or if we've reached the end of input, return null.
@ -942,7 +811,7 @@
     * @returns the node
     */
    readNode: function () {
-      var c = this.nextChar();
+      let c = this.nextChar();
 
      if (c === undefined)
        return null;
@ -950,8 +819,8 @@
      // Read any text as Text node
      if (c !== "<") {
        --this.currentChar;
-        var node = new Text();
-        var n = this.html.indexOf("<", this.currentChar);
+        let node = new Text();
+        let n = this.html.indexOf("<", this.currentChar);
        if (n === -1) {
          node.textContent = this.html.substring(this.currentChar, this.html.length);
          this.currentChar = this.html.length;
@ -973,7 +842,7 @@
        if (this.match("--")) {
          this.discardTo("-->");
        } else {
-          var c = this.nextChar();
+          let c = this.nextChar();
          while (c !== ">") {
            if (c === undefined)
              return null;
@ -993,32 +862,25 @@
      }

      // Otherwise, we're looking at an Element node
-      var result = this.makeElementNode(this.retPair);
+      let result = this.makeElementNode(this.retPair);
      if (!result)
        return null;

-      var node = this.retPair[0];
-      var closed = this.retPair[1];
-      var localName = node.localName;
+      let node = this.retPair[0];
+      let closed = this.retPair[1];
+      let localName = node.localName;

      // If this isn't a void Element, read its child nodes
      if (!closed) {
-        if (localName == "script") {
-          this.readScript(node);
-        } else {
-          this.readChildren(node);
-        }
-        var closingTag = "</" + localName + ">";
+        this.readChildren(node);
+        let closingTag = "</" + localName + ">";
        if (!this.match(closingTag)) {
          error("expected '" + closingTag + "'");
          return null;
        }
      }

-      // Only use the first title, because SVG might have other
-      // title elements which we don't care about (medium.com
-      // does this, at least).
-      if (localName === "title" && !this.doc.title) {
+      if (localName === "title") {
        this.doc.title = node.textContent.trim();
      } else if (localName === "head") {
        this.doc.head = node;
@ -1036,14 +898,14 @@
     */
    parse: function (html) {
      this.html = html;
-      var doc = this.doc = new Document();
+      let doc = this.doc = new Document();
      this.readChildren(doc);

      // If this is an HTML document, remove root-level children except for the
      // <html> node
      if (doc.documentElement) {
-        for (var i = doc.childNodes.length; --i >= 0;) {
-          var child = doc.childNodes[i];
+        for (let i = doc.childNodes.length; --i >= 0;) {
+          let child = doc.childNodes[i];
          if (child !== doc.documentElement) {
            doc.removeChild(child);
          }
--- a/toolkit/components/reader/Readability.js
+++ b/toolkit/components/reader/Readability.js
@ -102,18 +102,16 @@ Readability.prototype = {
    extraneous: /print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i,
    byline: /byline|author|dateline|writtenby/i,
    replaceFonts: /<(\/?)font[^>]*>/gi,
+    trim: /^\s+|\s+$/g,
    normalize: /\s{2,}/g,
-    videos: /https?:\/\/(www\.)?(youtube|vimeo)\.com/i,
+    videos: /http:\/\/(www\.)?(youtube|vimeo)\.com/i,
    nextLink: /(next|weiter|continue|>([^\|]|$)|»([^\|]|$))/i,
    prevLink: /(prev|earl|old|new|<|«)/i,
-    whitespace: /^\s*$/,
-    hasContent: /\S$/,
+    whitespace: /^\s*$/
  },

  DIV_TO_P_ELEMS: [ "A", "BLOCKQUOTE", "DL", "DIV", "IMG", "OL", "P", "PRE", "TABLE", "UL", "SELECT" ],

-  ALTER_TO_DIV_EXCEPTIONS: ["DIV", "ARTICLE", "SECTION", "P"],
-
  /**
   * Run any post-process modifications to article content as necessary.
   *
@ -206,7 +204,7 @@ Readability.prototype = {
        curTitle = this._getInnerText(hOnes[0]);
    }

-    curTitle = curTitle.trim();
+    curTitle = curTitle.replace(this.REGEXPS.trim, "");

    if (curTitle.split(' ').length <= 4)
      curTitle = origTitle;
@ -225,8 +223,8 @@ Readability.prototype = {

    // Remove all style tags in head
    var styleTags = doc.getElementsByTagName("style");
-    for (var st = styleTags.length - 1; st >= 0; st -= 1) {
-      styleTags[st].parentNode.removeChild(styleTags[st]);
+    for (var st = 0; st < styleTags.length; st += 1) {
+      styleTags[st].textContent = "";
    }

    if (doc.body) {
@ -307,8 +305,6 @@ Readability.prototype = {
  },

  _setNodeTag: function (node, tag) {
-    // FIXME this doesn't work on anything but JSDOMParser (ie the node's tag
-    // won't actually be set).
    node.localName = tag.toLowerCase();
    node.tagName = tag.toUpperCase();
  },
@ -411,54 +407,6 @@ Readability.prototype = {
    node.readability.contentScore += this._getClassWeight(node);
  },

-  _removeAndGetNext: function(node) {
-    var nextNode = this._getNextNode(node, true);
-    node.parentNode.removeChild(node);
-    return nextNode;
-  },
-
-  /**
-   * Traverse the DOM from node to node, starting at the node passed in.
-   * Pass true for the second parameter to indicate this node itself
-   * (and its kids) are going away, and we want the next node over.
-   *
-   * Calling this in a loop will traverse the DOM depth-first.
-   */
-  _getNextNode: function(node, ignoreSelfAndKids) {
-    // First check for kids if those aren't being ignored
-    if (!ignoreSelfAndKids && node.firstElementChild) {
-      return node.firstElementChild;
-    }
-    // Then for siblings...
-    if (node.nextElementSibling) {
-      return node.nextElementSibling;
-    }
-    // And finally, move up the parent chain *and* find a sibling
-    // (because this is depth-first traversal, we will have already
-    // seen the parent nodes themselves).
-    do {
-      node = node.parentNode;
-    } while (node && !node.nextElementSibling);
-    return node && node.nextElementSibling;
-  },
-
-  _checkByline: function(node, matchString) {
-    if (this._articleByline) {
-      return false;
-    }
-
-    if (node.getAttribute !== undefined) {
-      var rel = node.getAttribute("rel");
-    }
-
-    if ((rel === "author" || this.REGEXPS.byline.test(matchString)) && this._isValidByline(node.textContent)) {
-      this._articleByline = node.textContent.trim();
-      return true;
-    }
-
-    return false;
-  },
-
  /***
   * grabArticle - Using a variety of metrics (content score, classname, element types), find the content that is
   *         most likely to be the stuff a user wants to read. Then return it wrapped up in a div.
@ -482,37 +430,65 @@ Readability.prototype = {
    // Check if any "dir" is set on the toplevel document element
    this._articleDir = doc.documentElement.getAttribute("dir");

+    //helper function used below in the 'while' loop:
+    function purgeNode(node, allElements) {
+      for (var i = node.childNodes.length; --i >= 0;) {
+        purgeNode(node.childNodes[i], allElements);
+      }
+      if (node._index !== undefined && allElements[node._index] == node)
+        delete allElements[node._index];
+    }
    while (true) {
      var stripUnlikelyCandidates = this._flagIsActive(this.FLAG_STRIP_UNLIKELYS);
+      var allElements = page.getElementsByTagName('*');

      // First, node prepping. Trash nodes that look cruddy (like ones with the
      // class name "comment", etc), and turn divs into P tags where they have been
      // used inappropriately (as in, where they contain no other block level elements.)
-      var elementsToScore = [];
-      var node = this._doc.documentElement;
+      //
+      // Note: Assignment from index for performance. See http://www.peachpit.com/articles/article.aspx?p=31567&seqNum=5
+      // TODO: Shouldn't this be a reverse traversal?
+      var node = null;
+      var nodesToScore = [];

-      while (node) {
-        var matchString = node.className + " " + node.id;
+      // var each node know its index in the allElements array.
+      for (var i = allElements.length; --i >= 0;) {
+        allElements[i]._index = i;
+      }

-        // Check to see if this node is a byline, and remove it if it is.
-        if (this._checkByline(node, matchString)) {
-          node = this._removeAndGetNext(node);
+      /**
+       * JSDOMParser returns static node lists, not live ones. When we remove
+       * an element from the document, we need to manually remove it - and all
+       * of its children - from the allElements array.
+       */
+      for (var nodeIndex = 0; nodeIndex < allElements.length; nodeIndex++) {
+        if (!(node = allElements[nodeIndex]))
          continue;
+
+        var matchString = node.className + node.id;
+        if (matchString.search(this.REGEXPS.byline) !== -1 && !this._articleByline) {
+          if (this._isValidByline(node.textContent)) {
+            this._articleByline = node.textContent.trim();
+            node.parentNode.removeChild(node);
+            purgeNode(node, allElements);
+            continue;
+          }
        }

        // Remove unlikely candidates
        if (stripUnlikelyCandidates) {
-          if (this.REGEXPS.unlikelyCandidates.test(matchString) &&
-              !this.REGEXPS.okMaybeItsACandidate.test(matchString) &&
-              node.tagName !== "BODY") {
+          if (matchString.search(this.REGEXPS.unlikelyCandidates) !== -1 &&
+            matchString.search(this.REGEXPS.okMaybeItsACandidate) === -1 &&
+            node.tagName !== "BODY") {
            this.log("Removing unlikely candidate - " + matchString);
-            node = this._removeAndGetNext(node);
+            node.parentNode.removeChild(node);
+            purgeNode(node, allElements);
            continue;
          }
        }

        if (node.tagName === "P" || node.tagName === "TD" || node.tagName === "PRE")
-          elementsToScore.push(node);
+          nodesToScore[nodesToScore.length] = node;

        // Turn all divs that don't have children block level elements into p's
        if (node.tagName === "DIV") {
@ -520,28 +496,34 @@ Readability.prototype = {
          // element. DIVs with only a P element inside and no text content can be
          // safely converted into plain P elements to avoid confusing the scoring
          // algorithm with DIVs with are, in practice, paragraphs.
-          if (this._hasSinglePInsideElement(node)) {
-            var newNode = node.firstElementChild;
-            node.parentNode.replaceChild(newNode, node);
-            node = newNode;
-          } else if (!this._hasChildBlockElement(node)) {
-            this._setNodeTag(node, "P");
-            elementsToScore.push(node);
+          var pIndex = this._getSinglePIndexInsideDiv(node);
+
+          if (pIndex >= 0 || !this._hasChildBlockElement(node)) {
+            if (pIndex >= 0) {
+              var newNode = node.childNodes[pIndex];
+              node.parentNode.replaceChild(newNode, node);
+              purgeNode(node, allElements);
+            } else {
+              this._setNodeTag(node, "P");
+              nodesToScore[nodesToScore.length] = node;
+            }
          } else {
            // EXPERIMENTAL
            for (var i = 0, il = node.childNodes.length; i < il; i += 1) {
              var childNode = node.childNodes[i];
-              if (childNode.nodeType === Node.TEXT_NODE) {
+              if (!childNode)
+                continue;
+
+              if (childNode.nodeType === 3) { // Node.TEXT_NODE
                var p = doc.createElement('p');
                p.textContent = childNode.textContent;
                p.style.display = 'inline';
                p.className = 'readability-styled';
-                node.replaceChild(p, childNode);
+                childNode.parentNode.replaceChild(p, childNode);
              }
            }
          }
        }
-        node = this._getNextNode(node);
      }

      /**
@ -551,10 +533,10 @@ Readability.prototype = {
       * A score is determined by things like number of commas, class names, etc. Maybe eventually link density.
      **/
      var candidates = [];
-      for (var pt = 0; pt < elementsToScore.length; pt += 1) {
-        var parentNode = elementsToScore[pt].parentNode;
+      for (var pt = 0; pt < nodesToScore.length; pt += 1) {
+        var parentNode = nodesToScore[pt].parentNode;
        var grandParentNode = parentNode ? parentNode.parentNode : null;
-        var innerText = this._getInnerText(elementsToScore[pt]);
+        var innerText = this._getInnerText(nodesToScore[pt]);

        if (!parentNode || typeof(parentNode.tagName) === 'undefined')
          continue;
@ -630,40 +612,15 @@ Readability.prototype = {
        // Move all of the page's children into topCandidate
        topCandidate = doc.createElement("DIV");
        neededToCreateTopCandidate = true;
-        // Move everything (not just elements, also text nodes etc.) into the container
-        // so we even include text directly in the body:
-        var kids = page.childNodes;
-        while (kids.length) {
-          this.log("Moving child out:", kids[0]);
-          topCandidate.appendChild(kids[0]);
+        var children = page.childNodes;
+        while (children.length) {
+          this.log("Moving child out:", children[0]);
+          topCandidate.appendChild(children[0]);
        }

        page.appendChild(topCandidate);

        this._initializeNode(topCandidate);
-      } else if (topCandidate) {
-        // Because of our bonus system, parents of candidates might have scores
-        // themselves. They get half of the node. There won't be nodes with higher
-        // scores than our topCandidate, but if we see the score going *up* in the first
-        // few steps up the tree, that's a decent sign that there might be more content
-        // lurking in other places that we want to unify in. The sibling stuff
-        // below does some of that - but only if we've looked high enough up the DOM
-        // tree.
-        var parentOfTopCandidate = topCandidate.parentNode;
-        // The scores shouldn't get too low.
-        var scoreThreshold = topCandidate.readability.contentScore / 3;
-        var lastScore = parentOfTopCandidate.readability.contentScore;
-        while (parentOfTopCandidate && parentOfTopCandidate.readability) {
-          var parentScore = parentOfTopCandidate.readability.contentScore;
-          if (parentScore < scoreThreshold)
-            break;
-          if (parentScore > lastScore) {
-            // Alright! We found a better parent to use.
-            topCandidate = parentOfTopCandidate;
-            break;
-          }
-          parentOfTopCandidate = parentOfTopCandidate.parentNode;
-        }
      }

      // Now that we have the top candidate, look through its siblings for content
@ -674,71 +631,72 @@ Readability.prototype = {
        articleContent.id = "readability-content";

      var siblingScoreThreshold = Math.max(10, topCandidate.readability.contentScore * 0.2);
-      var siblings = topCandidate.parentNode.children;
+      var siblingNodes = topCandidate.parentNode.childNodes;

-      for (var s = 0, sl = siblings.length; s < sl; s++) {
-        var sibling = siblings[s];
+      for (var s = 0, sl = siblingNodes.length; s < sl; s += 1) {
+        var siblingNode = siblingNodes[s];
        var append = false;

-        this.log("Looking at sibling node:", sibling, sibling.readability ? ("with score " + sibling.readability.contentScore) : '');
-        this.log("Sibling has score", sibling.readability ? sibling.readability.contentScore : 'Unknown');
+        this.log("Looking at sibling node:", siblingNode, ((typeof siblingNode.readability !== 'undefined') ? ("with score " + siblingNode.readability.contentScore) : ''));
+        this.log("Sibling has score " + (siblingNode.readability ? siblingNode.readability.contentScore : 'Unknown'));

-        if (sibling === topCandidate) {
+        if (siblingNode === topCandidate)
          append = true;
-        } else {
-          var contentBonus = 0;

-          // Give a bonus if sibling nodes and top candidates have the example same classname
-          if (sibling.className === topCandidate.className && topCandidate.className !== "")
-            contentBonus += topCandidate.readability.contentScore * 0.2;
+        var contentBonus = 0;

-          if (sibling.readability &&
-              ((sibling.readability.contentScore + contentBonus) >= siblingScoreThreshold)) {
+        // Give a bonus if sibling nodes and top candidates have the example same classname
+        if (siblingNode.className === topCandidate.className && topCandidate.className !== "")
+          contentBonus += topCandidate.readability.contentScore * 0.2;
+
+        if (typeof siblingNode.readability !== 'undefined' &&
+          (siblingNode.readability.contentScore+contentBonus) >= siblingScoreThreshold)
+          append = true;
+
+        if (siblingNode.nodeName === "P") {
+          var linkDensity = this._getLinkDensity(siblingNode);
+          var nodeContent = this._getInnerText(siblingNode);
+          var nodeLength = nodeContent.length;
+
+          if (nodeLength > 80 && linkDensity < 0.25) {
+            append = true;
+          } else if (nodeLength < 80 && linkDensity === 0 && nodeContent.search(/\.( |$)/) !== -1) {
            append = true;
-          } else if (sibling.nodeName === "P") {
-            var linkDensity = this._getLinkDensity(sibling);
-            var nodeContent = this._getInnerText(sibling);
-            var nodeLength = nodeContent.length;
-
-            if (nodeLength > 80 && linkDensity < 0.25) {
-              append = true;
-            } else if (nodeLength < 80 && linkDensity === 0 && nodeContent.search(/\.( |$)/) !== -1) {
-              append = true;
-            }
          }
        }

        if (append) {
-          this.log("Appending node:", sibling);
+          this.log("Appending node:", siblingNode);

-          if (this.ALTER_TO_DIV_EXCEPTIONS.indexOf(sibling.nodeName) === -1) {
+          // siblingNodes is a reference to the childNodes array, and
+          // siblingNode is removed from the array when we call appendChild()
+          // below. As a result, we must revisit this index since the nodes
+          // have been shifted.
+          s -= 1;
+          sl -= 1;
+
+          if (siblingNode.nodeName !== "DIV" && siblingNode.nodeName !== "P") {
            // We have a node that isn't a common block level element, like a form or td tag.
-            // Turn it into a div so it doesn't get filtered out later by accident.
-            this.log("Altering sibling:", sibling, 'to div.');
+            // Turn it into a div so it doesn't get filtered out later by accident. */
+            this.log("Altering siblingNode:", siblingNode, 'to div.');

-            this._setNodeTag(sibling, "DIV");
+            this._setNodeTag(siblingNode, "DIV");
          }

          // To ensure a node does not interfere with readability styles,
          // remove its classnames.
-          sibling.removeAttribute("class");
+          siblingNode.removeAttribute("class");

-          articleContent.appendChild(sibling);
-          // siblings is a reference to the children array, and
-          // sibling is removed from the array when we call appendChild().
-          // As a result, we must revisit this index since the nodes
-          // have been shifted.
-          s -= 1;
-          sl -= 1;
+          // Append sibling and subtract from our list because it removes
+          // the node when you append to another node.
+          articleContent.appendChild(siblingNode);
        }
      }

-      if (this.ENABLE_LOGGING)
-        this.log("Article content pre-prep: " + articleContent.innerHTML);
+      this.log("Article content pre-prep: " + articleContent.innerHTML);
      // So we have all of the content that we need. Now we clean it up for presentation.
      this._prepArticle(articleContent);
-      if (this.ENABLE_LOGGING)
-        this.log("Article content post-prep: " + articleContent.innerHTML);
+      this.log("Article content post-prep: " + articleContent.innerHTML);

      if (this._curPageNum === 1) {
        if (neededToCreateTopCandidate) {
@ -760,8 +718,7 @@ Readability.prototype = {
        }
      }

-      if (this.ENABLE_LOGGING)
-        this.log("Article content after paging: " + articleContent.innerHTML);
+      this.log("Article content after paging: " + articleContent.innerHTML);

      // Now that we've gone through the full algorithm, check to see if
      // we got any meaningful content. If we didn't, we may need to re-run
@ -803,12 +760,19 @@ Readability.prototype = {
  },

  /**
-   * Attempts to get excerpt and byline metadata for the article.
-   * 
-   * @return Object with optional "excerpt" and "byline" properties
-   */
-  _getArticleMetadata: function() {
-    var metadata = {};
+   * Attempts to get the excerpt from these
+   * sources in the following order:
+   * - meta description tag
+   * - open-graph description
+   * - twitter cards description
+   * - article's first paragraph
+   * If no excerpt is found, an empty string will be
+   * returned.
+   *
+   * @param Element - root element of the processed version page
+   * @return String - excerpt of the article
+  **/
+  _getExcerpt: function(articleContent) {
    var values = {};
    var metaElements = this._doc.getElementsByTagName("meta");

@ -825,12 +789,7 @@ Readability.prototype = {
      var elementName = element.getAttribute("name");
      var elementProperty = element.getAttribute("property");

-      if (elementName === "author") {
-        metadata.byline = element.getAttribute("content");
-        continue;
-      }
-
-      var name = null;
+      var name;
      if (namePattern.test(elementName)) {
        name = elementName;
      } else if (propertyPattern.test(elementProperty)) {
@ -849,16 +808,26 @@ Readability.prototype = {
    }

    if ("description" in values) {
-      metadata.excerpt = values["description"];
-    } else if ("og:description" in values) {
-      // Use facebook open graph description.
-      metadata.excerpt = values["og:description"];
-    } else if ("twitter:description" in values) {
-      // Use twitter cards description.
-      metadata.excerpt = values["twitter:description"];
+      return values["description"];
    }

-    return metadata;
+    if ("og:description" in values) {
+      // Use facebook open graph description.
+      return values["og:description"];
+    }
+
+    if ("twitter:description" in values) {
+      // Use twitter cards description.
+      return values["twitter:description"];
+    }
+
+    // No description meta tags, use the article's first paragraph.
+    var paragraphs = articleContent.getElementsByTagName("p");
+    if (paragraphs.length > 0) {
+      return paragraphs[0].textContent;
+    }
+
+    return "";
  },

  /**
@ -878,28 +847,33 @@ Readability.prototype = {
  },

  /**
-   * Check if this node has only whitespace and a single P element
-   * Returns false if the DIV node contains non-empty text nodes
-   * or if it contains no P or more than 1 element.
+   * Get child index of the only P element inside a DIV with no
+   * text content. Returns -1 if the DIV node contains non-empty
+   * text nodes or if it contains other element nodes.
   *
   * @param Element
  **/
-  _hasSinglePInsideElement: function(e) {
-    // There should be exactly 1 element child which is a P:
-    if (e.children.length != 1 || e.firstElementChild.tagName !== "P") {
-      return false;
-    }
-    // And there should be no text nodes with real content
+  _getSinglePIndexInsideDiv: function(e) {
    var childNodes = e.childNodes;
+    var pIndex = -1;
+
    for (var i = childNodes.length; --i >= 0;) {
      var node = childNodes[i];
-      if (node.nodeType == Node.TEXT_NODE &&
-          this.REGEXPS.hasContent.test(node.textContent)) {
-        return false;
+
+      if (node.nodeType === Node.ELEMENT_NODE) {
+        if (node.tagName !== "P")
+          return -1;
+
+        if (pIndex >= 0)
+          return -1;
+
+        pIndex = i;
+      } else if (node.nodeType == Node.TEXT_NODE && this._getInnerText(node, false)) {
+        return -1;
      }
    }

-    return true;
+    return pIndex;
  },

  /**
@ -908,9 +882,12 @@ Readability.prototype = {
   * @param Element
   */
  _hasChildBlockElement: function (e) {
-    var length = e.children.length;
+    var length = e.childNodes.length;
    for (var i = 0; i < length; i++) {
-      var child = e.children[i];
+      var child = e.childNodes[i];
+      if (child.nodeType != 1)
+        continue;
+
      if (this.DIV_TO_P_ELEMS.indexOf(child.tagName) !== -1 || this._hasChildBlockElement(child))
        return true;
    }
@ -925,7 +902,7 @@ Readability.prototype = {
   * @return string
  **/
  _getInnerText: function(e, normalizeSpaces) {
-    var textContent = e.textContent.trim();
+    var textContent = e.textContent.replace(this.REGEXPS.trim, "");
    normalizeSpaces = (typeof normalizeSpaces === 'undefined') ? true : normalizeSpaces;

    if (normalizeSpaces) {
@ -956,9 +933,10 @@ Readability.prototype = {
  **/
  _cleanStyles: function(e) {
    e = e || this._doc;
+    var cur = e.firstChild;
+
    if (!e)
      return;
-    var cur = e.firstChild;

    // Remove any root styles, if we're able.
    if (typeof e.removeAttribute === 'function' && e.className !== 'readability-styled')
@ -966,7 +944,7 @@ Readability.prototype = {

    // Go until there are no more child nodes
    while (cur !== null) {
-      if (cur.nodeType === cur.ELEMENT_NODE) {
+      if (cur.nodeType === 1) {
        // Remove style attribute(s) :
        if (cur.className !== "readability-styled")
          cur.removeAttribute("style");
@ -1377,19 +1355,19 @@ Readability.prototype = {

    // Look for a special classname
    if (typeof(e.className) === 'string' && e.className !== '') {
-      if (this.REGEXPS.negative.test(e.className))
+      if (e.className.search(this.REGEXPS.negative) !== -1)
        weight -= 25;

-      if (this.REGEXPS.positive.test(e.className))
+      if (e.className.search(this.REGEXPS.positive) !== -1)
        weight += 25;
    }

    // Look for a special ID
    if (typeof(e.id) === 'string' && e.id !== '') {
-      if (this.REGEXPS.negative.test(e.id))
+      if (e.id.search(this.REGEXPS.negative) !== -1)
        weight -= 25;

-      if (this.REGEXPS.positive.test(e.id))
+      if (e.id.search(this.REGEXPS.positive) !== -1)
        weight += 25;
    }

@ -1417,11 +1395,11 @@ Readability.prototype = {
        }

        // First, check the elements attributes to see if any of them contain youtube or vimeo
-        if (this.REGEXPS.videos.test(attributeValues))
+        if (attributeValues.search(this.REGEXPS.videos) !== -1)
          continue;

        // Then check the elements inside this element for the same.
-        if (this.REGEXPS.videos.test(targetList[y].innerHTML))
+        if (targetList[y].innerHTML.search(this.REGEXPS.videos) !== -1)
          continue;
      }

@ -1467,7 +1445,7 @@ Readability.prototype = {
        var embedCount = 0;
        var embeds = tagsList[i].getElementsByTagName("embed");
        for (var ei = 0, il = embeds.length; ei < il; ei += 1) {
-          if (!this.REGEXPS.videos.test(embeds[ei].src))
+          if (embeds[ei].src.search(this.REGEXPS.videos) === -1)
            embedCount += 1;
        }

@ -1554,8 +1532,6 @@ Readability.prototype = {
    this._prepDocument();

    var articleTitle = this._getArticleTitle();
-    var metadata = this._getArticleMetadata();
-
    var articleContent = this._grabArticle();
    if (!articleContent)
      return null;
@ -1572,22 +1548,14 @@ Readability.prototype = {
    //   }).bind(this), 500);
    // }

-    // If we haven't found an excerpt in the article's metadata, use the article's
-    // first paragraph as the excerpt. This is used for displaying a preview of
-    // the article's content.
-    if (!metadata.excerpt) {
-      var paragraphs = articleContent.getElementsByTagName("p");
-      if (paragraphs.length > 0) {
-        metadata.excerpt = paragraphs[0].textContent;
-      }
-    }
+    var excerpt = this._getExcerpt(articleContent);

    return { uri: this._uri,
             title: articleTitle,
-             byline: metadata.byline || this._articleByline,
+             byline: this._articleByline,
             dir: this._articleDir,
             content: articleContent.innerHTML,
             length: articleContent.textContent.length,
-             excerpt: metadata.excerpt };
+             excerpt: excerpt };
  }
 };