Bug 784386 - Part 8: Replace innerHTML call for divToPElements search. r=lucasr

This commit is contained in:
Brian Nicholson 2012-08-31 15:56:13 -07:00
Родитель 0ce7fefd09
Коммит cd0cfb30b5
1 изменённых файлов: 21 добавлений и 2 удалений

Просмотреть файл

@ -75,7 +75,6 @@ Readability.prototype = {
negative: /hidden|combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget/i,
extraneous: /print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i,
byline: /byline|author|dateline|writtenby/i,
divToPElements: /<(a|blockquote|dl|div|img|ol|p|pre|table|ul|select)/i,
replaceFonts: /<(\/?)font[^>]*>/gi,
trim: /^\s+|\s+$/g,
normalize: /\s{2,}/g,
@ -86,6 +85,8 @@ Readability.prototype = {
whitespace: /^\s*$/
},
DIV_TO_P_ELEMS: [ "A", "BLOCKQUOTE", "DL", "DIV", "IMG", "OL", "P", "PRE", "TABLE", "UL", "SELECT" ],
/**
* Run any post-process modifications to article content as necessary.
*
@ -477,7 +478,7 @@ Readability.prototype = {
// algorithm with DIVs with are, in practice, paragraphs.
let pIndex = this._getSinglePIndexInsideDiv(node);
if (node.innerHTML.search(this.REGEXPS.divToPElements) === -1 || pIndex >= 0) {
if (pIndex >= 0 || !this._hasChildBlockElement(node)) {
if (pIndex >= 0) {
let newNode = node.childNodes[pIndex];
node.parentNode.replaceChild(newNode, node);
@ -766,6 +767,24 @@ Readability.prototype = {
return pIndex;
},
/**
* Determine whether element has any children block level elements.
*
* @param Element
*/
_hasChildBlockElement: function (e) {
let length = e.childNodes.length;
for (let i = 0; i < length; i++) {
let child = e.childNodes[i];
if (child.nodeType != 1)
continue;
if (this.DIV_TO_P_ELEMS.indexOf(child.tagName) !== -1 || this._hasChildBlockElement(child))
return true;
}
return false;
},
/**
* Get the inner text of a node - cross browser compatibly.
* This also strips out any excess whitespace to be found.