No bug - Update readability from github repo, includes fix for Bug 1142312 and Bug 1285543, r=Gijs

MozReview-Commit-ID: 5hi1iuDO3XE

--HG--
extra : rebase_source : 9c15ccde6cadc4df3d7751ca90a53dc4c0d021c3
This commit is contained in:
Evan Tseng 2016-12-15 12:03:53 +08:00
Родитель 6182653bf0
Коммит 0eebd3161c
2 изменённых файлов: 30 добавлений и 16 удалений

Просмотреть файл

@ -31,7 +31,7 @@ var TEST_PAGES = [
{
url: URL_PREFIX + "developer.mozilla.org/en/XULRunner/Build_Instructions.html",
expected: {
title: "Building XULRunner",
title: "Building XULRunner | MDN",
byline: null,
excerpt: "XULRunner is built using basically the same process as Firefox or other applications. Please read and follow the general Build Documentation for instructions on how to get sources and set up build prerequisites.",
}

Просмотреть файл

@ -119,7 +119,7 @@ Readability.prototype = {
// All of the regular expressions in use within readability.
// Defined up here so we don't instantiate them repeatedly in loops.
REGEXPS: {
unlikelyCandidates: /banner|combx|comment|community|disqus|extra|foot|header|legends|menu|modal|related|remark|rss|shoutbox|sidebar|skyscraper|sponsor|ad-break|agegate|pagination|pager|popup/i,
unlikelyCandidates: /banner|combx|comment|community|cover-wrap|disqus|extra|foot|header|legends|menu|modal|related|remark|rss|shoutbox|sidebar|skyscraper|sponsor|ad-break|agegate|pagination|pager|popup|yom-remote/i,
okMaybeItsACandidate: /and|article|body|column|main|shadow/i,
positive: /article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i,
negative: /hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|modal|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i,
@ -155,8 +155,8 @@ Readability.prototype = {
*
* If function is not passed, removes all the nodes in node list.
*
* @param NodeList nodeList The no
* @param Function filterFn
* @param NodeList nodeList The nodes to operate on
* @param Function filterFn the function to use as a filter
* @return void
*/
_removeNodes: function(nodeList, filterFn) {
@ -171,6 +171,20 @@ Readability.prototype = {
}
},
/**
* Iterates over a NodeList, and calls _setNodeTag for each node.
*
* @param NodeList nodeList The nodes to operate on
* @param String newTagName the new tag name to use
* @return void
*/
_replaceNodeTags: function(nodeList, newTagName) {
for (var i = nodeList.length - 1; i >= 0; i--) {
var node = nodeList[i];
this._setNodeTag(node, newTagName);
}
},
/**
* Iterate over a NodeList, which doesn't natively fully implement the Array
* interface.
@ -180,10 +194,9 @@ Readability.prototype = {
*
* @param NodeList nodeList The NodeList.
* @param Function fn The iterate function.
* @param Boolean backward Whether to use backward iteration.
* @return void
*/
_forEachNode: function(nodeList, fn, backward) {
_forEachNode: function(nodeList, fn) {
Array.prototype.forEach.call(nodeList, fn, this);
},
@ -362,9 +375,7 @@ Readability.prototype = {
this._replaceBrs(doc.body);
}
this._forEachNode(doc.getElementsByTagName("font"), function(fontNode) {
this._setNodeTag(fontNode, "SPAN");
});
this._replaceNodeTags(doc.getElementsByTagName("font"), "SPAN");
},
/**
@ -1062,12 +1073,15 @@ Readability.prototype = {
metadata.excerpt = values["twitter:description"];
}
if ("og:title" in values) {
// Use facebook open graph title.
metadata.title = values["og:title"];
} else if ("twitter:title" in values) {
// Use twitter cards title.
metadata.title = values["twitter:title"];
metadata.title = this._getArticleTitle();
if (!metadata.title) {
if ("og:title" in values) {
// Use facebook open graph title.
metadata.title = values["og:title"];
} else if ("twitter:title" in values) {
// Use twitter cards title.
metadata.title = values["twitter:title"];
}
}
return metadata;
@ -1857,7 +1871,7 @@ Readability.prototype = {
this._prepDocument();
var metadata = this._getArticleMetadata();
var articleTitle = metadata.title || this._getArticleTitle();
var articleTitle = metadata.title;
var articleContent = this._grabArticle();
if (!articleContent)