Bug 1158184 - merge recent github readability changes into m-c, rs=me

--HG--
extra : rebase_source : f1722be5d87137dd73620bfe9d277f990d96a94c
This commit is contained in:
Gijs Kruitbosch 2015-04-24 16:20:02 +01:00
Родитель 46f3b97701
Коммит 328227f7cb
1 изменённых файлов: 45 добавлений и 7 удалений

Просмотреть файл

@ -171,6 +171,21 @@ Readability.prototype = {
return Array.prototype.some.call(nodeList, fn, this);
},
/**
* Concat all nodelists passed as arguments.
*
* @return ...NodeList
* @return Array
*/
_concatNodeLists: function() {
var slice = Array.prototype.slice;
var args = slice.call(arguments);
var nodeLists = args.map(function(list) {
return slice.call(list);
});
return Array.prototype.concat.apply([], nodeLists);
},
/**
* Converts each <a> and <img> uri in the given element to an absolute URI.
*
@ -252,10 +267,24 @@ Readability.prototype = {
if (curTitle.split(' ').length < 3)
curTitle = origTitle.replace(/[^\|\-]*[\|\-](.*)/gi,'$1');
} else if (curTitle.indexOf(': ') !== -1) {
curTitle = origTitle.replace(/.*:(.*)/gi, '$1');
// Check if we have an heading containing this exact string, so we
// could assume it's the full title.
var headings = this._concatNodeLists(
doc.getElementsByTagName('h1'),
doc.getElementsByTagName('h2')
);
var match = this._someNode(headings, function(heading) {
return heading.textContent === curTitle;
});
if (curTitle.split(' ').length < 3)
curTitle = origTitle.replace(/[^:]*[:](.*)/gi,'$1');
// If we don't, let's extract the title out of the original title string.
if (!match) {
curTitle = origTitle.substring(origTitle.lastIndexOf(':') + 1);
// If the title is now too short, try the first colon instead:
if (curTitle.split(' ').length < 3)
curTitle = origTitle.substring(origTitle.indexOf(':') + 1);
}
} else if (curTitle.length > 150 || curTitle.length < 15) {
var hOnes = doc.getElementsByTagName('h1');
@ -396,6 +425,7 @@ Readability.prototype = {
this._clean(articleContent, "object");
this._clean(articleContent, "embed");
this._clean(articleContent, "h1");
this._clean(articleContent, "footer");
// If there is only one h2, they are probably using it as a header
// and not a subheader, so remove it since we already have a header.
@ -913,10 +943,10 @@ Readability.prototype = {
// Match "description", or Twitter's "twitter:description" (Cards)
// in name attribute.
var namePattern = /^\s*((twitter)\s*:\s*)?description\s*$/gi;
var namePattern = /^\s*((twitter)\s*:\s*)?(description|title)\s*$/gi;
// Match Facebook's og:description (Open Graph) in property attribute.
var propertyPattern = /^\s*og\s*:\s*description\s*$/gi;
// Match Facebook's Open Graph title & description properties.
var propertyPattern = /^\s*og\s*:\s*(description|title)\s*$/gi;
// Find description tags.
this._forEachNode(metaElements, function(element) {
@ -956,6 +986,14 @@ Readability.prototype = {
metadata.excerpt = values["twitter:description"];
}
if ("og:title" in values) {
// Use facebook open graph title.
metadata.title = values["og:title"];
} else if ("twitter:title" in values) {
// Use twitter cards title.
metadata.title = values["twitter:title"];
}
return metadata;
},
@ -1715,8 +1753,8 @@ Readability.prototype = {
this._prepDocument();
var articleTitle = this._getArticleTitle();
var metadata = this._getArticleMetadata();
var articleTitle = metadata.title || this._getArticleTitle();
var articleContent = this._grabArticle();
if (!articleContent)