зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1158184 - merge recent github readability changes into m-c, rs=me
--HG-- extra : rebase_source : f1722be5d87137dd73620bfe9d277f990d96a94c
This commit is contained in:
Родитель
46f3b97701
Коммит
328227f7cb
|
@ -171,6 +171,21 @@ Readability.prototype = {
|
|||
return Array.prototype.some.call(nodeList, fn, this);
|
||||
},
|
||||
|
||||
/**
|
||||
* Concat all nodelists passed as arguments.
|
||||
*
|
||||
* @return ...NodeList
|
||||
* @return Array
|
||||
*/
|
||||
_concatNodeLists: function() {
|
||||
var slice = Array.prototype.slice;
|
||||
var args = slice.call(arguments);
|
||||
var nodeLists = args.map(function(list) {
|
||||
return slice.call(list);
|
||||
});
|
||||
return Array.prototype.concat.apply([], nodeLists);
|
||||
},
|
||||
|
||||
/**
|
||||
* Converts each <a> and <img> uri in the given element to an absolute URI.
|
||||
*
|
||||
|
@ -252,10 +267,24 @@ Readability.prototype = {
|
|||
if (curTitle.split(' ').length < 3)
|
||||
curTitle = origTitle.replace(/[^\|\-]*[\|\-](.*)/gi,'$1');
|
||||
} else if (curTitle.indexOf(': ') !== -1) {
|
||||
curTitle = origTitle.replace(/.*:(.*)/gi, '$1');
|
||||
// Check if we have an heading containing this exact string, so we
|
||||
// could assume it's the full title.
|
||||
var headings = this._concatNodeLists(
|
||||
doc.getElementsByTagName('h1'),
|
||||
doc.getElementsByTagName('h2')
|
||||
);
|
||||
var match = this._someNode(headings, function(heading) {
|
||||
return heading.textContent === curTitle;
|
||||
});
|
||||
|
||||
if (curTitle.split(' ').length < 3)
|
||||
curTitle = origTitle.replace(/[^:]*[:](.*)/gi,'$1');
|
||||
// If we don't, let's extract the title out of the original title string.
|
||||
if (!match) {
|
||||
curTitle = origTitle.substring(origTitle.lastIndexOf(':') + 1);
|
||||
|
||||
// If the title is now too short, try the first colon instead:
|
||||
if (curTitle.split(' ').length < 3)
|
||||
curTitle = origTitle.substring(origTitle.indexOf(':') + 1);
|
||||
}
|
||||
} else if (curTitle.length > 150 || curTitle.length < 15) {
|
||||
var hOnes = doc.getElementsByTagName('h1');
|
||||
|
||||
|
@ -396,6 +425,7 @@ Readability.prototype = {
|
|||
this._clean(articleContent, "object");
|
||||
this._clean(articleContent, "embed");
|
||||
this._clean(articleContent, "h1");
|
||||
this._clean(articleContent, "footer");
|
||||
|
||||
// If there is only one h2, they are probably using it as a header
|
||||
// and not a subheader, so remove it since we already have a header.
|
||||
|
@ -913,10 +943,10 @@ Readability.prototype = {
|
|||
|
||||
// Match "description", or Twitter's "twitter:description" (Cards)
|
||||
// in name attribute.
|
||||
var namePattern = /^\s*((twitter)\s*:\s*)?description\s*$/gi;
|
||||
var namePattern = /^\s*((twitter)\s*:\s*)?(description|title)\s*$/gi;
|
||||
|
||||
// Match Facebook's og:description (Open Graph) in property attribute.
|
||||
var propertyPattern = /^\s*og\s*:\s*description\s*$/gi;
|
||||
// Match Facebook's Open Graph title & description properties.
|
||||
var propertyPattern = /^\s*og\s*:\s*(description|title)\s*$/gi;
|
||||
|
||||
// Find description tags.
|
||||
this._forEachNode(metaElements, function(element) {
|
||||
|
@ -956,6 +986,14 @@ Readability.prototype = {
|
|||
metadata.excerpt = values["twitter:description"];
|
||||
}
|
||||
|
||||
if ("og:title" in values) {
|
||||
// Use facebook open graph title.
|
||||
metadata.title = values["og:title"];
|
||||
} else if ("twitter:title" in values) {
|
||||
// Use twitter cards title.
|
||||
metadata.title = values["twitter:title"];
|
||||
}
|
||||
|
||||
return metadata;
|
||||
},
|
||||
|
||||
|
@ -1715,8 +1753,8 @@ Readability.prototype = {
|
|||
|
||||
this._prepDocument();
|
||||
|
||||
var articleTitle = this._getArticleTitle();
|
||||
var metadata = this._getArticleMetadata();
|
||||
var articleTitle = metadata.title || this._getArticleTitle();
|
||||
|
||||
var articleContent = this._grabArticle();
|
||||
if (!articleContent)
|
||||
|
|
Загрузка…
Ссылка в новой задаче