зеркало из https://github.com/mozilla/gecko-dev.git
Bug 782285 - Don't offer reader in pages with too much reading competition (r=mfinkle)
* * * blo
This commit is contained in:
Родитель
07f3bf083b
Коммит
4a98b7cf20
|
@ -58,6 +58,10 @@ Readability.prototype = {
|
|||
FLAG_WEIGHT_CLASSES: 0x2,
|
||||
FLAG_CLEAN_CONDITIONALLY: 0x4,
|
||||
|
||||
// The number of top candidates to consider when analysing how
|
||||
// tight the competition is among candidates.
|
||||
N_TOP_CANDIDATES: 5,
|
||||
|
||||
// The maximum number of pages to loop through before we call
|
||||
// it quits and just show a link.
|
||||
MAX_PAGES: 5,
|
||||
|
@ -535,24 +539,34 @@ Readability.prototype = {
|
|||
|
||||
// After we've calculated scores, loop through all of the possible
|
||||
// candidate nodes we found and find the one with the highest score.
|
||||
let topCandidate = null;
|
||||
let topCandidates = [];
|
||||
for (let c = 0, cl = candidates.length; c < cl; c += 1) {
|
||||
let candidate = candidates[c];
|
||||
|
||||
// Scale the final candidates score based on link density. Good content
|
||||
// should have a relatively small link density (5% or less) and be mostly
|
||||
// unaffected by this operation.
|
||||
candidates[c].readability.contentScore =
|
||||
candidates[c].readability.contentScore * (1 - this._getLinkDensity(candidates[c]));
|
||||
let candidateScore = candidate.readability.contentScore * (1 - this._getLinkDensity(candidate));
|
||||
candidate.readability.contentScore = candidateScore;
|
||||
|
||||
this.log('Candidate: ' + candidates[c] + " (" + candidates[c].className + ":" +
|
||||
candidates[c].id + ") with score " +
|
||||
candidates[c].readability.contentScore);
|
||||
this.log('Candidate: ' + candidate + " (" + candidate.className + ":" +
|
||||
candidate.id + ") with score " + candidateScore);
|
||||
|
||||
if (!topCandidate ||
|
||||
candidates[c].readability.contentScore > topCandidate.readability.contentScore) {
|
||||
topCandidate = candidates[c];
|
||||
for (let t = 0; t < this.N_TOP_CANDIDATES; t++) {
|
||||
let aTopCandidate = topCandidates[t];
|
||||
|
||||
if (!aTopCandidate || candidateScore > aTopCandidate.readability.contentScore) {
|
||||
topCandidates.splice(t, 0, candidate);
|
||||
if (topCandidates.length > this.N_TOP_CANDIDATES)
|
||||
topCandidates.pop();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let topCandidate = topCandidates[0] || null;
|
||||
let lastTopCandidate = (topCandidates.length > 3 ? topCandidates[topCandidates.length - 1] : null);
|
||||
|
||||
// If we still have no top candidate, just use the body as a last resort.
|
||||
// We also have to copy the body node so it is something we can modify.
|
||||
if (topCandidate === null || topCandidate.tagName === "BODY") {
|
||||
|
@ -647,7 +661,7 @@ Readability.prototype = {
|
|||
// grabArticle with different flags set. This gives us a higher likelihood of
|
||||
// finding the content, and the sieve approach gives us a higher likelihood of
|
||||
// finding the -right- content.
|
||||
if (this._getInnerText(articleContent, false).length < 250) {
|
||||
if (this._getInnerText(articleContent, true).length < 500) {
|
||||
page.innerHTML = pageCacheHtml;
|
||||
|
||||
if (this._flagIsActive(this.FLAG_STRIP_UNLIKELYS)) {
|
||||
|
@ -660,6 +674,17 @@ Readability.prototype = {
|
|||
return null;
|
||||
}
|
||||
} else {
|
||||
if (lastTopCandidate !== null) {
|
||||
// EXPERIMENTAL: Contrast ratio is how we measure the level of competition between candidates in the
|
||||
// readability algorithm. This is to avoid offering reader mode on pages that are more like
|
||||
// a list or directory of links with summaries. It takes the score of the last top candidate
|
||||
// (see N_TOP_CANDIDATES) and checks how it compares to the top candidate's. On pages that are not
|
||||
// actual articles, there will likely be many candidates with similar score (i.e. higher contrast ratio).
|
||||
let contrastRatio = lastTopCandidate.readability.contentScore / topCandidate.readability.contentScore;
|
||||
if (contrastRatio > 0.45)
|
||||
return null;
|
||||
}
|
||||
|
||||
return articleContent;
|
||||
}
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче