Bug 1444082 - sync reader mode to github tip ( 8525c6af36d3badbe27c4672a6f2dd99ddb4097f ), r=johannh

MozReview-Commit-ID: LZLFf9kyUR5

--HG--
extra : rebase_source : 9f2e517afa764dbe339b84abfaaf948b1402cba1
This commit is contained in:
Gijs Kruitbosch 2018-03-08 14:35:02 +00:00
Родитель 1d92eccd3c
Коммит 4340ba60f2
5 изменённых файлов: 57 добавлений и 127 удалений

Просмотреть файл

@ -34,95 +34,6 @@
data-usernames-api="/en-US/users/api/usernames"
>
<nav class="scrollable">
<div id="search-bar">
<form id="search" action="/en-US/search">
<input type="hidden" name="product" value="mobile" />
<input name="q" placeholder="Search Mozilla Support" required="required" type="search" value="">
<button class="icon-sprite" type="submit">Search</button>
</form>
</div>
<a href="/en-US/products">Home</a>
<a href="/en-US/questions/new">Ask a question</a>
<a href="/en-US/questions">Support Forum</a>
<header>Navigation</header>
<a href="/en-US/get-involved">Help other users</a>
<a href="?&amp;mobile=0">Switch to desktop site</a>
<header>Profile</header>
<a href="/en-US/users/login">Sign in</a>
<header>Languages</header>
<a href="/en-US/locales" class="locale-picker">Switch language</a>
</nav>
<header class="slide-on-exposed">
<div id="menu-button" class="icon-sprite"></div>
<h1>
Firefox for Android
</h1>
</header>
<div class="wrapper slide-on-exposed">
<section id="content">
<ul id="topics">
<li>
<a href="/en-US/products/mobile/get-started" class="cf">
<img src="//support.cdn.mozilla.net/static/img/blank.png" class="topic-sprite topic-get-started" alt="">
Learn the Basics: get started
</a>
</li>
<li>
<a href="/en-US/products/mobile/download-and-install" class="cf">
<img src="//support.cdn.mozilla.net/static/img/blank.png" class="topic-sprite topic-download-and-install" alt="">
Download, install and migration
</a>
</li>
<li>
<a href="/en-US/products/mobile/privacy-and-security" class="cf">
<img src="//support.cdn.mozilla.net/static/img/blank.png" class="topic-sprite topic-privacy-and-security" alt="">
Privacy and security settings
</a>
</li>
<li>
<a href="/en-US/products/mobile/customize" class="cf">
<img src="//support.cdn.mozilla.net/static/img/blank.png" class="topic-sprite topic-customize" alt="">
Customize controls, options and add-ons
</a>
</li>
<li>
<a href="/en-US/products/mobile/sync" class="cf">
<img src="//support.cdn.mozilla.net/static/img/blank.png" class="topic-sprite topic-sync" alt="">
Firefox Sync settings
</a>
</li>
<li>
<a href="/en-US/products/mobile/fix-problems" class="cf">
<img src="//support.cdn.mozilla.net/static/img/blank.png" class="topic-sprite topic-fix-problems" alt="">
Fix slowness, crashing, error messages and other problems
</a>
</li>
<li>
<a href="/en-US/kb/get-community-support" class="cf">
<img src="//support.cdn.mozilla.net/static/img/blank.png" class="topic-sprite topic-get-community-support" alt="">
Get community support
</a>
</li>
</ul>
</section>
<footer>
</footer>
<ul id="notifications">
</ul>
</div>
<script src="//support.cdn.mozilla.net/static/jsi18n/en-us/javascript.js?beb7c1e"></script>

Просмотреть файл

@ -560,7 +560,8 @@
},
};
var Document = function () {
var Document = function (url) {
this.documentURI = url;
this.styleSheets = [];
this.childNodes = [];
this.children = [];
@ -600,6 +601,20 @@
node.textContent = text;
return node;
},
get baseURI() {
if (!this.hasOwnProperty("_baseURI")) {
this._baseURI = this.documentURI;
var baseElements = this.getElementsByTagName("base");
var href = baseElements[0] && baseElements[0].getAttribute("href");
if (href) {
try {
this._baseURI = (new URL(href, this._baseURI)).href;
} catch (ex) {/* Just fall back to documentURI */}
}
}
return this._baseURI;
},
};
var Element = function (tag) {
@ -1118,9 +1133,9 @@
/**
* Parses an HTML string and returns a JS implementation of the Document.
*/
parse: function (html) {
parse: function (html, url) {
this.html = html;
var doc = this.doc = new Document();
var doc = this.doc = new Document(url);
this.readChildren(doc);
// If this is an HTML document, remove root-level children except for the

Просмотреть файл

@ -41,6 +41,7 @@ function Readability(uri, doc, options) {
this._articleTitle = null;
this._articleByline = null;
this._articleDir = null;
this._attempts = [];
// Configurable options
this._debug = !!options.debug;
@ -275,34 +276,20 @@ Readability.prototype = {
* @return void
*/
_fixRelativeUris: function(articleContent) {
var scheme = this._uri.scheme;
var prePath = this._uri.prePath;
var pathBase = this._uri.pathBase;
var baseURI = this._doc.baseURI;
var documentURI = this._doc.documentURI;
function toAbsoluteURI(uri) {
// If this is already an absolute URI, return it.
if (/^[a-zA-Z][a-zA-Z0-9\+\-\.]*:/.test(uri))
// Leave hash links alone if the base URI matches the document URI:
if (baseURI == documentURI && uri.charAt(0) == "#") {
return uri;
// Scheme-rooted relative URI.
if (uri.substr(0, 2) == "//")
return scheme + "://" + uri.substr(2);
// Prepath-rooted relative URI.
if (uri[0] == "/")
return prePath + uri;
// Dotslash relative URI.
if (uri.indexOf("./") === 0)
return pathBase + uri.slice(2);
// Ignore hash URIs:
if (uri[0] == "#")
return uri;
// Standard relative URI; add entire path. pathBase already includes a
// trailing "/".
return pathBase + uri;
}
// Otherwise, resolve against base URI:
try {
return new URL(uri, baseURI).href;
} catch (ex) {
// Something went wrong, just return the original:
}
return uri;
}
var links = articleContent.getElementsByTagName("a");
@ -535,6 +522,7 @@ Readability.prototype = {
this._clean(articleContent, "embed");
this._clean(articleContent, "h1");
this._clean(articleContent, "footer");
this._clean(articleContent, "link");
// Clean out elements have "share" in their id/class combinations from final top candidates,
// which means we don't remove the top candidates even they have "share".
@ -1089,24 +1077,45 @@ Readability.prototype = {
if (this._debug)
this.log("Article content after paging: " + articleContent.innerHTML);
var parseSuccessful = true;
// Now that we've gone through the full algorithm, check to see if
// we got any meaningful content. If we didn't, we may need to re-run
// grabArticle with different flags set. This gives us a higher likelihood of
// finding the content, and the sieve approach gives us a higher likelihood of
// finding the -right- content.
if (this._getInnerText(articleContent, true).length < this._wordThreshold) {
var textLength = this._getInnerText(articleContent, true).length;
if (textLength < this._wordThreshold) {
parseSuccessful = false;
page.innerHTML = pageCacheHtml;
if (this._flagIsActive(this.FLAG_STRIP_UNLIKELYS)) {
this._removeFlag(this.FLAG_STRIP_UNLIKELYS);
this._attempts.push({articleContent: articleContent, textLength: textLength});
} else if (this._flagIsActive(this.FLAG_WEIGHT_CLASSES)) {
this._removeFlag(this.FLAG_WEIGHT_CLASSES);
this._attempts.push({articleContent: articleContent, textLength: textLength});
} else if (this._flagIsActive(this.FLAG_CLEAN_CONDITIONALLY)) {
this._removeFlag(this.FLAG_CLEAN_CONDITIONALLY);
this._attempts.push({articleContent: articleContent, textLength: textLength});
} else {
return null;
this._attempts.push({articleContent: articleContent, textLength: textLength});
// No luck after removing flags, just return the longest text we found during the different loops
this._attempts.sort(function (a, b) {
return a.textLength < b.textLength;
});
// But first check if we actually have something
if (!this._attempts[0].textLength) {
return null;
}
articleContent = this._attempts[0].articleContent;
parseSuccessful = true;
}
} else {
}
if (parseSuccessful) {
// Find out text direction from ancestors of final top candidate.
var ancestors = [parentOfTopCandidate, topCandidate].concat(this._getNodeAncestors(parentOfTopCandidate));
this._someNode(ancestors, function(ancestor) {

Просмотреть файл

@ -47,7 +47,7 @@ var Agent = {
* @return {object} Article object returned from Readability.
*/
parseDocument(uri, serializedDoc, options) {
let doc = new JSDOMParser().parse(serializedDoc);
let doc = new JSDOMParser().parse(serializedDoc, uri.spec);
return new Readability(uri, doc, options).parse();
},
};

Просмотреть файл

@ -5,10 +5,5 @@
<meta name="description" content="This is the non-article description." />
</head>
<body>
<header>Site header</header>
<div>
<h1>Non article title</h1>
<p>Woot!</p>
</div>
</body>
</html>