From bb971f3f6a1fb6065b0c1cb0c969fe50fd983375 Mon Sep 17 00:00:00 2001 From: Ursula Sarracini Date: Thu, 7 Sep 2017 11:05:03 -0400 Subject: [PATCH] Bug 1393924 - Collect description and preview image and store it into moz_places r=mak MozReview-Commit-ID: 8AgBMOodQop --HG-- extra : rebase_source : f440e9497ee52a3b38e7bcf6dac7fee574f7885d --- browser/base/content/browser.js | 16 ++- browser/base/content/content.js | 2 + browser/base/content/tabbrowser.xml | 14 ++ .../base/content/test/metaTags/.eslintrc.js | 7 + .../base/content/test/metaTags/browser.ini | 4 + .../test/metaTags/browser_meta_tags.js | 30 ++++ .../base/content/test/metaTags/meta_tags.html | 18 +++ browser/base/moz.build | 1 + browser/modules/ContentMetaHandler.jsm | 132 ++++++++++++++++++ browser/modules/moz.build | 1 + 10 files changed, 222 insertions(+), 3 deletions(-) create mode 100644 browser/base/content/test/metaTags/.eslintrc.js create mode 100644 browser/base/content/test/metaTags/browser.ini create mode 100644 browser/base/content/test/metaTags/browser_meta_tags.js create mode 100644 browser/base/content/test/metaTags/meta_tags.html create mode 100644 browser/modules/ContentMetaHandler.jsm diff --git a/browser/base/content/browser.js b/browser/base/content/browser.js index b43a277bda74..a95ba4cb90a8 100755 --- a/browser/base/content/browser.js +++ b/browser/base/content/browser.js @@ -1290,7 +1290,7 @@ var gBrowserInit = { // loading the frame script to ensure that we don't miss any // message sent between when the frame script is loaded and when // the listener is registered. - DOMLinkHandler.init(); + DOMEventHandler.init(); gPageStyleMenu.init(); LanguageDetectionListener.init(); BrowserOnClick.init(); @@ -3704,13 +3704,13 @@ var newWindowButtonObserver = { } } } - -const DOMLinkHandler = { +const DOMEventHandler = { init() { let mm = window.messageManager; mm.addMessageListener("Link:AddFeed", this); mm.addMessageListener("Link:SetIcon", this); mm.addMessageListener("Link:AddSearch", this); + mm.addMessageListener("Meta:SetPageInfo", this); }, receiveMessage(aMsg) { @@ -3727,9 +3727,19 @@ const DOMLinkHandler = { case "Link:AddSearch": this.addSearch(aMsg.target, aMsg.data.engine, aMsg.data.url); break; + + case "Meta:SetPageInfo": + this.setPageInfo(aMsg.data); + break; } }, + setPageInfo(aData) { + const {url, description, previewImageURL} = aData; + gBrowser.setPageInfo(url, description, previewImageURL); + return true; + }, + setIcon(aBrowser, aURL, aLoadingPrincipal) { if (gBrowser.isFailedIcon(aURL)) return false; diff --git a/browser/base/content/content.js b/browser/base/content/content.js index 910b55467b43..0f38dedc8357 100644 --- a/browser/base/content/content.js +++ b/browser/base/content/content.js @@ -17,6 +17,7 @@ XPCOMUtils.defineLazyModuleGetters(this, { E10SUtils: "resource:///modules/E10SUtils.jsm", BrowserUtils: "resource://gre/modules/BrowserUtils.jsm", ContentLinkHandler: "resource:///modules/ContentLinkHandler.jsm", + ContentMetaHandler: "resource:///modules/ContentMetaHandler.jsm", ContentWebRTC: "resource:///modules/ContentWebRTC.jsm", SpellCheckHelper: "resource://gre/modules/InlineSpellChecker.jsm", InlineSpellCheckerContent: "resource://gre/modules/InlineSpellCheckerContent.jsm", @@ -769,6 +770,7 @@ var ClickEventHandler = { ClickEventHandler.init(); ContentLinkHandler.init(this); +ContentMetaHandler.init(this); // TODO: Load this lazily so the JSM is run only if a relevant event/message fires. var pluginContent = new PluginContent(global); diff --git a/browser/base/content/tabbrowser.xml b/browser/base/content/tabbrowser.xml index acb77b2a1cc2..41bc36b301f5 100644 --- a/browser/base/content/tabbrowser.xml +++ b/browser/base/content/tabbrowser.xml @@ -1022,6 +1022,20 @@ + + + + + + + + + diff --git a/browser/base/content/test/metaTags/.eslintrc.js b/browser/base/content/test/metaTags/.eslintrc.js new file mode 100644 index 000000000000..58a15b48622a --- /dev/null +++ b/browser/base/content/test/metaTags/.eslintrc.js @@ -0,0 +1,7 @@ +"use strict"; + +module.exports = { + "extends": [ + "plugin:mozilla/browser-test", + ] +}; diff --git a/browser/base/content/test/metaTags/browser.ini b/browser/base/content/test/metaTags/browser.ini new file mode 100644 index 000000000000..b406b97ffcd0 --- /dev/null +++ b/browser/base/content/test/metaTags/browser.ini @@ -0,0 +1,4 @@ +[DEFAULT] +support-files = + meta_tags.html +[browser_meta_tags.js] diff --git a/browser/base/content/test/metaTags/browser_meta_tags.js b/browser/base/content/test/metaTags/browser_meta_tags.js new file mode 100644 index 000000000000..f5d64b303307 --- /dev/null +++ b/browser/base/content/test/metaTags/browser_meta_tags.js @@ -0,0 +1,30 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* globals gBrowser */ +/* This tests that with the page meta_tags.html, ContentMetaHandler.jsm parses out + * the meta tags avilable and only stores the best one for description and one for + * preview image url. In the case of this test, the best defined meta tags are + * "og:description" and "og:image:url". The list of meta tags and their order of + * preference is found in ContentMetaHandler.jsm. Because there is debounce logic + * in ContentLinkHandler.jsm to only make one single SQL update, we have to wait + * for some time before checking that the page info was stored correctly. + */ +add_task(async function test() { + Components.utils.import("resource://gre/modules/PlacesUtils.jsm"); + const URL = "https://example.com/browser/browser/base/content/test/metaTags/meta_tags.html"; + let tab = await BrowserTestUtils.openNewForegroundTab(gBrowser, URL); + + // Wait until places has stored the page info + let pageInfo; + await BrowserTestUtils.waitForCondition(async () => { + pageInfo = await PlacesUtils.history.fetch(URL, {"includeMeta": true}); + const {previewImageURL, description} = pageInfo; + return previewImageURL.href && description; + }); + is(pageInfo.description, "og:description", "got the correct description"); + is(pageInfo.previewImageURL.href, "og:image:url", "got the correct preview image"); + await BrowserTestUtils.removeTab(tab); +}); + diff --git a/browser/base/content/test/metaTags/meta_tags.html b/browser/base/content/test/metaTags/meta_tags.html new file mode 100644 index 000000000000..a096b68ad880 --- /dev/null +++ b/browser/base/content/test/metaTags/meta_tags.html @@ -0,0 +1,18 @@ + + + + + MetaTags + + + + + + + + + + + + + diff --git a/browser/base/moz.build b/browser/base/moz.build index 19e49d94696e..301d1f54f18a 100644 --- a/browser/base/moz.build +++ b/browser/base/moz.build @@ -24,6 +24,7 @@ BROWSER_CHROME_MANIFESTS += [ 'content/test/contextMenu/browser.ini', 'content/test/forms/browser.ini', 'content/test/general/browser.ini', + 'content/test/metaTags/browser.ini', 'content/test/newtab/browser.ini', 'content/test/pageinfo/browser.ini', 'content/test/performance/browser.ini', diff --git a/browser/modules/ContentMetaHandler.jsm b/browser/modules/ContentMetaHandler.jsm new file mode 100644 index 000000000000..c73545fb0540 --- /dev/null +++ b/browser/modules/ContentMetaHandler.jsm @@ -0,0 +1,132 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +"use strict"; +const {utils: Cu, interfaces: Ci, classes: Cc} = Components; +Cu.importGlobalProperties(["URL"]); + +// Debounce time in milliseconds +const TIMEOUT_DELAY = 100; + +// Possible description tags, listed in order from least favourable to most favourable +const DESCRIPTION_RULES = [ + "twitter:description", + "description", + "og:description" +]; + +// Possible image tags, listed in order from least favourable to most favourable +const PREVIEW_IMAGE_RULES = [ + "thumbnail", + "twitter:image", + "og:image", + "og:image:url", + "og:image:secure_url" +]; + +const DEFAULT_MAP_VALUES = { + "description": {value: null, currMaxScore: -1}, + "image": {value: null, currMaxScore: -1}, + "timeout": null +}; + +/* + * Checks if the incoming meta tag has a greater score than the current best + * score by checking the index of the meta tag in the list of rules provided. + * + * @param {Array} aRules + * The list of rules for a given type of meta tag + * @param {String} aTag + * The name or property of the incoming meta tag + * @param {String} aEntry + * The current best entry for the given meta tag + * + * @returns {Boolean} true if the incoming meta tag is better than the current + * best meta tag of that same kind, false otherwise + */ +function shouldExtractMetadata(aRules, aTag, aEntry) { + return aRules.indexOf(aTag) > aEntry.currMaxScore; +} + +this.EXPORTED_SYMBOLS = [ "ContentMetaHandler" ]; + +/* + * This listens to DOMMetaAdded events and collects relevant metadata about the + * meta tag received. Then, it sends the metadata gathered from the meta tags + * and the url of the page as it's payload to be inserted into moz_places. + */ + +this.ContentMetaHandler = { + init(chromeGlobal) { + chromeGlobal.addEventListener("DOMMetaAdded", event => { + const metaTag = event.originalTarget; + const window = metaTag.ownerGlobal; + + // If there's no meta tag, or we're in a sub-frame, ignore this + if (!metaTag || !metaTag.ownerDocument || window != window.top) { + return; + } + this.handleMetaTag(metaTag, chromeGlobal); + }); + // Stores a mapping of the best description and preview image collected so far + // for a given URL + this._metaTags = new Map(); + }, + + + handleMetaTag(metaTag, chromeGlobal) { + const url = metaTag.ownerDocument.documentURI; + + let name = metaTag.name; + let prop = metaTag.getAttributeNS(null, "property"); + if (!name && !prop) { + return; + } + + let tag = name || prop; + let entry = this._metaTags.get(url) || Object.assign({}, DEFAULT_MAP_VALUES); + + if (shouldExtractMetadata(DESCRIPTION_RULES, tag, entry.description)) { + // Extract the description + const value = metaTag.getAttributeNS(null, "content"); + if (value) { + entry.description.value = value; + entry.description.currMaxScore = DESCRIPTION_RULES.indexOf(tag); + } + } else if (shouldExtractMetadata(PREVIEW_IMAGE_RULES, tag, entry.image)) { + // Extract the preview image + const value = metaTag.getAttributeNS(null, "content"); + if (value) { + entry.image.value = new URL(value, url).href; + entry.image.currMaxScore = PREVIEW_IMAGE_RULES.indexOf(tag); + } + } else { + // We don't care about other meta tags + return; + } + + if (!this._metaTags.has(url)) { + this._metaTags.set(url, entry); + } + + if (entry.timeout) { + entry.timeout.delay = TIMEOUT_DELAY; + } else { + // We want to debounce incoming meta tags until we're certain we have the + // best one for description and preview image, and only store that one + entry.timeout = Cc["@mozilla.org/timer;1"].createInstance(Ci.nsITimer); + entry.timeout.initWithCallback(() => { + entry.timeout = null; + + // Save description and preview image to moz_places + chromeGlobal.sendAsyncMessage("Meta:SetPageInfo", { + url, + description: entry.description.value, + previewImageURL: entry.image.value + }); + this._metaTags.delete(url); + }, TIMEOUT_DELAY, Ci.nsITimer.TYPE_ONE_SHOT); + } + } +}; diff --git a/browser/modules/moz.build b/browser/modules/moz.build index 4552fb654729..cfdb01a00ef9 100644 --- a/browser/modules/moz.build +++ b/browser/modules/moz.build @@ -132,6 +132,7 @@ EXTRA_JS_MODULES += [ 'ContentClick.jsm', 'ContentCrashHandlers.jsm', 'ContentLinkHandler.jsm', + 'ContentMetaHandler.jsm', 'ContentObservers.js', 'ContentSearch.jsm', 'ContentWebRTC.jsm',