зеркало из https://github.com/mozilla/gecko-dev.git
200 строки
5.9 KiB
JavaScript
200 строки
5.9 KiB
JavaScript
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
// Debounce time in milliseconds - this should be long enough to account for
|
|
// sync script tags that could appear between desired meta tags
|
|
const TIMEOUT_DELAY = 1000;
|
|
|
|
const ACCEPTED_PROTOCOLS = ["http:", "https:"];
|
|
|
|
// Possible description tags, listed in order from least favourable to most favourable
|
|
const DESCRIPTION_RULES = [
|
|
"twitter:description",
|
|
"description",
|
|
"og:description",
|
|
];
|
|
|
|
// Possible image tags, listed in order from least favourable to most favourable
|
|
const PREVIEW_IMAGE_RULES = [
|
|
"thumbnail",
|
|
"twitter:image",
|
|
"og:image",
|
|
"og:image:url",
|
|
"og:image:secure_url",
|
|
];
|
|
|
|
/*
|
|
* Checks if the incoming meta tag has a greater score than the current best
|
|
* score by checking the index of the meta tag in the list of rules provided.
|
|
*
|
|
* @param {Array} aRules
|
|
* The list of rules for a given type of meta tag
|
|
* @param {String} aTag
|
|
* The name or property of the incoming meta tag
|
|
* @param {String} aEntry
|
|
* The current best entry for the given meta tag
|
|
*
|
|
* @returns {Boolean} true if the incoming meta tag is better than the current
|
|
* best meta tag of that same kind, false otherwise
|
|
*/
|
|
function shouldExtractMetadata(aRules, aTag, aEntry) {
|
|
return aRules.indexOf(aTag) > aEntry.currMaxScore;
|
|
}
|
|
|
|
/*
|
|
* Ensure that the preview image URL is safe and valid before storing
|
|
*
|
|
* @param {URL} aURL
|
|
* A URL object that needs to be checked for valid principal and protocol
|
|
*
|
|
* @returns {Boolean} true if the preview URL is safe and can be stored, false otherwise
|
|
*/
|
|
function checkLoadURIStr(aURL) {
|
|
if (!ACCEPTED_PROTOCOLS.includes(aURL.protocol)) {
|
|
return false;
|
|
}
|
|
try {
|
|
let ssm = Services.scriptSecurityManager;
|
|
let principal = ssm.createNullPrincipal({});
|
|
ssm.checkLoadURIStrWithPrincipal(
|
|
principal,
|
|
aURL.href,
|
|
ssm.DISALLOW_INHERIT_PRINCIPAL
|
|
);
|
|
} catch (e) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* This listens to DOMMetaAdded events and collects relevant metadata about the
|
|
* meta tag received. Then, it sends the metadata gathered from the meta tags
|
|
* and the url of the page as it's payload to be inserted into moz_places.
|
|
*/
|
|
export class ContentMetaChild extends JSWindowActorChild {
|
|
constructor() {
|
|
super();
|
|
|
|
// Store a mapping of the best description and preview
|
|
// image collected so far for a given URL.
|
|
this.metaTags = new Map();
|
|
}
|
|
|
|
didDestroy() {
|
|
for (let entry of this.metaTags.values()) {
|
|
entry.timeout.cancel();
|
|
}
|
|
}
|
|
|
|
handleEvent(event) {
|
|
switch (event.type) {
|
|
case "DOMContentLoaded":
|
|
const metaTags = this.contentWindow.document.querySelectorAll("meta");
|
|
for (let metaTag of metaTags) {
|
|
this.onMetaTag(metaTag);
|
|
}
|
|
break;
|
|
case "DOMMetaAdded":
|
|
this.onMetaTag(event.originalTarget);
|
|
break;
|
|
default:
|
|
}
|
|
}
|
|
|
|
onMetaTag(metaTag) {
|
|
const window = metaTag.ownerGlobal;
|
|
|
|
// If there's no meta tag, ignore this. Also verify that the window
|
|
// matches just to be safe.
|
|
if (!metaTag || !metaTag.ownerDocument || window != this.contentWindow) {
|
|
return;
|
|
}
|
|
|
|
const url = metaTag.ownerDocument.documentURI;
|
|
|
|
let name = metaTag.name;
|
|
let prop = metaTag.getAttributeNS(null, "property");
|
|
if (!name && !prop) {
|
|
return;
|
|
}
|
|
|
|
let tag = name || prop;
|
|
|
|
const entry = this.metaTags.get(url) || {
|
|
description: { value: null, currMaxScore: -1 },
|
|
image: { value: null, currMaxScore: -1 },
|
|
timeout: null,
|
|
};
|
|
|
|
// Malformed meta tag - do not store it
|
|
const content = metaTag.getAttributeNS(null, "content");
|
|
if (!content) {
|
|
return;
|
|
}
|
|
|
|
if (shouldExtractMetadata(DESCRIPTION_RULES, tag, entry.description)) {
|
|
// Extract the description
|
|
entry.description.value = content;
|
|
entry.description.currMaxScore = DESCRIPTION_RULES.indexOf(tag);
|
|
} else if (shouldExtractMetadata(PREVIEW_IMAGE_RULES, tag, entry.image)) {
|
|
// Extract the preview image
|
|
let value;
|
|
try {
|
|
value = new URL(content, url);
|
|
} catch (e) {
|
|
return;
|
|
}
|
|
if (value && checkLoadURIStr(value)) {
|
|
entry.image.value = value.href;
|
|
entry.image.currMaxScore = PREVIEW_IMAGE_RULES.indexOf(tag);
|
|
}
|
|
} else {
|
|
// We don't care about other meta tags
|
|
return;
|
|
}
|
|
|
|
if (!this.metaTags.has(url)) {
|
|
this.metaTags.set(url, entry);
|
|
}
|
|
|
|
if (entry.timeout) {
|
|
entry.timeout.delay = TIMEOUT_DELAY;
|
|
} else {
|
|
// We want to debounce incoming meta tags until we're certain we have the
|
|
// best one for description and preview image, and only store that one
|
|
entry.timeout = Cc["@mozilla.org/timer;1"].createInstance(Ci.nsITimer);
|
|
entry.timeout.initWithCallback(
|
|
() => {
|
|
entry.timeout = null;
|
|
this.metaTags.delete(url);
|
|
// We try to cancel the timers when we get destroyed, but if
|
|
// there's a race, catch it:
|
|
if (!this.manager || this.manager.isClosed) {
|
|
return;
|
|
}
|
|
|
|
// Save description and preview image to moz_places
|
|
this.sendAsyncMessage("Meta:SetPageInfo", {
|
|
url,
|
|
description: entry.description.value,
|
|
previewImageURL: entry.image.value,
|
|
});
|
|
|
|
// Telemetry for recording the size of page metadata
|
|
let metadataSize = entry.description.value
|
|
? entry.description.value.length
|
|
: 0;
|
|
metadataSize += entry.image.value ? entry.image.value.length : 0;
|
|
Services.telemetry
|
|
.getHistogramById("PAGE_METADATA_SIZE")
|
|
.add(metadataSize);
|
|
},
|
|
TIMEOUT_DELAY,
|
|
Ci.nsITimer.TYPE_ONE_SHOT
|
|
);
|
|
}
|
|
}
|
|
}
|