зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1724320 - Collect Open Graph page data. r=mossop
Differential Revision: https://phabricator.services.mozilla.com/D121927
This commit is contained in:
Родитель
f4c00a8872
Коммит
56b979ba71
|
@ -0,0 +1,106 @@
|
|||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
"use strict";
|
||||
|
||||
var EXPORTED_SYMBOLS = ["OpenGraphPageData"];
|
||||
|
||||
const { PageDataCollector } = ChromeUtils.import(
|
||||
"resource:///modules/pagedata/PageDataCollector.jsm"
|
||||
);
|
||||
|
||||
/**
|
||||
* @typedef {object} GeneralPageData
|
||||
* Data about a product.
|
||||
* @property {string | undefined} title
|
||||
* The title describing the page.
|
||||
* @property {string | undefined} site_name
|
||||
* The name of the site the page is on.
|
||||
* @property {string | undefined} type
|
||||
* The type of the object being described by Open Graph. See
|
||||
* https://ogp.me/#types for a list of possible types.
|
||||
* @property {string | undefined} image
|
||||
* A URL pointing to an image that describes the page.
|
||||
* @property {string | undefined} url
|
||||
* The permalink to the page.
|
||||
*/
|
||||
|
||||
const RELEVANT_TAGS = ["title", "site_name", "image", "type", "url"];
|
||||
|
||||
/**
|
||||
* Collects Open Graph related data from a page.
|
||||
*
|
||||
* TODO: Respond to DOM mutations to trigger recollection.
|
||||
*/
|
||||
class OpenGraphPageData extends PageDataCollector {
|
||||
/**
|
||||
* @see PageDataCollector.init
|
||||
*/
|
||||
async init() {
|
||||
return this.#collect();
|
||||
}
|
||||
|
||||
/**
|
||||
* Collects data from the meta tags on the page.
|
||||
* See https://ogp.me/ for the parsing spec.
|
||||
*
|
||||
* @param {NodeList} tags
|
||||
* A NodeList of Open Graph meta tags.
|
||||
* @returns {GeneralPageData}
|
||||
* Data describing the webpage.
|
||||
*/
|
||||
#collectOpenGraphTags(tags) {
|
||||
// Ensure all tags are present in the returned object, even if their values
|
||||
// are undefined.
|
||||
let pageData = Object.fromEntries(
|
||||
RELEVANT_TAGS.map(tag => [tag, undefined])
|
||||
);
|
||||
|
||||
for (let tag of tags) {
|
||||
// Stripping "og:" from the property name.
|
||||
let propertyName = tag.getAttribute("property").substring(3);
|
||||
if (RELEVANT_TAGS.includes(propertyName)) {
|
||||
pageData[propertyName] = tag.getAttribute("content");
|
||||
}
|
||||
}
|
||||
|
||||
return pageData;
|
||||
}
|
||||
|
||||
/**
|
||||
* Collects the existing data from the page.
|
||||
*
|
||||
* @returns {Data[]}
|
||||
*/
|
||||
#collect() {
|
||||
/**
|
||||
* A map from item type to an array of the items found in the page.
|
||||
*/
|
||||
let items = new Map();
|
||||
let insert = (type, item) => {
|
||||
let data = items.get(type);
|
||||
if (!data) {
|
||||
data = [];
|
||||
items.set(type, data);
|
||||
}
|
||||
data.push(item);
|
||||
};
|
||||
|
||||
// Sites can technically define an Open Graph prefix other than `og:`.
|
||||
// However, `og:` is one of the default RDFa prefixes and it's likely
|
||||
// uncommon that sites use a custom prefix. If we find that metadata is
|
||||
// missing for common sites due to this issue, we could consider adding a
|
||||
// basic RDFa parser.
|
||||
let openGraphTags = this.document.querySelectorAll("meta[property^='og:'");
|
||||
if (!openGraphTags.length) {
|
||||
return [];
|
||||
}
|
||||
insert(
|
||||
PageDataCollector.DATA_TYPE.GENERAL,
|
||||
this.#collectOpenGraphTags(openGraphTags)
|
||||
);
|
||||
|
||||
return Array.from(items, ([type, data]) => ({ type, data }));
|
||||
}
|
||||
}
|
|
@ -11,6 +11,7 @@ const { XPCOMUtils } = ChromeUtils.import(
|
|||
);
|
||||
|
||||
XPCOMUtils.defineLazyModuleGetters(this, {
|
||||
OpenGraphPageData: "resource:///modules/pagedata/OpenGraphPageData.jsm",
|
||||
PrivateBrowsingUtils: "resource://gre/modules/PrivateBrowsingUtils.jsm",
|
||||
SchemaOrgPageData: "resource:///modules/pagedata/SchemaOrgPageData.jsm",
|
||||
Services: "resource://gre/modules/Services.jsm",
|
||||
|
@ -42,7 +43,7 @@ XPCOMUtils.defineLazyPreferenceGetter(
|
|||
* @returns {PageDataCollector[]}
|
||||
*/
|
||||
function getCollectors(document) {
|
||||
return [new SchemaOrgPageData(document)];
|
||||
return [new SchemaOrgPageData(document), new OpenGraphPageData(document)];
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -24,6 +24,7 @@ class PageDataCollector extends EventEmitter {
|
|||
static get DATA_TYPE() {
|
||||
return {
|
||||
PRODUCT: 1,
|
||||
GENERAL: 2,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@ BROWSER_CHROME_MANIFESTS += [
|
|||
]
|
||||
|
||||
EXTRA_JS_MODULES.pagedata += [
|
||||
"OpenGraphPageData.jsm",
|
||||
"PageDataCollector.jsm",
|
||||
"PageDataService.jsm",
|
||||
"SchemaOrgPageData.jsm",
|
||||
|
|
|
@ -8,8 +8,13 @@ prefs =
|
|||
browser.pagedata.enabled=true
|
||||
support-files =
|
||||
head.js
|
||||
product1.html
|
||||
product2.html
|
||||
|
||||
[browser_pagedata_basic.js]
|
||||
[browser_pagedata_opengraph.js]
|
||||
support-files =
|
||||
opengraph1.html
|
||||
opengraph2.html
|
||||
[browser_pagedata_product.js]
|
||||
support-files =
|
||||
product1.html
|
||||
product2.html
|
||||
|
|
|
@ -0,0 +1,72 @@
|
|||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
/**
|
||||
* Tests that the page data service can parse Open Graph metadata.
|
||||
*/
|
||||
|
||||
const BASE_URL = getRootDirectory(gTestPath).replace(
|
||||
"chrome://mochitests/content",
|
||||
"https://example.com"
|
||||
);
|
||||
|
||||
add_task(async function test_type_website() {
|
||||
let promise = PageDataService.once("page-data");
|
||||
|
||||
const TEST_URL = BASE_URL + "opengraph1.html";
|
||||
|
||||
await BrowserTestUtils.withNewTab(TEST_URL, async browser => {
|
||||
let pageData = await promise;
|
||||
Assert.equal(pageData.url, TEST_URL, "Should have returned the loaded URL");
|
||||
Assert.equal(pageData.data.length, 1, "Should have only one data item");
|
||||
Assert.deepEqual(
|
||||
pageData.data,
|
||||
[
|
||||
{
|
||||
type: PageDataCollector.DATA_TYPE.GENERAL,
|
||||
data: [
|
||||
{
|
||||
type: "website",
|
||||
site_name: "Mozilla",
|
||||
url: "https://www.mozilla.org/",
|
||||
image: "https://example.com/preview-image",
|
||||
title: "Internet for people, not profit",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
"Should have returned the expected data"
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
add_task(async function test_type_movie() {
|
||||
let promise = PageDataService.once("page-data");
|
||||
|
||||
const TEST_URL = BASE_URL + "opengraph2.html";
|
||||
|
||||
await BrowserTestUtils.withNewTab(TEST_URL, async browser => {
|
||||
let pageData = await promise;
|
||||
Assert.equal(pageData.url, TEST_URL, "Should have returned the loaded URL");
|
||||
Assert.equal(pageData.data.length, 1, "Should have only one data item");
|
||||
Assert.deepEqual(
|
||||
pageData.data,
|
||||
[
|
||||
{
|
||||
type: PageDataCollector.DATA_TYPE.GENERAL,
|
||||
data: [
|
||||
{
|
||||
type: "video.movie",
|
||||
site_name: undefined,
|
||||
url: "https://www.imdb.com/title/tt0499004/",
|
||||
image: "https://example.com/preview-code-rush",
|
||||
title: "Code Rush (TV Movie 2000) - IMDb",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
"Should have returned the expected data"
|
||||
);
|
||||
});
|
||||
});
|
|
@ -0,0 +1,17 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>Internet for people, not profit — Mozilla</title>
|
||||
<meta http-equiv="Content-Type" content="text/html;charset=utf-8"></meta>
|
||||
<meta property="og:type" content="website">
|
||||
<meta property="og:site_name" content="Mozilla">
|
||||
<meta property="og:url" content="https://www.mozilla.org/">
|
||||
<meta property="og:image" content="https://example.com/preview-image">
|
||||
<meta property="og:title" content="Internet for people, not profit">
|
||||
<!-- We expect the test will ignore tags the parser does not recognize. -->
|
||||
<meta property="og:locale" content="en_CA">
|
||||
<meta property="og:description" content="Mozilla is the not-for-profit behind the lightning fast Firefox browser. We put people over profit to give everyone more power online.">
|
||||
</head>
|
||||
<body>
|
||||
<p>Test page</p>
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,16 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>Code Rush (TV Movie 2000)</title>
|
||||
<meta property="og:url" content="https://www.imdb.com/title/tt0499004/"/>
|
||||
<!-- Omitting og:site_name to test that the parser doesn't break on missing tags. -->
|
||||
<meta property="og:title" content="Code Rush (TV Movie 2000) - IMDb"/>
|
||||
<meta property="og:description" content="This is the description of the movie."/>
|
||||
<meta property="og:type" content="video.movie"/>
|
||||
<meta property="og:image" content="https://example.com/preview-code-rush"/>
|
||||
<meta property="og:image:height" content="750"/>
|
||||
<meta property="og:image:width" content="1000"/>
|
||||
</head>
|
||||
<body>
|
||||
<p>Test page</p>
|
||||
</body>
|
||||
</html>
|
Загрузка…
Ссылка в новой задаче