зеркало из https://github.com/mozilla/gecko-dev.git
100 строки
2.9 KiB
JavaScript
100 строки
2.9 KiB
JavaScript
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
"use strict";
|
|
|
|
const { RemoteSettings } = ChromeUtils.import(
|
|
"resource://services-settings/remote-settings.js"
|
|
);
|
|
|
|
// Returns whether the passed in params match the criteria.
|
|
// To match, they must contain all the params specified in criteria and the values
|
|
// must match if a value is provided in criteria.
|
|
function _hasParams(criteria, params) {
|
|
for (let param of criteria) {
|
|
const val = params.get(param.key);
|
|
if (
|
|
val === null ||
|
|
(param.value && param.value !== val) ||
|
|
(param.prefix && !val.startsWith(param.prefix))
|
|
) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* classifySite
|
|
* Classifies a given URL into a category based on classification data from RemoteSettings.
|
|
* The data from remote settings can match a category by one of the following:
|
|
* - match the exact URL
|
|
* - match the hostname or second level domain (sld)
|
|
* - match query parameter(s), and optionally their values or prefixes
|
|
* - match both (hostname or sld) and query parameter(s)
|
|
*
|
|
* The data looks like:
|
|
* [{
|
|
* "type": "hostname-and-params-match",
|
|
* "criteria": [
|
|
* {
|
|
* "url": "https://matchurl.com",
|
|
* "hostname": "matchhostname.com",
|
|
* "sld": "secondleveldomain",
|
|
* "params": [
|
|
* {
|
|
* "key": "matchparam",
|
|
* "value": "matchvalue",
|
|
* "prefix": "matchpPrefix",
|
|
* },
|
|
* ],
|
|
* },
|
|
* ],
|
|
* "weight": 300,
|
|
* },...]
|
|
*/
|
|
async function classifySite(url, RS = RemoteSettings) {
|
|
let category = "other";
|
|
let parsedURL;
|
|
|
|
// Try to parse the url.
|
|
for (let _url of [url, `https://${url}`]) {
|
|
try {
|
|
parsedURL = new URL(_url);
|
|
break;
|
|
} catch (e) {}
|
|
}
|
|
|
|
if (parsedURL) {
|
|
// If we parsed successfully, find a match.
|
|
const hostname = parsedURL.hostname.replace(/^www\./i, "");
|
|
const params = parsedURL.searchParams;
|
|
// NOTE: there will be an initial/default local copy of the data in m-c.
|
|
// Therefore, this should never return an empty list [].
|
|
const siteTypes = await RS("sites-classification").get();
|
|
const sortedSiteTypes = siteTypes.sort(
|
|
(x, y) => (y.weight || 0) - (x.weight || 0)
|
|
);
|
|
for (let type of sortedSiteTypes) {
|
|
for (let criteria of type.criteria) {
|
|
if (criteria.url && criteria.url !== url) {
|
|
continue;
|
|
}
|
|
if (criteria.hostname && criteria.hostname !== hostname) {
|
|
continue;
|
|
}
|
|
if (criteria.sld && criteria.sld !== hostname.split(".")[0]) {
|
|
continue;
|
|
}
|
|
if (criteria.params && !_hasParams(criteria.params, params)) {
|
|
continue;
|
|
}
|
|
return type.type;
|
|
}
|
|
}
|
|
}
|
|
return category;
|
|
}
|
|
|
|
const EXPORTED_SYMBOLS = ["classifySite"];
|