зеркало из https://github.com/mozilla/tsci.git
Merge pull request #148 from mozilla/issues/117/1
Fixes #117 - Use the Trexa list instead of the Tranco list
This commit is contained in:
Коммит
33f145cb0e
|
@ -16,7 +16,7 @@ You can omit any keys where the defaults would suffice. Here is what a commented
|
|||
would look like:
|
||||
```
|
||||
{
|
||||
// The size of the Tranco list to download, up to 1 million sites.
|
||||
// The size of the Trexa list to download, up to ~150k sites.
|
||||
"listSize": 500,
|
||||
// The directory that will be used to store the downloaded list.
|
||||
"listDir": "data/",
|
||||
|
@ -78,6 +78,12 @@ A `--resume` option also exists, to continue collecting weekly results until the
|
|||
npm start 2019-05-23 -- --resume
|
||||
```
|
||||
|
||||
A `--exact` option exists to allow testing of single dates that don't fall at the end of the week.
|
||||
|
||||
```
|
||||
npm start 2020-05-28 -- --exact
|
||||
```
|
||||
|
||||
Code of Conduct
|
||||
===============
|
||||
|
||||
|
|
|
@ -187,9 +187,10 @@ function getEOW(date) {
|
|||
/**
|
||||
* Return the list of query dates for a given inputDate
|
||||
* @param {Date} inputDate the date to start with.
|
||||
* @param {Object} options
|
||||
* @returns an Array with all dates to gather bugs for
|
||||
*/
|
||||
function getQueryDates(inputDate) {
|
||||
function getQueryDates(inputDate, options) {
|
||||
const queryDates = [];
|
||||
if (inputDate) {
|
||||
// We want to consider open bugs only until the end of the given week.
|
||||
|
@ -205,7 +206,7 @@ function getQueryDates(inputDate) {
|
|||
queryDates.push(getEOW(parsed));
|
||||
parsed.setDate(parsed.getDate() + 7);
|
||||
if (getEOW(parsed) > today) {
|
||||
// Stop if we get into future dates (the Tranco list won't
|
||||
// Stop if we get into future dates (the Trexa list won't
|
||||
// have anything useful for us).
|
||||
break;
|
||||
}
|
||||
|
@ -222,6 +223,9 @@ function getQueryDates(inputDate) {
|
|||
break;
|
||||
}
|
||||
}
|
||||
// one day we can use options?.exact
|
||||
} else if (options && options.exact) {
|
||||
queryDates.push(parsed);
|
||||
} else {
|
||||
// A single date is specified.
|
||||
queryDates.push(getEOW(parsed));
|
||||
|
|
6
index.js
6
index.js
|
@ -3,7 +3,7 @@ const bugs = require("./bugs");
|
|||
const fs = require("fs");
|
||||
const helpers = require("./helpers");
|
||||
const spreadsheet = require("./spreadsheet");
|
||||
const tranco = require("./tranco");
|
||||
const trexa = require("./trexa");
|
||||
|
||||
const argv = process.argv.slice(2);
|
||||
|
||||
|
@ -38,12 +38,14 @@ const main = async () => {
|
|||
const inputDate = argv[0] || maxDate;
|
||||
if (argv.includes("--resume")) {
|
||||
queryDates = helpers.resumeQueryDates(inputDate);
|
||||
} else if (argv.includes("--exact")) {
|
||||
queryDates = helpers.getQueryDates(inputDate, { exact: true });
|
||||
} else {
|
||||
queryDates = helpers.getQueryDates(inputDate);
|
||||
}
|
||||
|
||||
for (const date of queryDates) {
|
||||
const LIST_FILE = await tranco.fetchList(LIST_SIZE, LIST_DIR, date);
|
||||
const LIST_FILE = await trexa.fetchList(LIST_SIZE, LIST_DIR, date);
|
||||
const bugTable = await bugs.fetchBugs(
|
||||
LIST_FILE,
|
||||
bugzillaKey,
|
||||
|
|
|
@ -229,9 +229,9 @@
|
|||
},
|
||||
"dependencies": {
|
||||
"minimist": {
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz",
|
||||
"integrity": "sha1-o1AIsg9BOD7sH7kU9M1d95omQoQ=",
|
||||
"version": "1.2.5",
|
||||
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz",
|
||||
"integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==",
|
||||
"dev": true
|
||||
}
|
||||
}
|
||||
|
@ -3735,18 +3735,6 @@
|
|||
"mime": "^2.2.0"
|
||||
}
|
||||
},
|
||||
"handlebars": {
|
||||
"version": "4.5.3",
|
||||
"resolved": "https://registry.npmjs.org/handlebars/-/handlebars-4.5.3.tgz",
|
||||
"integrity": "sha512-3yPecJoJHK/4c6aZhSvxOyG4vJKDshV36VHp0iVCDVh7o9w2vwi3NSnL2MMPj3YdduqaBcu7cGbggJQM0br9xA==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"neo-async": "^2.6.0",
|
||||
"optimist": "^0.6.1",
|
||||
"source-map": "^0.6.1",
|
||||
"uglify-js": "^3.1.4"
|
||||
}
|
||||
},
|
||||
"har-schema": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/har-schema/-/har-schema-2.0.0.tgz",
|
||||
|
@ -3875,6 +3863,12 @@
|
|||
"whatwg-encoding": "^1.0.1"
|
||||
}
|
||||
},
|
||||
"html-escaper": {
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/html-escaper/-/html-escaper-2.0.2.tgz",
|
||||
"integrity": "sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==",
|
||||
"dev": true
|
||||
},
|
||||
"http-signature": {
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/http-signature/-/http-signature-1.2.0.tgz",
|
||||
|
@ -4605,12 +4599,12 @@
|
|||
}
|
||||
},
|
||||
"istanbul-reports": {
|
||||
"version": "2.2.6",
|
||||
"resolved": "https://registry.npmjs.org/istanbul-reports/-/istanbul-reports-2.2.6.tgz",
|
||||
"integrity": "sha512-SKi4rnMyLBKe0Jy2uUdx28h8oG7ph2PPuQPvIAh31d+Ci+lSiEu4C+h3oBPuJ9+mPKhOyW0M8gY4U5NM1WLeXA==",
|
||||
"version": "2.2.7",
|
||||
"resolved": "https://registry.npmjs.org/istanbul-reports/-/istanbul-reports-2.2.7.tgz",
|
||||
"integrity": "sha512-uu1F/L1o5Y6LzPVSVZXNOoD/KXpJue9aeLRd0sM9uMXfZvzomB0WxVamWb5ue8kA2vVWEmW7EG+A5n3f1kqHKg==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"handlebars": "^4.1.2"
|
||||
"html-escaper": "^2.0.0"
|
||||
}
|
||||
},
|
||||
"jest": {
|
||||
|
@ -5683,14 +5677,6 @@
|
|||
"dev": true,
|
||||
"requires": {
|
||||
"minimist": "^1.2.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"minimist": {
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz",
|
||||
"integrity": "sha1-o1AIsg9BOD7sH7kU9M1d95omQoQ=",
|
||||
"dev": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"jsonify": {
|
||||
|
@ -5731,9 +5717,9 @@
|
|||
}
|
||||
},
|
||||
"kind-of": {
|
||||
"version": "6.0.2",
|
||||
"resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz",
|
||||
"integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==",
|
||||
"version": "6.0.3",
|
||||
"resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.3.tgz",
|
||||
"integrity": "sha512-dcS1ul+9tmeD95T+x28/ehLgd9mENa3LsvDTtzm3vyBEO7RPptvAD+t44WVXaUjTBRcrpFeFlC8WCruUR456hw==",
|
||||
"dev": true
|
||||
},
|
||||
"kleur": {
|
||||
|
@ -6293,9 +6279,9 @@
|
|||
}
|
||||
},
|
||||
"minimist": {
|
||||
"version": "0.0.8",
|
||||
"resolved": "https://registry.npmjs.org/minimist/-/minimist-0.0.8.tgz",
|
||||
"integrity": "sha1-hX/Kv8M5fSYluCKCYuhqp6ARsF0=",
|
||||
"version": "1.2.5",
|
||||
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz",
|
||||
"integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==",
|
||||
"dev": true
|
||||
},
|
||||
"mixin-deep": {
|
||||
|
@ -6335,12 +6321,20 @@
|
|||
}
|
||||
},
|
||||
"mkdirp": {
|
||||
"version": "0.5.1",
|
||||
"resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.1.tgz",
|
||||
"integrity": "sha1-MAV0OOrGz3+MR2fzhkjWaX11yQM=",
|
||||
"version": "0.5.5",
|
||||
"resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.5.tgz",
|
||||
"integrity": "sha512-NKmAlESf6jMGym1++R0Ra7wvhV+wFW63FaSOFPwRahvea0gMUcGUhVeAg/0BC0wiv9ih5NYPB1Wn1UEI1/L+xQ==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"minimist": "0.0.8"
|
||||
"minimist": "^1.2.5"
|
||||
},
|
||||
"dependencies": {
|
||||
"minimist": {
|
||||
"version": "1.2.5",
|
||||
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz",
|
||||
"integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==",
|
||||
"dev": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"ms": {
|
||||
|
@ -6386,12 +6380,6 @@
|
|||
"integrity": "sha1-Sr6/7tdUHywnrPspvbvRXI1bpPc=",
|
||||
"dev": true
|
||||
},
|
||||
"neo-async": {
|
||||
"version": "2.6.1",
|
||||
"resolved": "https://registry.npmjs.org/neo-async/-/neo-async-2.6.1.tgz",
|
||||
"integrity": "sha512-iyam8fBuCUpWeKPGpaNMetEocMt364qkCsfL9JuhjXX6dRnguRVOfk2GZaDpPjcOKiiXCPINZC1GczQ7iTq3Zw==",
|
||||
"dev": true
|
||||
},
|
||||
"nice-try": {
|
||||
"version": "1.0.5",
|
||||
"resolved": "https://registry.npmjs.org/nice-try/-/nice-try-1.0.5.tgz",
|
||||
|
@ -6658,16 +6646,6 @@
|
|||
"integrity": "sha512-pVOEP16TrAO2/fjej1IdOyupJY8KDUM1CvsaScRbw6oddvpQoOfGk4ywha0HKKVAD6RkW4x6Q+tNBwhf3Bgpuw==",
|
||||
"dev": true
|
||||
},
|
||||
"optimist": {
|
||||
"version": "0.6.1",
|
||||
"resolved": "https://registry.npmjs.org/optimist/-/optimist-0.6.1.tgz",
|
||||
"integrity": "sha1-2j6nRob6IaGaERwybpDrFaAZZoY=",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"minimist": "~0.0.1",
|
||||
"wordwrap": "~0.0.2"
|
||||
}
|
||||
},
|
||||
"optionator": {
|
||||
"version": "0.8.3",
|
||||
"resolved": "https://registry.npmjs.org/optionator/-/optionator-0.8.3.tgz",
|
||||
|
@ -7401,9 +7379,9 @@
|
|||
}
|
||||
},
|
||||
"minimist": {
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz",
|
||||
"integrity": "sha1-o1AIsg9BOD7sH7kU9M1d95omQoQ=",
|
||||
"version": "1.2.5",
|
||||
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz",
|
||||
"integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==",
|
||||
"dev": true
|
||||
},
|
||||
"to-regex-range": {
|
||||
|
@ -8196,17 +8174,6 @@
|
|||
"integrity": "sha512-q+MB8nYR1KDLrgr4G5yemftpMC7/QLqVndBmEEdqzmNj5dcFOO4Oo8qlwZE3ULT3+Zim1F8Kq4cBnikNhlCMlg==",
|
||||
"dev": true
|
||||
},
|
||||
"uglify-js": {
|
||||
"version": "3.7.0",
|
||||
"resolved": "https://registry.npmjs.org/uglify-js/-/uglify-js-3.7.0.tgz",
|
||||
"integrity": "sha512-PC/ee458NEMITe1OufAjal65i6lB58R1HWMRcxwvdz1UopW0DYqlRL3xdu3IcTvTXsB02CRHykidkTRL+A3hQA==",
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"requires": {
|
||||
"commander": "~2.20.3",
|
||||
"source-map": "~0.6.1"
|
||||
}
|
||||
},
|
||||
"union-value": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/union-value/-/union-value-1.0.1.tgz",
|
||||
|
@ -8418,12 +8385,6 @@
|
|||
"integrity": "sha512-Hz/mrNwitNRh/HUAtM/VT/5VH+ygD6DV7mYKZAtHOrbs8U7lvPS6xf7EJKMF0uW1KJCl0H701g3ZGus+muE5vQ==",
|
||||
"dev": true
|
||||
},
|
||||
"wordwrap": {
|
||||
"version": "0.0.3",
|
||||
"resolved": "https://registry.npmjs.org/wordwrap/-/wordwrap-0.0.3.tgz",
|
||||
"integrity": "sha1-o9XabNXAvAAI03I0u68b7WMFkQc=",
|
||||
"dev": true
|
||||
},
|
||||
"wrap-ansi": {
|
||||
"version": "5.1.0",
|
||||
"resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-5.1.0.tgz",
|
||||
|
@ -8591,9 +8552,9 @@
|
|||
}
|
||||
},
|
||||
"yargs-parser": {
|
||||
"version": "13.1.1",
|
||||
"resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-13.1.1.tgz",
|
||||
"integrity": "sha512-oVAVsHz6uFrg3XQheFII8ESO2ssAf9luWuAd6Wexsu4F3OtIW0o8IribPXYrD4WC24LWtPrJlGy87y5udK+dxQ==",
|
||||
"version": "13.1.2",
|
||||
"resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-13.1.2.tgz",
|
||||
"integrity": "sha512-3lbsNRf/j+A4QuSZfDRA7HRSfWrzO0YjqTJd5kjAq37Zep1CEgaYmrH9Q3GwPiB9cHyd1Y1UwggGhJGoxipbzg==",
|
||||
"requires": {
|
||||
"camelcase": "^5.0.0",
|
||||
"decamelize": "^1.2.0"
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
const fs = require("fs");
|
||||
const tranco = require("../tranco.js");
|
||||
const trexa = require("../trexa.js");
|
||||
|
||||
const args = {
|
||||
listFile: "./tests/fixtures/copy.csv",
|
||||
|
@ -21,7 +21,7 @@ afterAll(async () => {
|
|||
});
|
||||
|
||||
test("ignoredDomains get removed", async () => {
|
||||
await tranco.removeIgnoredDomains(args).then(async returnedArgs => {
|
||||
await trexa.removeIgnoredDomains(args).then(async returnedArgs => {
|
||||
const data = await fs.promises.readFile(returnedArgs.listFile, "utf8");
|
||||
const lines = data.split(/^/m);
|
||||
|
||||
|
@ -32,7 +32,7 @@ test("ignoredDomains get removed", async () => {
|
|||
|
||||
describe("clampListSize tests", () => {
|
||||
test("clampListSize current size > config.listSize", async () => {
|
||||
await tranco.clampListSize(args).then(async csvPath => {
|
||||
await trexa.clampListSize(args).then(async csvPath => {
|
||||
const data = await fs.promises.readFile(csvPath, "utf8");
|
||||
const lines = data.split(/\r?\n/);
|
||||
|
||||
|
@ -43,7 +43,7 @@ describe("clampListSize tests", () => {
|
|||
|
||||
test("clampListSize current size < config.listSize", async () => {
|
||||
args.config.listSize = 15;
|
||||
await tranco.clampListSize(args).then(async csvPath => {
|
||||
await trexa.clampListSize(args).then(async csvPath => {
|
||||
const data = await fs.promises.readFile(csvPath, "utf8");
|
||||
const lines = data.split(/\r?\n/);
|
||||
|
||||
|
|
|
@ -80,44 +80,6 @@ const removeIgnoredDomains = function(args) {
|
|||
});
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns the list ID for the specified date or, if that cannot be found, the
|
||||
* most recent one available. If a date is not specified, returns the latest
|
||||
* available list.
|
||||
* @param {Date} date the date of the requested list ID
|
||||
* @returns the String of the list ID
|
||||
*/
|
||||
const fetchListID = async date => {
|
||||
const ID_URL = `https://tranco-list.eu/daily_list_id?date=${parseDate(date)}`;
|
||||
|
||||
return fetch(ID_URL).then(async res => {
|
||||
if (
|
||||
res.ok &&
|
||||
res.headers.get("content-type") === "text/plain; charset=utf-8"
|
||||
) {
|
||||
return { listID: await res.text(), listDate: date };
|
||||
} else if (res.status === 503) {
|
||||
const newDate = new Date(date);
|
||||
const now = new Date();
|
||||
// Future dates are unlikely to be available yet, but also ones
|
||||
// from long ago may have never been available. Try to converge
|
||||
// towards the present.
|
||||
if (date > now) {
|
||||
newDate.setDate(newDate.getDate() - 1);
|
||||
// If we end up at "today", we need to request the list from
|
||||
// the day before -- the daily list is actually a day old.
|
||||
} else if (parseDate(newDate) === parseDate(now)) {
|
||||
newDate.setDate(newDate.getDate() - 2);
|
||||
} else {
|
||||
newDate.setDate(newDate.getDate() + 1);
|
||||
}
|
||||
console.warn(`Retrying with date ${newDate}`);
|
||||
return fetchListID(newDate);
|
||||
}
|
||||
throw new Error(`Request for ${ID_URL} returned status ${res.status}!`);
|
||||
});
|
||||
};
|
||||
|
||||
const fetchList = async (
|
||||
size = 500,
|
||||
directory = "data/",
|
||||
|
@ -138,9 +100,7 @@ const fetchList = async (
|
|||
date = new Date();
|
||||
}
|
||||
|
||||
// Fetch the list ID for the requested date.
|
||||
const { listID, listDate } = await fetchListID(date);
|
||||
const file = `${directory}list-${parseDate(listDate)}.csv`;
|
||||
const file = `${directory}list-${parseDate(date)}.csv`;
|
||||
|
||||
// Check for an already downloaded list.
|
||||
const listIsCached = await fs.promises
|
||||
|
@ -148,28 +108,25 @@ const fetchList = async (
|
|||
.then(() => true)
|
||||
.catch(() => false);
|
||||
if (listIsCached) {
|
||||
console.log("Found cached Tranco list");
|
||||
console.log("Found cached Trexa list");
|
||||
return file;
|
||||
}
|
||||
|
||||
// Fetch the list.
|
||||
const LIST_URL = `https://tranco-list.eu/download/${listID}/${listSize}`;
|
||||
return fetch(LIST_URL).then(res => {
|
||||
if (
|
||||
!res.ok ||
|
||||
res.headers.get("content-type") !== "text/csv; charset=utf-8"
|
||||
) {
|
||||
throw new Error(`List ${listID} not found!`);
|
||||
const LIST_URL = `https://trexa.webcompat.com/api/lists/${parseDate(
|
||||
date
|
||||
)}?count=${listSize}`;
|
||||
return fetch(LIST_URL, {
|
||||
headers: { "User-Agent": "mozilla-tsci/1.0" },
|
||||
}).then(res => {
|
||||
if (!res.ok || !res.headers.get("content-type").includes("text/csv")) {
|
||||
throw new Error(`List trexa-${parseDate(date)}.csv not found!`);
|
||||
}
|
||||
return new Promise((resolve, reject) => {
|
||||
const dest = fs.createWriteStream(file);
|
||||
res.body.pipe(dest);
|
||||
dest.on("finish", () => {
|
||||
console.log(
|
||||
`Downloaded Tranco list with ID ${listID} for date ${parseDate(
|
||||
listDate
|
||||
)}`
|
||||
);
|
||||
console.log(`Downloaded Trexa list for date ${parseDate(date)}`);
|
||||
removeIgnoredDomains({ listFile: file, config })
|
||||
.then(clampListSize)
|
||||
.then(
|
Загрузка…
Ссылка в новой задаче