This commit is contained in:
Katrina Uychaco 2017-06-20 17:53:39 -07:00
Родитель a5b92b9ce9
Коммит 9cdabad591
3 изменённых файлов: 11 добавлений и 14 удалений

3
cli.js
Просмотреть файл

@ -3,9 +3,10 @@
const yargs = require('yargs') const yargs = require('yargs')
const lightcrawler = require('.') const lightcrawler = require('.')
const options = yargs const options = yargs.demandOption(['c', 'u'])
.alias('u', 'url').describe('url', 'URL to crawl') .alias('u', 'url').describe('url', 'URL to crawl')
.alias('h', 'help').help('h') .alias('h', 'help').help('h')
.alias('c', 'config').describe('config', 'Options for lighthouse')
.argv .argv
lightcrawler(options) lightcrawler(options)

Просмотреть файл

@ -1,8 +0,0 @@
{
"extends": "lighthouse:default",
"settings": {
"onlyAudits": [
"external-anchors-use-rel-noopener"
]
}
}

Просмотреть файл

@ -3,13 +3,17 @@ const ChildProcess = require('child_process')
const Crawler = require('simplecrawler') const Crawler = require('simplecrawler')
const path = require('path') const path = require('path')
const queue = require('async/queue') const queue = require('async/queue')
const fs = require('fs')
module.exports = (options) => { module.exports = (options) => {
const config = JSON.parse(fs.readFileSync(options.config))
const configPath = path.resolve(options.config)
const crawler = new Crawler(options.url) const crawler = new Crawler(options.url)
crawler.respectRobotsTxt = false crawler.respectRobotsTxt = false
crawler.parseHTMLComments = false crawler.parseHTMLComments = false
crawler.parseScriptTags = false crawler.parseScriptTags = false
crawler.maxDepth = 1 crawler.maxDepth = config.settings.crawler.maxDepth || 1
crawler.discoverResources = (buffer, item) => { crawler.discoverResources = (buffer, item) => {
const page = cheerio.load(buffer.toString('utf8')) const page = cheerio.load(buffer.toString('utf8'))
@ -23,11 +27,11 @@ module.exports = (options) => {
let totalErrorCount = 0 let totalErrorCount = 0
const lighthouseQueue = queue((url, callback) => { const lighthouseQueue = queue((url, callback) => {
runLighthouse(url, (errorCount) => { runLighthouse(url, configPath, (errorCount) => {
totalErrorCount += errorCount totalErrorCount += errorCount
callback() callback()
}) })
}, 5) }, config.settings.crawler.maxChromeInstances)
crawler.on('fetchcomplete', (queueItem, responseBuffer, response) => { crawler.on('fetchcomplete', (queueItem, responseBuffer, response) => {
lighthouseQueue.push(queueItem.url) lighthouseQueue.push(queueItem.url)
@ -43,7 +47,7 @@ module.exports = (options) => {
crawler.start() crawler.start()
} }
function runLighthouse (url, callback) { function runLighthouse (url, configPath, callback) {
const args = [ const args = [
url, url,
'--output=json', '--output=json',
@ -52,7 +56,7 @@ function runLighthouse (url, callback) {
'--disable-cpu-throttling', '--disable-cpu-throttling',
'--disable-network-throttling', '--disable-network-throttling',
'--chrome-flags=--headless --disable-gpu', '--chrome-flags=--headless --disable-gpu',
`--config-path=${path.join(__dirname, 'config.json')}` `--config-path=${configPath}`
] ]
const lighthousePath = require.resolve('lighthouse/lighthouse-cli/index.js') const lighthousePath = require.resolve('lighthouse/lighthouse-cli/index.js')