зеркало из https://github.com/github/lightcrawler.git
Add required config option
This commit is contained in:
Родитель
a5b92b9ce9
Коммит
9cdabad591
3
cli.js
3
cli.js
|
@ -3,9 +3,10 @@
|
||||||
const yargs = require('yargs')
|
const yargs = require('yargs')
|
||||||
const lightcrawler = require('.')
|
const lightcrawler = require('.')
|
||||||
|
|
||||||
const options = yargs
|
const options = yargs.demandOption(['c', 'u'])
|
||||||
.alias('u', 'url').describe('url', 'URL to crawl')
|
.alias('u', 'url').describe('url', 'URL to crawl')
|
||||||
.alias('h', 'help').help('h')
|
.alias('h', 'help').help('h')
|
||||||
|
.alias('c', 'config').describe('config', 'Options for lighthouse')
|
||||||
.argv
|
.argv
|
||||||
|
|
||||||
lightcrawler(options)
|
lightcrawler(options)
|
||||||
|
|
|
@ -1,8 +0,0 @@
|
||||||
{
|
|
||||||
"extends": "lighthouse:default",
|
|
||||||
"settings": {
|
|
||||||
"onlyAudits": [
|
|
||||||
"external-anchors-use-rel-noopener"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
14
index.js
14
index.js
|
@ -3,13 +3,17 @@ const ChildProcess = require('child_process')
|
||||||
const Crawler = require('simplecrawler')
|
const Crawler = require('simplecrawler')
|
||||||
const path = require('path')
|
const path = require('path')
|
||||||
const queue = require('async/queue')
|
const queue = require('async/queue')
|
||||||
|
const fs = require('fs')
|
||||||
|
|
||||||
module.exports = (options) => {
|
module.exports = (options) => {
|
||||||
|
const config = JSON.parse(fs.readFileSync(options.config))
|
||||||
|
const configPath = path.resolve(options.config)
|
||||||
|
|
||||||
const crawler = new Crawler(options.url)
|
const crawler = new Crawler(options.url)
|
||||||
crawler.respectRobotsTxt = false
|
crawler.respectRobotsTxt = false
|
||||||
crawler.parseHTMLComments = false
|
crawler.parseHTMLComments = false
|
||||||
crawler.parseScriptTags = false
|
crawler.parseScriptTags = false
|
||||||
crawler.maxDepth = 1
|
crawler.maxDepth = config.settings.crawler.maxDepth || 1
|
||||||
|
|
||||||
crawler.discoverResources = (buffer, item) => {
|
crawler.discoverResources = (buffer, item) => {
|
||||||
const page = cheerio.load(buffer.toString('utf8'))
|
const page = cheerio.load(buffer.toString('utf8'))
|
||||||
|
@ -23,11 +27,11 @@ module.exports = (options) => {
|
||||||
let totalErrorCount = 0
|
let totalErrorCount = 0
|
||||||
|
|
||||||
const lighthouseQueue = queue((url, callback) => {
|
const lighthouseQueue = queue((url, callback) => {
|
||||||
runLighthouse(url, (errorCount) => {
|
runLighthouse(url, configPath, (errorCount) => {
|
||||||
totalErrorCount += errorCount
|
totalErrorCount += errorCount
|
||||||
callback()
|
callback()
|
||||||
})
|
})
|
||||||
}, 5)
|
}, config.settings.crawler.maxChromeInstances)
|
||||||
|
|
||||||
crawler.on('fetchcomplete', (queueItem, responseBuffer, response) => {
|
crawler.on('fetchcomplete', (queueItem, responseBuffer, response) => {
|
||||||
lighthouseQueue.push(queueItem.url)
|
lighthouseQueue.push(queueItem.url)
|
||||||
|
@ -43,7 +47,7 @@ module.exports = (options) => {
|
||||||
crawler.start()
|
crawler.start()
|
||||||
}
|
}
|
||||||
|
|
||||||
function runLighthouse (url, callback) {
|
function runLighthouse (url, configPath, callback) {
|
||||||
const args = [
|
const args = [
|
||||||
url,
|
url,
|
||||||
'--output=json',
|
'--output=json',
|
||||||
|
@ -52,7 +56,7 @@ function runLighthouse (url, callback) {
|
||||||
'--disable-cpu-throttling',
|
'--disable-cpu-throttling',
|
||||||
'--disable-network-throttling',
|
'--disable-network-throttling',
|
||||||
'--chrome-flags=--headless --disable-gpu',
|
'--chrome-flags=--headless --disable-gpu',
|
||||||
`--config-path=${path.join(__dirname, 'config.json')}`
|
`--config-path=${configPath}`
|
||||||
]
|
]
|
||||||
|
|
||||||
const lighthousePath = require.resolve('lighthouse/lighthouse-cli/index.js')
|
const lighthousePath = require.resolve('lighthouse/lighthouse-cli/index.js')
|
||||||
|
|
Загрузка…
Ссылка в новой задаче