lightcrawler/index.js

107 строки
2.8 KiB
JavaScript
Исходник Обычный вид История

2017-06-20 21:01:20 +03:00
const cheerio = require('cheerio')
const ChildProcess = require('child_process')
2017-06-20 23:40:54 +03:00
const Crawler = require('simplecrawler')
2017-06-20 21:01:20 +03:00
const path = require('path')
2017-06-20 23:40:54 +03:00
const queue = require('async/queue')
2017-06-21 03:53:39 +03:00
const fs = require('fs')
2017-06-20 21:01:20 +03:00
2017-06-20 23:40:54 +03:00
module.exports = (options) => {
2017-06-21 03:53:39 +03:00
const config = JSON.parse(fs.readFileSync(options.config))
const configPath = path.resolve(options.config)
2017-06-20 23:40:54 +03:00
const crawler = new Crawler(options.url)
crawler.respectRobotsTxt = false
crawler.parseHTMLComments = false
crawler.parseScriptTags = false
2017-06-21 03:53:39 +03:00
crawler.maxDepth = config.settings.crawler.maxDepth || 1
2017-06-20 21:01:20 +03:00
2017-06-20 23:40:54 +03:00
crawler.discoverResources = (buffer, item) => {
const page = cheerio.load(buffer.toString('utf8'))
const links = page('a[href]').map(function () {
return page(this).attr('href')
}).get()
2017-06-20 21:01:20 +03:00
2017-06-20 23:40:54 +03:00
return links
}
2017-06-20 23:45:51 +03:00
let totalErrorCount = 0
const lighthouseQueue = queue((url, callback) => {
2017-06-21 03:53:39 +03:00
runLighthouse(url, configPath, (errorCount) => {
2017-06-20 23:45:51 +03:00
totalErrorCount += errorCount
callback()
})
2017-06-21 03:53:39 +03:00
}, config.settings.crawler.maxChromeInstances)
2017-06-20 23:40:54 +03:00
crawler.on('fetchcomplete', (queueItem, responseBuffer, response) => {
lighthouseQueue.push(queueItem.url)
})
2017-06-20 23:45:51 +03:00
crawler.once('complete', () => {
lighthouseQueue.drain = () => {
if (totalErrorCount > 0) {
process.exit(1)
}
}
})
2017-06-20 21:01:20 +03:00
2017-06-20 23:40:54 +03:00
crawler.start()
}
2017-06-20 21:01:20 +03:00
2017-06-21 03:53:39 +03:00
function runLighthouse (url, configPath, callback) {
2017-06-20 23:40:54 +03:00
const args = [
2017-06-20 21:01:20 +03:00
url,
'--output=json',
'--output-path=stdout',
'--disable-device-emulation',
'--disable-cpu-throttling',
'--disable-network-throttling',
2017-06-20 23:40:54 +03:00
'--chrome-flags=--headless --disable-gpu',
2017-06-21 03:53:39 +03:00
`--config-path=${configPath}`
2017-06-20 23:40:54 +03:00
]
2017-06-21 00:09:44 +03:00
const lighthousePath = require.resolve('lighthouse/lighthouse-cli/index.js')
const lighthouse = ChildProcess.spawn(lighthousePath, args)
2017-06-20 21:01:20 +03:00
let output = ''
lighthouse.stdout.on('data', (data) => {
output += data
})
lighthouse.once('close', () => {
2017-06-20 23:45:51 +03:00
let errorCount = 0
2017-06-20 23:40:54 +03:00
2017-06-21 00:19:46 +03:00
let report
try {
report = JSON.parse(output)
} catch (parseError) {
console.error(`Parsing JSON report output failed: ${output}`)
callback(1)
return
}
2017-06-20 21:01:20 +03:00
report.reportCategories.forEach((category) => {
category.audits.forEach((audit) => {
if (audit.score !== 100) {
2017-06-20 23:45:51 +03:00
errorCount++
2017-06-20 21:01:20 +03:00
console.log(`${url} failed ${audit.id}`)
2017-06-20 23:40:54 +03:00
const {value} = audit.result.extendedInfo
if (Array.isArray(value)) {
value.forEach((result) => {
console.log(` ${result.url}`)
})
} else if (Array.isArray(value.nodes)) {
value.nodes.forEach((result) => {
let message = result.failureSummary
message = message.replace(/^Fix any of the following:/g, '').trim()
console.log(` ${message}`)
console.log(` ${result.html}`)
})
}
2017-06-20 21:01:20 +03:00
}
})
})
2017-06-20 23:45:51 +03:00
callback(errorCount)
2017-06-20 21:01:20 +03:00
})
}