lightcrawler/index.js

94 строки
2.5 KiB
JavaScript
Исходник Обычный вид История

2017-06-20 21:01:20 +03:00
const cheerio = require('cheerio')
const ChildProcess = require('child_process')
2017-06-20 23:40:54 +03:00
const Crawler = require('simplecrawler')
2017-06-20 21:01:20 +03:00
const path = require('path')
2017-06-20 23:40:54 +03:00
const queue = require('async/queue')
2017-06-20 21:01:20 +03:00
2017-06-20 23:40:54 +03:00
module.exports = (options) => {
const crawler = new Crawler(options.url)
crawler.respectRobotsTxt = false
crawler.parseHTMLComments = false
crawler.parseScriptTags = false
crawler.maxDepth = 1
2017-06-20 21:01:20 +03:00
2017-06-20 23:40:54 +03:00
crawler.discoverResources = (buffer, item) => {
const page = cheerio.load(buffer.toString('utf8'))
const links = page('a[href]').map(function () {
return page(this).attr('href')
}).get()
2017-06-20 21:01:20 +03:00
2017-06-20 23:40:54 +03:00
return links
}
2017-06-20 23:45:51 +03:00
let totalErrorCount = 0
const lighthouseQueue = queue((url, callback) => {
runLighthouse(url, (errorCount) => {
totalErrorCount += errorCount
callback()
})
}, 5)
2017-06-20 23:40:54 +03:00
crawler.on('fetchcomplete', (queueItem, responseBuffer, response) => {
lighthouseQueue.push(queueItem.url)
})
2017-06-20 23:45:51 +03:00
crawler.once('complete', () => {
lighthouseQueue.drain = () => {
if (totalErrorCount > 0) {
process.exit(1)
}
}
})
2017-06-20 21:01:20 +03:00
2017-06-20 23:40:54 +03:00
crawler.start()
}
2017-06-20 21:01:20 +03:00
2017-06-20 23:40:54 +03:00
function runLighthouse (url, callback) {
const args = [
2017-06-20 21:01:20 +03:00
url,
'--output=json',
'--output-path=stdout',
'--disable-device-emulation',
'--disable-cpu-throttling',
'--disable-network-throttling',
2017-06-20 23:40:54 +03:00
'--chrome-flags=--headless --disable-gpu',
2017-06-20 21:01:20 +03:00
`--config-path=${path.join(__dirname, 'config.json')}`
2017-06-20 23:40:54 +03:00
]
const lighthouse = ChildProcess.spawn(path.join(__dirname, 'node_modules', '.bin', 'lighthouse'), args)
2017-06-20 21:01:20 +03:00
let output = ''
lighthouse.stdout.on('data', (data) => {
output += data
})
lighthouse.once('close', () => {
2017-06-20 23:45:51 +03:00
let errorCount = 0
2017-06-20 23:40:54 +03:00
2017-06-20 21:01:20 +03:00
const report = JSON.parse(output)
report.reportCategories.forEach((category) => {
category.audits.forEach((audit) => {
if (audit.score !== 100) {
2017-06-20 23:45:51 +03:00
errorCount++
2017-06-20 21:01:20 +03:00
console.log(`${url} failed ${audit.id}`)
2017-06-20 23:40:54 +03:00
const {value} = audit.result.extendedInfo
if (Array.isArray(value)) {
value.forEach((result) => {
console.log(` ${result.url}`)
})
} else if (Array.isArray(value.nodes)) {
value.nodes.forEach((result) => {
let message = result.failureSummary
message = message.replace(/^Fix any of the following:/g, '').trim()
console.log(` ${message}`)
console.log(` ${result.html}`)
})
}
2017-06-20 21:01:20 +03:00
}
})
})
2017-06-20 23:45:51 +03:00
callback(errorCount)
2017-06-20 21:01:20 +03:00
})
}