lightcrawler/index.js

174 строки
5.3 KiB
JavaScript
Исходник Постоянная ссылка Обычный вид История

2017-06-20 21:01:20 +03:00
const cheerio = require('cheerio')
const ChildProcess = require('child_process')
2017-06-20 23:40:54 +03:00
const Crawler = require('simplecrawler')
2017-06-20 21:01:20 +03:00
const path = require('path')
2017-06-20 23:40:54 +03:00
const queue = require('async/queue')
2017-06-21 03:53:39 +03:00
const fs = require('fs')
2017-06-22 01:46:17 +03:00
const colors = require('colors')
2017-06-20 21:01:20 +03:00
const stats = {
pageCount: 0,
violationCounts: {},
passedAuditsCount: 0,
startTime: null,
auditTimesByPageUrl: {}
}
2017-06-20 23:40:54 +03:00
module.exports = (options) => {
stats.startTime = new Date()
2017-06-21 03:53:39 +03:00
const configPath = path.resolve(options.config)
2017-06-21 23:00:16 +03:00
const config = JSON.parse(fs.readFileSync(configPath))
2017-06-21 03:53:39 +03:00
2017-06-20 23:40:54 +03:00
const crawler = new Crawler(options.url)
crawler.respectRobotsTxt = false
crawler.parseHTMLComments = false
crawler.parseScriptTags = false
2017-06-21 03:53:39 +03:00
crawler.maxDepth = config.settings.crawler.maxDepth || 1
2017-06-20 21:01:20 +03:00
2017-06-20 23:40:54 +03:00
crawler.discoverResources = (buffer, item) => {
const page = cheerio.load(buffer.toString('utf8'))
const links = page('a[href]').map(function () {
return page(this).attr('href')
}).get()
2017-06-20 21:01:20 +03:00
2017-06-20 23:40:54 +03:00
return links
}
2017-06-20 23:45:51 +03:00
let totalErrorCount = 0
const lighthouseQueue = queue((url, callback) => {
2017-06-21 03:53:39 +03:00
runLighthouse(url, configPath, (errorCount) => {
2017-06-20 23:45:51 +03:00
totalErrorCount += errorCount
callback()
})
2017-06-21 03:53:39 +03:00
}, config.settings.crawler.maxChromeInstances)
2017-06-20 23:40:54 +03:00
crawler.on('fetchcomplete', (queueItem, responseBuffer, response) => {
lighthouseQueue.push(queueItem.url)
})
2017-06-20 23:45:51 +03:00
crawler.once('complete', () => {
lighthouseQueue.drain = () => {
printStats()
2017-06-20 23:45:51 +03:00
if (totalErrorCount > 0) {
process.exit(1)
}
}
})
2017-06-20 21:01:20 +03:00
2017-06-20 23:40:54 +03:00
crawler.start()
}
2017-06-20 21:01:20 +03:00
2017-06-21 03:53:39 +03:00
function runLighthouse (url, configPath, callback) {
stats.pageCount++
2017-06-20 23:40:54 +03:00
const args = [
2017-06-20 21:01:20 +03:00
url,
'--output=json',
'--output-path=stdout',
'--disable-device-emulation',
'--disable-cpu-throttling',
'--disable-network-throttling',
2017-06-20 23:40:54 +03:00
'--chrome-flags=--headless --disable-gpu',
2017-06-21 03:53:39 +03:00
`--config-path=${configPath}`
2017-06-20 23:40:54 +03:00
]
2017-06-21 00:09:44 +03:00
const lighthousePath = require.resolve('lighthouse/lighthouse-cli/index.js')
const lighthouse = ChildProcess.spawn(lighthousePath, args)
2017-06-20 21:01:20 +03:00
let output = ''
lighthouse.stdout.on('data', (data) => {
output += data
})
stats.auditTimesByPageUrl[url] = {startTime: new Date()}
2017-06-20 21:01:20 +03:00
lighthouse.once('close', () => {
stats.auditTimesByPageUrl[url].endTime = new Date()
2017-06-20 23:45:51 +03:00
let errorCount = 0
2017-06-20 23:40:54 +03:00
2017-06-21 00:19:46 +03:00
let report
try {
report = JSON.parse(output)
} catch (parseError) {
console.error(`Parsing JSON report output failed: ${output}`)
callback(1)
return
}
2017-06-20 21:01:20 +03:00
report.reportCategories.forEach((category) => {
let displayedCategory = false
2017-06-20 21:01:20 +03:00
category.audits.forEach((audit) => {
if (audit.score === 100) {
stats.passedAuditsCount++
} else {
if (!displayedCategory) {
console.log();
console.log(category.name.bold.underline);
displayedCategory = true
}
2017-06-20 23:45:51 +03:00
errorCount++
2017-06-22 01:46:17 +03:00
console.log(url.replace(/\/$/, ''), '\u2717'.red, audit.id.bold, '-', audit.result.description.italic)
2017-06-20 23:40:54 +03:00
if (stats.violationCounts[category.name] === undefined) {
stats.violationCounts[category.name] = 0
}
2017-06-22 01:46:17 +03:00
if (audit.result.extendedInfo) {
const {value} = audit.result.extendedInfo
if (Array.isArray(value)) {
stats.violationCounts[category.name] += value.length
2017-06-22 01:46:17 +03:00
value.forEach((result) => {
if (result.url) {
console.log(` ${result.url}`)
}
})
} else if (Array.isArray(value.nodes)) {
stats.violationCounts[category.name] += value.nodes.length
2017-06-22 01:46:17 +03:00
const messagesToNodes = {}
value.nodes.forEach((result) => {
let message = result.failureSummary
message = message.replace(/^Fix any of the following:/g, '').trim()
if (messagesToNodes[message]) {
messagesToNodes[message].push(result.html)
} else {
messagesToNodes[message] = [result.html]
}
})
Object.keys(messagesToNodes).forEach((message) => {
console.log(` ${message}`)
messagesToNodes[message].forEach(node => {
console.log(` ${node}`.gray)
2017-06-22 01:46:17 +03:00
})
})
} else {
stats.violationCounts[category.name]++
2017-06-22 01:46:17 +03:00
}
2017-06-20 23:40:54 +03:00
}
2017-06-20 21:01:20 +03:00
}
})
})
2017-06-20 23:45:51 +03:00
callback(errorCount)
2017-06-20 21:01:20 +03:00
})
}
function printStats() {
console.log();
console.log();
console.log('Lighthouse Summary'.bold.underline);
console.log(` Total Pages Scanned: ${stats.pageCount}`);
console.log(` Total Auditing Time: ${new Date() - stats.startTime} ms`);
const totalTime = Object.keys(stats.auditTimesByPageUrl).reduce((sum, url) => {
const {endTime, startTime} = stats.auditTimesByPageUrl[url]
return (endTime - startTime) + sum
}, 0)
console.log(` Average Page Audit Time: ${Math.round(totalTime/stats.pageCount)} ms`);
console.log(` Total Audits Passed: ${stats.passedAuditsCount}`, '\u2713'.green);
2017-06-23 05:59:53 +03:00
if (Object.keys(stats.violationCounts).length === 0) {
console.log(` Total Violations: None! \\o/ 🎉`);
} else {
console.log(` Total Violations:`);
Object.keys(stats.violationCounts).forEach(category => {
console.log(` ${category}: ${stats.violationCounts[category]}`, '\u2717'.red);
})
}
}