зеркало из https://github.com/electron/apps.git
132 строки
3.6 KiB
JavaScript
132 строки
3.6 KiB
JavaScript
const MAX_CONCURRENCY = Number(process.env.MAX_CONCURRENCY) || 4 // simultaneous open web requests
|
|
const README_CACHE_TTL = require('human-interval')(
|
|
process.env.README_CACHE_TTL || '4 hours'
|
|
)
|
|
|
|
const fs = require('fs')
|
|
const path = require('path')
|
|
const Bottleneck = require('bottleneck')
|
|
const github = require('../lib/github')
|
|
const cheerio = require('cheerio')
|
|
const parseGitUrl = require('github-url-to-object')
|
|
|
|
const outputFile = path.join(__dirname, '../meta/readmes.json')
|
|
const oldReadmeData = require(outputFile)
|
|
const output = {}
|
|
const limiter = new Bottleneck({
|
|
maxConcurrent: MAX_CONCURRENCY,
|
|
})
|
|
|
|
const apps = require('../lib/raw-app-list')()
|
|
const appsWithRepos = require('../lib/apps-with-github-repos')
|
|
const appsToUpdate = appsWithRepos.filter((app) => {
|
|
const oldData = oldReadmeData[app.slug]
|
|
if (!oldData) return true
|
|
const oldDate = new Date(oldData.readmeFetchedAt || null).getTime()
|
|
return oldDate + README_CACHE_TTL < Date.now()
|
|
})
|
|
|
|
console.log(
|
|
`${appsWithRepos.length} of ${apps.length} apps have a GitHub repo.`
|
|
)
|
|
console.log(
|
|
`${appsToUpdate.length} of those ${appsWithRepos.length} have missing or outdated README data.`
|
|
)
|
|
|
|
appsToUpdate.forEach((app) => {
|
|
limiter
|
|
.schedule(getReadme, app)
|
|
.then((repository) => {
|
|
return repository.data.default_branch
|
|
})
|
|
.catch((err) => {
|
|
if (err.status !== 404) {
|
|
console.error(`${app.slug}: Non 404 error`)
|
|
console.error(err)
|
|
}
|
|
|
|
return
|
|
})
|
|
.then((defaultBranch) => {
|
|
limiter
|
|
.schedule(getReadme, app, defaultBranch)
|
|
.then((release) => {
|
|
console.log(`${app.slug}: got latest README`)
|
|
output[app.slug] = {
|
|
readmeCleaned: cleanReadme(release.data, defaultBranch, app),
|
|
readmeOriginal: release.data,
|
|
readmeFetchedAt: new Date(),
|
|
}
|
|
})
|
|
.catch((err) => {
|
|
console.error(`${app.slug}: no README found`)
|
|
output[app.slug] = {
|
|
readmeOriginal: null,
|
|
readmeFetchedAt: new Date(),
|
|
}
|
|
if (err.status !== 404) {
|
|
console.error(`${app.slug}: Non 404 error`)
|
|
console.error(err)
|
|
}
|
|
})
|
|
})
|
|
})
|
|
|
|
limiter.on('idle', () => {
|
|
setTimeout(() => {
|
|
fs.writeFileSync(outputFile, JSON.stringify(output, null, 2))
|
|
console.log(`Done fetching README files.\nWrote ${outputFile}`)
|
|
process.exit()
|
|
}, 1000)
|
|
})
|
|
|
|
function getReadme(app, defaultBranch) {
|
|
const { user: owner, repo } = parseGitUrl(app.repository)
|
|
const opts = {
|
|
owner: owner,
|
|
repo: repo,
|
|
headers: {
|
|
Accept: 'application/vnd.github.v3.html',
|
|
},
|
|
}
|
|
|
|
if (defaultBranch) {
|
|
return github.repos.getReadme(opts)
|
|
}
|
|
|
|
return github.repos.get(opts)
|
|
}
|
|
|
|
function cleanReadme(readme, defaultBranch, app) {
|
|
const $ = cheerio.load(readme)
|
|
|
|
const $relativeImages = $('img').not('[src^="http"]')
|
|
if ($relativeImages.length) {
|
|
console.log(
|
|
`${app.slug}: updating ${$relativeImages.length} relative image URLs`
|
|
)
|
|
$relativeImages.each((i, img) => {
|
|
$(img).attr({
|
|
src: `${app.repository.replace(
|
|
'github.com',
|
|
'raw.githubusercontent.com'
|
|
)}/${defaultBranch}/${$(img).attr('src')}`,
|
|
crossorigin: '',
|
|
})
|
|
})
|
|
}
|
|
|
|
const $relativeLinks = $('a').not('[href^="http"]')
|
|
if ($relativeLinks.length) {
|
|
console.log(`${app.slug}: updating ${$relativeLinks.length} relative links`)
|
|
$relativeLinks.each((i, link) => {
|
|
$(link).attr(
|
|
'href',
|
|
`${app.repository}/blob/${defaultBranch}/${$(link).attr('href')}`
|
|
)
|
|
})
|
|
}
|
|
|
|
return $('body').html()
|
|
}
|