зеркало из https://github.com/mozilla/snakepit.git
This commit is contained in:
Родитель
e7ca489932
Коммит
b3d0aa0f39
|
@ -33,6 +33,7 @@
|
|||
"dependencies": {
|
||||
"bcrypt": "^1.0.3",
|
||||
"body-parser": "^1.18.2",
|
||||
"combined-stream": "^1.0.6",
|
||||
"commander": "^2.14.1",
|
||||
"express": "^4.16.2",
|
||||
"jsonwebtoken": "^8.1.1",
|
||||
|
|
|
@ -1,2 +1,4 @@
|
|||
echo "Available"
|
||||
exit 0
|
||||
grep "^model name" /proc/cpuinfo | sed -e "s/(tm)//gI" -e "s/(r)//gI" -e "s/processor//gI" -e "s/cpu//gI" -e "s/@.*$//" -e "s/ *$//" -e "s/^.*: /cpu:/"
|
||||
if [ "$(type -t nvidia-smi)" = "file" ]; then
|
||||
nvidia-smi --query-gpu=gpu_name --format=csv,noheader | sed -e "s/^/cuda:/"
|
||||
fi
|
|
@ -0,0 +1,41 @@
|
|||
const store = require('./store.js')
|
||||
|
||||
var exports = module.exports = {}
|
||||
|
||||
var db = store.root
|
||||
|
||||
exports.initDb = function() {
|
||||
if (!db.aliases) {
|
||||
db.aliases = {}
|
||||
}
|
||||
}
|
||||
|
||||
exports.initApp = function(app) {
|
||||
app.get('/aliases', function(req, res) {
|
||||
res.status(200).send(Object.keys(db.aliases))
|
||||
})
|
||||
|
||||
app.put('/aliases/:id', function(req, res) {
|
||||
if (req.user.admin) {
|
||||
if (req.body && req.body.model) {
|
||||
db.aliases[req.params.id] = {
|
||||
id: req.params.id,
|
||||
model: req.body.model
|
||||
}
|
||||
res.status(200).send()
|
||||
} else {
|
||||
res.status(400).send()
|
||||
}
|
||||
} else {
|
||||
res.status(403).send()
|
||||
}
|
||||
})
|
||||
|
||||
app.delete('/aliases/:id', function(req, res) {
|
||||
if (req.user.admin) {
|
||||
delete db.aliases[req.params.id]
|
||||
} else {
|
||||
res.status(403).send()
|
||||
}
|
||||
})
|
||||
}
|
16
src/jobs.js
16
src/jobs.js
|
@ -18,9 +18,10 @@ exports.initDb = function() {
|
|||
|
||||
function _getRunningJobs() {
|
||||
var jobs = []
|
||||
for (let [id, node] of Object.entries(db.nodes)) {
|
||||
Object.keys(db.nodes).forEach(id => {
|
||||
let node = db.nodes[id]
|
||||
if (node.state >= nodes.STATE_ACTIVE) {
|
||||
let gpuCounter = numGpus
|
||||
let gpuCounter = numGpus
|
||||
gpuReservation = []
|
||||
for(let gpu = 0; gpu < node.gpus.length; gpu++) {
|
||||
if (node.gpus[gpu].job == 0 || state == 0) {
|
||||
|
@ -37,16 +38,17 @@ function _getRunningJobs() {
|
|||
}
|
||||
gpuCounter = numGpus
|
||||
}
|
||||
}
|
||||
})
|
||||
return jobs
|
||||
}
|
||||
|
||||
function _reserve(numNodes, numGpus, state) {
|
||||
let reservation = []
|
||||
let nodeCounter = numNodes
|
||||
for (let [id, node] of Object.entries(db.nodes)) {
|
||||
Object.keys(db.nodes).forEach(id => {
|
||||
let node = db.nodes[id]
|
||||
if (node.state >= state) {
|
||||
let gpuCounter = numGpus
|
||||
let gpuCounter = numGpus
|
||||
gpuReservation = []
|
||||
for(let gpu = 0; gpu < node.gpus.length; gpu++) {
|
||||
if (node.gpus[gpu].job == 0 || state == 0) {
|
||||
|
@ -63,7 +65,7 @@ function _reserve(numNodes, numGpus, state) {
|
|||
}
|
||||
gpuCounter = numGpus
|
||||
}
|
||||
}
|
||||
})
|
||||
return false
|
||||
}
|
||||
|
||||
|
@ -120,5 +122,5 @@ exports.initApp = function(app) {
|
|||
}
|
||||
|
||||
exports.tick = function() {
|
||||
|
||||
|
||||
}
|
68
src/nodes.js
68
src/nodes.js
|
@ -1,6 +1,8 @@
|
|||
const fs = require('fs')
|
||||
const stream = require('stream')
|
||||
const path = require('path')
|
||||
const { exec, execFile, spawn } = require('child_process')
|
||||
const CombinedStream = require('combined-stream')
|
||||
const { spawn } = require('child_process')
|
||||
const store = require('./store.js')
|
||||
|
||||
var exports = module.exports = {}
|
||||
|
@ -9,29 +11,50 @@ var db = store.root
|
|||
|
||||
const STATE_UNKNOWN = exports.STATE_UNKNOWN = 0
|
||||
const STATE_OFFLINE = exports.STATE_OFFLINE = 1
|
||||
const STATE_ACTIVE = exports.STATE_ACTIVE = 2
|
||||
const STATE_ONLINE = exports.STATE_ONLINE = 2
|
||||
|
||||
function _runScript(node, scriptName, callback) {
|
||||
function _runScript(node, scriptName, env, callback) {
|
||||
if (typeof env == 'function') {
|
||||
callback = env
|
||||
env = {}
|
||||
}
|
||||
let scriptPath = path.join(__dirname, '..', 'scripts', scriptName)
|
||||
let address = node.user + '@' + node.address
|
||||
console.log('Running script "' + scriptPath + '" on "' + address + '"')
|
||||
p = execFile(
|
||||
'ssh',
|
||||
[address, '-p', node.port, 'bash -s'],
|
||||
null,
|
||||
callback
|
||||
)
|
||||
fs.createReadStream(scriptPath).pipe(p.stdin)
|
||||
fs.readFile(scriptPath, function read(err, content) {
|
||||
if (!err) {
|
||||
env = env || {}
|
||||
let address = node.user + '@' + node.address
|
||||
console.log('Running script "' + scriptPath + '" on "' + address + '"')
|
||||
p = spawn('ssh', [address, '-p', node.port, 'bash -s'])
|
||||
let stdout = []
|
||||
p.stdout.on('data', data => stdout.push(data))
|
||||
let stderr = []
|
||||
p.stderr.on('data', data => stderr.push(data))
|
||||
p.on('close', code => callback(code, stdout.join('\n'), stderr.join('\n')))
|
||||
var stdinStream = new stream.Readable()
|
||||
Object.keys(env).forEach(name => stdinStream.push('export ' + name + '=' + env[name] + '\n'))
|
||||
stdinStream.push(content + '\n')
|
||||
stdinStream.push(null)
|
||||
stdinStream.pipe(p.stdin)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
function _checkAvailability(node, callback) {
|
||||
_runScript(node, 'available.sh', (err, stdout, stderr) => {
|
||||
console.log(stdout)
|
||||
if (err) {
|
||||
console.error(stderr)
|
||||
callback(false)
|
||||
console.error(err)
|
||||
callback()
|
||||
} else {
|
||||
callback(true)
|
||||
var resources = {}
|
||||
stdout.split('\n').forEach(line => {
|
||||
let [type, model] = line.split(':')
|
||||
if (type && model) {
|
||||
resources[type] = resources[type] || []
|
||||
resources[type].push({ model: model, index: resources[type].length })
|
||||
}
|
||||
})
|
||||
callback(resources)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
@ -52,13 +75,20 @@ exports.initApp = function(app) {
|
|||
id: id,
|
||||
address: node.address || dbnode.address,
|
||||
port: node.port || dbnode.port || 22,
|
||||
gpus: node.hasOwnProperty('gpus') ? node.gpus : dbnode.gpus,
|
||||
user: node.user || dbnode.user || 'pitmaster',
|
||||
state: STATE_UNKNOWN
|
||||
state: STATE_ONLINE
|
||||
}
|
||||
if (newnode.address) {
|
||||
_checkAvailability(newnode, available => {
|
||||
if (available) {
|
||||
_checkAvailability(newnode, resources => {
|
||||
if (resources) {
|
||||
if (node.cvd) {
|
||||
console.log(node.cvd)
|
||||
Object.keys(resources).forEach(type => {
|
||||
resources[type] = resources[type]
|
||||
.filter(resource => type != 'cuda' || node.cvd.includes(resource.index))
|
||||
})
|
||||
}
|
||||
newnode.resources = resources
|
||||
db.nodes[id] = newnode
|
||||
res.status(200).send()
|
||||
} else {
|
||||
|
|
|
@ -3,7 +3,7 @@ const path = require('path')
|
|||
const cluster = require('cluster')
|
||||
const cpus = require('os').cpus().length
|
||||
const config = require('./config.js')
|
||||
const modules = 'users nodes jobs'.split(' ').map(name => require('./' + name + '.js'))
|
||||
const modules = 'users nodes jobs aliases'.split(' ').map(name => require('./' + name + '.js'))
|
||||
|
||||
function readConfigFile(name) {
|
||||
var filename = path.join('config', name)
|
||||
|
|
Загрузка…
Ссылка в новой задаче