2020-04-09 03:36:33 +03:00
|
|
|
#!/bin/bash
|
|
|
|
|
|
|
|
/opt/pbs/bin/qmgr -c "export hook pbs_cgroups application/x-config default" > pbs_cgroups.json
|
|
|
|
|
|
|
|
tmp=$(mktemp)
|
|
|
|
jq '.cgroup.devices.enabled = true' pbs_cgroups.json > $tmp && mv $tmp pbs_cgroups.json
|
|
|
|
jq '.cgroup.devices.allow = [ "b *:* rwm","c 10:* rwm","c 4:* rwm","c 5:* rwm","c 1:* rwm","c 7:* rwm",["nvidia-uvm", "rwm"]]' pbs_cgroups.json > $tmp && mv $tmp pbs_cgroups.json
|
|
|
|
|
|
|
|
/opt/pbs/bin/qmgr -c "import hook pbs_cgroups application/x-config default pbs_cgroups.json"
|
|
|
|
|
|
|
|
/opt/pbs/bin/qmgr -c "export hook pbs_cgroups application/x-python default" > pbs_cgroups.py
|
|
|
|
|
|
|
|
cat > pbs_cgroups_py.patch << EOF
|
|
|
|
--- pbs_cgroups.py_orig 2020-04-08 20:05:31.573683390 +0000
|
|
|
|
+++ pbs_cgroups.py 2020-04-08 20:09:50.388237101 +0000
|
|
|
|
@@ -780,6 +780,7 @@
|
|
|
|
if gpus:
|
|
|
|
# Don't put quotes around the values. ex "0" or "0,1".
|
|
|
|
# This will cause it to fail.
|
|
|
|
+ gpus = range(0,len(gpus))
|
|
|
|
env_list.append('CUDA_VISIBLE_DEVICES=%s' %
|
|
|
|
string.join(gpus, ','))
|
|
|
|
pbs.logmsg(pbs.EVENT_DEBUG4, 'ENV_LIST: %s' % env_list)
|
|
|
|
@@ -1014,6 +1015,7 @@
|
|
|
|
if gpus:
|
|
|
|
# Don't put quotes around the values. ex "0" or "0,1".
|
|
|
|
# This will cause it to fail.
|
|
|
|
+ gpus=range(0,len(gpus))
|
|
|
|
env_list.append('CUDA_VISIBLE_DEVICES=%s' %
|
|
|
|
string.join(gpus, ','))
|
|
|
|
pbs.logmsg(pbs.EVENT_DEBUG4, 'ENV_LIST: %s' % env_list)
|
|
|
|
@@ -2887,6 +2889,7 @@
|
|
|
|
pbs.logmsg(pbs.EVENT_DEBUG4,
|
|
|
|
'offload_devices: %s' % offload_devices)
|
|
|
|
if cuda_visible_devices:
|
|
|
|
+ cuda_visible_devices = range(0,len(cuda_visible_devices))
|
2020-04-16 22:17:02 +03:00
|
|
|
value = string.join(cuda_visible_devices, '\\\,')
|
2020-04-09 03:36:33 +03:00
|
|
|
pbs.event().env['CUDA_VISIBLE_DEVICES'] = '%s' % value
|
|
|
|
pbs.logmsg(pbs.EVENT_DEBUG4,
|
|
|
|
EOF
|
|
|
|
|
2020-04-16 22:00:37 +03:00
|
|
|
patch -p0 < pbs_cgroups_py.patch
|
2020-04-09 03:36:33 +03:00
|
|
|
|
|
|
|
/opt/pbs/bin/qmgr -c "import hook pbs_cgroups application/x-python default" pbs_cgroups.py
|
|
|
|
|
|
|
|
/opt/pbs/bin/qmgr -c "set hook pbs_cgroups enabled = true"
|
|
|
|
|
|
|
|
sed -i 's/^resources:.*/resources: "ncpus, pool_name, mem, arch, host, vnode, aoe, eoe, ngpus"/' /var/spool/pbs/sched_priv/sched_config
|