Node prep script improvements
- Blacklist nouveau universally on GPU VMs - Change URL retrieval to requests - Update requirements to latest
This commit is contained in:
Родитель
2d53e411e4
Коммит
983a7eed45
|
@ -36,11 +36,8 @@ try:
|
|||
import pathlib2 as pathlib
|
||||
except ImportError:
|
||||
import pathlib
|
||||
import requests
|
||||
import time
|
||||
try:
|
||||
import urllib.request as urllibreq
|
||||
except ImportError:
|
||||
import urllib as urllibreq
|
||||
import uuid
|
||||
# non-stdlib imports
|
||||
import azure.batch.models as batchmodels
|
||||
|
@ -63,6 +60,7 @@ from .version import __version__
|
|||
logger = logging.getLogger(__name__)
|
||||
util.setup_logger(logger)
|
||||
# global defines
|
||||
_REQUEST_CHUNK_SIZE = 4194304
|
||||
_ROOT_PATH = pathlib.Path(__file__).resolve().parent.parent
|
||||
_AZUREFILE_DVD_BIN = {
|
||||
'url': (
|
||||
|
@ -72,6 +70,7 @@ _AZUREFILE_DVD_BIN = {
|
|||
'sha256': (
|
||||
'288f809a1290ea8daf89d222507bda9b3709a9665cec8b70354a50252395e127'
|
||||
),
|
||||
'target': 'resources/azurefile-dockervolumedriver'
|
||||
}
|
||||
_NVIDIA_DOCKER = {
|
||||
'ubuntuserver': {
|
||||
|
@ -275,12 +274,18 @@ def _setup_nvidia_driver_package(blob_client, config, vm_size):
|
|||
raise RuntimeError(
|
||||
'Cannot proceed with deployment due to non-agreement with '
|
||||
'license for NVIDIA driver')
|
||||
else:
|
||||
logger.info('NVIDIA Software License accepted')
|
||||
# download driver
|
||||
logger.debug('downloading NVIDIA driver to {}'.format(
|
||||
_NVIDIA_DRIVER['target']))
|
||||
response = urllibreq.urlopen(_NVIDIA_DRIVER[gpu_type]['url'])
|
||||
response = requests.get(_NVIDIA_DRIVER[gpu_type]['url'], stream=True)
|
||||
with pkg.open('wb') as f:
|
||||
f.write(response.read())
|
||||
for chunk in response.iter_content(chunk_size=_REQUEST_CHUNK_SIZE):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
logger.debug('wrote {} bytes to {}'.format(
|
||||
pkg.stat().st_size, _NVIDIA_DRIVER['target']))
|
||||
# check sha256
|
||||
if (util.compute_sha256_for_file(pkg, False) !=
|
||||
_NVIDIA_DRIVER[gpu_type]['sha256']):
|
||||
|
@ -308,9 +313,13 @@ def _setup_nvidia_docker_package(blob_client, config):
|
|||
# download package
|
||||
logger.debug('downloading NVIDIA docker to {}'.format(
|
||||
_NVIDIA_DOCKER[offer]['target']))
|
||||
response = urllibreq.urlopen(_NVIDIA_DOCKER[offer]['url'])
|
||||
response = requests.get(_NVIDIA_DOCKER[offer]['url'], stream=True)
|
||||
with pkg.open('wb') as f:
|
||||
f.write(response.read())
|
||||
for chunk in response.iter_content(chunk_size=_REQUEST_CHUNK_SIZE):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
logger.debug('wrote {} bytes to {}'.format(
|
||||
pkg.stat().st_size, _NVIDIA_DOCKER[offer]['target']))
|
||||
# check sha256
|
||||
if (util.compute_sha256_for_file(pkg, False) !=
|
||||
_NVIDIA_DOCKER[offer]['sha256']):
|
||||
|
@ -331,15 +340,19 @@ def _setup_azurefile_volume_driver(blob_client, config):
|
|||
offer = settings.pool_offer(config, lower=True)
|
||||
sku = settings.pool_sku(config, lower=True)
|
||||
# check to see if binary is downloaded
|
||||
bin = pathlib.Path(_ROOT_PATH, 'resources/azurefile-dockervolumedriver')
|
||||
bin = pathlib.Path(_ROOT_PATH, _AZUREFILE_DVD_BIN['target'])
|
||||
if (not bin.exists() or
|
||||
util.compute_sha256_for_file(bin, False) !=
|
||||
_AZUREFILE_DVD_BIN['sha256']):
|
||||
# download package
|
||||
logger.debug('downloading Azure File Docker Volume Driver')
|
||||
response = urllibreq.urlopen(_AZUREFILE_DVD_BIN['url'])
|
||||
response = requests.get(_AZUREFILE_DVD_BIN['url'], stream=True)
|
||||
with bin.open('wb') as f:
|
||||
f.write(response.read())
|
||||
for chunk in response.iter_content(chunk_size=_REQUEST_CHUNK_SIZE):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
logger.debug('wrote {} bytes to {}'.format(
|
||||
bin.stat().st_size, _AZUREFILE_DVD_BIN['target']))
|
||||
# check sha256
|
||||
if (util.compute_sha256_for_file(bin, False) !=
|
||||
_AZUREFILE_DVD_BIN['sha256']):
|
||||
|
|
|
@ -22,7 +22,8 @@ to your local machine has been completed.
|
|||
2. Create a directory to hold your configuration files. For this quickstart
|
||||
guide, create a directory named `config`.
|
||||
3. Copy the sample configuration files from the Deep Learning framework recipe
|
||||
of your choice to the `config` directory:
|
||||
of your choice to the `config` directory (please note that some Docker images
|
||||
are very large, such as CNTK, which will lead to longer pool allocation time):
|
||||
* [CNTK-CPU-OpenMPI](../recipes/CNTK-CPU-OpenMPI/config/singlenode/)
|
||||
* [Caffe-CPU](../recipes/Caffe-CPU/config/)
|
||||
* [Chainer-CPU](../recipes/Chainer-CPU/config/)
|
||||
|
|
|
@ -3,8 +3,8 @@ azure-batch==2.0.1
|
|||
azure-keyvault==0.2.0
|
||||
azure-mgmt-batch==3.0.1
|
||||
azure-mgmt-compute==1.0.0rc1
|
||||
azure-mgmt-network==1.0.0rc2
|
||||
azure-mgmt-resource==1.0.0rc1
|
||||
azure-mgmt-network==1.0.0rc3
|
||||
azure-mgmt-resource==1.0.0rc2
|
||||
azure-storage==0.34.0
|
||||
blobxfer==0.12.1
|
||||
click==6.7
|
||||
|
@ -12,4 +12,5 @@ future==0.16.0
|
|||
msrest==0.4.7
|
||||
msrestazure==0.4.7
|
||||
pathlib2==2.2.1; python_version < '3.5'
|
||||
requests==2.13.0
|
||||
scandir==1.5; python_version < '3.5'
|
||||
|
|
|
@ -132,13 +132,15 @@ check_for_buggy_ntfs_mount() {
|
|||
|
||||
check_for_nvidia_card() {
|
||||
set +e
|
||||
lspci
|
||||
lspci | grep -i nvidia > /dev/null
|
||||
out=$(lspci)
|
||||
echo "$out" | grep -i nvidia > /dev/null
|
||||
if [ $? -ne 0 ]; then
|
||||
echo $out
|
||||
echo "ERROR: No Nvidia card(s) detected!"
|
||||
exit 1
|
||||
fi
|
||||
set -e
|
||||
echo $out
|
||||
}
|
||||
|
||||
install_azurefile_docker_volume_driver() {
|
||||
|
@ -376,12 +378,10 @@ if [ $offer == "ubuntuserver" ] || [ $offer == "debian" ]; then
|
|||
check_for_nvidia_card
|
||||
# split arg into two
|
||||
IFS=':' read -ra GPUARGS <<< "$gpu"
|
||||
# take special actions if we're on NV-series VMs
|
||||
if [ ${GPUARGS[0]} == "True" ]; then
|
||||
# remove nouveau
|
||||
apt-get --purge remove xserver-xorg-video-nouveau
|
||||
rmmod nouveau
|
||||
# blacklist nouveau from being loaded if rebooted
|
||||
# remove nouveau
|
||||
apt-get --purge remove xserver-xorg-video-nouveau
|
||||
rmmod nouveau
|
||||
# blacklist nouveau from being loaded if rebooted
|
||||
cat > /etc/modprobe.d/blacklist-nouveau.conf << EOF
|
||||
blacklist nouveau
|
||||
blacklist lbm-nouveau
|
||||
|
@ -389,7 +389,6 @@ options nouveau modeset=0
|
|||
alias nouveau off
|
||||
alias lbm-nouveau off
|
||||
EOF
|
||||
fi
|
||||
nvdriver=${GPUARGS[1]}
|
||||
nvdocker=${GPUARGS[2]}
|
||||
# get development essentials for nvidia driver
|
||||
|
|
Загрузка…
Ссылка в новой задаче