- Blacklist nouveau universally on GPU VMs
- Change URL retrieval to requests
- Update requirements to latest
This commit is contained in:
Fred Park 2017-05-04 22:30:44 -07:00
Родитель 2d53e411e4
Коммит 983a7eed45
4 изменённых файлов: 37 добавлений и 23 удалений

Просмотреть файл

@ -36,11 +36,8 @@ try:
import pathlib2 as pathlib
except ImportError:
import pathlib
import requests
import time
try:
import urllib.request as urllibreq
except ImportError:
import urllib as urllibreq
import uuid
# non-stdlib imports
import azure.batch.models as batchmodels
@ -63,6 +60,7 @@ from .version import __version__
logger = logging.getLogger(__name__)
util.setup_logger(logger)
# global defines
_REQUEST_CHUNK_SIZE = 4194304
_ROOT_PATH = pathlib.Path(__file__).resolve().parent.parent
_AZUREFILE_DVD_BIN = {
'url': (
@ -72,6 +70,7 @@ _AZUREFILE_DVD_BIN = {
'sha256': (
'288f809a1290ea8daf89d222507bda9b3709a9665cec8b70354a50252395e127'
),
'target': 'resources/azurefile-dockervolumedriver'
}
_NVIDIA_DOCKER = {
'ubuntuserver': {
@ -275,12 +274,18 @@ def _setup_nvidia_driver_package(blob_client, config, vm_size):
raise RuntimeError(
'Cannot proceed with deployment due to non-agreement with '
'license for NVIDIA driver')
else:
logger.info('NVIDIA Software License accepted')
# download driver
logger.debug('downloading NVIDIA driver to {}'.format(
_NVIDIA_DRIVER['target']))
response = urllibreq.urlopen(_NVIDIA_DRIVER[gpu_type]['url'])
response = requests.get(_NVIDIA_DRIVER[gpu_type]['url'], stream=True)
with pkg.open('wb') as f:
f.write(response.read())
for chunk in response.iter_content(chunk_size=_REQUEST_CHUNK_SIZE):
if chunk:
f.write(chunk)
logger.debug('wrote {} bytes to {}'.format(
pkg.stat().st_size, _NVIDIA_DRIVER['target']))
# check sha256
if (util.compute_sha256_for_file(pkg, False) !=
_NVIDIA_DRIVER[gpu_type]['sha256']):
@ -308,9 +313,13 @@ def _setup_nvidia_docker_package(blob_client, config):
# download package
logger.debug('downloading NVIDIA docker to {}'.format(
_NVIDIA_DOCKER[offer]['target']))
response = urllibreq.urlopen(_NVIDIA_DOCKER[offer]['url'])
response = requests.get(_NVIDIA_DOCKER[offer]['url'], stream=True)
with pkg.open('wb') as f:
f.write(response.read())
for chunk in response.iter_content(chunk_size=_REQUEST_CHUNK_SIZE):
if chunk:
f.write(chunk)
logger.debug('wrote {} bytes to {}'.format(
pkg.stat().st_size, _NVIDIA_DOCKER[offer]['target']))
# check sha256
if (util.compute_sha256_for_file(pkg, False) !=
_NVIDIA_DOCKER[offer]['sha256']):
@ -331,15 +340,19 @@ def _setup_azurefile_volume_driver(blob_client, config):
offer = settings.pool_offer(config, lower=True)
sku = settings.pool_sku(config, lower=True)
# check to see if binary is downloaded
bin = pathlib.Path(_ROOT_PATH, 'resources/azurefile-dockervolumedriver')
bin = pathlib.Path(_ROOT_PATH, _AZUREFILE_DVD_BIN['target'])
if (not bin.exists() or
util.compute_sha256_for_file(bin, False) !=
_AZUREFILE_DVD_BIN['sha256']):
# download package
logger.debug('downloading Azure File Docker Volume Driver')
response = urllibreq.urlopen(_AZUREFILE_DVD_BIN['url'])
response = requests.get(_AZUREFILE_DVD_BIN['url'], stream=True)
with bin.open('wb') as f:
f.write(response.read())
for chunk in response.iter_content(chunk_size=_REQUEST_CHUNK_SIZE):
if chunk:
f.write(chunk)
logger.debug('wrote {} bytes to {}'.format(
bin.stat().st_size, _AZUREFILE_DVD_BIN['target']))
# check sha256
if (util.compute_sha256_for_file(bin, False) !=
_AZUREFILE_DVD_BIN['sha256']):

Просмотреть файл

@ -22,7 +22,8 @@ to your local machine has been completed.
2. Create a directory to hold your configuration files. For this quickstart
guide, create a directory named `config`.
3. Copy the sample configuration files from the Deep Learning framework recipe
of your choice to the `config` directory:
of your choice to the `config` directory (please note that some Docker images
are very large, such as CNTK, which will lead to longer pool allocation time):
* [CNTK-CPU-OpenMPI](../recipes/CNTK-CPU-OpenMPI/config/singlenode/)
* [Caffe-CPU](../recipes/Caffe-CPU/config/)
* [Chainer-CPU](../recipes/Chainer-CPU/config/)

Просмотреть файл

@ -3,8 +3,8 @@ azure-batch==2.0.1
azure-keyvault==0.2.0
azure-mgmt-batch==3.0.1
azure-mgmt-compute==1.0.0rc1
azure-mgmt-network==1.0.0rc2
azure-mgmt-resource==1.0.0rc1
azure-mgmt-network==1.0.0rc3
azure-mgmt-resource==1.0.0rc2
azure-storage==0.34.0
blobxfer==0.12.1
click==6.7
@ -12,4 +12,5 @@ future==0.16.0
msrest==0.4.7
msrestazure==0.4.7
pathlib2==2.2.1; python_version < '3.5'
requests==2.13.0
scandir==1.5; python_version < '3.5'

Просмотреть файл

@ -132,13 +132,15 @@ check_for_buggy_ntfs_mount() {
check_for_nvidia_card() {
set +e
lspci
lspci | grep -i nvidia > /dev/null
out=$(lspci)
echo "$out" | grep -i nvidia > /dev/null
if [ $? -ne 0 ]; then
echo $out
echo "ERROR: No Nvidia card(s) detected!"
exit 1
fi
set -e
echo $out
}
install_azurefile_docker_volume_driver() {
@ -376,12 +378,10 @@ if [ $offer == "ubuntuserver" ] || [ $offer == "debian" ]; then
check_for_nvidia_card
# split arg into two
IFS=':' read -ra GPUARGS <<< "$gpu"
# take special actions if we're on NV-series VMs
if [ ${GPUARGS[0]} == "True" ]; then
# remove nouveau
apt-get --purge remove xserver-xorg-video-nouveau
rmmod nouveau
# blacklist nouveau from being loaded if rebooted
# remove nouveau
apt-get --purge remove xserver-xorg-video-nouveau
rmmod nouveau
# blacklist nouveau from being loaded if rebooted
cat > /etc/modprobe.d/blacklist-nouveau.conf << EOF
blacklist nouveau
blacklist lbm-nouveau
@ -389,7 +389,6 @@ options nouveau modeset=0
alias nouveau off
alias lbm-nouveau off
EOF
fi
nvdriver=${GPUARGS[1]}
nvdocker=${GPUARGS[2]}
# get development essentials for nvidia driver