Allow CentOS 7.3 on NC/NV
This commit is contained in:
Родитель
de45b18a67
Коммит
8eb2197d23
|
@ -97,5 +97,5 @@ resources/azurefile-dockervolume-create.sh
|
|||
resources/azurefile-dockervolumedriver
|
||||
resources/azurefile-dockervolumedriver.env
|
||||
resources/docker-registry-v2.tar.gz
|
||||
resources/nvidia-docker.deb
|
||||
resources/nvidia-docker.*
|
||||
resources/nvidia-driver*.run
|
||||
|
|
|
@ -73,6 +73,16 @@ _AZUREFILE_DVD_BIN = {
|
|||
),
|
||||
'target': 'resources/azurefile-dockervolumedriver'
|
||||
}
|
||||
__NVIDIA_DOCKER_RPM = {
|
||||
'url': (
|
||||
'https://github.com/NVIDIA/nvidia-docker/releases/download/'
|
||||
'v1.0.1/nvidia-docker-1.0.1-1.x86_64.rpm'
|
||||
),
|
||||
'sha256': (
|
||||
'f05dfe7fe655ed39c399db0d6362e351b059f2708c3e6da17f590a000237ec3a'
|
||||
),
|
||||
'target': 'resources/nvidia-docker.rpm'
|
||||
}
|
||||
_NVIDIA_DOCKER = {
|
||||
'ubuntuserver': {
|
||||
'url': (
|
||||
|
@ -84,6 +94,8 @@ _NVIDIA_DOCKER = {
|
|||
),
|
||||
'target': 'resources/nvidia-docker.deb'
|
||||
},
|
||||
'centos': __NVIDIA_DOCKER_RPM,
|
||||
'centos-hpc': __NVIDIA_DOCKER_RPM,
|
||||
}
|
||||
_NVIDIA_DRIVER = {
|
||||
'compute': {
|
||||
|
@ -339,9 +351,6 @@ def _setup_nvidia_docker_package(blob_client, config):
|
|||
:return: package path
|
||||
"""
|
||||
offer = settings.pool_offer(config, lower=True)
|
||||
if offer != 'ubuntuserver':
|
||||
raise ValueError('Offer {} is unsupported with nvidia docker'.format(
|
||||
offer))
|
||||
pkg = pathlib.Path(_ROOT_PATH, _NVIDIA_DOCKER[offer]['target'])
|
||||
# check to see if package is downloaded
|
||||
if (not pkg.exists() or
|
||||
|
@ -1534,6 +1543,7 @@ def _adjust_settings_for_pool_creation(config):
|
|||
# enforce publisher/offer/sku restrictions
|
||||
allowed = False
|
||||
shipyard_container_required = True
|
||||
# oracle linux is not supported due to UEKR4 requirement
|
||||
if publisher == 'canonical':
|
||||
if offer == 'ubuntuserver':
|
||||
if sku.startswith('14.04'):
|
||||
|
@ -1560,13 +1570,10 @@ def _adjust_settings_for_pool_creation(config):
|
|||
elif offer == 'opensuse-leap':
|
||||
if sku >= '42':
|
||||
allowed = True
|
||||
# check for valid image if gpu, currently only ubuntu 16.04 is supported
|
||||
if (settings.is_gpu_pool(pool.vm_size) and
|
||||
util.is_none_or_empty(node_agent) and
|
||||
(publisher != 'canonical' and offer != 'ubuntuserver' and
|
||||
sku < '16.04')):
|
||||
allowed = False
|
||||
# oracle linux is not supported due to UEKR4 requirement
|
||||
# check if allowed for gpu (if gpu vm size)
|
||||
if allowed:
|
||||
allowed = settings.gpu_configuration_check(
|
||||
config, vm_size=pool.vm_size)
|
||||
if not allowed and util.is_none_or_empty(node_agent):
|
||||
raise ValueError(
|
||||
('Unsupported Docker Host VM Config, publisher={} offer={} '
|
||||
|
|
|
@ -403,6 +403,37 @@ def get_gpu_type_from_vm_size(vm_size):
|
|||
return None
|
||||
|
||||
|
||||
def gpu_configuration_check(config, vm_size=None):
|
||||
# type: (dict, str) -> bool
|
||||
"""Check if OS is allowed with a GPU VM
|
||||
:param dict config: configuration dict
|
||||
:param str vm_size: vm size
|
||||
:rtype: bool
|
||||
:return: if configuration is allowed
|
||||
"""
|
||||
# if this is not a gpu sku, always allow
|
||||
if util.is_none_or_empty(vm_size):
|
||||
vm_size = pool_settings(config).vm_size
|
||||
if not is_gpu_pool(vm_size):
|
||||
return True
|
||||
# always allow gpu with custom images
|
||||
node_agent = pool_custom_image_node_agent(config)
|
||||
if util.is_not_empty(node_agent):
|
||||
return True
|
||||
# check for platform image support
|
||||
publisher = pool_publisher(config, lower=True)
|
||||
offer = pool_offer(config, lower=True)
|
||||
sku = pool_sku(config, lower=True)
|
||||
if (publisher == 'canonical' and offer == 'ubuntuserver' and
|
||||
sku > '16.04'):
|
||||
return True
|
||||
elif (publisher == 'openlogic' and
|
||||
(offer == 'centos' or offer == 'centos-hpc') and sku == '7.3'):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def is_rdma_pool(vm_size):
|
||||
# type: (str) -> bool
|
||||
"""Check if pool is IB/RDMA capable
|
||||
|
|
|
@ -20,10 +20,11 @@ you can invoke as:
|
|||
shipyard.cmd
|
||||
```
|
||||
|
||||
If on Mac, you will need to invoke the Python interpreter and pass
|
||||
the script as an argument. For example:
|
||||
If you installed manually (i.e., did not use the installer scripts), then
|
||||
you will need to invoke the Python interpreter and pass the script as an
|
||||
argument. For example:
|
||||
```
|
||||
python shipyard.py
|
||||
python3 shipyard.py
|
||||
```
|
||||
|
||||
The `-h` or `--help` option will list the available options, which are
|
||||
|
|
|
@ -16,12 +16,11 @@ The pool configuration should enable the following properties:
|
|||
K80 GPUs for GPU compute acceleration while `NV` VM instances feature
|
||||
M60 GPUs for visualization workloads. Because CNTK is a GPU-accelerated
|
||||
compute application, it is best to choose `NC` VM instances.
|
||||
* `publisher` should be `Canonical`. Other publishers will be supported
|
||||
once they are available for N-series VMs.
|
||||
* `offer` should be `UbuntuServer`. Other offers will be supported once they
|
||||
are available for N-series VMs.
|
||||
* `sku` should be `16.04-LTS`. Other skus will be supported once they are
|
||||
available for N-series VMs.
|
||||
* `vm_configuration` is the VM configuration
|
||||
* `platform_image` specifies to use a platform image
|
||||
* `publisher` should be `Canonical` or `OpenLogic`.
|
||||
* `offer` should be `UbuntuServer` for Canonical or `CentOS` for OpenLogic.
|
||||
* `sku` should be `16.04-LTS` for Ubuntu or `7.3` for CentOS.
|
||||
* `inter_node_communication_enabled` must be set to `true`
|
||||
* `max_tasks_per_node` must be set to 1 or omitted
|
||||
|
||||
|
|
|
@ -13,12 +13,11 @@ The pool configuration should enable the following properties:
|
|||
K80 GPUs for GPU compute acceleration while `NV` VM instances feature
|
||||
M60 GPUs for visualization workloads. Because Caffe is a GPU-accelerated
|
||||
compute application, it is best to choose `NC` VM instances.
|
||||
* `publisher` should be `Canonical`. Other publishers will be supported
|
||||
once they are available for N-series VMs.
|
||||
* `offer` should be `UbuntuServer`. Other offers will be supported once they
|
||||
are available for N-series VMs.
|
||||
* `sku` should be `16.04-LTS`. Other skus will be supported once they are
|
||||
available for N-series VMs.
|
||||
* `vm_configuration` is the VM configuration
|
||||
* `platform_image` specifies to use a platform image
|
||||
* `publisher` should be `Canonical` or `OpenLogic`.
|
||||
* `offer` should be `UbuntuServer` for Canonical or `CentOS` for OpenLogic.
|
||||
* `sku` should be `16.04-LTS` for Ubuntu or `7.3` for CentOS.
|
||||
|
||||
### Global Configuration
|
||||
The global configuration should set the following properties:
|
||||
|
|
|
@ -13,12 +13,11 @@ The pool configuration should enable the following properties:
|
|||
K80 GPUs for GPU compute acceleration while `NV` VM instances feature
|
||||
M60 GPUs for visualization workloads. Because Caffe is a GPU-accelerated
|
||||
compute application, it is best to choose `NC` VM instances.
|
||||
* `publisher` should be `Canonical`. Other publishers will be supported
|
||||
once they are available for N-series VMs.
|
||||
* `offer` should be `UbuntuServer`. Other offers will be supported once they
|
||||
are available for N-series VMs.
|
||||
* `sku` should be `16.04-LTS`. Other skus will be supported once they are
|
||||
available for N-series VMs.
|
||||
* `vm_configuration` is the VM configuration
|
||||
* `platform_image` specifies to use a platform image
|
||||
* `publisher` should be `Canonical` or `OpenLogic`.
|
||||
* `offer` should be `UbuntuServer` for Canonical or `CentOS` for OpenLogic.
|
||||
* `sku` should be `16.04-LTS` for Ubuntu or `7.3` for CentOS.
|
||||
|
||||
### Global Configuration
|
||||
The global configuration should set the following properties:
|
||||
|
|
|
@ -14,12 +14,11 @@ The pool configuration should enable the following properties:
|
|||
K80 GPUs for GPU compute acceleration while `NV` VM instances feature
|
||||
M60 GPUs for visualization workloads. Because FFmpeg is for transforming
|
||||
audio/video, it is best to choose `NV` VM instances.
|
||||
* `publisher` should be `Canonical`. Other publishers will be supported
|
||||
once they are available for N-series VMs.
|
||||
* `offer` should be `UbuntuServer`. Other offers will be supported once they
|
||||
are available for N-series VMs.
|
||||
* `sku` should be `16.04-LTS`. Other skus will be supported once they are
|
||||
available for N-series VMs.
|
||||
* `vm_configuration` is the VM configuration
|
||||
* `platform_image` specifies to use a platform image
|
||||
* `publisher` should be `Canonical` or `OpenLogic`.
|
||||
* `offer` should be `UbuntuServer` for Canonical or `CentOS` for OpenLogic.
|
||||
* `sku` should be `16.04-LTS` for Ubuntu or `7.3` for CentOS.
|
||||
|
||||
### Global Configuration
|
||||
The global configuration should set the following properties:
|
||||
|
|
|
@ -14,12 +14,11 @@ The pool configuration should enable the following properties:
|
|||
K80 GPUs for GPU compute acceleration while `NV` VM instances feature
|
||||
M60 GPUs for visualization workloads. Because Caffe is a GPU-accelerated
|
||||
compute application, it is best to choose `NC` VM instances.
|
||||
* `publisher` should be `Canonical`. Other publishers will be supported
|
||||
once they are available for N-series VMs.
|
||||
* `offer` should be `UbuntuServer`. Other offers will be supported once they
|
||||
are available for N-series VMs.
|
||||
* `sku` should be `16.04-LTS`. Other skus will be supported once they are
|
||||
available for N-series VMs.
|
||||
* `vm_configuration` is the VM configuration
|
||||
* `platform_image` specifies to use a platform image
|
||||
* `publisher` should be `Canonical` or `OpenLogic`.
|
||||
* `offer` should be `UbuntuServer` for Canonical or `CentOS` for OpenLogic.
|
||||
* `sku` should be `16.04-LTS` for Ubuntu or `7.3` for CentOS.
|
||||
|
||||
### Global Configuration
|
||||
The global configuration should set the following properties:
|
||||
|
|
|
@ -13,12 +13,11 @@ The pool configuration should enable the following properties:
|
|||
K80 GPUs for GPU compute acceleration while `NV` VM instances feature
|
||||
M60 GPUs for visualization workloads. Because CNTK is a GPU-accelerated
|
||||
compute application, it is best to choose `NC` VM instances.
|
||||
* `publisher` should be `Canonical`. Other publishers will be supported
|
||||
once they are available for N-series VMs.
|
||||
* `offer` should be `UbuntuServer`. Other offers will be supported once they
|
||||
are available for N-series VMs.
|
||||
* `sku` should be `16.04-LTS`. Other skus will be supported once they are
|
||||
available for N-series VMs.
|
||||
* `vm_configuration` is the VM configuration
|
||||
* `platform_image` specifies to use a platform image
|
||||
* `publisher` should be `Canonical` or `OpenLogic`.
|
||||
* `offer` should be `UbuntuServer` for Canonical or `CentOS` for OpenLogic.
|
||||
* `sku` should be `16.04-LTS` for Ubuntu or `7.3` for CentOS.
|
||||
* `inter_node_communication_enabled` must be set to `true`
|
||||
* `max_tasks_per_node` must be set to 1 or omitted
|
||||
|
||||
|
|
|
@ -15,13 +15,11 @@ The pool configuration should enable the following properties:
|
|||
K80 GPUs for GPU compute acceleration while `NV` VM instances feature
|
||||
M60 GPUs for visualization workloads. Because NAMD is a GPU-accelerated
|
||||
compute application, it is best to choose `NC` VM instances.
|
||||
* `publisher` should be `Canonical`. Other publishers will be supported
|
||||
once they are available for N-series VMs.
|
||||
* `offer` should be `UbuntuServer`. Other offers will be supported once they
|
||||
are available for N-series VMs.
|
||||
* `sku` should be `16.04-LTS`. Other skus will be supported once they are
|
||||
available for N-series VMs.
|
||||
* `max_tasks_per_node` must be set to 1 or omitted
|
||||
* `vm_configuration` is the VM configuration
|
||||
* `platform_image` specifies to use a platform image
|
||||
* `publisher` should be `Canonical` or `OpenLogic`.
|
||||
* `offer` should be `UbuntuServer` for Canonical or `CentOS` for OpenLogic.
|
||||
* `sku` should be `16.04-LTS` for Ubuntu or `7.3` for CentOS.
|
||||
|
||||
### Global Configuration
|
||||
The global configuration should set the following properties:
|
||||
|
|
|
@ -16,12 +16,14 @@ GPUs:
|
|||
instances feature M60 GPUs for visualization workloads. Because TensorFlow is
|
||||
a GPU-accelerated compute application, it is best to choose `NC` VM instances.
|
||||
If not using GPUs, another appropriate SKU can be selected.
|
||||
* `publisher` should be `Canonical` if using GPUs. Other publishers will be
|
||||
supported once they are available for N-series VMs.
|
||||
* `offer` should be `UbuntuServer` if using GPUs. Other offers will be
|
||||
supported once they are available for N-series VMs.
|
||||
* `sku` should be `16.04-LTS` if using GPUs. Other skus will be supported
|
||||
once they are available for N-series VMs.
|
||||
* `vm_configuration` is the VM configuration
|
||||
* `platform_image` specifies to use a platform image
|
||||
* `publisher` should be `Canonical` or `OpenLogic` if using GPUs. Other
|
||||
supported publishers can be used if not.
|
||||
* `offer` should be `UbuntuServer` for Canonical or `CentOS` for OpenLogic
|
||||
if using GPUs. Other supported offers can be used if not.
|
||||
* `sku` should be `16.04-LTS` for Ubuntu or `7.3` for CentOS if using
|
||||
GPUs. Other supported skus can be used if not.
|
||||
|
||||
If on multiple CPUs:
|
||||
* `max_tasks_per_node` must be set to 1 or omitted
|
||||
|
|
|
@ -13,12 +13,11 @@ The pool configuration should enable the following properties:
|
|||
K80 GPUs for GPU compute acceleration while `NV` VM instances feature
|
||||
M60 GPUs for visualization workloads. Because TensorFlow is a GPU-accelerated
|
||||
compute application, it is best to choose `NC` VM instances.
|
||||
* `publisher` should be `Canonical`. Other publishers will be supported
|
||||
once they are available for N-series VMs.
|
||||
* `offer` should be `UbuntuServer`. Other offers will be supported once they
|
||||
are available for N-series VMs.
|
||||
* `sku` should be `16.04-LTS`. Other skus will be supported once they are
|
||||
available for N-series VMs.
|
||||
* `vm_configuration` is the VM configuration
|
||||
* `platform_image` specifies to use a platform image
|
||||
* `publisher` should be `Canonical` or `OpenLogic`.
|
||||
* `offer` should be `UbuntuServer` for Canonical or `CentOS` for OpenLogic.
|
||||
* `sku` should be `16.04-LTS` for Ubuntu or `7.3` for CentOS.
|
||||
|
||||
### Global Configuration
|
||||
The global configuration should set the following properties:
|
||||
|
|
|
@ -13,12 +13,11 @@ The pool configuration should enable the following properties:
|
|||
K80 GPUs for GPU compute acceleration while `NV` VM instances feature
|
||||
M60 GPUs for visualization workloads. Because Torch is a GPU-accelerated
|
||||
compute application, it is best to choose `NC` VM instances.
|
||||
* `publisher` should be `Canonical`. Other publishers will be supported
|
||||
once they are available for N-series VMs.
|
||||
* `offer` should be `UbuntuServer`. Other offers will be supported once they
|
||||
are available for N-series VMs.
|
||||
* `sku` should be `16.04-LTS`. Other skus will be supported once they are
|
||||
available for N-series VMs.
|
||||
* `vm_configuration` is the VM configuration
|
||||
* `platform_image` specifies to use a platform image
|
||||
* `publisher` should be `Canonical` or `OpenLogic`.
|
||||
* `offer` should be `UbuntuServer` for Canonical or `CentOS` for OpenLogic.
|
||||
* `sku` should be `16.04-LTS` for Ubuntu or `7.3` for CentOS.
|
||||
|
||||
### Global Configuration
|
||||
The global configuration should set the following properties:
|
||||
|
|
|
@ -154,6 +154,93 @@ check_for_nvidia_card() {
|
|||
fi
|
||||
}
|
||||
|
||||
install_nvidia_software() {
|
||||
offer=$1
|
||||
shift
|
||||
# check for nvidia card
|
||||
check_for_nvidia_card
|
||||
# split arg into two
|
||||
IFS=':' read -ra GPUARGS <<< "$gpu"
|
||||
nvdriver=${GPUARGS[1]}
|
||||
nvdocker=${GPUARGS[2]}
|
||||
# remove nouveau
|
||||
rmmod nouveau
|
||||
# purge nouveau off system
|
||||
if [ $offer == "ubuntuserver" ]; then
|
||||
apt-get --purge remove xserver-xorg-video-nouveau xserver-xorg-video-nouveau-hwe-16.04
|
||||
elif [[ $offer == centos* ]]; then
|
||||
yum erase -y xorg-x11-drv-nouveau
|
||||
else
|
||||
echo "ERROR: unsupported distribution for nvidia/GPU, offer: $offer"
|
||||
exit 1
|
||||
fi
|
||||
# blacklist nouveau from being loaded if rebooted
|
||||
cat > /etc/modprobe.d/blacklist-nouveau.conf << EOF
|
||||
blacklist nouveau
|
||||
blacklist lbm-nouveau
|
||||
options nouveau modeset=0
|
||||
alias nouveau off
|
||||
alias lbm-nouveau off
|
||||
EOF
|
||||
# get development essentials for nvidia driver
|
||||
if [ $offer == "ubuntuserver" ]; then
|
||||
install_packages $offer build-essential
|
||||
elif [[ $offer == centos* ]]; then
|
||||
install_packages $offer gcc binutils make "kernel-devel-$(uname -r)"
|
||||
fi
|
||||
# get additional dependency if NV-series VMs
|
||||
if [ ${GPUARGS[0]} == "True" ]; then
|
||||
if [ $offer == "ubuntuserver" ]; then
|
||||
install_packages $offer xserver-xorg-dev
|
||||
elif [[ $offer == centos* ]]; then
|
||||
install_packages $offer xorg-x11-server-devel
|
||||
fi
|
||||
fi
|
||||
# install driver
|
||||
./$nvdriver -s
|
||||
# add flag to config template for GRID driver
|
||||
if [ ${GPUARGS[0]} == "True" ]; then
|
||||
echo "IgnoreSP=TRUE" >> /etc/nvidia/gridd.conf.template
|
||||
fi
|
||||
# install nvidia-docker
|
||||
if [ $offer == "ubuntuserver" ]; then
|
||||
dpkg -i $nvdocker
|
||||
elif [[ $offer == centos* ]]; then
|
||||
rpm -Uvh $nvdocker
|
||||
fi
|
||||
# enable and start nvidia docker service
|
||||
systemctl enable nvidia-docker.service
|
||||
systemctl start nvidia-docker.service
|
||||
systemctl status nvidia-docker.service
|
||||
# get driver version
|
||||
nvdriverver=`cat /proc/driver/nvidia/version | grep "Kernel Module" | cut -d ' ' -f 9`
|
||||
echo nvidia driver version $nvdriverver detected
|
||||
# create the docker volume now to avoid volume driver conflicts for
|
||||
# tasks. run this in a loop as it can fail if triggered too quickly
|
||||
# after start
|
||||
NV_START=$(date -u +"%s")
|
||||
set +e
|
||||
while :
|
||||
do
|
||||
echo "INFO: Attempting to create nvidia-docker volume with version $nvdriverver"
|
||||
docker volume create -d nvidia-docker --name nvidia_driver_$nvdriverver
|
||||
if [ $? -eq 0 ]; then
|
||||
docker volume list
|
||||
break
|
||||
else
|
||||
NV_NOW=$(date -u +"%s")
|
||||
NV_DIFF=$((($NV_NOW-$NV_START)/60))
|
||||
# fail after 5 minutes of attempts
|
||||
if [ $NV_DIFF -ge 5 ]; then
|
||||
echo "ERROR: could not create nvidia-docker volume"
|
||||
exit 1
|
||||
fi
|
||||
sleep 1
|
||||
fi
|
||||
done
|
||||
set -e
|
||||
}
|
||||
|
||||
install_azurefile_docker_volume_driver() {
|
||||
chown root:root azurefile-dockervolumedriver*
|
||||
chmod 755 azurefile-dockervolumedriver
|
||||
|
@ -178,6 +265,8 @@ install_azurefile_docker_volume_driver() {
|
|||
# create docker volumes
|
||||
chmod +x azurefile-dockervolume-create.sh
|
||||
./azurefile-dockervolume-create.sh
|
||||
# list volumes
|
||||
docker volume list
|
||||
}
|
||||
|
||||
refresh_package_index() {
|
||||
|
@ -464,67 +553,7 @@ if [ $offer == "ubuntuserver" ] || [ $offer == "debian" ]; then
|
|||
$srvstatus
|
||||
# install gpu related items
|
||||
if [ ! -z $gpu ] && [ ! -f $nodeprepfinished ]; then
|
||||
# check for nvidia card
|
||||
check_for_nvidia_card
|
||||
# split arg into two
|
||||
IFS=':' read -ra GPUARGS <<< "$gpu"
|
||||
# remove nouveau
|
||||
rmmod nouveau
|
||||
apt-get --purge remove xserver-xorg-video-nouveau xserver-xorg-video-nouveau-hwe-16.04
|
||||
# blacklist nouveau from being loaded if rebooted
|
||||
cat > /etc/modprobe.d/blacklist-nouveau.conf << EOF
|
||||
blacklist nouveau
|
||||
blacklist lbm-nouveau
|
||||
options nouveau modeset=0
|
||||
alias nouveau off
|
||||
alias lbm-nouveau off
|
||||
EOF
|
||||
nvdriver=${GPUARGS[1]}
|
||||
nvdocker=${GPUARGS[2]}
|
||||
# get development essentials for nvidia driver
|
||||
install_packages $offer build-essential
|
||||
# get additional dependency if NV-series VMs
|
||||
if [ ${GPUARGS[0]} == "True" ]; then
|
||||
install_packages $offer xserver-xorg-dev
|
||||
fi
|
||||
# install driver
|
||||
./$nvdriver -s
|
||||
# add flag to config template for GRID driver
|
||||
if [ ${GPUARGS[0]} == "True" ]; then
|
||||
echo "IgnoreSP=TRUE" >> /etc/nvidia/gridd.conf.template
|
||||
fi
|
||||
# install nvidia-docker
|
||||
dpkg -i $nvdocker
|
||||
# enable and start nvidia docker service
|
||||
systemctl enable nvidia-docker.service
|
||||
systemctl start nvidia-docker.service
|
||||
systemctl status nvidia-docker.service
|
||||
# get driver version
|
||||
nvdriverver=`cat /proc/driver/nvidia/version | grep "Kernel Module" | cut -d ' ' -f 9`
|
||||
echo nvidia driver version $nvdriverver detected
|
||||
# create the docker volume now to avoid volume driver conflicts for
|
||||
# tasks. run this in a loop as it can fail if triggered too quickly
|
||||
# after start
|
||||
NV_START=$(date -u +"%s")
|
||||
set +e
|
||||
while :
|
||||
do
|
||||
echo "INFO: Attempting to create nvidia-docker volume with version $nvdriverver"
|
||||
docker volume create -d nvidia-docker --name nvidia_driver_$nvdriverver
|
||||
if [ $? -eq 0 ]; then
|
||||
break
|
||||
else
|
||||
NV_NOW=$(date -u +"%s")
|
||||
NV_DIFF=$((($NV_NOW-$NV_START)/60))
|
||||
# fail after 5 minutes of attempts
|
||||
if [ $NV_DIFF -ge 5 ]; then
|
||||
echo "ERROR: could not create nvidia-docker volume"
|
||||
exit 1
|
||||
fi
|
||||
sleep 1
|
||||
fi
|
||||
done
|
||||
set -e
|
||||
install_nvidia_software $offer
|
||||
fi
|
||||
# set up glusterfs
|
||||
if [ $gluster_on_compute -eq 1 ] && [ ! -f $nodeprepfinished ]; then
|
||||
|
@ -568,7 +597,7 @@ elif [[ $offer == centos* ]] || [[ $offer == "rhel" ]] || [[ $offer == "oracle-l
|
|||
exit 1
|
||||
fi
|
||||
# gpu is not supported on these offers
|
||||
if [ ! -z $gpu ]; then
|
||||
if [[ ! -z $gpu ]] && [[ $offer != centos* ]]; then
|
||||
echo "ERROR: gpu unsupported on this sku: $sku for offer $offer"
|
||||
exit 1
|
||||
fi
|
||||
|
@ -618,6 +647,10 @@ elif [[ $offer == centos* ]] || [[ $offer == "rhel" ]] || [[ $offer == "oracle-l
|
|||
if [ $azurefile -eq 1 ]; then
|
||||
install_azurefile_docker_volume_driver $offer $sku
|
||||
fi
|
||||
# install gpu related items
|
||||
if [ ! -z $gpu ] && [ ! -f $nodeprepfinished ]; then
|
||||
install_nvidia_software $offer
|
||||
fi
|
||||
# set up glusterfs
|
||||
if [ $gluster_on_compute -eq 1 ] && [ ! -f $nodeprepfinished ]; then
|
||||
install_packages $offer epel-release centos-release-gluster38
|
||||
|
|
Загрузка…
Ссылка в новой задаче