Allow CentOS 7.3 on NC/NV
This commit is contained in:
Родитель
de45b18a67
Коммит
8eb2197d23
|
@ -97,5 +97,5 @@ resources/azurefile-dockervolume-create.sh
|
||||||
resources/azurefile-dockervolumedriver
|
resources/azurefile-dockervolumedriver
|
||||||
resources/azurefile-dockervolumedriver.env
|
resources/azurefile-dockervolumedriver.env
|
||||||
resources/docker-registry-v2.tar.gz
|
resources/docker-registry-v2.tar.gz
|
||||||
resources/nvidia-docker.deb
|
resources/nvidia-docker.*
|
||||||
resources/nvidia-driver*.run
|
resources/nvidia-driver*.run
|
||||||
|
|
|
@ -73,6 +73,16 @@ _AZUREFILE_DVD_BIN = {
|
||||||
),
|
),
|
||||||
'target': 'resources/azurefile-dockervolumedriver'
|
'target': 'resources/azurefile-dockervolumedriver'
|
||||||
}
|
}
|
||||||
|
__NVIDIA_DOCKER_RPM = {
|
||||||
|
'url': (
|
||||||
|
'https://github.com/NVIDIA/nvidia-docker/releases/download/'
|
||||||
|
'v1.0.1/nvidia-docker-1.0.1-1.x86_64.rpm'
|
||||||
|
),
|
||||||
|
'sha256': (
|
||||||
|
'f05dfe7fe655ed39c399db0d6362e351b059f2708c3e6da17f590a000237ec3a'
|
||||||
|
),
|
||||||
|
'target': 'resources/nvidia-docker.rpm'
|
||||||
|
}
|
||||||
_NVIDIA_DOCKER = {
|
_NVIDIA_DOCKER = {
|
||||||
'ubuntuserver': {
|
'ubuntuserver': {
|
||||||
'url': (
|
'url': (
|
||||||
|
@ -84,6 +94,8 @@ _NVIDIA_DOCKER = {
|
||||||
),
|
),
|
||||||
'target': 'resources/nvidia-docker.deb'
|
'target': 'resources/nvidia-docker.deb'
|
||||||
},
|
},
|
||||||
|
'centos': __NVIDIA_DOCKER_RPM,
|
||||||
|
'centos-hpc': __NVIDIA_DOCKER_RPM,
|
||||||
}
|
}
|
||||||
_NVIDIA_DRIVER = {
|
_NVIDIA_DRIVER = {
|
||||||
'compute': {
|
'compute': {
|
||||||
|
@ -339,9 +351,6 @@ def _setup_nvidia_docker_package(blob_client, config):
|
||||||
:return: package path
|
:return: package path
|
||||||
"""
|
"""
|
||||||
offer = settings.pool_offer(config, lower=True)
|
offer = settings.pool_offer(config, lower=True)
|
||||||
if offer != 'ubuntuserver':
|
|
||||||
raise ValueError('Offer {} is unsupported with nvidia docker'.format(
|
|
||||||
offer))
|
|
||||||
pkg = pathlib.Path(_ROOT_PATH, _NVIDIA_DOCKER[offer]['target'])
|
pkg = pathlib.Path(_ROOT_PATH, _NVIDIA_DOCKER[offer]['target'])
|
||||||
# check to see if package is downloaded
|
# check to see if package is downloaded
|
||||||
if (not pkg.exists() or
|
if (not pkg.exists() or
|
||||||
|
@ -1534,6 +1543,7 @@ def _adjust_settings_for_pool_creation(config):
|
||||||
# enforce publisher/offer/sku restrictions
|
# enforce publisher/offer/sku restrictions
|
||||||
allowed = False
|
allowed = False
|
||||||
shipyard_container_required = True
|
shipyard_container_required = True
|
||||||
|
# oracle linux is not supported due to UEKR4 requirement
|
||||||
if publisher == 'canonical':
|
if publisher == 'canonical':
|
||||||
if offer == 'ubuntuserver':
|
if offer == 'ubuntuserver':
|
||||||
if sku.startswith('14.04'):
|
if sku.startswith('14.04'):
|
||||||
|
@ -1560,13 +1570,10 @@ def _adjust_settings_for_pool_creation(config):
|
||||||
elif offer == 'opensuse-leap':
|
elif offer == 'opensuse-leap':
|
||||||
if sku >= '42':
|
if sku >= '42':
|
||||||
allowed = True
|
allowed = True
|
||||||
# check for valid image if gpu, currently only ubuntu 16.04 is supported
|
# check if allowed for gpu (if gpu vm size)
|
||||||
if (settings.is_gpu_pool(pool.vm_size) and
|
if allowed:
|
||||||
util.is_none_or_empty(node_agent) and
|
allowed = settings.gpu_configuration_check(
|
||||||
(publisher != 'canonical' and offer != 'ubuntuserver' and
|
config, vm_size=pool.vm_size)
|
||||||
sku < '16.04')):
|
|
||||||
allowed = False
|
|
||||||
# oracle linux is not supported due to UEKR4 requirement
|
|
||||||
if not allowed and util.is_none_or_empty(node_agent):
|
if not allowed and util.is_none_or_empty(node_agent):
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
('Unsupported Docker Host VM Config, publisher={} offer={} '
|
('Unsupported Docker Host VM Config, publisher={} offer={} '
|
||||||
|
|
|
@ -403,6 +403,37 @@ def get_gpu_type_from_vm_size(vm_size):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def gpu_configuration_check(config, vm_size=None):
|
||||||
|
# type: (dict, str) -> bool
|
||||||
|
"""Check if OS is allowed with a GPU VM
|
||||||
|
:param dict config: configuration dict
|
||||||
|
:param str vm_size: vm size
|
||||||
|
:rtype: bool
|
||||||
|
:return: if configuration is allowed
|
||||||
|
"""
|
||||||
|
# if this is not a gpu sku, always allow
|
||||||
|
if util.is_none_or_empty(vm_size):
|
||||||
|
vm_size = pool_settings(config).vm_size
|
||||||
|
if not is_gpu_pool(vm_size):
|
||||||
|
return True
|
||||||
|
# always allow gpu with custom images
|
||||||
|
node_agent = pool_custom_image_node_agent(config)
|
||||||
|
if util.is_not_empty(node_agent):
|
||||||
|
return True
|
||||||
|
# check for platform image support
|
||||||
|
publisher = pool_publisher(config, lower=True)
|
||||||
|
offer = pool_offer(config, lower=True)
|
||||||
|
sku = pool_sku(config, lower=True)
|
||||||
|
if (publisher == 'canonical' and offer == 'ubuntuserver' and
|
||||||
|
sku > '16.04'):
|
||||||
|
return True
|
||||||
|
elif (publisher == 'openlogic' and
|
||||||
|
(offer == 'centos' or offer == 'centos-hpc') and sku == '7.3'):
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def is_rdma_pool(vm_size):
|
def is_rdma_pool(vm_size):
|
||||||
# type: (str) -> bool
|
# type: (str) -> bool
|
||||||
"""Check if pool is IB/RDMA capable
|
"""Check if pool is IB/RDMA capable
|
||||||
|
|
|
@ -20,10 +20,11 @@ you can invoke as:
|
||||||
shipyard.cmd
|
shipyard.cmd
|
||||||
```
|
```
|
||||||
|
|
||||||
If on Mac, you will need to invoke the Python interpreter and pass
|
If you installed manually (i.e., did not use the installer scripts), then
|
||||||
the script as an argument. For example:
|
you will need to invoke the Python interpreter and pass the script as an
|
||||||
|
argument. For example:
|
||||||
```
|
```
|
||||||
python shipyard.py
|
python3 shipyard.py
|
||||||
```
|
```
|
||||||
|
|
||||||
The `-h` or `--help` option will list the available options, which are
|
The `-h` or `--help` option will list the available options, which are
|
||||||
|
|
|
@ -16,12 +16,11 @@ The pool configuration should enable the following properties:
|
||||||
K80 GPUs for GPU compute acceleration while `NV` VM instances feature
|
K80 GPUs for GPU compute acceleration while `NV` VM instances feature
|
||||||
M60 GPUs for visualization workloads. Because CNTK is a GPU-accelerated
|
M60 GPUs for visualization workloads. Because CNTK is a GPU-accelerated
|
||||||
compute application, it is best to choose `NC` VM instances.
|
compute application, it is best to choose `NC` VM instances.
|
||||||
* `publisher` should be `Canonical`. Other publishers will be supported
|
* `vm_configuration` is the VM configuration
|
||||||
once they are available for N-series VMs.
|
* `platform_image` specifies to use a platform image
|
||||||
* `offer` should be `UbuntuServer`. Other offers will be supported once they
|
* `publisher` should be `Canonical` or `OpenLogic`.
|
||||||
are available for N-series VMs.
|
* `offer` should be `UbuntuServer` for Canonical or `CentOS` for OpenLogic.
|
||||||
* `sku` should be `16.04-LTS`. Other skus will be supported once they are
|
* `sku` should be `16.04-LTS` for Ubuntu or `7.3` for CentOS.
|
||||||
available for N-series VMs.
|
|
||||||
* `inter_node_communication_enabled` must be set to `true`
|
* `inter_node_communication_enabled` must be set to `true`
|
||||||
* `max_tasks_per_node` must be set to 1 or omitted
|
* `max_tasks_per_node` must be set to 1 or omitted
|
||||||
|
|
||||||
|
|
|
@ -13,12 +13,11 @@ The pool configuration should enable the following properties:
|
||||||
K80 GPUs for GPU compute acceleration while `NV` VM instances feature
|
K80 GPUs for GPU compute acceleration while `NV` VM instances feature
|
||||||
M60 GPUs for visualization workloads. Because Caffe is a GPU-accelerated
|
M60 GPUs for visualization workloads. Because Caffe is a GPU-accelerated
|
||||||
compute application, it is best to choose `NC` VM instances.
|
compute application, it is best to choose `NC` VM instances.
|
||||||
* `publisher` should be `Canonical`. Other publishers will be supported
|
* `vm_configuration` is the VM configuration
|
||||||
once they are available for N-series VMs.
|
* `platform_image` specifies to use a platform image
|
||||||
* `offer` should be `UbuntuServer`. Other offers will be supported once they
|
* `publisher` should be `Canonical` or `OpenLogic`.
|
||||||
are available for N-series VMs.
|
* `offer` should be `UbuntuServer` for Canonical or `CentOS` for OpenLogic.
|
||||||
* `sku` should be `16.04-LTS`. Other skus will be supported once they are
|
* `sku` should be `16.04-LTS` for Ubuntu or `7.3` for CentOS.
|
||||||
available for N-series VMs.
|
|
||||||
|
|
||||||
### Global Configuration
|
### Global Configuration
|
||||||
The global configuration should set the following properties:
|
The global configuration should set the following properties:
|
||||||
|
|
|
@ -13,12 +13,11 @@ The pool configuration should enable the following properties:
|
||||||
K80 GPUs for GPU compute acceleration while `NV` VM instances feature
|
K80 GPUs for GPU compute acceleration while `NV` VM instances feature
|
||||||
M60 GPUs for visualization workloads. Because Caffe is a GPU-accelerated
|
M60 GPUs for visualization workloads. Because Caffe is a GPU-accelerated
|
||||||
compute application, it is best to choose `NC` VM instances.
|
compute application, it is best to choose `NC` VM instances.
|
||||||
* `publisher` should be `Canonical`. Other publishers will be supported
|
* `vm_configuration` is the VM configuration
|
||||||
once they are available for N-series VMs.
|
* `platform_image` specifies to use a platform image
|
||||||
* `offer` should be `UbuntuServer`. Other offers will be supported once they
|
* `publisher` should be `Canonical` or `OpenLogic`.
|
||||||
are available for N-series VMs.
|
* `offer` should be `UbuntuServer` for Canonical or `CentOS` for OpenLogic.
|
||||||
* `sku` should be `16.04-LTS`. Other skus will be supported once they are
|
* `sku` should be `16.04-LTS` for Ubuntu or `7.3` for CentOS.
|
||||||
available for N-series VMs.
|
|
||||||
|
|
||||||
### Global Configuration
|
### Global Configuration
|
||||||
The global configuration should set the following properties:
|
The global configuration should set the following properties:
|
||||||
|
|
|
@ -14,12 +14,11 @@ The pool configuration should enable the following properties:
|
||||||
K80 GPUs for GPU compute acceleration while `NV` VM instances feature
|
K80 GPUs for GPU compute acceleration while `NV` VM instances feature
|
||||||
M60 GPUs for visualization workloads. Because FFmpeg is for transforming
|
M60 GPUs for visualization workloads. Because FFmpeg is for transforming
|
||||||
audio/video, it is best to choose `NV` VM instances.
|
audio/video, it is best to choose `NV` VM instances.
|
||||||
* `publisher` should be `Canonical`. Other publishers will be supported
|
* `vm_configuration` is the VM configuration
|
||||||
once they are available for N-series VMs.
|
* `platform_image` specifies to use a platform image
|
||||||
* `offer` should be `UbuntuServer`. Other offers will be supported once they
|
* `publisher` should be `Canonical` or `OpenLogic`.
|
||||||
are available for N-series VMs.
|
* `offer` should be `UbuntuServer` for Canonical or `CentOS` for OpenLogic.
|
||||||
* `sku` should be `16.04-LTS`. Other skus will be supported once they are
|
* `sku` should be `16.04-LTS` for Ubuntu or `7.3` for CentOS.
|
||||||
available for N-series VMs.
|
|
||||||
|
|
||||||
### Global Configuration
|
### Global Configuration
|
||||||
The global configuration should set the following properties:
|
The global configuration should set the following properties:
|
||||||
|
|
|
@ -14,12 +14,11 @@ The pool configuration should enable the following properties:
|
||||||
K80 GPUs for GPU compute acceleration while `NV` VM instances feature
|
K80 GPUs for GPU compute acceleration while `NV` VM instances feature
|
||||||
M60 GPUs for visualization workloads. Because Caffe is a GPU-accelerated
|
M60 GPUs for visualization workloads. Because Caffe is a GPU-accelerated
|
||||||
compute application, it is best to choose `NC` VM instances.
|
compute application, it is best to choose `NC` VM instances.
|
||||||
* `publisher` should be `Canonical`. Other publishers will be supported
|
* `vm_configuration` is the VM configuration
|
||||||
once they are available for N-series VMs.
|
* `platform_image` specifies to use a platform image
|
||||||
* `offer` should be `UbuntuServer`. Other offers will be supported once they
|
* `publisher` should be `Canonical` or `OpenLogic`.
|
||||||
are available for N-series VMs.
|
* `offer` should be `UbuntuServer` for Canonical or `CentOS` for OpenLogic.
|
||||||
* `sku` should be `16.04-LTS`. Other skus will be supported once they are
|
* `sku` should be `16.04-LTS` for Ubuntu or `7.3` for CentOS.
|
||||||
available for N-series VMs.
|
|
||||||
|
|
||||||
### Global Configuration
|
### Global Configuration
|
||||||
The global configuration should set the following properties:
|
The global configuration should set the following properties:
|
||||||
|
|
|
@ -13,12 +13,11 @@ The pool configuration should enable the following properties:
|
||||||
K80 GPUs for GPU compute acceleration while `NV` VM instances feature
|
K80 GPUs for GPU compute acceleration while `NV` VM instances feature
|
||||||
M60 GPUs for visualization workloads. Because CNTK is a GPU-accelerated
|
M60 GPUs for visualization workloads. Because CNTK is a GPU-accelerated
|
||||||
compute application, it is best to choose `NC` VM instances.
|
compute application, it is best to choose `NC` VM instances.
|
||||||
* `publisher` should be `Canonical`. Other publishers will be supported
|
* `vm_configuration` is the VM configuration
|
||||||
once they are available for N-series VMs.
|
* `platform_image` specifies to use a platform image
|
||||||
* `offer` should be `UbuntuServer`. Other offers will be supported once they
|
* `publisher` should be `Canonical` or `OpenLogic`.
|
||||||
are available for N-series VMs.
|
* `offer` should be `UbuntuServer` for Canonical or `CentOS` for OpenLogic.
|
||||||
* `sku` should be `16.04-LTS`. Other skus will be supported once they are
|
* `sku` should be `16.04-LTS` for Ubuntu or `7.3` for CentOS.
|
||||||
available for N-series VMs.
|
|
||||||
* `inter_node_communication_enabled` must be set to `true`
|
* `inter_node_communication_enabled` must be set to `true`
|
||||||
* `max_tasks_per_node` must be set to 1 or omitted
|
* `max_tasks_per_node` must be set to 1 or omitted
|
||||||
|
|
||||||
|
|
|
@ -15,13 +15,11 @@ The pool configuration should enable the following properties:
|
||||||
K80 GPUs for GPU compute acceleration while `NV` VM instances feature
|
K80 GPUs for GPU compute acceleration while `NV` VM instances feature
|
||||||
M60 GPUs for visualization workloads. Because NAMD is a GPU-accelerated
|
M60 GPUs for visualization workloads. Because NAMD is a GPU-accelerated
|
||||||
compute application, it is best to choose `NC` VM instances.
|
compute application, it is best to choose `NC` VM instances.
|
||||||
* `publisher` should be `Canonical`. Other publishers will be supported
|
* `vm_configuration` is the VM configuration
|
||||||
once they are available for N-series VMs.
|
* `platform_image` specifies to use a platform image
|
||||||
* `offer` should be `UbuntuServer`. Other offers will be supported once they
|
* `publisher` should be `Canonical` or `OpenLogic`.
|
||||||
are available for N-series VMs.
|
* `offer` should be `UbuntuServer` for Canonical or `CentOS` for OpenLogic.
|
||||||
* `sku` should be `16.04-LTS`. Other skus will be supported once they are
|
* `sku` should be `16.04-LTS` for Ubuntu or `7.3` for CentOS.
|
||||||
available for N-series VMs.
|
|
||||||
* `max_tasks_per_node` must be set to 1 or omitted
|
|
||||||
|
|
||||||
### Global Configuration
|
### Global Configuration
|
||||||
The global configuration should set the following properties:
|
The global configuration should set the following properties:
|
||||||
|
|
|
@ -16,12 +16,14 @@ GPUs:
|
||||||
instances feature M60 GPUs for visualization workloads. Because TensorFlow is
|
instances feature M60 GPUs for visualization workloads. Because TensorFlow is
|
||||||
a GPU-accelerated compute application, it is best to choose `NC` VM instances.
|
a GPU-accelerated compute application, it is best to choose `NC` VM instances.
|
||||||
If not using GPUs, another appropriate SKU can be selected.
|
If not using GPUs, another appropriate SKU can be selected.
|
||||||
* `publisher` should be `Canonical` if using GPUs. Other publishers will be
|
* `vm_configuration` is the VM configuration
|
||||||
supported once they are available for N-series VMs.
|
* `platform_image` specifies to use a platform image
|
||||||
* `offer` should be `UbuntuServer` if using GPUs. Other offers will be
|
* `publisher` should be `Canonical` or `OpenLogic` if using GPUs. Other
|
||||||
supported once they are available for N-series VMs.
|
supported publishers can be used if not.
|
||||||
* `sku` should be `16.04-LTS` if using GPUs. Other skus will be supported
|
* `offer` should be `UbuntuServer` for Canonical or `CentOS` for OpenLogic
|
||||||
once they are available for N-series VMs.
|
if using GPUs. Other supported offers can be used if not.
|
||||||
|
* `sku` should be `16.04-LTS` for Ubuntu or `7.3` for CentOS if using
|
||||||
|
GPUs. Other supported skus can be used if not.
|
||||||
|
|
||||||
If on multiple CPUs:
|
If on multiple CPUs:
|
||||||
* `max_tasks_per_node` must be set to 1 or omitted
|
* `max_tasks_per_node` must be set to 1 or omitted
|
||||||
|
|
|
@ -13,12 +13,11 @@ The pool configuration should enable the following properties:
|
||||||
K80 GPUs for GPU compute acceleration while `NV` VM instances feature
|
K80 GPUs for GPU compute acceleration while `NV` VM instances feature
|
||||||
M60 GPUs for visualization workloads. Because TensorFlow is a GPU-accelerated
|
M60 GPUs for visualization workloads. Because TensorFlow is a GPU-accelerated
|
||||||
compute application, it is best to choose `NC` VM instances.
|
compute application, it is best to choose `NC` VM instances.
|
||||||
* `publisher` should be `Canonical`. Other publishers will be supported
|
* `vm_configuration` is the VM configuration
|
||||||
once they are available for N-series VMs.
|
* `platform_image` specifies to use a platform image
|
||||||
* `offer` should be `UbuntuServer`. Other offers will be supported once they
|
* `publisher` should be `Canonical` or `OpenLogic`.
|
||||||
are available for N-series VMs.
|
* `offer` should be `UbuntuServer` for Canonical or `CentOS` for OpenLogic.
|
||||||
* `sku` should be `16.04-LTS`. Other skus will be supported once they are
|
* `sku` should be `16.04-LTS` for Ubuntu or `7.3` for CentOS.
|
||||||
available for N-series VMs.
|
|
||||||
|
|
||||||
### Global Configuration
|
### Global Configuration
|
||||||
The global configuration should set the following properties:
|
The global configuration should set the following properties:
|
||||||
|
|
|
@ -13,12 +13,11 @@ The pool configuration should enable the following properties:
|
||||||
K80 GPUs for GPU compute acceleration while `NV` VM instances feature
|
K80 GPUs for GPU compute acceleration while `NV` VM instances feature
|
||||||
M60 GPUs for visualization workloads. Because Torch is a GPU-accelerated
|
M60 GPUs for visualization workloads. Because Torch is a GPU-accelerated
|
||||||
compute application, it is best to choose `NC` VM instances.
|
compute application, it is best to choose `NC` VM instances.
|
||||||
* `publisher` should be `Canonical`. Other publishers will be supported
|
* `vm_configuration` is the VM configuration
|
||||||
once they are available for N-series VMs.
|
* `platform_image` specifies to use a platform image
|
||||||
* `offer` should be `UbuntuServer`. Other offers will be supported once they
|
* `publisher` should be `Canonical` or `OpenLogic`.
|
||||||
are available for N-series VMs.
|
* `offer` should be `UbuntuServer` for Canonical or `CentOS` for OpenLogic.
|
||||||
* `sku` should be `16.04-LTS`. Other skus will be supported once they are
|
* `sku` should be `16.04-LTS` for Ubuntu or `7.3` for CentOS.
|
||||||
available for N-series VMs.
|
|
||||||
|
|
||||||
### Global Configuration
|
### Global Configuration
|
||||||
The global configuration should set the following properties:
|
The global configuration should set the following properties:
|
||||||
|
|
|
@ -154,6 +154,93 @@ check_for_nvidia_card() {
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
install_nvidia_software() {
|
||||||
|
offer=$1
|
||||||
|
shift
|
||||||
|
# check for nvidia card
|
||||||
|
check_for_nvidia_card
|
||||||
|
# split arg into two
|
||||||
|
IFS=':' read -ra GPUARGS <<< "$gpu"
|
||||||
|
nvdriver=${GPUARGS[1]}
|
||||||
|
nvdocker=${GPUARGS[2]}
|
||||||
|
# remove nouveau
|
||||||
|
rmmod nouveau
|
||||||
|
# purge nouveau off system
|
||||||
|
if [ $offer == "ubuntuserver" ]; then
|
||||||
|
apt-get --purge remove xserver-xorg-video-nouveau xserver-xorg-video-nouveau-hwe-16.04
|
||||||
|
elif [[ $offer == centos* ]]; then
|
||||||
|
yum erase -y xorg-x11-drv-nouveau
|
||||||
|
else
|
||||||
|
echo "ERROR: unsupported distribution for nvidia/GPU, offer: $offer"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
# blacklist nouveau from being loaded if rebooted
|
||||||
|
cat > /etc/modprobe.d/blacklist-nouveau.conf << EOF
|
||||||
|
blacklist nouveau
|
||||||
|
blacklist lbm-nouveau
|
||||||
|
options nouveau modeset=0
|
||||||
|
alias nouveau off
|
||||||
|
alias lbm-nouveau off
|
||||||
|
EOF
|
||||||
|
# get development essentials for nvidia driver
|
||||||
|
if [ $offer == "ubuntuserver" ]; then
|
||||||
|
install_packages $offer build-essential
|
||||||
|
elif [[ $offer == centos* ]]; then
|
||||||
|
install_packages $offer gcc binutils make "kernel-devel-$(uname -r)"
|
||||||
|
fi
|
||||||
|
# get additional dependency if NV-series VMs
|
||||||
|
if [ ${GPUARGS[0]} == "True" ]; then
|
||||||
|
if [ $offer == "ubuntuserver" ]; then
|
||||||
|
install_packages $offer xserver-xorg-dev
|
||||||
|
elif [[ $offer == centos* ]]; then
|
||||||
|
install_packages $offer xorg-x11-server-devel
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
# install driver
|
||||||
|
./$nvdriver -s
|
||||||
|
# add flag to config template for GRID driver
|
||||||
|
if [ ${GPUARGS[0]} == "True" ]; then
|
||||||
|
echo "IgnoreSP=TRUE" >> /etc/nvidia/gridd.conf.template
|
||||||
|
fi
|
||||||
|
# install nvidia-docker
|
||||||
|
if [ $offer == "ubuntuserver" ]; then
|
||||||
|
dpkg -i $nvdocker
|
||||||
|
elif [[ $offer == centos* ]]; then
|
||||||
|
rpm -Uvh $nvdocker
|
||||||
|
fi
|
||||||
|
# enable and start nvidia docker service
|
||||||
|
systemctl enable nvidia-docker.service
|
||||||
|
systemctl start nvidia-docker.service
|
||||||
|
systemctl status nvidia-docker.service
|
||||||
|
# get driver version
|
||||||
|
nvdriverver=`cat /proc/driver/nvidia/version | grep "Kernel Module" | cut -d ' ' -f 9`
|
||||||
|
echo nvidia driver version $nvdriverver detected
|
||||||
|
# create the docker volume now to avoid volume driver conflicts for
|
||||||
|
# tasks. run this in a loop as it can fail if triggered too quickly
|
||||||
|
# after start
|
||||||
|
NV_START=$(date -u +"%s")
|
||||||
|
set +e
|
||||||
|
while :
|
||||||
|
do
|
||||||
|
echo "INFO: Attempting to create nvidia-docker volume with version $nvdriverver"
|
||||||
|
docker volume create -d nvidia-docker --name nvidia_driver_$nvdriverver
|
||||||
|
if [ $? -eq 0 ]; then
|
||||||
|
docker volume list
|
||||||
|
break
|
||||||
|
else
|
||||||
|
NV_NOW=$(date -u +"%s")
|
||||||
|
NV_DIFF=$((($NV_NOW-$NV_START)/60))
|
||||||
|
# fail after 5 minutes of attempts
|
||||||
|
if [ $NV_DIFF -ge 5 ]; then
|
||||||
|
echo "ERROR: could not create nvidia-docker volume"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
set -e
|
||||||
|
}
|
||||||
|
|
||||||
install_azurefile_docker_volume_driver() {
|
install_azurefile_docker_volume_driver() {
|
||||||
chown root:root azurefile-dockervolumedriver*
|
chown root:root azurefile-dockervolumedriver*
|
||||||
chmod 755 azurefile-dockervolumedriver
|
chmod 755 azurefile-dockervolumedriver
|
||||||
|
@ -178,6 +265,8 @@ install_azurefile_docker_volume_driver() {
|
||||||
# create docker volumes
|
# create docker volumes
|
||||||
chmod +x azurefile-dockervolume-create.sh
|
chmod +x azurefile-dockervolume-create.sh
|
||||||
./azurefile-dockervolume-create.sh
|
./azurefile-dockervolume-create.sh
|
||||||
|
# list volumes
|
||||||
|
docker volume list
|
||||||
}
|
}
|
||||||
|
|
||||||
refresh_package_index() {
|
refresh_package_index() {
|
||||||
|
@ -464,67 +553,7 @@ if [ $offer == "ubuntuserver" ] || [ $offer == "debian" ]; then
|
||||||
$srvstatus
|
$srvstatus
|
||||||
# install gpu related items
|
# install gpu related items
|
||||||
if [ ! -z $gpu ] && [ ! -f $nodeprepfinished ]; then
|
if [ ! -z $gpu ] && [ ! -f $nodeprepfinished ]; then
|
||||||
# check for nvidia card
|
install_nvidia_software $offer
|
||||||
check_for_nvidia_card
|
|
||||||
# split arg into two
|
|
||||||
IFS=':' read -ra GPUARGS <<< "$gpu"
|
|
||||||
# remove nouveau
|
|
||||||
rmmod nouveau
|
|
||||||
apt-get --purge remove xserver-xorg-video-nouveau xserver-xorg-video-nouveau-hwe-16.04
|
|
||||||
# blacklist nouveau from being loaded if rebooted
|
|
||||||
cat > /etc/modprobe.d/blacklist-nouveau.conf << EOF
|
|
||||||
blacklist nouveau
|
|
||||||
blacklist lbm-nouveau
|
|
||||||
options nouveau modeset=0
|
|
||||||
alias nouveau off
|
|
||||||
alias lbm-nouveau off
|
|
||||||
EOF
|
|
||||||
nvdriver=${GPUARGS[1]}
|
|
||||||
nvdocker=${GPUARGS[2]}
|
|
||||||
# get development essentials for nvidia driver
|
|
||||||
install_packages $offer build-essential
|
|
||||||
# get additional dependency if NV-series VMs
|
|
||||||
if [ ${GPUARGS[0]} == "True" ]; then
|
|
||||||
install_packages $offer xserver-xorg-dev
|
|
||||||
fi
|
|
||||||
# install driver
|
|
||||||
./$nvdriver -s
|
|
||||||
# add flag to config template for GRID driver
|
|
||||||
if [ ${GPUARGS[0]} == "True" ]; then
|
|
||||||
echo "IgnoreSP=TRUE" >> /etc/nvidia/gridd.conf.template
|
|
||||||
fi
|
|
||||||
# install nvidia-docker
|
|
||||||
dpkg -i $nvdocker
|
|
||||||
# enable and start nvidia docker service
|
|
||||||
systemctl enable nvidia-docker.service
|
|
||||||
systemctl start nvidia-docker.service
|
|
||||||
systemctl status nvidia-docker.service
|
|
||||||
# get driver version
|
|
||||||
nvdriverver=`cat /proc/driver/nvidia/version | grep "Kernel Module" | cut -d ' ' -f 9`
|
|
||||||
echo nvidia driver version $nvdriverver detected
|
|
||||||
# create the docker volume now to avoid volume driver conflicts for
|
|
||||||
# tasks. run this in a loop as it can fail if triggered too quickly
|
|
||||||
# after start
|
|
||||||
NV_START=$(date -u +"%s")
|
|
||||||
set +e
|
|
||||||
while :
|
|
||||||
do
|
|
||||||
echo "INFO: Attempting to create nvidia-docker volume with version $nvdriverver"
|
|
||||||
docker volume create -d nvidia-docker --name nvidia_driver_$nvdriverver
|
|
||||||
if [ $? -eq 0 ]; then
|
|
||||||
break
|
|
||||||
else
|
|
||||||
NV_NOW=$(date -u +"%s")
|
|
||||||
NV_DIFF=$((($NV_NOW-$NV_START)/60))
|
|
||||||
# fail after 5 minutes of attempts
|
|
||||||
if [ $NV_DIFF -ge 5 ]; then
|
|
||||||
echo "ERROR: could not create nvidia-docker volume"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
sleep 1
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
set -e
|
|
||||||
fi
|
fi
|
||||||
# set up glusterfs
|
# set up glusterfs
|
||||||
if [ $gluster_on_compute -eq 1 ] && [ ! -f $nodeprepfinished ]; then
|
if [ $gluster_on_compute -eq 1 ] && [ ! -f $nodeprepfinished ]; then
|
||||||
|
@ -568,7 +597,7 @@ elif [[ $offer == centos* ]] || [[ $offer == "rhel" ]] || [[ $offer == "oracle-l
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
# gpu is not supported on these offers
|
# gpu is not supported on these offers
|
||||||
if [ ! -z $gpu ]; then
|
if [[ ! -z $gpu ]] && [[ $offer != centos* ]]; then
|
||||||
echo "ERROR: gpu unsupported on this sku: $sku for offer $offer"
|
echo "ERROR: gpu unsupported on this sku: $sku for offer $offer"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
@ -618,6 +647,10 @@ elif [[ $offer == centos* ]] || [[ $offer == "rhel" ]] || [[ $offer == "oracle-l
|
||||||
if [ $azurefile -eq 1 ]; then
|
if [ $azurefile -eq 1 ]; then
|
||||||
install_azurefile_docker_volume_driver $offer $sku
|
install_azurefile_docker_volume_driver $offer $sku
|
||||||
fi
|
fi
|
||||||
|
# install gpu related items
|
||||||
|
if [ ! -z $gpu ] && [ ! -f $nodeprepfinished ]; then
|
||||||
|
install_nvidia_software $offer
|
||||||
|
fi
|
||||||
# set up glusterfs
|
# set up glusterfs
|
||||||
if [ $gluster_on_compute -eq 1 ] && [ ! -f $nodeprepfinished ]; then
|
if [ $gluster_on_compute -eq 1 ] && [ ! -f $nodeprepfinished ]; then
|
||||||
install_packages $offer epel-release centos-release-gluster38
|
install_packages $offer epel-release centos-release-gluster38
|
||||||
|
|
Загрузка…
Ссылка в новой задаче