зеркало из https://github.com/microsoft/pai.git
[Kubespray] make requirement check not to fail immediately (#4567)
* fix * fix * fix * fix * fix * fix * fix * fix * fix * fix
This commit is contained in:
Родитель
f68e2e0c2e
Коммит
8898310726
|
@ -14,6 +14,13 @@
|
|||
- name: "SSH test from dev-box to all machine of infra and worker"
|
||||
ping:
|
||||
|
||||
- hosts: all,localhost
|
||||
gather_facts: false
|
||||
tasks:
|
||||
- name: "set up unmet_requirements"
|
||||
set_fact:
|
||||
unmet_requirements: []
|
||||
|
||||
- hosts: localhost
|
||||
become: true
|
||||
become_user: root
|
||||
|
@ -35,3 +42,10 @@
|
|||
gather_facts: true
|
||||
roles:
|
||||
- { role: requirement/worker }
|
||||
|
||||
- hosts: all,localhost
|
||||
tasks:
|
||||
- name: "display unmet requirements"
|
||||
fail:
|
||||
msg: "The following requirements are not met: {{ unmet_requirements | join('. ') }}"
|
||||
when: unmet_requirements|length > 0
|
|
@ -48,7 +48,17 @@ echo "Ping Test"
|
|||
|
||||
ansible all -i ${HOME}/pai-deploy/cluster-cfg/hosts.yml -m ping || exit $?
|
||||
|
||||
/bin/bash requirement.sh -m ${MASTER_LIST} -w ${WORKER_LIST} -c ${CLUSTER_CONFIG} || exit $?
|
||||
/bin/bash requirement.sh -m ${MASTER_LIST} -w ${WORKER_LIST} -c ${CLUSTER_CONFIG}
|
||||
ret_code_check=$?
|
||||
if [ $ret_code_check -ne 0 ]; then
|
||||
echo ""
|
||||
echo "Please press ENTER to stop the script, check the log, and modify the cluster setting to meet the requirements."
|
||||
echo "If you are very sure about the configuration, and still want to continue, you can type in \"continue\" to force the script to proceed."
|
||||
read user_input
|
||||
if [ "${user_input}"x != "continue"x ]; then
|
||||
exit $ret_code_check
|
||||
fi
|
||||
fi
|
||||
|
||||
/bin/bash preinstall.sh -c ${CLUSTER_CONFIG} || exit $?
|
||||
|
||||
|
|
|
@ -34,10 +34,9 @@ if [ $ret_code_check -eq 0 ]
|
|||
then
|
||||
echo "Pass: Cluster meets the requirements"
|
||||
else
|
||||
echo "Faild: Please check the output, and modify the cluster setting to meet the requirement"
|
||||
echo "Failed: There are unmet requirements in your cluster, the installation will be very likely to fail."
|
||||
rm -rf ${HOME}/pai-pre-check/
|
||||
exit $ret_code_check
|
||||
fi
|
||||
|
||||
rm -rf ${HOME}/pai-pre-check/
|
||||
|
||||
|
|
|
@ -52,7 +52,7 @@
|
|||
when: ping_cloud_google.rc == 0
|
||||
|
||||
- name: Update source with mirrors.aliyun.com
|
||||
include_tasks: cloud.google.yml
|
||||
include_tasks: mirrors.aliyun.yml
|
||||
when: ping_cloud_google.rc != 0
|
||||
|
||||
- name: run the equivalent of "apt-get update" as a separate step
|
||||
|
|
|
@ -8,8 +8,8 @@
|
|||
environment: {}
|
||||
|
||||
- name: "Dev-box 1.2 Check if docker is installed on your dev-box or not."
|
||||
fail:
|
||||
msg: "Unable to find docker in your dev-box machine"
|
||||
set_fact:
|
||||
unmet_requirements: "{{ unmet_requirements + ['Unable to find docker in your dev-box machine'] }}"
|
||||
changed_when: false
|
||||
check_mode: false
|
||||
environment: {}
|
||||
|
@ -29,32 +29,15 @@
|
|||
dev_box_docker_version_replace: "{{ dev_box_docker_version.stdout | replace('\"','') }}"
|
||||
|
||||
- name: "Dev-box 2.3 Check docker version in dev-box"
|
||||
fail:
|
||||
msg: "The docker version in your dev-box is too low, please update it. And make sure its higher then 1.10"
|
||||
set_fact:
|
||||
unmet_requirements: "{{ unmet_requirements + ['The docker version in your dev-box is too low, please update it. And make sure its higher then 1.10'] }}"
|
||||
changed_when: false
|
||||
check_mode: false
|
||||
environment: {}
|
||||
when:
|
||||
- dev_box_docker_version_replace is version('1.10.0', '<=')
|
||||
|
||||
- name: "Dev-box 3.1 Check whether the vm can raw.githubusercontent.com"
|
||||
raw: curl https://raw.githubusercontent.com/microsoft/pai/master/README.md
|
||||
register: devbox_curl_githubusercontent
|
||||
failed_when: false
|
||||
changed_when: false
|
||||
check_mode: false
|
||||
environment: {}
|
||||
|
||||
- name: "Dev-box 3.2 Check whether the vm can access to raw.githubusercontent.com"
|
||||
fail:
|
||||
msg: "Unable to access raw.githubusercontent.com"
|
||||
changed_when: false
|
||||
check_mode: false
|
||||
environment: {}
|
||||
when:
|
||||
- devbox_curl_githubusercontent.rc != 0
|
||||
|
||||
- name: "Dev-box 4.1 Check whether the vm can access to docker.io"
|
||||
- name: "Dev-box 3.1 Check whether the vm can access to docker.io"
|
||||
raw: nslookup index.docker.io
|
||||
register: devbox_nslookup_docker_io
|
||||
failed_when: false
|
||||
|
@ -62,9 +45,9 @@
|
|||
check_mode: false
|
||||
environment: {}
|
||||
|
||||
- name: "Dev-box 4.2 Check whether the vm can access to docker.io"
|
||||
fail:
|
||||
msg: "Unable to access docker.io"
|
||||
- name: "Dev-box 3.2 Check whether the vm can access to docker.io"
|
||||
set_fact:
|
||||
unmet_requirements: "{{ unmet_requirements + ['Unable to access docker.io'] }}"
|
||||
changed_when: false
|
||||
check_mode: false
|
||||
environment: {}
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
---
|
||||
- name: Check memory resource requirement for openpai service
|
||||
fail:
|
||||
msg: "OpenPAI's infra node should have 40 Gi free memory for service "
|
||||
set_fact:
|
||||
unmet_requirements: "{{ unmet_requirements + [\"OpenPAI's infra node should have 40 Gi free memory for service\"] }}"
|
||||
when:
|
||||
- ansible_memory_mb["nocache"]["free"] < 40000
|
||||
|
||||
- name: Check cpu resource requirement for openpai service
|
||||
fail:
|
||||
msg: "OpenPAI's infra node should have 1 CPU vcore for service "
|
||||
set_fact:
|
||||
unmet_requirements: "{{ unmet_requirements + [\"OpenPAI's infra node should have 1 CPU vcore for service\"] }}"
|
||||
when:
|
||||
- ansible_processor_vcpus < 1
|
|
@ -8,8 +8,8 @@
|
|||
environment: {}
|
||||
|
||||
- name: "Fail, if docker is not installed on your master machine."
|
||||
fail:
|
||||
msg: "Unable to find docker in your master machine"
|
||||
set_fact:
|
||||
unmet_requirements: "{{ unmet_requirements + ['Unable to find docker in your master machine'] }}"
|
||||
changed_when: false
|
||||
check_mode: false
|
||||
environment: {}
|
||||
|
|
|
@ -8,8 +8,8 @@
|
|||
environment: {}
|
||||
|
||||
- name: "Infra 1.2 Check whether the vm can access to docker.io"
|
||||
fail:
|
||||
msg: "Unable to access docker.io"
|
||||
set_fact:
|
||||
unmet_requirements: "{{ unmet_requirements + ['Unable to access docker.io'] }}"
|
||||
changed_when: false
|
||||
check_mode: false
|
||||
environment: {}
|
||||
|
@ -33,8 +33,8 @@
|
|||
environment: {}
|
||||
|
||||
- name: "Infra 2.3 Check whether the NTP is installed and enabled."
|
||||
fail:
|
||||
msg: "NTP is not enabled on your infra machines."
|
||||
set_fact:
|
||||
unmet_requirements: "{{ unmet_requirements + ['NTP is not enabled on your infra machines'] }}"
|
||||
changed_when: false
|
||||
check_mode: false
|
||||
environment: {}
|
||||
|
@ -42,8 +42,8 @@
|
|||
- infra_ntpq.rc != 0 or infra_nslookup_docker_io.rc != 0
|
||||
|
||||
- name: "Infra 3.1 Ensure dev-box is not an infra machines"
|
||||
fail:
|
||||
msg: "Dev-box should be a separated machine from the cluster."
|
||||
set_fact:
|
||||
unmet_requirements: "{{ unmet_requirements + ['Dev-box should be a separated machine from the cluster'] }}"
|
||||
when:
|
||||
- ansible_control_host_address == ansible_default_ipv4.address
|
||||
|
||||
|
@ -56,8 +56,8 @@
|
|||
environment: {}
|
||||
|
||||
- name: "Infra 4.2 Check whether the vm can access to gcr.io"
|
||||
fail:
|
||||
msg: "Unable to access gcr.io"
|
||||
set_fact:
|
||||
unmet_requirements: "{{ unmet_requirements + ['Unable to access gcr.io'] }}"
|
||||
changed_when: false
|
||||
check_mode: false
|
||||
environment: {}
|
||||
|
@ -73,8 +73,8 @@
|
|||
environment: {}
|
||||
|
||||
- name: "Infra 5.2 Check whether the vm can access to quay.io"
|
||||
fail:
|
||||
msg: "Unable to access quay.io"
|
||||
set_fact:
|
||||
unmet_requirements: "{{ unmet_requirements + ['Unable to access quay.io'] }}"
|
||||
changed_when: false
|
||||
check_mode: false
|
||||
environment: {}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
---
|
||||
- name: Check memory resource requirement for openpai service
|
||||
fail:
|
||||
msg: "OpenPAI's worker node should have 16 Gi free memory for service "
|
||||
set_fact:
|
||||
unmet_requirements: "{{ unmet_requirements + [\"OpenPAI's worker node should have 16 Gi free memory for service\"] }}"
|
||||
when:
|
||||
- ansible_memory_mb["nocache"]["free"] < 16000
|
|
@ -8,8 +8,8 @@
|
|||
environment: {}
|
||||
|
||||
- name: "Failed, if docker is not installed on your worker machine."
|
||||
fail:
|
||||
msg: "Unable to find docker in your master machine"
|
||||
set_fact:
|
||||
unmet_requirements: "{{ unmet_requirements + ['Unable to find docker in your worker machine'] }}"
|
||||
changed_when: false
|
||||
check_mode: false
|
||||
environment: {}
|
||||
|
@ -29,8 +29,8 @@
|
|||
default_runtime_processed: "{{ default_runtime.stdout_lines[0] | replace('\"','') }}"
|
||||
|
||||
- name: "Check the default runtime is set correctly"
|
||||
fail:
|
||||
msg: "The default runtime is not set correctly"
|
||||
set_fact:
|
||||
unmet_requirements: "{{ unmet_requirements + ['The default runtime is not set correctly'] }}"
|
||||
changed_when: false
|
||||
check_mode: false
|
||||
environment: {}
|
||||
|
|
|
@ -8,8 +8,8 @@
|
|||
environment: {}
|
||||
|
||||
- name: "Check NVIDIA GPU exits or not"
|
||||
fail:
|
||||
msg: "NVIDIA GPU card is not detected in your worker machines."
|
||||
set_fact:
|
||||
unmet_requirements: "{{ unmet_requirements + ['NVIDIA GPU card is not detected in your worker machines'] }}"
|
||||
changed_when: false
|
||||
check_mode: false
|
||||
environment: {}
|
||||
|
@ -25,8 +25,8 @@
|
|||
environment: {}
|
||||
|
||||
- name: "Check NVIDIA GPU drivers is installed or not "
|
||||
fail:
|
||||
msg: "NVIDIA GPU drivers is not detected in your worker machines."
|
||||
set_fact:
|
||||
unmet_requirements: "{{ unmet_requirements + ['NVIDIA GPU drivers is not detected in your worker machines'] }}"
|
||||
changed_when: false
|
||||
check_mode: false
|
||||
environment: {}
|
||||
|
@ -42,8 +42,8 @@
|
|||
environment: {}
|
||||
|
||||
- name: "Check NVIDIA container runtime is installed or not"
|
||||
fail:
|
||||
msg: "NVIDIA container runtime is not detected in your worker machines."
|
||||
set_fact:
|
||||
unmet_requirements: "{{ unmet_requirements + ['NVIDIA container runtime is not detected in your worker machines'] }}"
|
||||
changed_when: false
|
||||
check_mode: false
|
||||
environment: {}
|
||||
|
|
|
@ -8,8 +8,8 @@
|
|||
environment: {}
|
||||
|
||||
- name: "Worker 1.2 Check whether the vm can access to docker.io"
|
||||
fail:
|
||||
msg: "Unable to access docker.io"
|
||||
set_fact:
|
||||
unmet_requirements: "{{ unmet_requirements + ['Unable to access docker.io'] }}"
|
||||
changed_when: false
|
||||
check_mode: false
|
||||
environment: {}
|
||||
|
@ -17,8 +17,8 @@
|
|||
- worker_nslookup_docker_io.rc != 0
|
||||
|
||||
- name: "Worker 2.1 Ensure dev-box is not an worker machines"
|
||||
fail:
|
||||
msg: "Dev-box should be a separated machine from the cluster."
|
||||
set_fact:
|
||||
unmet_requirements: "{{ unmet_requirements + ['Dev-box should be a separated machine from the cluster'] }}"
|
||||
when:
|
||||
- ansible_control_host_address == ansible_default_ipv4.address
|
||||
|
||||
|
@ -31,8 +31,8 @@
|
|||
environment: {}
|
||||
|
||||
- name: "Worker 3.2 Check whether the vm can access to gcr.io"
|
||||
fail:
|
||||
msg: "Unable to access gcr.io"
|
||||
set_fact:
|
||||
unmet_requirements: "{{ unmet_requirements + ['Unable to access gcr.io'] }}"
|
||||
changed_when: false
|
||||
check_mode: false
|
||||
environment: {}
|
||||
|
@ -48,8 +48,8 @@
|
|||
environment: {}
|
||||
|
||||
- name: "Worker 4.2 Check whether the vm can access to quay.io"
|
||||
fail:
|
||||
msg: "Unable to access quay.io"
|
||||
set_fact:
|
||||
unmet_requirements: "{{ unmet_requirements + ['Unable to access quay.io'] }}"
|
||||
changed_when: false
|
||||
check_mode: false
|
||||
environment: {}
|
||||
|
|
Загрузка…
Ссылка в новой задаче