[Kubespray] make requirement check not to fail immediately (#4567)

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix
This commit is contained in:
Zhiyuan He 2020-05-27 15:32:07 +08:00 коммит произвёл GitHub
Родитель f68e2e0c2e
Коммит 8898310726
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
12 изменённых файлов: 71 добавлений и 65 удалений

Просмотреть файл

@ -14,6 +14,13 @@
- name: "SSH test from dev-box to all machine of infra and worker"
ping:
- hosts: all,localhost
gather_facts: false
tasks:
- name: "set up unmet_requirements"
set_fact:
unmet_requirements: []
- hosts: localhost
become: true
become_user: root
@ -35,3 +42,10 @@
gather_facts: true
roles:
- { role: requirement/worker }
- hosts: all,localhost
tasks:
- name: "display unmet requirements"
fail:
msg: "The following requirements are not met: {{ unmet_requirements | join('. ') }}"
when: unmet_requirements|length > 0

Просмотреть файл

@ -48,7 +48,17 @@ echo "Ping Test"
ansible all -i ${HOME}/pai-deploy/cluster-cfg/hosts.yml -m ping || exit $?
/bin/bash requirement.sh -m ${MASTER_LIST} -w ${WORKER_LIST} -c ${CLUSTER_CONFIG} || exit $?
/bin/bash requirement.sh -m ${MASTER_LIST} -w ${WORKER_LIST} -c ${CLUSTER_CONFIG}
ret_code_check=$?
if [ $ret_code_check -ne 0 ]; then
echo ""
echo "Please press ENTER to stop the script, check the log, and modify the cluster setting to meet the requirements."
echo "If you are very sure about the configuration, and still want to continue, you can type in \"continue\" to force the script to proceed."
read user_input
if [ "${user_input}"x != "continue"x ]; then
exit $ret_code_check
fi
fi
/bin/bash preinstall.sh -c ${CLUSTER_CONFIG} || exit $?

Просмотреть файл

@ -34,10 +34,9 @@ if [ $ret_code_check -eq 0 ]
then
echo "Pass: Cluster meets the requirements"
else
echo "Faild: Please check the output, and modify the cluster setting to meet the requirement"
echo "Failed: There are unmet requirements in your cluster, the installation will be very likely to fail."
rm -rf ${HOME}/pai-pre-check/
exit $ret_code_check
fi
rm -rf ${HOME}/pai-pre-check/

Просмотреть файл

@ -52,7 +52,7 @@
when: ping_cloud_google.rc == 0
- name: Update source with mirrors.aliyun.com
include_tasks: cloud.google.yml
include_tasks: mirrors.aliyun.yml
when: ping_cloud_google.rc != 0
- name: run the equivalent of "apt-get update" as a separate step

Просмотреть файл

@ -8,8 +8,8 @@
environment: {}
- name: "Dev-box 1.2 Check if docker is installed on your dev-box or not."
fail:
msg: "Unable to find docker in your dev-box machine"
set_fact:
unmet_requirements: "{{ unmet_requirements + ['Unable to find docker in your dev-box machine'] }}"
changed_when: false
check_mode: false
environment: {}
@ -29,32 +29,15 @@
dev_box_docker_version_replace: "{{ dev_box_docker_version.stdout | replace('\"','') }}"
- name: "Dev-box 2.3 Check docker version in dev-box"
fail:
msg: "The docker version in your dev-box is too low, please update it. And make sure its higher then 1.10"
set_fact:
unmet_requirements: "{{ unmet_requirements + ['The docker version in your dev-box is too low, please update it. And make sure its higher then 1.10'] }}"
changed_when: false
check_mode: false
environment: {}
when:
- dev_box_docker_version_replace is version('1.10.0', '<=')
- name: "Dev-box 3.1 Check whether the vm can raw.githubusercontent.com"
raw: curl https://raw.githubusercontent.com/microsoft/pai/master/README.md
register: devbox_curl_githubusercontent
failed_when: false
changed_when: false
check_mode: false
environment: {}
- name: "Dev-box 3.2 Check whether the vm can access to raw.githubusercontent.com"
fail:
msg: "Unable to access raw.githubusercontent.com"
changed_when: false
check_mode: false
environment: {}
when:
- devbox_curl_githubusercontent.rc != 0
- name: "Dev-box 4.1 Check whether the vm can access to docker.io"
- name: "Dev-box 3.1 Check whether the vm can access to docker.io"
raw: nslookup index.docker.io
register: devbox_nslookup_docker_io
failed_when: false
@ -62,9 +45,9 @@
check_mode: false
environment: {}
- name: "Dev-box 4.2 Check whether the vm can access to docker.io"
fail:
msg: "Unable to access docker.io"
- name: "Dev-box 3.2 Check whether the vm can access to docker.io"
set_fact:
unmet_requirements: "{{ unmet_requirements + ['Unable to access docker.io'] }}"
changed_when: false
check_mode: false
environment: {}

Просмотреть файл

@ -1,12 +1,12 @@
---
- name: Check memory resource requirement for openpai service
fail:
msg: "OpenPAI's infra node should have 40 Gi free memory for service "
set_fact:
unmet_requirements: "{{ unmet_requirements + [\"OpenPAI's infra node should have 40 Gi free memory for service\"] }}"
when:
- ansible_memory_mb["nocache"]["free"] < 40000
- name: Check cpu resource requirement for openpai service
fail:
msg: "OpenPAI's infra node should have 1 CPU vcore for service "
set_fact:
unmet_requirements: "{{ unmet_requirements + [\"OpenPAI's infra node should have 1 CPU vcore for service\"] }}"
when:
- ansible_processor_vcpus < 1

Просмотреть файл

@ -8,8 +8,8 @@
environment: {}
- name: "Fail, if docker is not installed on your master machine."
fail:
msg: "Unable to find docker in your master machine"
set_fact:
unmet_requirements: "{{ unmet_requirements + ['Unable to find docker in your master machine'] }}"
changed_when: false
check_mode: false
environment: {}

Просмотреть файл

@ -8,8 +8,8 @@
environment: {}
- name: "Infra 1.2 Check whether the vm can access to docker.io"
fail:
msg: "Unable to access docker.io"
set_fact:
unmet_requirements: "{{ unmet_requirements + ['Unable to access docker.io'] }}"
changed_when: false
check_mode: false
environment: {}
@ -33,8 +33,8 @@
environment: {}
- name: "Infra 2.3 Check whether the NTP is installed and enabled."
fail:
msg: "NTP is not enabled on your infra machines."
set_fact:
unmet_requirements: "{{ unmet_requirements + ['NTP is not enabled on your infra machines'] }}"
changed_when: false
check_mode: false
environment: {}
@ -42,8 +42,8 @@
- infra_ntpq.rc != 0 or infra_nslookup_docker_io.rc != 0
- name: "Infra 3.1 Ensure dev-box is not an infra machines"
fail:
msg: "Dev-box should be a separated machine from the cluster."
set_fact:
unmet_requirements: "{{ unmet_requirements + ['Dev-box should be a separated machine from the cluster'] }}"
when:
- ansible_control_host_address == ansible_default_ipv4.address
@ -56,8 +56,8 @@
environment: {}
- name: "Infra 4.2 Check whether the vm can access to gcr.io"
fail:
msg: "Unable to access gcr.io"
set_fact:
unmet_requirements: "{{ unmet_requirements + ['Unable to access gcr.io'] }}"
changed_when: false
check_mode: false
environment: {}
@ -73,8 +73,8 @@
environment: {}
- name: "Infra 5.2 Check whether the vm can access to quay.io"
fail:
msg: "Unable to access quay.io"
set_fact:
unmet_requirements: "{{ unmet_requirements + ['Unable to access quay.io'] }}"
changed_when: false
check_mode: false
environment: {}

Просмотреть файл

@ -1,6 +1,6 @@
---
- name: Check memory resource requirement for openpai service
fail:
msg: "OpenPAI's worker node should have 16 Gi free memory for service "
set_fact:
unmet_requirements: "{{ unmet_requirements + [\"OpenPAI's worker node should have 16 Gi free memory for service\"] }}"
when:
- ansible_memory_mb["nocache"]["free"] < 16000

Просмотреть файл

@ -8,8 +8,8 @@
environment: {}
- name: "Failed, if docker is not installed on your worker machine."
fail:
msg: "Unable to find docker in your master machine"
set_fact:
unmet_requirements: "{{ unmet_requirements + ['Unable to find docker in your worker machine'] }}"
changed_when: false
check_mode: false
environment: {}
@ -29,8 +29,8 @@
default_runtime_processed: "{{ default_runtime.stdout_lines[0] | replace('\"','') }}"
- name: "Check the default runtime is set correctly"
fail:
msg: "The default runtime is not set correctly"
set_fact:
unmet_requirements: "{{ unmet_requirements + ['The default runtime is not set correctly'] }}"
changed_when: false
check_mode: false
environment: {}

Просмотреть файл

@ -8,8 +8,8 @@
environment: {}
- name: "Check NVIDIA GPU exits or not"
fail:
msg: "NVIDIA GPU card is not detected in your worker machines."
set_fact:
unmet_requirements: "{{ unmet_requirements + ['NVIDIA GPU card is not detected in your worker machines'] }}"
changed_when: false
check_mode: false
environment: {}
@ -25,8 +25,8 @@
environment: {}
- name: "Check NVIDIA GPU drivers is installed or not "
fail:
msg: "NVIDIA GPU drivers is not detected in your worker machines."
set_fact:
unmet_requirements: "{{ unmet_requirements + ['NVIDIA GPU drivers is not detected in your worker machines'] }}"
changed_when: false
check_mode: false
environment: {}
@ -42,8 +42,8 @@
environment: {}
- name: "Check NVIDIA container runtime is installed or not"
fail:
msg: "NVIDIA container runtime is not detected in your worker machines."
set_fact:
unmet_requirements: "{{ unmet_requirements + ['NVIDIA container runtime is not detected in your worker machines'] }}"
changed_when: false
check_mode: false
environment: {}

Просмотреть файл

@ -8,8 +8,8 @@
environment: {}
- name: "Worker 1.2 Check whether the vm can access to docker.io"
fail:
msg: "Unable to access docker.io"
set_fact:
unmet_requirements: "{{ unmet_requirements + ['Unable to access docker.io'] }}"
changed_when: false
check_mode: false
environment: {}
@ -17,8 +17,8 @@
- worker_nslookup_docker_io.rc != 0
- name: "Worker 2.1 Ensure dev-box is not an worker machines"
fail:
msg: "Dev-box should be a separated machine from the cluster."
set_fact:
unmet_requirements: "{{ unmet_requirements + ['Dev-box should be a separated machine from the cluster'] }}"
when:
- ansible_control_host_address == ansible_default_ipv4.address
@ -31,8 +31,8 @@
environment: {}
- name: "Worker 3.2 Check whether the vm can access to gcr.io"
fail:
msg: "Unable to access gcr.io"
set_fact:
unmet_requirements: "{{ unmet_requirements + ['Unable to access gcr.io'] }}"
changed_when: false
check_mode: false
environment: {}
@ -48,8 +48,8 @@
environment: {}
- name: "Worker 4.2 Check whether the vm can access to quay.io"
fail:
msg: "Unable to access quay.io"
set_fact:
unmet_requirements: "{{ unmet_requirements + ['Unable to access quay.io'] }}"
changed_when: false
check_mode: false
environment: {}