зеркало из https://github.com/Azure/aks-engine.git
Update prometheus-grafana extension to run on agent nodes (#1959)
* Update prometheus-grafana extension to run on agent nodes * Support running on both master and agent nodes * Update example json making run on agent default * add retry logic to initial helm install for prometheus chart * Update rootURL to point to fork for CI * Revert to upsteam for rootURL
This commit is contained in:
Родитель
bd0134adc1
Коммит
0d6fabe303
|
@ -7,19 +7,19 @@
|
||||||
"masterProfile": {
|
"masterProfile": {
|
||||||
"count": 1,
|
"count": 1,
|
||||||
"dnsPrefix": "",
|
"dnsPrefix": "",
|
||||||
"vmSize": "Standard_DS2_v2",
|
"vmSize": "Standard_DS2_v2"
|
||||||
"extensions": [
|
|
||||||
{
|
|
||||||
"name": "prometheus-grafana-k8s"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"agentPoolProfiles": [
|
"agentPoolProfiles": [
|
||||||
{
|
{
|
||||||
"name": "agentpool1",
|
"name": "agentpool1",
|
||||||
"count": 3,
|
"count": 3,
|
||||||
"vmSize": "Standard_DS2_v2",
|
"vmSize": "Standard_DS2_v2",
|
||||||
"availabilityProfile": "AvailabilitySet"
|
"availabilityProfile": "AvailabilitySet",
|
||||||
|
"extensions": [
|
||||||
|
{
|
||||||
|
"name": "prometheus-grafana-k8s"
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"linuxProfile": {
|
"linuxProfile": {
|
||||||
|
|
|
@ -3,6 +3,9 @@ set -x
|
||||||
|
|
||||||
echo $(date) " - Starting Script"
|
echo $(date) " - Starting Script"
|
||||||
|
|
||||||
|
echo $(date) " - Setting kubeconfig"
|
||||||
|
export KUBECONFIG=/var/lib/kubelet/kubeconfig
|
||||||
|
|
||||||
echo $(date) " - Waiting for API Server to start"
|
echo $(date) " - Waiting for API Server to start"
|
||||||
kubernetesStarted=1
|
kubernetesStarted=1
|
||||||
for i in {1..600}; do
|
for i in {1..600}; do
|
||||||
|
@ -59,14 +62,47 @@ wait_for_master_nodes() {
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
agent_nodes() {
|
||||||
|
kubectl get no -L kubernetes.io/role -l kubernetes.io/role=agent --no-headers -o jsonpath="{.items[*].metadata.name}" | tr " " "\n" | sort | head -n 1
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_for_agent_nodes() {
|
||||||
|
ATTEMPTS=90
|
||||||
|
SLEEP_TIME=10
|
||||||
|
|
||||||
|
ITERATION=0
|
||||||
|
while [[ $ITERATION -lt $ATTEMPTS ]]; do
|
||||||
|
echo $(date) " - Is kubectl returning agent nodes? (attempt $(( $ITERATION + 1 )) of $ATTEMPTS)"
|
||||||
|
|
||||||
|
FIRST_K8S_AGENT=$(agent_nodes)
|
||||||
|
|
||||||
|
if [[ -n $FIRST_K8S_AGENT ]]; then
|
||||||
|
echo $(date) " - kubectl is returning agent nodes"
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
|
||||||
|
ITERATION=$(( $ITERATION + 1 ))
|
||||||
|
sleep $SLEEP_TIME
|
||||||
|
done
|
||||||
|
|
||||||
|
echo $(date) " - kubectl failed to return agent nodes in the alotted time"
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
should_this_node_run_extension() {
|
should_this_node_run_extension() {
|
||||||
FIRST_K8S_MASTER=$(master_nodes)
|
FIRST_K8S_MASTER=$(master_nodes)
|
||||||
if [[ $FIRST_K8S_MASTER = $(hostname) ]]; then
|
if [[ $FIRST_K8S_MASTER = $(hostname) ]]; then
|
||||||
echo $(date) " - Local node $(hostname) is found to be the first master node $FIRST_K8S_MASTER"
|
echo $(date) " - Local node $(hostname) is found to be the first master node $FIRST_K8S_MASTER"
|
||||||
return
|
return
|
||||||
else
|
else
|
||||||
echo $(date) " - Local node $(hostname) is not the first master node $FIRST_K8S_MASTER"
|
FIRST_K8S_AGENT=$(agent_nodes)
|
||||||
return 1
|
if [[ $FIRST_K8S_AGENT = $(hostname) ]]; then
|
||||||
|
echo $(date) " - Local node $(hostname) is found to be the first agent node $FIRST_K8S_AGENT"
|
||||||
|
return
|
||||||
|
else
|
||||||
|
echo $(date) " - Local node $(hostname) is not the first master node $FIRST_K8S_MASTER or the first agent node $FIRST_K8S_AGENT"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -129,16 +165,29 @@ install_prometheus() {
|
||||||
|
|
||||||
echo $(date) " - Installing the Prometheus Helm chart"
|
echo $(date) " - Installing the Prometheus Helm chart"
|
||||||
|
|
||||||
helm install -f prometheus_values.yaml \
|
ATTEMPTS=90
|
||||||
--name $PROM_RELEASE_NAME \
|
SLEEP_TIME=10
|
||||||
--namespace $NAMESPACE stable/prometheus $(storageclass_param)
|
|
||||||
|
ITERATION=0
|
||||||
|
while [[ $ITERATION -lt $ATTEMPTS ]]; do
|
||||||
|
helm install -f prometheus_values.yaml \
|
||||||
|
--name $PROM_RELEASE_NAME \
|
||||||
|
--namespace $NAMESPACE stable/prometheus $(storageclass_param)
|
||||||
|
|
||||||
|
if [[ $? -eq 0 ]]; then
|
||||||
|
echo $(date) " - Helm install successfully completed"
|
||||||
|
break
|
||||||
|
else
|
||||||
|
echo $(date) " - Helm install returned a non-zero exit code. Retrying."
|
||||||
|
fi
|
||||||
|
|
||||||
|
ITERATION=$(( $ITERATION + 1 ))
|
||||||
|
sleep $SLEEP_TIME
|
||||||
|
done
|
||||||
|
|
||||||
PROM_POD_PREFIX="$PROM_RELEASE_NAME-prometheus-server"
|
PROM_POD_PREFIX="$PROM_RELEASE_NAME-prometheus-server"
|
||||||
DESIRED_POD_STATE=Running
|
DESIRED_POD_STATE=Running
|
||||||
|
|
||||||
ATTEMPTS=90
|
|
||||||
SLEEP_TIME=10
|
|
||||||
|
|
||||||
ITERATION=0
|
ITERATION=0
|
||||||
while [[ $ITERATION -lt $ATTEMPTS ]]; do
|
while [[ $ITERATION -lt $ATTEMPTS ]]; do
|
||||||
echo $(date) " - Is the prometheus server pod ($PROM_POD_PREFIX-*) running? (attempt $(( $ITERATION + 1 )) of $ATTEMPTS)"
|
echo $(date) " - Is the prometheus server pod ($PROM_POD_PREFIX-*) running? (attempt $(( $ITERATION + 1 )) of $ATTEMPTS)"
|
||||||
|
@ -212,9 +261,16 @@ if [[ $? -ne 0 ]]; then
|
||||||
echo $(date) " - Error while waiting for kubectl to output master nodes. Exiting"
|
echo $(date) " - Error while waiting for kubectl to output master nodes. Exiting"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
wait_for_agent_nodes
|
||||||
|
if [[ $? -ne 0 ]]; then
|
||||||
|
echo $(date) " - Error while waiting for kubectl to output agent nodes. Exiting"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
should_this_node_run_extension
|
should_this_node_run_extension
|
||||||
if [[ $? -ne 0 ]]; then
|
if [[ $? -ne 0 ]]; then
|
||||||
echo $(date) " - Not the first master node, no longer continuing extension. Exiting"
|
echo $(date) " - Not the first master node or the first agent node, no longer continuing extension. Exiting"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
Загрузка…
Ссылка в новой задаче