зеркало из https://github.com/Azure/aks-engine.git
Update prometheus-grafana extension to run on agent nodes (#1959)
* Update prometheus-grafana extension to run on agent nodes * Support running on both master and agent nodes * Update example json making run on agent default * add retry logic to initial helm install for prometheus chart * Update rootURL to point to fork for CI * Revert to upsteam for rootURL
This commit is contained in:
Родитель
bd0134adc1
Коммит
0d6fabe303
|
@ -7,19 +7,19 @@
|
|||
"masterProfile": {
|
||||
"count": 1,
|
||||
"dnsPrefix": "",
|
||||
"vmSize": "Standard_DS2_v2",
|
||||
"extensions": [
|
||||
{
|
||||
"name": "prometheus-grafana-k8s"
|
||||
}
|
||||
]
|
||||
"vmSize": "Standard_DS2_v2"
|
||||
},
|
||||
"agentPoolProfiles": [
|
||||
{
|
||||
"name": "agentpool1",
|
||||
"count": 3,
|
||||
"vmSize": "Standard_DS2_v2",
|
||||
"availabilityProfile": "AvailabilitySet"
|
||||
"availabilityProfile": "AvailabilitySet",
|
||||
"extensions": [
|
||||
{
|
||||
"name": "prometheus-grafana-k8s"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"linuxProfile": {
|
||||
|
|
|
@ -3,6 +3,9 @@ set -x
|
|||
|
||||
echo $(date) " - Starting Script"
|
||||
|
||||
echo $(date) " - Setting kubeconfig"
|
||||
export KUBECONFIG=/var/lib/kubelet/kubeconfig
|
||||
|
||||
echo $(date) " - Waiting for API Server to start"
|
||||
kubernetesStarted=1
|
||||
for i in {1..600}; do
|
||||
|
@ -59,14 +62,47 @@ wait_for_master_nodes() {
|
|||
return 1
|
||||
}
|
||||
|
||||
agent_nodes() {
|
||||
kubectl get no -L kubernetes.io/role -l kubernetes.io/role=agent --no-headers -o jsonpath="{.items[*].metadata.name}" | tr " " "\n" | sort | head -n 1
|
||||
}
|
||||
|
||||
wait_for_agent_nodes() {
|
||||
ATTEMPTS=90
|
||||
SLEEP_TIME=10
|
||||
|
||||
ITERATION=0
|
||||
while [[ $ITERATION -lt $ATTEMPTS ]]; do
|
||||
echo $(date) " - Is kubectl returning agent nodes? (attempt $(( $ITERATION + 1 )) of $ATTEMPTS)"
|
||||
|
||||
FIRST_K8S_AGENT=$(agent_nodes)
|
||||
|
||||
if [[ -n $FIRST_K8S_AGENT ]]; then
|
||||
echo $(date) " - kubectl is returning agent nodes"
|
||||
return
|
||||
fi
|
||||
|
||||
ITERATION=$(( $ITERATION + 1 ))
|
||||
sleep $SLEEP_TIME
|
||||
done
|
||||
|
||||
echo $(date) " - kubectl failed to return agent nodes in the alotted time"
|
||||
return 1
|
||||
}
|
||||
|
||||
should_this_node_run_extension() {
|
||||
FIRST_K8S_MASTER=$(master_nodes)
|
||||
if [[ $FIRST_K8S_MASTER = $(hostname) ]]; then
|
||||
echo $(date) " - Local node $(hostname) is found to be the first master node $FIRST_K8S_MASTER"
|
||||
return
|
||||
else
|
||||
echo $(date) " - Local node $(hostname) is not the first master node $FIRST_K8S_MASTER"
|
||||
return 1
|
||||
FIRST_K8S_AGENT=$(agent_nodes)
|
||||
if [[ $FIRST_K8S_AGENT = $(hostname) ]]; then
|
||||
echo $(date) " - Local node $(hostname) is found to be the first agent node $FIRST_K8S_AGENT"
|
||||
return
|
||||
else
|
||||
echo $(date) " - Local node $(hostname) is not the first master node $FIRST_K8S_MASTER or the first agent node $FIRST_K8S_AGENT"
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
|
@ -129,16 +165,29 @@ install_prometheus() {
|
|||
|
||||
echo $(date) " - Installing the Prometheus Helm chart"
|
||||
|
||||
helm install -f prometheus_values.yaml \
|
||||
--name $PROM_RELEASE_NAME \
|
||||
--namespace $NAMESPACE stable/prometheus $(storageclass_param)
|
||||
ATTEMPTS=90
|
||||
SLEEP_TIME=10
|
||||
|
||||
ITERATION=0
|
||||
while [[ $ITERATION -lt $ATTEMPTS ]]; do
|
||||
helm install -f prometheus_values.yaml \
|
||||
--name $PROM_RELEASE_NAME \
|
||||
--namespace $NAMESPACE stable/prometheus $(storageclass_param)
|
||||
|
||||
if [[ $? -eq 0 ]]; then
|
||||
echo $(date) " - Helm install successfully completed"
|
||||
break
|
||||
else
|
||||
echo $(date) " - Helm install returned a non-zero exit code. Retrying."
|
||||
fi
|
||||
|
||||
ITERATION=$(( $ITERATION + 1 ))
|
||||
sleep $SLEEP_TIME
|
||||
done
|
||||
|
||||
PROM_POD_PREFIX="$PROM_RELEASE_NAME-prometheus-server"
|
||||
DESIRED_POD_STATE=Running
|
||||
|
||||
ATTEMPTS=90
|
||||
SLEEP_TIME=10
|
||||
|
||||
ITERATION=0
|
||||
while [[ $ITERATION -lt $ATTEMPTS ]]; do
|
||||
echo $(date) " - Is the prometheus server pod ($PROM_POD_PREFIX-*) running? (attempt $(( $ITERATION + 1 )) of $ATTEMPTS)"
|
||||
|
@ -212,9 +261,16 @@ if [[ $? -ne 0 ]]; then
|
|||
echo $(date) " - Error while waiting for kubectl to output master nodes. Exiting"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
wait_for_agent_nodes
|
||||
if [[ $? -ne 0 ]]; then
|
||||
echo $(date) " - Error while waiting for kubectl to output agent nodes. Exiting"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
should_this_node_run_extension
|
||||
if [[ $? -ne 0 ]]; then
|
||||
echo $(date) " - Not the first master node, no longer continuing extension. Exiting"
|
||||
echo $(date) " - Not the first master node or the first agent node, no longer continuing extension. Exiting"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче