Update prometheus-grafana extension to run on agent nodes (#1959)

* Update prometheus-grafana extension to run on agent nodes

* Support running on both master and agent nodes

* Update example json making run on agent default

* add retry logic to initial helm install for prometheus chart

* Update rootURL to point to fork for CI

* Revert to upsteam for rootURL
This commit is contained in:
Rita Zhang 2017-12-20 17:06:43 -06:00 коммит произвёл Jack Francis
Родитель bd0134adc1
Коммит 0d6fabe303
2 изменённых файлов: 72 добавлений и 16 удалений

Просмотреть файл

@ -7,19 +7,19 @@
"masterProfile": { "masterProfile": {
"count": 1, "count": 1,
"dnsPrefix": "", "dnsPrefix": "",
"vmSize": "Standard_DS2_v2", "vmSize": "Standard_DS2_v2"
"extensions": [
{
"name": "prometheus-grafana-k8s"
}
]
}, },
"agentPoolProfiles": [ "agentPoolProfiles": [
{ {
"name": "agentpool1", "name": "agentpool1",
"count": 3, "count": 3,
"vmSize": "Standard_DS2_v2", "vmSize": "Standard_DS2_v2",
"availabilityProfile": "AvailabilitySet" "availabilityProfile": "AvailabilitySet",
"extensions": [
{
"name": "prometheus-grafana-k8s"
}
]
} }
], ],
"linuxProfile": { "linuxProfile": {

Просмотреть файл

@ -3,6 +3,9 @@ set -x
echo $(date) " - Starting Script" echo $(date) " - Starting Script"
echo $(date) " - Setting kubeconfig"
export KUBECONFIG=/var/lib/kubelet/kubeconfig
echo $(date) " - Waiting for API Server to start" echo $(date) " - Waiting for API Server to start"
kubernetesStarted=1 kubernetesStarted=1
for i in {1..600}; do for i in {1..600}; do
@ -59,14 +62,47 @@ wait_for_master_nodes() {
return 1 return 1
} }
agent_nodes() {
kubectl get no -L kubernetes.io/role -l kubernetes.io/role=agent --no-headers -o jsonpath="{.items[*].metadata.name}" | tr " " "\n" | sort | head -n 1
}
wait_for_agent_nodes() {
ATTEMPTS=90
SLEEP_TIME=10
ITERATION=0
while [[ $ITERATION -lt $ATTEMPTS ]]; do
echo $(date) " - Is kubectl returning agent nodes? (attempt $(( $ITERATION + 1 )) of $ATTEMPTS)"
FIRST_K8S_AGENT=$(agent_nodes)
if [[ -n $FIRST_K8S_AGENT ]]; then
echo $(date) " - kubectl is returning agent nodes"
return
fi
ITERATION=$(( $ITERATION + 1 ))
sleep $SLEEP_TIME
done
echo $(date) " - kubectl failed to return agent nodes in the alotted time"
return 1
}
should_this_node_run_extension() { should_this_node_run_extension() {
FIRST_K8S_MASTER=$(master_nodes) FIRST_K8S_MASTER=$(master_nodes)
if [[ $FIRST_K8S_MASTER = $(hostname) ]]; then if [[ $FIRST_K8S_MASTER = $(hostname) ]]; then
echo $(date) " - Local node $(hostname) is found to be the first master node $FIRST_K8S_MASTER" echo $(date) " - Local node $(hostname) is found to be the first master node $FIRST_K8S_MASTER"
return return
else else
echo $(date) " - Local node $(hostname) is not the first master node $FIRST_K8S_MASTER" FIRST_K8S_AGENT=$(agent_nodes)
return 1 if [[ $FIRST_K8S_AGENT = $(hostname) ]]; then
echo $(date) " - Local node $(hostname) is found to be the first agent node $FIRST_K8S_AGENT"
return
else
echo $(date) " - Local node $(hostname) is not the first master node $FIRST_K8S_MASTER or the first agent node $FIRST_K8S_AGENT"
return 1
fi
fi fi
} }
@ -129,16 +165,29 @@ install_prometheus() {
echo $(date) " - Installing the Prometheus Helm chart" echo $(date) " - Installing the Prometheus Helm chart"
helm install -f prometheus_values.yaml \ ATTEMPTS=90
--name $PROM_RELEASE_NAME \ SLEEP_TIME=10
--namespace $NAMESPACE stable/prometheus $(storageclass_param)
ITERATION=0
while [[ $ITERATION -lt $ATTEMPTS ]]; do
helm install -f prometheus_values.yaml \
--name $PROM_RELEASE_NAME \
--namespace $NAMESPACE stable/prometheus $(storageclass_param)
if [[ $? -eq 0 ]]; then
echo $(date) " - Helm install successfully completed"
break
else
echo $(date) " - Helm install returned a non-zero exit code. Retrying."
fi
ITERATION=$(( $ITERATION + 1 ))
sleep $SLEEP_TIME
done
PROM_POD_PREFIX="$PROM_RELEASE_NAME-prometheus-server" PROM_POD_PREFIX="$PROM_RELEASE_NAME-prometheus-server"
DESIRED_POD_STATE=Running DESIRED_POD_STATE=Running
ATTEMPTS=90
SLEEP_TIME=10
ITERATION=0 ITERATION=0
while [[ $ITERATION -lt $ATTEMPTS ]]; do while [[ $ITERATION -lt $ATTEMPTS ]]; do
echo $(date) " - Is the prometheus server pod ($PROM_POD_PREFIX-*) running? (attempt $(( $ITERATION + 1 )) of $ATTEMPTS)" echo $(date) " - Is the prometheus server pod ($PROM_POD_PREFIX-*) running? (attempt $(( $ITERATION + 1 )) of $ATTEMPTS)"
@ -212,9 +261,16 @@ if [[ $? -ne 0 ]]; then
echo $(date) " - Error while waiting for kubectl to output master nodes. Exiting" echo $(date) " - Error while waiting for kubectl to output master nodes. Exiting"
exit 1 exit 1
fi fi
wait_for_agent_nodes
if [[ $? -ne 0 ]]; then
echo $(date) " - Error while waiting for kubectl to output agent nodes. Exiting"
exit 1
fi
should_this_node_run_extension should_this_node_run_extension
if [[ $? -ne 0 ]]; then if [[ $? -ne 0 ]]; then
echo $(date) " - Not the first master node, no longer continuing extension. Exiting" echo $(date) " - Not the first master node or the first agent node, no longer continuing extension. Exiting"
exit 1 exit 1
fi fi