azure-container-networking/.pipelines/templates/log-template.yaml

276 строки
14 KiB
YAML

# -- Captures --
# CNS, CNI, and Cilium Logs
# CNS, CNI, and Cilium State files
# Daemonset and Deployment Images
# Node Status
# kube-system namespace logs
# Non-ready pods on failure
# -- Controled by --
# CNI and OS | ${{ parameters.cni }} and ${{ parameters.os }}
# CNS ConfigMap | "ManageEndpointState"
# -- Generates --
# Logs on a per-node basis
# Outputs a singluar unique artifact per template call | ${{ parameters.clusterName }}_${{ parameters.jobName }}_Attempt_#$(System.StageAttempt)
# Each artifact is divided into sub-directories
# -- Intent --
# Provide through debugging information to understand why CNI test scenarios are failing without having to blindly reproduce
parameters:
clusterName: ""
logType: "failure"
os: ""
cni: ""
jobName: "FailedE2ELogs"
steps:
- task: AzureCLI@2
inputs:
azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION)
scriptLocation: "inlineScript"
scriptType: "bash"
addSpnToEnvironment: true
inlineScript: |
make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}
acnLogs=$(System.DefaultWorkingDirectory)/${{ parameters.clusterName }}_${{ parameters.logType }}_Attempt_#$(System.StageAttempt)
mkdir -p $acnLogs
echo "Root Directory created: $acnLogs"
echo "##vso[task.setvariable variable=acnLogs]$acnLogs"
kubectl get pods -n kube-system -owide
podList=`kubectl get pods -n kube-system --no-headers | awk '{print $1}'`
mkdir -p $acnLogs/kube-system
echo "Directory created: $acnLogs/kube-system"
for pod in $podList; do
kubectl logs -n kube-system $pod > $acnLogs/kube-system/$pod-logs.txt
echo "$acnLogs/kube-system/$pod-logs.txt"
done
displayName: Kube-System Logs
condition: always()
continueOnError: true # Tends to fail after node restart due to pods still restarting. This should not block other tests or logs from running.
- bash: |
kubectl describe nodes
displayName: Node Status
condition: always()
- bash: |
kubectl get ds -A -owide
echo "Capture daemonset images being used"
dsList=`kubectl get ds -A | grep kube-system | awk '{print $2}'`
for ds in $dsList; do
echo "$ds"
kubectl describe ds -n kube-system $ds | grep Image
done
displayName: Daemonset Images
condition: always()
- bash: |
kubectl get deploy -A -owide
echo "Capture deployment images being used"
deployList=`kubectl get deploy -A | grep kube-system | awk '{print $2}'`
for deploy in $deployList; do
echo "$deploy"
kubectl describe deploy -n kube-system $deploy | grep Image
done
displayName: Deployment Images
condition: always()
- ${{ if eq(parameters.logType, 'failure') }}:
- bash: |
kubectl get pods -n kube-system -o custom-columns=NAME:.metadata.name,STATUS:.status.phase,NODE:.spec.nodeName
podList=`kubectl get pods -n kube-system -o custom-columns=NAME:.metadata.name,STATUS:.status.phase,NODE:.spec.nodeName --no-headers | grep -v Running | awk '{print $1}'`
array=($podList)
if [ -z ${array[0]} ]; then
echo "There are no kube-system pods in a non-ready state."
else
mkdir -p $acnLogs/${{ parameters.os }}non-ready
echo "Directory created: $acnLogs/${{ parameters.os }}non-ready"
echo "Capturing failed pods"
for pod in $podList; do
kubectl describe pod -n kube-system $pod > $acnLogs/${{ parameters.os }}non-ready/$pod.txt
echo "$acnLogs/${{ parameters.os }}non-ready/$pod.txt"
done
fi
displayName: Failure Logs
condition: always()
- ${{ if eq(parameters.os, 'linux') }}:
- bash: |
echo "Ensure that privileged pod exists on each node"
kubectl apply -f test/integration/manifests/load/privileged-daemonset.yaml
kubectl rollout status ds -n kube-system privileged-daemonset
echo "------ Log work ------"
kubectl get pods -n kube-system -l os=linux,app=privileged-daemonset -owide
echo "Capture logs from each linux node. Files located in var/logs/*."
podList=`kubectl get pods -n kube-system -l os=linux,app=privileged-daemonset -owide --no-headers | awk '{print $1}'`
for pod in $podList; do
index=0
files=(`kubectl exec -i -n kube-system $pod -- find ./var/log -maxdepth 2 -name "azure-*" -type f`)
fileBase=(`kubectl exec -i -n kube-system $pod -- find ./var/log -maxdepth 2 -name "azure-*" -type f -printf "%f\n"`)
node=`kubectl get pod -n kube-system $pod -o custom-columns=NODE:.spec.nodeName,NAME:.metadata.name --no-headers | awk '{print $1}'`
mkdir -p $(acnLogs)/"$node"_logs/log-output/
echo "Directory created: $(acnLogs)/"$node"_logs/"
for file in ${files[*]}; do
kubectl exec -i -n kube-system $pod -- cat $file > $(acnLogs)/"$node"_logs/log-output/${fileBase[$index]}
echo "Azure-*.log, ${fileBase[$index]}, captured: $(acnLogs)/"$node"_logs/log-output/${fileBase[$index]}"
((index++))
done
if [ ${{ parameters.cni }} = 'cilium' ]; then
file="cilium-cni.log"
kubectl exec -i -n kube-system $pod -- cat var/log/$file > $(acnLogs)/"$node"_logs/log-output/$file
echo "Cilium log, $file, captured: $(acnLogs)/"$node"_logs/log-output/$file"
fi
done
if ! [ ${{ parameters.cni }} = 'cilium' ]; then
echo "------ Privileged work ------"
kubectl get pods -n kube-system -l os=linux,app=privileged-daemonset -owide
echo "Capture State Files from privileged pods"
for pod in $podList; do
node=`kubectl get pod -n kube-system $pod -o custom-columns=NODE:.spec.nodeName,NAME:.metadata.name --no-headers | awk '{print $1}'`
mkdir -p $(acnLogs)/"$node"_logs/privileged-output/
echo "Directory created: $(acnLogs)/"$node"_logs/privileged-output/"
file="azure-vnet.json"
kubectl exec -i -n kube-system $pod -- cat /var/run/$file > $(acnLogs)/"$node"_logs/privileged-output/$file
echo "CNI State, $file, captured: $(acnLogs)/"$node"_logs/privileged-output/$file"
if [ ${{ parameters.cni }} = 'cniv1' ]; then
file="azure-vnet-ipam.json"
kubectl exec -i -n kube-system $pod -- cat /var/run/$file > $(acnLogs)/"$node"_logs/privileged-output/$file
echo "CNIv1 IPAM, $file, captured: $(acnLogs)/"$node"_logs/privileged-output/$file"
fi
done
fi
if [ ${{ parameters.cni }} = 'cilium' ] || [ ${{ parameters.cni }} = 'cniv2' ]; then
echo "------ CNS work ------"
kubectl get pods -n kube-system -l k8s-app=azure-cns
echo "Capture State Files from CNS pods"
cnsPods=`kubectl get pods -n kube-system -l k8s-app=azure-cns --no-headers | awk '{print $1}'`
for pod in $cnsPods; do
managed=`kubectl exec -i -n kube-system $pod -- cat etc/azure-cns/cns_config.json | jq .ManageEndpointState`
node=`kubectl get pod -n kube-system $pod -o custom-columns=NODE:.spec.nodeName,NAME:.metadata.name --no-headers | awk '{print $1}'`
mkdir -p $(acnLogs)/"$node"_logs/CNS-output/
echo "Directory created: $(acnLogs)/"$node"_logs/CNS-output/"
file="cnsCache.txt"
kubectl exec -i -n kube-system $pod -- curl localhost:10090/debug/ipaddresses -d {\"IPConfigStateFilter\":[\"Assigned\"]} > $(acnLogs)/"$node"_logs/CNS-output/$file
echo "CNS cache, $file, captured: $(acnLogs)/"$node"_logs/CNS-output/$file"
file="azure-cns.json"
kubectl exec -i -n kube-system $pod -- cat /var/lib/azure-network/$file > $(acnLogs)/"$node"_logs/CNS-output/$file
echo "CNS State, $file, captured: $(acnLogs)/"$node"_logs/CNS-output/$file"
if [ $managed = "true" ]; then
file="azure-endpoints.json"
kubectl exec -i -n kube-system $pod -- cat /var/run/azure-cns/$file > $(acnLogs)/"$node"_logs/CNS-output/$file
echo "CNS Managed State, $file, captured: $(acnLogs)/"$node"_logs/CNS-output/$file"
fi
done
fi
if [ ${{ parameters.cni }} = 'cilium' ]; then
echo "------ Cilium work ------"
kubectl get pods -n kube-system -l k8s-app=cilium
echo "Capture State Files from Cilium pods"
ciliumPods=`kubectl get pods -n kube-system -l k8s-app=cilium --no-headers | awk '{print $1}'`
for pod in $ciliumPods; do
node=`kubectl get pod -n kube-system $pod -o custom-columns=NODE:.spec.nodeName,NAME:.metadata.name --no-headers | awk '{print $1}'`
mkdir -p $(acnLogs)/"$node"_logs/Cilium-output/
echo "Directory created: $(acnLogs)/"$node"_logs/Cilium-output/"
file="cilium-endpoint.json"
kubectl exec -i -n kube-system $pod -- cilium endpoint list -o json > $(acnLogs)/"$node"_logs/Cilium-output/$file
echo "Cilium, $file, captured: $(acnLogs)/"$node"_logs/Cilium-output/$file"
done
fi
displayName: Linux Logs
condition: always()
- ${{ if eq(parameters.os, 'windows') }}:
- bash: |
echo "Ensure that privileged pod exists on each node"
kubectl apply -f test/integration/manifests/load/privileged-daemonset-windows.yaml
kubectl rollout status ds -n kube-system privileged-daemonset
echo "------ Log work ------"
kubectl get pods -n kube-system -l os=windows,app=privileged-daemonset -owide
echo "Capture logs from each windows node. Files located in \k"
podList=`kubectl get pods -n kube-system -l os=windows,app=privileged-daemonset -owide --no-headers | awk '{print $1}'`
for pod in $podList; do
files=`kubectl exec -i -n kube-system $pod -- powershell "ls ../../k/azure*.log*" | grep azure | awk '{print $6}'`
node=`kubectl get pod -n kube-system $pod -o custom-columns=NODE:.spec.nodeName,NAME:.metadata.name --no-headers | awk '{print $1}'`
mkdir -p $(acnLogs)/"$node"_logs/log-output/
echo "Directory created: $(acnLogs)/"$node"_logs/log-output/"
for file in $files; do
kubectl exec -i -n kube-system $pod -- powershell "cat ../../k/$file" > $(acnLogs)/"$node"_logs/log-output/$file
echo "Azure-*.log, $file, captured: $(acnLogs)/"$node"_logs/log-output/$file"
done
if [ ${{ parameters.cni }} = 'cniv2' ]; then
file="azure-cns.log"
kubectl exec -i -n kube-system $pod -- cat k/azurecns/$file > $(acnLogs)/"$node"_logs/log-output/$file
echo "CNS Log, $file, captured: $(acnLogs)/"$node"_logs/log-output/$file"
fi
done
echo "------ Privileged work ------"
kubectl get pods -n kube-system -l os=windows,app=privileged-daemonset -owide
echo "Capture State Files from privileged pods"
for pod in $podList; do
node=`kubectl get pod -n kube-system $pod -o custom-columns=NODE:.spec.nodeName,NAME:.metadata.name --no-headers | awk '{print $1}'`
mkdir -p $(acnLogs)/"$node"_logs/privileged-output/
echo "Directory created: $(acnLogs)/"$node"_logs/privileged-output/"
file="azure-vnet.json"
kubectl exec -i -n kube-system $pod -- powershell cat ../../k/$file > $(acnLogs)/"$node"_logs/privileged-output/$file
echo "CNI State, $file, captured: $(acnLogs)/"$node"_logs/privileged-output/$file"
if [ ${{ parameters.cni }} = 'cniv1' ]; then
file="azure-vnet-ipam.json"
kubectl exec -i -n kube-system $pod -- powershell cat ../../k/$file > $(acnLogs)/"$node"_logs/privileged-output/$file
echo "CNI IPAM, $file, captured: $(acnLogs)/"$node"_logs/privileged-output/$file"
fi
done
if [ ${{ parameters.cni }} = 'cniv2' ]; then
echo "------ CNS work ------"
kubectl get pods -n kube-system -l k8s-app=azure-cns-win --no-headers
echo "Capture State Files from CNS pods"
cnsPods=`kubectl get pods -n kube-system -l k8s-app=azure-cns-win --no-headers | awk '{print $1}'`
for pod in $cnsPods; do
managed=`kubectl exec -i -n kube-system pod -- powershell cat etc/azure-cns/cns_config.json | jq .ManageEndpointState`
node=`kubectl get pod -n kube-system $pod -o custom-columns=NODE:.spec.nodeName,NAME:.metadata.name --no-headers | awk '{print $1}'`
mkdir -p $(acnLogs)/"$node"_logs/CNS-output/
echo "Directory created: $(acnLogs)/"$node"_logs/CNS-output/"
file="cnsCache.txt"
kubectl exec -i -n kube-system $pod -- powershell 'Invoke-WebRequest -Uri 127.0.0.1:10090/debug/ipaddresses -Method Post -ContentType application/x-www-form-urlencoded -Body "{`"IPConfigStateFilter`":[`"Assigned`"]}" -UseBasicParsing | Select-Object -Expand Content' > $(acnLogs)/"$node"_logs/CNS-output/$file
echo "CNS cache, $file, captured: $(acnLogs)/"$node"_logs/CNS-output/$file"
file="azure-cns.json"
kubectl exec -i -n kube-system $pod -- powershell cat k/azurecns/azure-cns.json > $(acnLogs)/"$node"_logs/CNS-output/$file
echo "CNS State, $file, captured: $(acnLogs)/"$node"_logs/CNS-output/$file"
if [ $managed = "true" ]; then
file="azure-endpoints.json"
kubectl exec -i -n kube-system $pod -- cat k/azurecns/$file > $(acnLogs)/"$node"_logs/CNS-output/$file
echo "CNS Managed State, $file, captured: $(acnLogs)/"$node"_logs/CNS-output/$file"
fi
done
fi
displayName: Windows Logs
condition: always()
- publish: $(System.DefaultWorkingDirectory)/${{ parameters.clusterName }}_${{ parameters.logType }}_Attempt_#$(System.StageAttempt)
condition: always()
artifact: ${{ parameters.clusterName }}_${{ parameters.os }}${{ parameters.jobName }}_Attempt_#$(System.StageAttempt)
name: acnLogs_${{ parameters.logType }}
displayName: Publish Cluster logs