2024-07-22 22:25:45 +03:00
|
|
|
pr:
|
|
|
|
branches:
|
|
|
|
include:
|
|
|
|
- master
|
|
|
|
- release/*
|
|
|
|
paths:
|
|
|
|
include:
|
|
|
|
- npm/*
|
|
|
|
- .pipelines/npm/*
|
|
|
|
- test/scale/*
|
|
|
|
|
|
|
|
|
2023-05-18 19:57:21 +03:00
|
|
|
trigger:
|
2024-07-22 22:25:45 +03:00
|
|
|
branches:
|
|
|
|
include:
|
|
|
|
- master
|
|
|
|
tags:
|
|
|
|
include:
|
|
|
|
- "*"
|
2023-05-18 19:57:21 +03:00
|
|
|
|
|
|
|
variables:
|
|
|
|
- name: VNET_NAME
|
|
|
|
value: npm-vnet
|
|
|
|
|
|
|
|
jobs:
|
|
|
|
- job: setup
|
|
|
|
displayName: "Configure Test Environment"
|
|
|
|
pool:
|
|
|
|
name: $(BUILD_POOL_NAME_DEFAULT)
|
|
|
|
demands:
|
|
|
|
- agent.os -equals Linux
|
|
|
|
- Role -equals Build
|
|
|
|
steps:
|
|
|
|
- checkout: self
|
|
|
|
|
|
|
|
- script: |
|
|
|
|
go version
|
|
|
|
go env
|
|
|
|
which go
|
|
|
|
echo $PATH
|
|
|
|
mkdir -p '$(GOBIN)'
|
|
|
|
mkdir -p '$(GOPATH)/pkg'
|
|
|
|
BUILD_NUMBER=$(Build.BuildNumber)
|
|
|
|
# format: npm-<year>-<month>-<day>-<minute>-<second>
|
|
|
|
RG=e2e-$(echo "npm-`date "+%Y-%m-%d-%M-%S"`")
|
|
|
|
TAG=$(make npm-version)
|
|
|
|
echo "Resource group: $RG"
|
|
|
|
echo "Image tag: $TAG"
|
|
|
|
|
|
|
|
echo "##vso[task.setvariable variable=RESOURCE_GROUP;isOutput=true;]$RG"
|
|
|
|
echo "##vso[task.setvariable variable=TAG;isOutput=true;]$TAG"
|
|
|
|
|
|
|
|
name: "EnvironmentalVariables"
|
|
|
|
displayName: "Set environmental variables"
|
|
|
|
condition: always()
|
|
|
|
|
|
|
|
- job: containerize
|
|
|
|
dependsOn: [setup]
|
|
|
|
displayName: Build Images
|
|
|
|
variables:
|
|
|
|
TAG: $[ dependencies.setup.outputs['EnvironmentalVariables.TAG'] ]
|
|
|
|
pool:
|
|
|
|
name: "$(BUILD_POOL_NAME_DEFAULT)"
|
|
|
|
strategy:
|
|
|
|
matrix:
|
2023-06-09 20:00:03 +03:00
|
|
|
npm_linux_amd64:
|
|
|
|
arch: amd64
|
|
|
|
name: npm
|
|
|
|
os: linux
|
2024-11-12 19:42:42 +03:00
|
|
|
npm_windows_amd64:
|
2023-05-18 19:57:21 +03:00
|
|
|
arch: amd64
|
|
|
|
name: npm
|
|
|
|
os: windows
|
|
|
|
steps:
|
|
|
|
- template: ../containers/container-template.yaml
|
|
|
|
parameters:
|
|
|
|
arch: $(arch)
|
|
|
|
name: $(name)
|
|
|
|
os: $(os)
|
|
|
|
|
|
|
|
- job: Create_Cluster_and_Run_Test
|
|
|
|
timeoutInMinutes: 360
|
|
|
|
displayName: "Run Scale Test"
|
|
|
|
pool:
|
|
|
|
name: $(BUILD_POOL_NAME_DEFAULT)
|
|
|
|
demands:
|
|
|
|
- agent.os -equals Linux
|
|
|
|
- Role -equals Build
|
|
|
|
dependsOn: [containerize, setup]
|
|
|
|
variables:
|
|
|
|
RESOURCE_GROUP: $[ dependencies.setup.outputs['EnvironmentalVariables.RESOURCE_GROUP'] ]
|
|
|
|
TAG: $[ dependencies.setup.outputs['EnvironmentalVariables.TAG'] ]
|
|
|
|
FQDN: empty
|
|
|
|
strategy:
|
|
|
|
matrix:
|
2023-07-19 19:13:52 +03:00
|
|
|
v2-linux:
|
|
|
|
PROFILE: "sc-lin"
|
|
|
|
NUM_NETPOLS: 800
|
|
|
|
INITIAL_CONNECTIVITY_TIMEOUT: 60
|
2024-07-24 04:57:02 +03:00
|
|
|
# 2024/07/23: Windows Scale Test is consistently failing to establish initial connectivity in time
|
|
|
|
# ws22:
|
|
|
|
# PROFILE: "sc-ws22"
|
|
|
|
# NUM_NETPOLS: 50
|
|
|
|
# INITIAL_CONNECTIVITY_TIMEOUT: 720
|
2023-05-18 19:57:21 +03:00
|
|
|
steps:
|
|
|
|
- checkout: self
|
|
|
|
- bash: |
|
|
|
|
test -d $(Pipeline.Workspace)/s/test/scale/ || {
|
|
|
|
echo "##vso[task.logissue type=error]$(Pipeline.Workspace)/s/test/scale/ does not exist"
|
|
|
|
exit 1
|
|
|
|
}
|
|
|
|
displayName: "Verify Directory Exists"
|
|
|
|
failOnStderr: true
|
|
|
|
- task: AzureCLI@2
|
2023-11-30 02:37:46 +03:00
|
|
|
displayName: "Download Kubectl"
|
2023-05-18 19:57:21 +03:00
|
|
|
inputs:
|
|
|
|
azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION)
|
|
|
|
scriptType: "bash"
|
|
|
|
scriptLocation: "inlineScript"
|
|
|
|
failOnStderr: true
|
|
|
|
condition: succeeded()
|
|
|
|
inlineScript: |
|
|
|
|
set -e
|
|
|
|
curl -LO https://dl.k8s.io/release/v1.23.0/bin/linux/amd64/kubectl
|
|
|
|
chmod +x kubectl
|
|
|
|
- task: AzureCLI@2
|
|
|
|
displayName: "Create AKS Cluster"
|
|
|
|
inputs:
|
|
|
|
azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION)
|
|
|
|
scriptType: "bash"
|
|
|
|
scriptLocation: "inlineScript"
|
|
|
|
failOnStderr: true
|
|
|
|
condition: succeeded()
|
|
|
|
inlineScript: |
|
|
|
|
set -e
|
|
|
|
az extension add --name aks-preview
|
|
|
|
az extension update --name aks-preview
|
|
|
|
|
2023-06-09 20:00:03 +03:00
|
|
|
echo "Creating resource group named $(RESOURCE_GROUP)"
|
|
|
|
az group create --name $(RESOURCE_GROUP) -l $(LOCATION) -o table
|
2023-05-18 19:57:21 +03:00
|
|
|
|
2023-06-09 20:00:03 +03:00
|
|
|
export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)
|
|
|
|
echo "Creating cluster named $CLUSTER_NAME"
|
2023-05-18 19:57:21 +03:00
|
|
|
az aks create \
|
2023-06-09 20:00:03 +03:00
|
|
|
--resource-group $(RESOURCE_GROUP) \
|
2023-05-18 19:57:21 +03:00
|
|
|
--name $CLUSTER_NAME \
|
|
|
|
--generate-ssh-keys \
|
|
|
|
--windows-admin-username e2eadmin \
|
|
|
|
--windows-admin-password alpha@numeric!password2 \
|
|
|
|
--network-plugin azure \
|
|
|
|
--vm-set-type VirtualMachineScaleSets \
|
|
|
|
--node-vm-size Standard_D4s_v3 \
|
|
|
|
--node-count 1 \
|
2023-11-30 02:37:46 +03:00
|
|
|
--tier standard \
|
2023-05-18 19:57:21 +03:00
|
|
|
--max-pods 100
|
|
|
|
|
2023-11-30 02:37:46 +03:00
|
|
|
echo "Getting credentials to $CLUSTER_NAME"
|
|
|
|
az aks get-credentials -g $(RESOURCE_GROUP) -n $CLUSTER_NAME --overwrite-existing --file ./kubeconfig
|
|
|
|
mkdir -p ~/.kube/
|
|
|
|
cp ./kubeconfig ~/.kube/config
|
|
|
|
|
|
|
|
# install kwok on linux node
|
|
|
|
cd $(Pipeline.Workspace)/s/test/scale/
|
|
|
|
chmod u+x run-kwok-as-pod.sh test-scale.sh connectivity/test-connectivity.sh
|
|
|
|
./run-kwok-as-pod.sh
|
|
|
|
# need reliability in case multiple controllers enter CrashLoopBackOff from "context cancelled"
|
|
|
|
kubectl scale deployment -n kube-system -l app=kwok-controller --replicas=5
|
2023-06-09 20:00:03 +03:00
|
|
|
|
2023-11-30 02:37:46 +03:00
|
|
|
if [[ $(PROFILE) == *ws22 ]]; then
|
2023-06-09 20:00:03 +03:00
|
|
|
echo "Adding Windows nodepool to $CLUSTER_NAME"
|
|
|
|
az aks nodepool add \
|
|
|
|
--resource-group $(RESOURCE_GROUP) \
|
|
|
|
--cluster-name $CLUSTER_NAME \
|
|
|
|
--name awin22 \
|
|
|
|
--os-type Windows \
|
|
|
|
--os-sku Windows2022 \
|
|
|
|
--node-vm-size Standard_D4s_v3 \
|
|
|
|
--node-count 1 \
|
|
|
|
--max-pods 100
|
|
|
|
fi
|
|
|
|
|
2023-05-18 19:57:21 +03:00
|
|
|
- task: AzureCLI@2
|
|
|
|
displayName: "Deploy NPM to Test Cluster"
|
|
|
|
inputs:
|
|
|
|
azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION)
|
|
|
|
scriptType: "bash"
|
|
|
|
scriptLocation: "inlineScript"
|
|
|
|
failOnStderr: true
|
|
|
|
condition: succeeded()
|
|
|
|
inlineScript: |
|
|
|
|
set -e
|
|
|
|
|
|
|
|
# deploy azure-npm
|
2023-06-09 20:00:03 +03:00
|
|
|
cp $(Pipeline.Workspace)/s/npm/azure-npm.yaml azure-npm.yaml
|
2023-05-18 19:57:21 +03:00
|
|
|
sed -i 's/memory: 300Mi/memory: 1000Mi/g' azure-npm.yaml
|
|
|
|
kubectl apply -f azure-npm.yaml
|
|
|
|
|
2023-06-09 20:00:03 +03:00
|
|
|
cp $(Pipeline.Workspace)/s/npm/examples/windows/azure-npm.yaml azure-npm-win.yaml
|
|
|
|
# set higher memory limit
|
|
|
|
sed -i 's/memory: 300Mi/memory: 1000Mi/g' azure-npm-win.yaml
|
|
|
|
kubectl apply -f azure-npm-win.yaml
|
|
|
|
|
2023-05-18 19:57:21 +03:00
|
|
|
# swap azure-npm image with one built during run
|
2023-06-09 20:00:03 +03:00
|
|
|
kubectl set image daemonset/azure-npm -n kube-system azure-npm=$IMAGE_REGISTRY/azure-npm:linux-amd64-$(TAG)
|
2024-11-12 19:42:42 +03:00
|
|
|
kubectl set image daemonset/azure-npm-win -n kube-system azure-npm=$IMAGE_REGISTRY/azure-npm:windows-amd64-$(TAG)
|
2023-05-18 19:57:21 +03:00
|
|
|
|
2023-06-09 20:00:03 +03:00
|
|
|
sleep 30s
|
2023-05-18 19:57:21 +03:00
|
|
|
echo "waiting for NPM to start running..."
|
2023-06-09 20:00:03 +03:00
|
|
|
kubectl wait --for=condition=Ready pod -l k8s-app=azure-npm -n kube-system --timeout=15m || {
|
|
|
|
kubectl describe pod -n kube-system -l k8s-app=azure-npm
|
|
|
|
echo "##vso[task.logissue type=error]NPM failed to start running"
|
|
|
|
exit 1
|
|
|
|
}
|
2023-05-18 19:57:21 +03:00
|
|
|
echo "sleep 3m to let NPM restart in case of bootup failure due to HNS errors"
|
|
|
|
sleep 3m
|
|
|
|
|
|
|
|
kubectl get po -n kube-system -owide -A
|
|
|
|
|
2023-06-09 20:00:03 +03:00
|
|
|
if [[ $(PROFILE) == *ws22 ]]; then
|
|
|
|
echo "labeling Windows nodes for scale test"
|
|
|
|
kubectl get node -o wide | grep "Windows Server 2022 Datacenter" | awk '{print $1}' | xargs -n 1 -I {} kubectl label node {} scale-test=true connectivity-test=true
|
|
|
|
else
|
|
|
|
echo "labeling Linux nodes for scale test"
|
|
|
|
kubectl get node -o wide | grep "Ubuntu" | awk '{print $1}' | xargs -n 1 -I {} kubectl label node {} scale-test=true connectivity-test=true
|
|
|
|
fi
|
2023-05-18 19:57:21 +03:00
|
|
|
|
|
|
|
export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)
|
|
|
|
echo "Showing cluster status for $CLUSTER_NAME"
|
2023-06-09 20:00:03 +03:00
|
|
|
FQDN=`az aks show -n $CLUSTER_NAME -g $(RESOURCE_GROUP) --query fqdn -o tsv`
|
2023-05-18 19:57:21 +03:00
|
|
|
echo "##vso[task.setvariable variable=FQDN]$FQDN"
|
|
|
|
|
|
|
|
- task: AzureCLI@2
|
|
|
|
displayName: "Scale Up Large"
|
|
|
|
inputs:
|
|
|
|
azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION)
|
|
|
|
scriptType: "bash"
|
|
|
|
scriptLocation: "inlineScript"
|
|
|
|
failOnStderr: true
|
|
|
|
condition: succeeded()
|
|
|
|
inlineScript: |
|
|
|
|
set -e
|
2023-06-09 20:00:03 +03:00
|
|
|
export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)
|
|
|
|
mkdir -p $(System.DefaultWorkingDirectory)/$CLUSTER_NAME
|
2023-05-18 19:57:21 +03:00
|
|
|
|
|
|
|
# 20 kwok nodes
|
|
|
|
# 1000 kwok Pods
|
|
|
|
# 30 real Pods
|
|
|
|
# 300 ACLs per endpoint
|
|
|
|
# ~4K IPSets
|
|
|
|
# ~36K IPSet members
|
|
|
|
kubectlPath=`pwd`/kubectl
|
|
|
|
cd $(Pipeline.Workspace)/s/test/scale/
|
|
|
|
set +e
|
|
|
|
./test-scale.sh --kubectl-binary=$kubectlPath \
|
|
|
|
--max-kwok-pods-per-node=50 \
|
|
|
|
--num-kwok-deployments=10 \
|
|
|
|
--num-kwok-replicas=100 \
|
|
|
|
--max-real-pods-per-node=30 \
|
|
|
|
--num-real-deployments=10 \
|
|
|
|
--num-real-replicas=3 \
|
2023-06-09 20:00:03 +03:00
|
|
|
--num-network-policies=$(NUM_NETPOLS) \
|
|
|
|
--num-unapplied-network-policies=$(NUM_NETPOLS) \
|
2023-05-18 19:57:21 +03:00
|
|
|
--num-unique-labels-per-pod=2 \
|
|
|
|
--num-unique-labels-per-deployment=2 \
|
|
|
|
--num-shared-labels-per-pod=10
|
|
|
|
rc=$?
|
|
|
|
exit $rc
|
|
|
|
|
|
|
|
- task: AzureCLI@2
|
|
|
|
displayName: "Test NPM Bootup Latency and Connectivity ($(PROFILE))"
|
|
|
|
inputs:
|
|
|
|
azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION)
|
|
|
|
scriptType: "bash"
|
|
|
|
scriptLocation: "inlineScript"
|
|
|
|
failOnStderr: true
|
|
|
|
condition: succeeded()
|
|
|
|
inlineScript: |
|
|
|
|
set -e
|
2023-06-09 20:00:03 +03:00
|
|
|
export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)
|
|
|
|
mkdir -p $(System.DefaultWorkingDirectory)/$CLUSTER_NAME
|
2023-05-18 19:57:21 +03:00
|
|
|
|
|
|
|
kubectl rollout restart -n kube-system ds azure-npm-win
|
|
|
|
echo "sleeping 3 minutes to allow NPM pods to restart after scale-up..."
|
|
|
|
sleep 3m
|
|
|
|
|
2023-11-30 02:37:46 +03:00
|
|
|
kubectl get pod -n kube-system -l app=kwok-controller -owide
|
|
|
|
kubectl get pod -n kube-system -l app=kwok-controller -owide | grep -q Running || {
|
|
|
|
echo "##vso[task.logissue type=error]need at least one kwok pod running"
|
|
|
|
exit 1
|
|
|
|
}
|
|
|
|
|
2023-05-18 19:57:21 +03:00
|
|
|
cd $(Pipeline.Workspace)/s/test/scale/connectivity/
|
2023-06-09 20:00:03 +03:00
|
|
|
# notes for Windows:
|
2023-05-18 19:57:21 +03:00
|
|
|
# initial connectivity should be established within 15 minutes of NPM restart (12 minute timeout since we already waited 3 minutes above)
|
|
|
|
# adding new network policy to all 30 Pods should happen within 30 seconds
|
|
|
|
set +e
|
|
|
|
./test-connectivity.sh --kubectl-binary=$kubectlPath \
|
|
|
|
--num-scale-pods-to-verify=all \
|
2023-06-09 20:00:03 +03:00
|
|
|
--max-wait-for-initial-connectivity=$(INITIAL_CONNECTIVITY_TIMEOUT) \
|
2023-05-18 19:57:21 +03:00
|
|
|
--max-wait-after-adding-netpol=30
|
|
|
|
rc=$?
|
|
|
|
if [[ $rc != 0 ]]; then
|
2023-11-30 02:37:46 +03:00
|
|
|
echo "capturing cluster state due to failure"
|
|
|
|
if [[ $(PROFILE) == *ws22 ]]; then
|
|
|
|
cd $(Pipeline.Workspace)/s/debug/windows/npm/
|
|
|
|
chmod u+x win-debug.sh
|
|
|
|
./win-debug.sh
|
|
|
|
mv logs_* $(System.DefaultWorkingDirectory)/$CLUSTER_NAME/
|
|
|
|
else
|
|
|
|
set -x
|
|
|
|
npmPod=`kubectl get pod -n kube-system | grep npm | grep -v npm-win | awk '{print $1}' | head -n 1 | tr -d '\n'`
|
|
|
|
kubectl exec -n kube-system $npmPod -- iptables-nft -vnL > $(System.DefaultWorkingDirectory)/$CLUSTER_NAME/iptables.out
|
|
|
|
kubectl exec -n kube-system $npmPod -- ipset -L > $(System.DefaultWorkingDirectory)/$CLUSTER_NAME/ipset.out
|
|
|
|
fi
|
|
|
|
|
2023-05-18 19:57:21 +03:00
|
|
|
kubectl get pod -n scale-test
|
|
|
|
kubectl get pod -n connectivity-test
|
2023-11-30 02:37:46 +03:00
|
|
|
exit $rc
|
2023-05-18 19:57:21 +03:00
|
|
|
fi
|
2023-11-30 02:37:46 +03:00
|
|
|
|
|
|
|
kubectl get pod -n kube-system -l app=kwok-controller -owide
|
|
|
|
kubectl get pod -n kube-system -l app=kwok-controller -owide | grep -q Running || {
|
|
|
|
echo "##vso[task.logissue type=error]need at least one kwok pod running"
|
|
|
|
exit 1
|
|
|
|
}
|
2023-05-18 19:57:21 +03:00
|
|
|
|
|
|
|
- task: AzureCLI@2
|
|
|
|
displayName: "CRUD at Medium Scale"
|
|
|
|
inputs:
|
|
|
|
azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION)
|
|
|
|
scriptType: "bash"
|
|
|
|
scriptLocation: "inlineScript"
|
|
|
|
failOnStderr: true
|
2023-06-09 20:00:03 +03:00
|
|
|
condition: succeeded()
|
2023-05-18 19:57:21 +03:00
|
|
|
inlineScript: |
|
|
|
|
set -e
|
2023-06-09 20:00:03 +03:00
|
|
|
export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)
|
|
|
|
mkdir -p $(System.DefaultWorkingDirectory)/$CLUSTER_NAME
|
2023-05-18 19:57:21 +03:00
|
|
|
|
|
|
|
# will delete scale-test and connectivity-test namespaces from previous run
|
|
|
|
# 10 kwok Pods
|
|
|
|
# 30 real Pods
|
|
|
|
kubectlPath=`pwd`/kubectl
|
|
|
|
cd $(Pipeline.Workspace)/s/test/scale/
|
|
|
|
set +e
|
|
|
|
./test-scale.sh --kubectl-binary=$kubectlPath \
|
|
|
|
--max-kwok-pods-per-node=50 \
|
|
|
|
--num-kwok-deployments=10 \
|
|
|
|
--num-kwok-replicas=1 \
|
|
|
|
--max-real-pods-per-node=30 \
|
|
|
|
--num-real-deployments=3 \
|
|
|
|
--num-real-replicas=4 \
|
|
|
|
--num-network-policies=1 \
|
|
|
|
--num-unapplied-network-policies=10 \
|
|
|
|
--num-unique-labels-per-pod=2 \
|
|
|
|
--num-unique-labels-per-deployment=2 \
|
|
|
|
--num-shared-labels-per-pod=10 \
|
|
|
|
--delete-labels \
|
|
|
|
--delete-labels-interval=30 \
|
|
|
|
--delete-labels-times=2 \
|
|
|
|
--delete-netpols \
|
|
|
|
--delete-netpols-interval=0 \
|
|
|
|
--delete-netpols-times=1 \
|
|
|
|
--delete-kwok-pods=10 \
|
|
|
|
--delete-real-pods=6 \
|
|
|
|
--delete-pods-interval=120 \
|
|
|
|
--delete-pods-times=2
|
|
|
|
rc=$?
|
|
|
|
exit $rc
|
|
|
|
|
|
|
|
- task: AzureCLI@2
|
|
|
|
displayName: "Test Connectivity after CRUD ($(PROFILE))"
|
|
|
|
inputs:
|
|
|
|
azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION)
|
|
|
|
scriptType: "bash"
|
|
|
|
scriptLocation: "inlineScript"
|
|
|
|
failOnStderr: true
|
|
|
|
condition: succeeded()
|
|
|
|
inlineScript: |
|
|
|
|
set -e
|
2023-06-09 20:00:03 +03:00
|
|
|
export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)
|
|
|
|
mkdir -p $(System.DefaultWorkingDirectory)/$CLUSTER_NAME
|
2023-11-30 02:37:46 +03:00
|
|
|
|
|
|
|
kubectl get pod -n kube-system -l app=kwok-controller -owide
|
|
|
|
kubectl get pod -n kube-system -l app=kwok-controller -owide | grep -q Running || {
|
|
|
|
echo "##vso[task.logissue type=error]need at least one kwok pod running"
|
|
|
|
exit 1
|
|
|
|
}
|
|
|
|
|
2023-05-18 19:57:21 +03:00
|
|
|
|
|
|
|
cd $(Pipeline.Workspace)/s/test/scale/connectivity/
|
|
|
|
# initial connectivity should be established within 10 minutes
|
|
|
|
# adding new network policy to all 12 Pods should happen within 20 seconds
|
|
|
|
set +e
|
|
|
|
./test-connectivity.sh --kubectl-binary=$kubectlPath \
|
|
|
|
--num-scale-pods-to-verify=all \
|
|
|
|
--max-wait-for-initial-connectivity=$((10*60)) \
|
|
|
|
--max-wait-after-adding-netpol=20
|
|
|
|
rc=$?
|
|
|
|
if [[ $rc != 0 ]]; then
|
2023-11-30 02:37:46 +03:00
|
|
|
echo "capturing cluster state due to failure"
|
|
|
|
if [[ $(PROFILE) == *ws22 ]]; then
|
|
|
|
cd $(Pipeline.Workspace)/s/debug/windows/npm/
|
|
|
|
chmod u+x win-debug.sh
|
|
|
|
./win-debug.sh
|
|
|
|
mv logs_* $(System.DefaultWorkingDirectory)/$CLUSTER_NAME/
|
|
|
|
else
|
|
|
|
set -x
|
|
|
|
npmPod=`kubectl get pod -n kube-system | grep npm | grep -v npm-win | awk '{print $1}' | head -n 1 | tr -d '\n'`
|
|
|
|
kubectl exec -n kube-system $npmPod -- iptables-nft -vnL > $(System.DefaultWorkingDirectory)/$CLUSTER_NAME/iptables.out
|
|
|
|
kubectl exec -n kube-system $npmPod -- ipset -L > $(System.DefaultWorkingDirectory)/$CLUSTER_NAME/ipset.out
|
|
|
|
fi
|
|
|
|
|
2023-05-18 19:57:21 +03:00
|
|
|
kubectl get pod -n scale-test
|
|
|
|
kubectl get pod -n connectivity-test
|
2023-11-30 02:37:46 +03:00
|
|
|
exit $rc
|
2023-05-18 19:57:21 +03:00
|
|
|
fi
|
2023-11-30 02:37:46 +03:00
|
|
|
|
|
|
|
kubectl get pod -n kube-system -l app=kwok-controller -owide
|
|
|
|
kubectl get pod -n kube-system -l app=kwok-controller -owide | grep -q Running || {
|
|
|
|
echo "##vso[task.logissue type=error]need at least one kwok pod running"
|
|
|
|
exit 1
|
|
|
|
}
|
2023-05-18 19:57:21 +03:00
|
|
|
|
|
|
|
- bash: |
|
|
|
|
export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)
|
|
|
|
echo "Getting cluster state for $CLUSTER_NAME"
|
|
|
|
mkdir -p $(System.DefaultWorkingDirectory)/$CLUSTER_NAME
|
2023-06-09 20:00:03 +03:00
|
|
|
kubectl get pods -n kube-system -owide | grep npm | grep -v kwok
|
|
|
|
npmPodList=`kubectl get pods -n kube-system -owide | grep npm | grep -v kwok | awk '{print $1}'`
|
2023-05-18 19:57:21 +03:00
|
|
|
for npmPod in $npmPodList; do
|
2023-06-09 20:00:03 +03:00
|
|
|
logFile=$(System.DefaultWorkingDirectory)/$CLUSTER_NAME/npm-logs_$(PROFILE)-$npmPod.txt
|
|
|
|
kubectl logs -n kube-system $npmPod > $logFile
|
|
|
|
|
|
|
|
# capture any previous logs in case there was a crash
|
2023-05-18 19:57:21 +03:00
|
|
|
previousLogFile=$(System.DefaultWorkingDirectory)/$CLUSTER_NAME/previous-npm-logs_$(PROFILE).txt
|
|
|
|
kubectl logs -n kube-system $npmPod -p > $previousLogFile
|
|
|
|
if [[ $? -ne 0 ]]; then
|
|
|
|
# remove the empty file if kubectl logs failed (e.g. there was no previous terminated container)
|
|
|
|
rm $previousLogFile
|
|
|
|
fi
|
|
|
|
done
|
|
|
|
cp ./kubeconfig $(System.DefaultWorkingDirectory)/$CLUSTER_NAME/.kubeconfig
|
|
|
|
condition: always()
|
|
|
|
displayName: "Get Logs"
|
|
|
|
|
|
|
|
- publish: $(System.DefaultWorkingDirectory)/$(RESOURCE_GROUP)-$(PROFILE)
|
|
|
|
condition: always()
|
|
|
|
artifact: NpmLogs-$(RESOURCE_GROUP)-$(PROFILE)
|
|
|
|
|
|
|
|
- job: clean_up
|
|
|
|
displayName: "Cleanup"
|
|
|
|
pool:
|
|
|
|
name: $(BUILD_POOL_NAME_DEFAULT)
|
|
|
|
demands:
|
|
|
|
- agent.os -equals Linux
|
|
|
|
- Role -equals Build
|
|
|
|
dependsOn:
|
|
|
|
[Create_Cluster_and_Run_Test, setup]
|
|
|
|
variables:
|
|
|
|
RESOURCE_GROUP: $[ dependencies.setup.outputs['EnvironmentalVariables.RESOURCE_GROUP'] ]
|
|
|
|
steps:
|
|
|
|
- checkout: none
|
|
|
|
- task: AzureCLI@2
|
|
|
|
displayName: "Delete Test Cluster Resource Group"
|
|
|
|
inputs:
|
|
|
|
azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION)
|
|
|
|
scriptType: "bash"
|
|
|
|
scriptLocation: "inlineScript"
|
2023-06-09 20:00:03 +03:00
|
|
|
condition: succeeded()
|
2023-05-18 19:57:21 +03:00
|
|
|
inlineScript: |
|
|
|
|
echo Deleting $(RESOURCE_GROUP)
|
|
|
|
az group delete -n $(RESOURCE_GROUP) --yes
|