azure-container-networking/.pipelines/npm/npm-conformance-tests-lates...

473 строки
19 KiB
YAML

trigger:
branches:
include:
- master
tags:
include:
- "*"
variables:
- name: VNET_NAME
value: npm-vnet
- name: NUM_PARALLEL_JOBS_FOR_STRESS_TEST
value: "3"
jobs:
- job: setup
displayName: "Configure Test Environment"
pool:
name: $(BUILD_POOL_NAME_DEFAULT)
demands:
- agent.os -equals Linux
- Role -equals Build
steps:
- checkout: self
- script: |
go version
go env
which go
echo $PATH
mkdir -p '$(GOBIN)'
mkdir -p '$(GOPATH)/pkg'
BUILD_NUMBER=$(Build.BuildNumber)
RG=e2e-$(echo "npm-`date "+%Y-%m-%d-%S"`")
git --version
TAG=$(git -c "versionsort.suffix=-" ls-remote --tags https://github.com/Azure/azure-container-networking | grep -v "zapai" | grep -v "ipam" | grep -v "{}"| cut --delimiter="/" --fields=3 | sort -V | tail -1)
echo "Resource group: $RG"
echo "Image tag: $TAG"
echo "##vso[task.setvariable variable=RESOURCE_GROUP;isOutput=true;]$RG"
echo "##vso[task.setvariable variable=TAG;isOutput=true;]$TAG"
name: "EnvironmentalVariables"
displayName: "Set environmental variables"
condition: always()
- job: k8se2e
displayName: "Build Kubernetes Test Suite"
pool:
name: $(BUILD_POOL_NAME_DEFAULT)
demands:
- agent.os -equals Linux
- Role -equals Build
steps:
- checkout: self
- script: |
echo "cloning vakr"
git clone https://github.com/vakalapa/kubernetes.git
displayName: "Clone Kubernetes Repo"
- bash: |
cd kubernetes
git checkout vakr/sleepinnpmtests
export PATH=$PATH:/usr/local/go/bin/
make WHAT=test/e2e/e2e.test
displayName: "Build Kubernetes e2e.test"
- publish: $(System.DefaultWorkingDirectory)/kubernetes/_output/local/bin/linux/amd64
artifact: Test
- job: Create_Cluster_and_Run_Test
timeoutInMinutes: 360
displayName: "Run Kubernetes Network Policy Test Suite"
strategy:
matrix:
v2-default:
AZURE_CLUSTER: "conformance-v2-default"
PROFILE: "v2-default"
IS_STRESS_TEST: "false"
v2-default-ws22:
AZURE_CLUSTER: "conformance-v2-default-ws22"
PROFILE: "v2-default-ws22"
IS_STRESS_TEST: "false"
pool:
name: $(BUILD_POOL_NAME_DEFAULT)
demands:
- agent.os -equals Linux
- Role -equals Build
dependsOn: [setup, k8se2e]
variables:
RESOURCE_GROUP: $[ dependencies.setup.outputs['EnvironmentalVariables.RESOURCE_GROUP'] ]
TAG: $[ dependencies.setup.outputs['EnvironmentalVariables.TAG'] ]
FQDN: empty
steps:
- checkout: none
- download: current
artifact: Test
- task: AzureCLI@2
displayName: "Create resource group"
inputs:
azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION)
scriptType: "bash"
scriptLocation: "inlineScript"
inlineScript: |
az group create -n $(RESOURCE_GROUP) -l $(LOCATION) -o table
echo created RG $(RESOURCE_GROUP) in $(LOCATION)
az version
- task: AzureCLI@2
displayName: "Deploy NPM to Test Cluster"
inputs:
azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION)
scriptType: "bash"
scriptLocation: "inlineScript"
failOnStderr: true
inlineScript: |
# get kubectl
curl -LO https://dl.k8s.io/release/v1.23.0/bin/linux/amd64/kubectl
chmod +x kubectl
echo Cluster $(AZURE_CLUSTER)
echo Resource $(RESOURCE_GROUP)
if [[ $(AZURE_CLUSTER) == *ws22 ]] # * is used for pattern matching
then
az extension add --name aks-preview
az extension update --name aks-preview
echo "creating WS22 Cluster";
az aks create \
--resource-group $(RESOURCE_GROUP) \
--name $(AZURE_CLUSTER) \
--generate-ssh-keys \
--windows-admin-username e2eadmin \
--windows-admin-password alpha@numeric!password2 \
--network-plugin azure \
--vm-set-type VirtualMachineScaleSets \
--node-vm-size Standard_D4s_v3 \
--node-count 1
if [ $? != 0 ]
then
echo "Failing fast since previous command failed"
exit 1
fi
# don't schedule anything on the linux system pool
echo "Updating $(AZURE_CLUSTER) to not schedule anything on linux pool..."
az aks nodepool update \
--cluster-name $(AZURE_CLUSTER) \
-g $(RESOURCE_GROUP) \
-n nodepool1 \
--node-taints CriticalAddonsOnly=true:NoSchedule
if [ $? != 0 ]
then
echo "Failing fast since previous command failed"
exit 1
fi
echo "Adding Windows nodepool to $(AZURE_CLUSTER) to group $(RESOURCE_GROUP)"
az aks nodepool add \
--resource-group $(RESOURCE_GROUP) \
--cluster-name $(AZURE_CLUSTER) \
--name awin22 \
--os-type Windows \
--os-sku Windows2022 \
--node-vm-size Standard_D4s_v3 \
--node-count 2
if [ $? != 0 ]
then
echo "Failing fast since previous command failed"
exit 1
fi
az aks get-credentials -n $(AZURE_CLUSTER) -g $(RESOURCE_GROUP) --file ./kubeconfig
./kubectl --kubeconfig=./kubeconfig apply -f https://raw.githubusercontent.com/Azure/azure-container-networking/master/npm/examples/windows/azure-npm.yaml
./kubectl --kubeconfig=./kubeconfig set image daemonset/azure-npm-win -n kube-system azure-npm=$IMAGE_REGISTRY/azure-npm:windows-amd64-ltsc2022-$(TAG)
else
echo "Creating Linux Cluster";
az aks create --no-ssh-key \
--resource-group $(RESOURCE_GROUP) \
--name $(AZURE_CLUSTER) \
--network-plugin azure
if [ $? != 0 ]
then
echo "Failing fast since previous command failed"
exit 1
fi
az aks get-credentials -n $(AZURE_CLUSTER) -g $(RESOURCE_GROUP) --file ./kubeconfig
# deploy azure-npm
./kubectl --kubeconfig=./kubeconfig apply -f https://raw.githubusercontent.com/Azure/azure-container-networking/master/npm/azure-npm.yaml
# swap azure-npm image with one built during run
./kubectl --kubeconfig=./kubeconfig set image daemonset/azure-npm -n kube-system azure-npm=$IMAGE_REGISTRY/azure-npm:linux-amd64-$(TAG)
# swap NPM profile with one specified as parameter
./kubectl --kubeconfig=./kubeconfig apply -f https://raw.githubusercontent.com/Azure/azure-container-networking/master/npm/profiles/$(PROFILE).yaml
./kubectl --kubeconfig=./kubeconfig rollout restart ds azure-npm -n kube-system
fi
./kubectl --kubeconfig=./kubeconfig describe daemonset azure-npm -n kube-system
if [ $? != 0 ]
then
echo "Failing fast since previous command failed"
exit 1
fi
FQDN=`az aks show -n $(AZURE_CLUSTER) -g $(RESOURCE_GROUP) --query fqdn -o tsv`
echo "##vso[task.setvariable variable=FQDN]$FQDN"
- bash: |
echo "sleeping 3 minutes to allow NPM pods to restart"
sleep 180
set -o pipefail
## create the output folder and include the kubeconfig there
npmLogsFolder=$(System.DefaultWorkingDirectory)/npmLogs_$(AZURE_CLUSTER)
mkdir -p $npmLogsFolder
cp ./kubeconfig $npmLogsFolder/kubeconfig
## write to all NPM pod logs in the background (do this in the background instead of after to make sure the logs aren't truncated)
npmPodList=`kubectl --kubeconfig=./kubeconfig get pods -n kube-system | grep npm | awk '{print $1}'`
echo "Found NPM pods: $npmPodList"
## Run all Conformance tests in the background
echo $FQDN
chmod +x $(Pipeline.Workspace)/Test/e2e.test
runConformance () {
KUBERNETES_SERVICE_HOST="$FQDN" KUBERNETES_SERVICE_PORT=443 $(Pipeline.Workspace)/Test/e2e.test --provider=local --ginkgo.focus="NetworkPolicy" --ginkgo.skip="SCTP" --kubeconfig=./kubeconfig
# there can't be a command after e2e.test because the exit code is important
}
runConformanceWindows () {
git clone https://github.com/huntergregory/kubernetes.git
cd kubernetes
git checkout sleep-before-probing
export PATH=$PATH:/usr/local/go/bin/
make WHAT=test/e2e/e2e.test
cd ..
nomatch1="should enforce policy based on PodSelector or NamespaceSelector"
nomatch2="should enforce policy based on NamespaceSelector with MatchExpressions using default ns label"
nomatch3="should enforce policy based on PodSelector and NamespaceSelector"
nomatch4="should enforce policy based on Multiple PodSelectors and NamespaceSelectors"
cidrExcept1="should ensure an IP overlapping both IPBlock.CIDR and IPBlock.Except is allowed"
cidrExcept2="should enforce except clause while egress access to server in CIDR block"
namedPorts="named port"
wrongK8sVersion="Netpol API"
toSkip="\[LinuxOnly\]|$nomatch1|$nomatch2|$nomatch3|$nomatch4|$cidrExcept1|$cidrExcept2|$namedPorts|$wrongK8sVersion|SCTP"
KUBERNETES_SERVICE_HOST="$FQDN" KUBERNETES_SERVICE_PORT=443 $(System.DefaultWorkingDirectory)/kubernetes/_output/local/bin/linux/amd64/e2e.test --provider=local --ginkgo.focus="NetworkPolicy" --ginkgo.skip="$toSkip" --node-os-distro=windows --allowed-not-ready-nodes=1 --kubeconfig=./kubeconfig
}
exitCode=0
if [ $(IS_STRESS_TEST) == "true" ]; then
echo "Running $NUM_PARALLEL_JOBS_FOR_STRESS_TEST conformance tests at once and writing outputs to files"
declare -a conformancePIDs
for round in $(seq 1 $NUM_PARALLEL_JOBS_FOR_STRESS_TEST); do
# for each iteration, run the conformance test and echos in the background, and write the output of the conformance test to a file
# run the conformance test in the foreground and write the output to stdout and a file
if [[ $(AZURE_CLUSTER) == *ws22 ]] # * is used for pattern matching
then
echo "starting conformance test #$round" && \
runConformanceWindows | tee $npmLogsFolder/conformance-results-$round && \
echo "finished conformance test #$round" &
pidOfConformanceTest=$!
conformancePIDs+=($pidOfConformanceTest)
else
echo "starting windows conformance test #$round" && \
runConformance > $npmLogsFolder/conformance-results-$round && \
echo "finished conformance test #$round" &
pidOfConformanceTest=$!
conformancePIDs+=($pidOfConformanceTest)
fi
done
# wait until all conformance tests finish and take note of any failed tests
for round in $(seq 1 $NUM_PARALLEL_JOBS_FOR_STRESS_TEST); do
i=$((round-1))
wait ${conformancePIDs[$i]}
exitCode=$?
if [ $exitCode != 0 ]; then
echo "conformance test #$round failed"
break
fi
done
else
# run the conformance test in the foreground and write the output to stdout and a file
if [[ $(AZURE_CLUSTER) == *ws22 ]] # * is used for pattern matching
then
runConformanceWindows | tee $npmLogsFolder/conformance-results
exitCode=$?
else
runConformance | tee $npmLogsFolder/conformance-results
exitCode=$?
fi
fi
# kill the background processes (the logs) that have this process' pid (i.e. $$) as a parent
for npmPod in $npmPodList; do
./kubectl --kubeconfig=./kubeconfig logs -n kube-system $npmPod > $npmLogsFolder/$npmPod-logs.txt
done
exit $exitCode
displayName: "Run Test Suite and Get Logs"
failOnStderr: false
- publish: $(System.DefaultWorkingDirectory)/npmLogs_$(AZURE_CLUSTER)
condition: always()
artifact: NpmLogs_$(AZURE_CLUSTER)
# 2024/07/23: Windows Cyclonus is consistently timing after 6 hours
# - job: Create_Windows_Cluster_and_Run_Test
# timeoutInMinutes: 360
# displayName: "Run Windows Cyclonus"
# pool:
# name: $(BUILD_POOL_NAME_DEFAULT)
# demands:
# - agent.os -equals Linux
# - Role -equals Build
# dependsOn: [setup]
# variables:
# RESOURCE_GROUP: $[ dependencies.setup.outputs['EnvironmentalVariables.RESOURCE_GROUP'] ]
# TAG: $[ dependencies.setup.outputs['EnvironmentalVariables.TAG'] ]
# FQDN: empty
# strategy:
# matrix:
# v2-windows:
# PROFILE: "cyc-ws22"
# steps:
# - checkout: self
# - download: none
# - task: AzureCLI@2
# displayName: "Create AKS Cluster"
# inputs:
# azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION)
# scriptType: "bash"
# scriptLocation: "inlineScript"
# failOnStderr: true
# inlineScript: |
# az extension add --name aks-preview
# az extension update --name aks-preview
# export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)
# echo "Creating resource group named $CLUSTER_NAME"
# az group create --name $CLUSTER_NAME -l $(LOCATION) -o table
# echo "Creating resource group named $CLUSTER_NAME"
# az aks create \
# --resource-group $CLUSTER_NAME \
# --name $CLUSTER_NAME \
# --generate-ssh-keys \
# --windows-admin-username e2eadmin \
# --windows-admin-password alpha@numeric!password2 \
# --network-plugin azure \
# --vm-set-type VirtualMachineScaleSets \
# --node-vm-size Standard_D4s_v3 \
# --node-count 1
# # don't schedule anything on the linux system pool
# echo "Updating $CLUSTER_NAME to not schedule anything on linux pool..."
# az aks nodepool update \
# --cluster-name $CLUSTER_NAME \
# -g $CLUSTER_NAME \
# -n nodepool1 \
# --node-taints CriticalAddonsOnly=true:NoSchedule
# echo "Adding Windows nodepool to $CLUSTER_NAME"
# az aks nodepool add \
# --resource-group $CLUSTER_NAME \
# --cluster-name $CLUSTER_NAME \
# --name awin22 \
# --os-type Windows \
# --os-sku Windows2022 \
# --node-vm-size Standard_D4s_v3 \
# --node-count 3
# echo "Getting credentials to $CLUSTER_NAME"
# az aks get-credentials -g $CLUSTER_NAME -n $CLUSTER_NAME --overwrite-existing --file ./kubeconfig
# mkdir -p ~/.kube/
# cp ./kubeconfig ~/.kube/config
# - task: AzureCLI@2
# displayName: "Deploy NPM to Test Cluster"
# inputs:
# azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION)
# scriptType: "bash"
# scriptLocation: "inlineScript"
# failOnStderr: true
# inlineScript: |
# export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)
# curl -LO https://dl.k8s.io/release/v1.23.0/bin/linux/amd64/kubectl
# chmod +x kubectl
# # deploy azure-npm
# ./kubectl --kubeconfig=./kubeconfig apply -f https://raw.githubusercontent.com/Azure/azure-container-networking/master/npm/examples/windows/azure-npm.yaml
# # swap azure-npm image with one built during run
# ./kubectl --kubeconfig=./kubeconfig set image daemonset/azure-npm-win -n kube-system azure-npm=$IMAGE_REGISTRY/azure-npm:windows-amd64-ltsc2022-$(TAG)
# echo "sleeping 3 minutes to allow NPM pods to restart"
# sleep 180
# ./kubectl --kubeconfig=./kubeconfig get po -n kube-system -owide -A
# echo "Showing cluster status for $CLUSTER_NAME"
# FQDN=`az aks show -n $CLUSTER_NAME -g $CLUSTER_NAME --query fqdn -o tsv`
# echo "##vso[task.setvariable variable=FQDN]$FQDN"
# - script: |
# cat ~/.kube/config
# curl -fsSL github.com/mattfenwick/cyclonus/releases/latest/download/cyclonus_linux_amd64.tar.gz | tar -zxv
# name: download_cyclonus
# displayName: "Download Cyclonus"
# failOnStderr: false
# condition: always()
# - script: |
# ./test/cyclonus/test-cyclonus-windows.sh
# name: cyclonus
# displayName: "Run Cyclonus Test"
# failOnStderr: false
# condition: always()
# - bash: |
# export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)
# cp cyclonus-$CLUSTER_NAME $(System.DefaultWorkingDirectory)/$CLUSTER_NAME/cyclonus-$CLUSTER_NAME
# echo "Getting cluster state for $CLUSTER_NAME"
# mkdir -p $(System.DefaultWorkingDirectory)/$CLUSTER_NAME
# kubectl logs -n kube-system -l k8s-app=azure-npm --tail -1 --prefix > $(System.DefaultWorkingDirectory)/$CLUSTER_NAME/npm-logs_$(PROFILE).txt
# cp ./kubeconfig $(System.DefaultWorkingDirectory)/$CLUSTER_NAME/.kubeconfig
# condition: always()
# - publish: $(System.DefaultWorkingDirectory)/$(RESOURCE_GROUP)-$(PROFILE)
# condition: always()
# artifact: NpmLogs-$(RESOURCE_GROUP)-$(PROFILE)
- job: clean_up
displayName: "Cleanup"
pool:
name: $(BUILD_POOL_NAME_DEFAULT)
demands:
- agent.os -equals Linux
- Role -equals Build
dependsOn:
[Create_Cluster_and_Run_Test, setup]
variables:
RESOURCE_GROUP: $[ dependencies.setup.outputs['EnvironmentalVariables.RESOURCE_GROUP'] ]
steps:
- checkout: none
- task: AzureCLI@2
displayName: "Delete Test Cluster Resource Group"
inputs:
azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION)
scriptType: "bash"
scriptLocation: "inlineScript"
inlineScript: |
echo Deleting $(RESOURCE_GROUP)
az group delete -n $(RESOURCE_GROUP) --yes