azure-container-networking/.pipelines/npm/npm-conformance-tests.yaml

600 строки
25 KiB
YAML

pr:
branches:
include:
- master
- release/*
paths:
include:
- npm/*
- .pipelines/npm/*
trigger:
branches:
include:
- master
tags:
include:
- "*"
variables:
- name: VNET_NAME
value: npm-vnet
- name: NUM_PARALLEL_JOBS_FOR_STRESS_TEST
value: "3"
stages:
- stage: setup
displayName: Setup
jobs:
- job: setup
displayName: "Configure Test Environment"
pool:
name: $(BUILD_POOL_NAME_DEFAULT)
demands:
- agent.os -equals Linux
- Role -equals Build
steps:
- checkout: self
- script: |
go version
go env
which go
echo $PATH
mkdir -p '$(GOBIN)'
mkdir -p '$(GOPATH)/pkg'
BUILD_NUMBER=$(Build.BuildNumber)
# format: npm-<year>-<month>-<day>-<minute>-<second>
RG=e2e-$(echo "npm-`date "+%Y-%m-%d-%M-%S"`")
TAG=$(make npm-version)
echo "Resource group: $RG"
echo "Image tag: $TAG"
echo "##vso[task.setvariable variable=RESOURCE_GROUP;isOutput=true;]$RG"
echo "##vso[task.setvariable variable=TAG;isOutput=true;]$TAG"
name: "EnvironmentalVariables"
displayName: "Set environmental variables"
condition: always()
- stage: build
displayName: Build Resources
dependsOn: [setup]
jobs:
- job: containerize
displayName: Build Images
variables:
TAG: $[ stagedependencies.setup.setup.outputs['EnvironmentalVariables.TAG'] ]
pool:
name: "$(BUILD_POOL_NAME_DEFAULT)"
strategy:
matrix:
npm_linux_amd64:
arch: amd64
name: npm
os: linux
npm_windows2022_amd64:
arch: amd64
name: npm
os: windows
os_version: ltsc2022
steps:
- template: ../containers/container-template.yaml
parameters:
arch: $(arch)
name: $(name)
os: $(os)
os_version: $(os_version)
- stage: Create_Cluster_and_Run_Test
displayName: NPM Conformance
dependsOn: [setup, build]
jobs:
- job: Create_Cluster_and_Run_Test
timeoutInMinutes: 360
displayName: "Run Kubernetes Network Policy Test Suite"
strategy:
matrix:
v2-foreground:
AZURE_CLUSTER: "conformance-v2-foreground"
PROFILE: "v2-foreground"
IS_STRESS_TEST: "false"
v2-background:
AZURE_CLUSTER: "conformance-v2-background"
PROFILE: "v2-background"
IS_STRESS_TEST: "false"
v2-ws22:
AZURE_CLUSTER: "conformance-v2-ws22"
PROFILE: "v2-default-ws22"
IS_STRESS_TEST: "false"
v2-linux-stress:
AZURE_CLUSTER: "conformance-v2-linux-stress"
PROFILE: "v2-background"
IS_STRESS_TEST: "true"
pool:
name: $(BUILD_POOL_NAME_DEFAULT)
demands:
- agent.os -equals Linux
- Role -equals Build
variables:
RESOURCE_GROUP: $[ stagedependencies.setup.setup.outputs['EnvironmentalVariables.RESOURCE_GROUP'] ]
TAG: $[ stagedependencies.setup.setup.outputs['EnvironmentalVariables.TAG'] ]
FQDN: empty
steps:
- checkout: self
- task: AzureCLI@2
displayName: "Create resource group"
inputs:
azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION)
scriptType: "bash"
scriptLocation: "inlineScript"
inlineScript: |
az group create -n $(RESOURCE_GROUP) -l $(LOCATION) -o table
echo created RG $(RESOURCE_GROUP) in $(LOCATION)
az version
- task: AzureCLI@2
displayName: "Deploy NPM to Test Cluster"
inputs:
azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION)
scriptType: "bash"
scriptLocation: "inlineScript"
failOnStderr: true
inlineScript: |
# get kubectl
curl -LO https://dl.k8s.io/release/v1.23.0/bin/linux/amd64/kubectl
chmod +x kubectl
echo Cluster $(AZURE_CLUSTER)
echo Resource $(RESOURCE_GROUP)
if [[ $(AZURE_CLUSTER) == *ws22 ]] # * is used for pattern matching
then
az extension add --name aks-preview
az extension update --name aks-preview
echo "creating WS22 Cluster";
az aks create \
--resource-group $(RESOURCE_GROUP) \
--name $(AZURE_CLUSTER) \
--generate-ssh-keys \
--windows-admin-username e2eadmin \
--windows-admin-password alpha@numeric!password2 \
--network-plugin azure \
--vm-set-type VirtualMachineScaleSets \
--node-vm-size Standard_D4s_v3 \
--node-count 1
if [ $? != 0 ]
then
echo "Failing fast since previous command failed"
exit 1
fi
# don't schedule anything on the linux system pool
echo "Updating $(AZURE_CLUSTER) to not schedule anything on linux pool..."
az aks nodepool update \
--cluster-name $(AZURE_CLUSTER) \
-g $(RESOURCE_GROUP) \
-n nodepool1 \
--node-taints CriticalAddonsOnly=true:NoSchedule
if [ $? != 0 ]
then
echo "Failing fast since previous command failed"
exit 1
fi
echo "Adding Windows nodepool to $(AZURE_CLUSTER) to group $(RESOURCE_GROUP)"
az aks nodepool add \
--resource-group $(RESOURCE_GROUP) \
--cluster-name $(AZURE_CLUSTER) \
--name awin22 \
--os-type Windows \
--os-sku Windows2022 \
--node-vm-size Standard_D4s_v3 \
--node-count 2
if [ $? != 0 ]
then
echo "Failing fast since previous command failed"
exit 1
fi
az aks get-credentials -n $(AZURE_CLUSTER) -g $(RESOURCE_GROUP) --file ./kubeconfig
./kubectl --kubeconfig=./kubeconfig apply -f $(Pipeline.Workspace)/s/npm/examples/windows/azure-npm.yaml
./kubectl --kubeconfig=./kubeconfig set image daemonset/azure-npm-win -n kube-system azure-npm=$IMAGE_REGISTRY/azure-npm:windows-amd64-ltsc2022-$(TAG)
else
echo "Creating Linux Cluster";
az aks create --no-ssh-key \
--resource-group $(RESOURCE_GROUP) \
--name $(AZURE_CLUSTER) \
--network-plugin azure
if [ $? != 0 ]
then
echo "Failing fast since previous command failed"
exit 1
fi
az aks get-credentials -n $(AZURE_CLUSTER) -g $(RESOURCE_GROUP) --file ./kubeconfig
# deploy azure-npm
./kubectl --kubeconfig=./kubeconfig apply -f $(Pipeline.Workspace)/s/npm/azure-npm.yaml
# swap azure-npm image with one built during run
./kubectl --kubeconfig=./kubeconfig set image daemonset/azure-npm -n kube-system azure-npm=$IMAGE_REGISTRY/azure-npm:linux-amd64-$(TAG)
# swap NPM profile with one specified as parameter
./kubectl --kubeconfig=./kubeconfig apply -f $(Pipeline.Workspace)/s/npm/profiles/$(PROFILE).yaml
./kubectl --kubeconfig=./kubeconfig rollout restart ds azure-npm -n kube-system
fi
./kubectl --kubeconfig=./kubeconfig describe daemonset azure-npm -n kube-system
if [ $? != 0 ]
then
echo "Failing fast since previous command failed"
exit 1
fi
FQDN=`az aks show -n $(AZURE_CLUSTER) -g $(RESOURCE_GROUP) --query fqdn -o tsv`
echo "##vso[task.setvariable variable=FQDN]$FQDN"
- task: AzureCLI@2
inputs:
azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION)
scriptLocation: "inlineScript"
scriptType: "bash"
addSpnToEnvironment: true
inlineScript: |
set -e
make -C ./hack/aks set-kubeconf AZCLI=az GROUP=$(RESOURCE_GROUP) CLUSTER=$(AZURE_CLUSTER)
# sig-release provides test suite tarball(s) per k8s release. Just need to provide k8s version "v1.xx.xx"
# pulling k8s version from AKS.
eval k8sVersion="v"$( az aks show -g $(RESOURCE_GROUP) -n $(AZURE_CLUSTER) --query "currentKubernetesVersion")
echo $k8sVersion
curl -L https://dl.k8s.io/$k8sVersion/kubernetes-test-linux-amd64.tar.gz -o ./kubernetes-test-linux-amd64.tar.gz
# https://github.com/kubernetes/sig-release/blob/master/release-engineering/artifacts.md#content-of-kubernetes-test-system-archtargz-on-example-of-kubernetes-test-linux-amd64targz-directories-removed-from-list
# explictly unzip and strip directories from ginkgo and e2e.test
tar -xvzf kubernetes-test-linux-amd64.tar.gz --strip-components=3 kubernetes/test/bin/ginkgo kubernetes/test/bin/e2e.test
displayName: "Setup Environment"
- bash: |
echo "sleeping 3 minutes to allow NPM pods to restart"
sleep 180
set -o pipefail
## create the output folder and include the kubeconfig there
npmLogsFolder=$(System.DefaultWorkingDirectory)/npmLogs_$(AZURE_CLUSTER)
mkdir -p $npmLogsFolder
cp ./kubeconfig $npmLogsFolder/kubeconfig
npmPodList=`kubectl --kubeconfig=./kubeconfig get pods -n kube-system | grep npm | awk '{print $1}'`
echo "Found NPM pods: $npmPodList"
## Run all Conformance tests in the background
echo $FQDN
runConformance () {
KUBERNETES_SERVICE_HOST="$FQDN" KUBERNETES_SERVICE_PORT=443 ./e2e.test --provider=local --ginkgo.focus="NetworkPolicy" --ginkgo.skip="SCTP" --kubeconfig=./kubeconfig
# there can't be a command after e2e.test because the exit code is important
}
runConformanceWindows () {
# full suite of ~32 test cases is taking too long...
# nomatch1="should enforce policy based on PodSelector or NamespaceSelector"
# nomatch2="should enforce policy based on NamespaceSelector with MatchExpressions using default ns label"
# nomatch3="should enforce policy based on PodSelector and NamespaceSelector"
# nomatch4="should enforce policy based on Multiple PodSelectors and NamespaceSelectors"
# cidrExcept1="should ensure an IP overlapping both IPBlock.CIDR and IPBlock.Except is allowed"
# cidrExcept2="should enforce except clause while egress access to server in CIDR block"
# namedPorts="named port"
# wrongK8sVersion="Netpol API"
# toSkip="\[LinuxOnly\]|$nomatch1|$nomatch2|$nomatch3|$nomatch4|$cidrExcept1|$cidrExcept2|$namedPorts|$wrongK8sVersion|SCTP"
# slimmed down to 14 tests like npm-cni-integration-test.yaml
# NetworkPolicy between server and...
f1="client should enforce policy to allow traffic only from a different namespace, based on NamespaceSelector"
f2="client should deny egress from pods based on PodSelector"
f3="client should enforce multiple, stacked policies with overlapping podSelectors"
f4="client should enforce egress policy allowing traffic to a server in a different namespace based on PodSelector and NamespaceSelector"
f5="client should work with Ingress, Egress specified together"
f6="client should enforce ingress policy allowing any port traffic to a server on a specific protocol"
f7="client should not allow access by TCP when a policy specifies only UDP"
f8="client should allow egress access to server in CIDR block"
f9="client should enforce policy based on Ports"
f10="client should support allow-all policy"
f11="client should enforce updated policy"
f12="client should support denying of egress traffic on the client side"
f13="client should stop enforcing policies after they are deleted"
f14="client should support a 'default-deny-ingress' policy"
focus="$f1|$f2|$f3|$f4|$f5|$f6|$f7|$f8|$f9|$f10|$f11|$f12|$f13|$f14"
KUBERNETES_SERVICE_HOST="$FQDN" KUBERNETES_SERVICE_PORT=443 \
./e2e.test \
--provider=local \
--ginkgo.focus="$focus" \
--ginkgo.skip="\[LinuxOnly\]|NetworkPolicyLegacy|SCTP" \
--node-os-distro=windows \
--allowed-not-ready-nodes=1 \
--kubeconfig=./kubeconfig \
--ginkgo.timeout="2h"
}
exitCode=0
if [ $(IS_STRESS_TEST) == "true" ]; then
echo "Running $NUM_PARALLEL_JOBS_FOR_STRESS_TEST conformance tests at once and writing outputs to files"
declare -a conformancePIDs
for round in $(seq 1 $NUM_PARALLEL_JOBS_FOR_STRESS_TEST); do
# for each iteration, run the conformance test and echos in the background, and write the output of the conformance test to a file
# run the conformance test in the foreground and write the output to stdout and a file
if [[ $(AZURE_CLUSTER) == *ws22 ]] # * is used for pattern matching
then
echo "starting conformance test #$round" && \
runConformanceWindows | tee $npmLogsFolder/conformance-results-$round && \
echo "finished conformance test #$round" &
pidOfConformanceTest=$!
conformancePIDs+=($pidOfConformanceTest)
else
echo "starting windows conformance test #$round" && \
runConformance > $npmLogsFolder/conformance-results-$round && \
echo "finished conformance test #$round" &
pidOfConformanceTest=$!
conformancePIDs+=($pidOfConformanceTest)
fi
done
# wait until all conformance tests finish and take note of any failed tests
for round in $(seq 1 $NUM_PARALLEL_JOBS_FOR_STRESS_TEST); do
i=$((round-1))
wait ${conformancePIDs[$i]}
exitCode=$?
if [ $exitCode != 0 ]; then
echo "conformance test #$round failed"
break
fi
done
else
# run the conformance test in the foreground and write the output to stdout and a file
if [[ $(AZURE_CLUSTER) == *ws22 ]] # * is used for pattern matching
then
runConformanceWindows | tee $npmLogsFolder/conformance-results
exitCode=$?
else
runConformance | tee $npmLogsFolder/conformance-results
exitCode=$?
fi
fi
# get all current npm pods
kubectl --kubeconfig=./kubeconfig get pods -n kube-system | grep npm
npmPodList=`kubectl --kubeconfig=./kubeconfig get pods -n kube-system | grep npm | awk '{print $1}'`
# capture all logs
for npmPod in $npmPodList; do
./kubectl --kubeconfig=./kubeconfig logs -n kube-system $npmPod > $npmLogsFolder/$npmPod-logs.txt
done
# capture any previous logs in case there was a crash
for npmPod in $npmPodList; do
previousLogFile=$npmLogsFolder/previous-$npmPod-logs.txt
./kubectl --kubeconfig=./kubeconfig logs -n kube-system $npmPod -p > $previousLogFile
if [[ $? -ne 0 ]]; then
# remove the empty file if kubectl logs failed (e.g. there was no previous terminated container)
rm $previousLogFile
fi
done
exit $exitCode
displayName: "Run Test Suite and Get Logs ($(PROFILE))"
failOnStderr: false
- publish: $(System.DefaultWorkingDirectory)/npmLogs_$(AZURE_CLUSTER)
condition: always()
artifact: NpmLogs_$(AZURE_CLUSTER)
- stage: clean_up_Create_Cluster_and_Run_Test
displayName: Cleanup Conformance
dependsOn: [setup, Create_Cluster_and_Run_Test]
condition: always()
jobs:
- job: clean_up
displayName: "Cleanup"
pool:
name: $(BUILD_POOL_NAME_DEFAULT)
demands:
- agent.os -equals Linux
- Role -equals Build
variables:
RESOURCE_GROUP: $[ stagedependencies.setup.setup.outputs['EnvironmentalVariables.RESOURCE_GROUP'] ]
steps:
- checkout: none
- task: AzureCLI@2
displayName: "Delete Test Cluster Resource Group"
inputs:
azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION)
scriptType: "bash"
scriptLocation: "inlineScript"
inlineScript: |
echo Deleting $(RESOURCE_GROUP)
az group delete -n $(RESOURCE_GROUP) --yes
# - stage: cyclonus_win
# displayName: Windows Cyclonus
# dependsOn: [setup, build]
# jobs:
# - job: Create_Windows_Cluster_and_Run_Test
# timeoutInMinutes: 360
# displayName: "Run Windows Cyclonus"
# pool:
# name: $(BUILD_POOL_NAME_DEFAULT)
# demands:
# - agent.os -equals Linux
# - Role -equals Build
# variables:
# RESOURCE_GROUP: $[ stagedependencies.setup.setup.outputs['EnvironmentalVariables.RESOURCE_GROUP'] ]
# TAG: $[ stagedependencies.setup.setup.outputs['EnvironmentalVariables.TAG'] ]
# FQDN: empty
# strategy:
# matrix:
# v2-windows:
# PROFILE: "cyc-ws22"
# steps:
# - checkout: self
# - task: AzureCLI@2
# displayName: "Create AKS Cluster"
# inputs:
# azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION)
# scriptType: "bash"
# scriptLocation: "inlineScript"
# failOnStderr: true
# inlineScript: |
# az extension add --name aks-preview
# az extension update --name aks-preview
# export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)
# echo "Creating resource group named $CLUSTER_NAME"
# az group create --name $CLUSTER_NAME -l $(LOCATION) -o table
# echo "Creating resource group named $CLUSTER_NAME"
# az aks create \
# --resource-group $CLUSTER_NAME \
# --name $CLUSTER_NAME \
# --generate-ssh-keys \
# --windows-admin-username e2eadmin \
# --windows-admin-password alpha@numeric!password2 \
# --network-plugin azure \
# --vm-set-type VirtualMachineScaleSets \
# --node-vm-size Standard_D8s_v3 \
# --node-count 1 \
# --uptime-sla
# # don't schedule anything on the linux system pool
# echo "Updating $CLUSTER_NAME to not schedule anything on linux pool..."
# az aks nodepool update \
# --cluster-name $CLUSTER_NAME \
# -g $CLUSTER_NAME \
# -n nodepool1 \
# --node-taints CriticalAddonsOnly=true:NoSchedule
# echo "Adding Windows nodepool to $CLUSTER_NAME"
# az aks nodepool add \
# --resource-group $CLUSTER_NAME \
# --cluster-name $CLUSTER_NAME \
# --name awin22 \
# --os-type Windows \
# --os-sku Windows2022 \
# --node-vm-size Standard_D4s_v3 \
# --node-count 3
# echo "Getting credentials to $CLUSTER_NAME"
# az aks get-credentials -g $CLUSTER_NAME -n $CLUSTER_NAME --overwrite-existing --file ./kubeconfig
# mkdir -p ~/.kube/
# cp ./kubeconfig ~/.kube/config
# - task: AzureCLI@2
# displayName: "Deploy NPM to Test Cluster"
# inputs:
# azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION)
# scriptType: "bash"
# scriptLocation: "inlineScript"
# failOnStderr: true
# inlineScript: |
# export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)
# curl -LO https://dl.k8s.io/release/v1.23.0/bin/linux/amd64/kubectl
# chmod +x kubectl
# # deploy azure-npm
# ./kubectl --kubeconfig=./kubeconfig apply -f $(Pipeline.Workspace)/s/npm/examples/windows/azure-npm.yaml
# # swap azure-npm image with one built during run
# ./kubectl --kubeconfig=./kubeconfig set image daemonset/azure-npm-win -n kube-system azure-npm=$IMAGE_REGISTRY/azure-npm:windows-amd64-ltsc2022-$(TAG)
# echo "sleeping and waiting for NPM pods to be ready..."
# sleep 1m
# ./kubectl --kubeconfig=./kubeconfig wait --for=condition=Ready pod -n kube-system -l k8s-app=azure-npm --timeout=5m
# echo "sleeping 3 more minutes to let windows NPM finish bootup phase"
# ./kubectl --kubeconfig=./kubeconfig get po -n kube-system -owide -A
# echo "Showing cluster status for $CLUSTER_NAME"
# FQDN=`az aks show -n $CLUSTER_NAME -g $CLUSTER_NAME --query fqdn -o tsv`
# echo "##vso[task.setvariable variable=FQDN]$FQDN"
# - script: |
# cat ~/.kube/config
# curl -fsSL github.com/mattfenwick/cyclonus/releases/latest/download/cyclonus_linux_amd64.tar.gz | tar -zxv
# name: download_cyclonus
# displayName: "Download Cyclonus"
# failOnStderr: false
# condition: always()
# - script: |
# ./test/cyclonus/test-cyclonus-windows.sh
# name: cyclonus
# displayName: "Run Cyclonus Test"
# failOnStderr: false
# condition: succeeded()
# - bash: |
# export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)
# cp cyclonus-$CLUSTER_NAME $(System.DefaultWorkingDirectory)/$CLUSTER_NAME/cyclonus-$CLUSTER_NAME
# echo "Getting cluster state for $CLUSTER_NAME"
# mkdir -p $(System.DefaultWorkingDirectory)/$CLUSTER_NAME
# kubectl get pods -n kube-system | grep npm
# kubectl logs -n kube-system -l k8s-app=azure-npm --tail -1 --prefix > $(System.DefaultWorkingDirectory)/$CLUSTER_NAME/npm-logs_$(PROFILE).txt
# # capture any previous logs in case there was a crash
# npmPodList=`kubectl get pods -n kube-system | grep npm | awk '{print $1}'`
# for npmPod in $npmPodList; do
# previousLogFile=$(System.DefaultWorkingDirectory)/$CLUSTER_NAME/previous-npm-logs_$(PROFILE).txt
# kubectl logs -n kube-system $npmPod -p > $previousLogFile
# if [[ $? -ne 0 ]]; then
# # remove the empty file if kubectl logs failed (e.g. there was no previous terminated container)
# rm $previousLogFile
# fi
# done
# cp ./kubeconfig $(System.DefaultWorkingDirectory)/$CLUSTER_NAME/.kubeconfig
# condition: always()
# - publish: $(System.DefaultWorkingDirectory)/$(RESOURCE_GROUP)-$(PROFILE)
# condition: always()
# artifact: NpmLogs-$(RESOURCE_GROUP)-$(PROFILE)
# - stage: clean_up_cyclonus_win
# displayName: Cleanup Cyclonus
# dependsOn: [setup, cyclonus_win]
# condition: always()
# jobs:
# - job: clean_up
# displayName: "Cleanup"
# pool:
# name: $(BUILD_POOL_NAME_DEFAULT)
# demands:
# - agent.os -equals Linux
# - Role -equals Build
# variables:
# RESOURCE_GROUP: $[ stagedependencies.setup.setup.outputs['EnvironmentalVariables.RESOURCE_GROUP'] ]
# strategy:
# matrix:
# v2-windows:
# PROFILE: "cyc-ws22"
# steps:
# - checkout: none
# - task: AzureCLI@2
# displayName: "Delete Test Cluster Resource Group"
# inputs:
# azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION)
# scriptType: "bash"
# scriptLocation: "inlineScript"
# inlineScript: |
# echo Deleting $(RESOURCE_GROUP)-$(PROFILE)
# az group delete -n $(RESOURCE_GROUP)-$(PROFILE) --yes