test: retry az commands in E2E tests (#2089)

This commit is contained in:
Jack Francis 2019-10-02 14:02:50 -07:00 коммит произвёл GitHub
Родитель 7841e50e68
Коммит 133cb46c42
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
3 изменённых файлов: 282 добавлений и 44 удалений

Просмотреть файл

@ -6,7 +6,6 @@ package azure
import (
"context"
"encoding/json"
"errors"
"fmt"
"log"
"os"
@ -19,6 +18,7 @@ import (
"github.com/Azure/azure-sdk-for-go/services/network/mgmt/2018-08-01/network"
"github.com/kelseyhightower/envconfig"
"github.com/pkg/errors"
)
// Storage provides access to StorageAccount objects
@ -122,6 +122,34 @@ func (a *Account) Login() error {
return nil
}
// LoginWithRetry invokes Login, retrying up to a timeout
func (a *Account) LoginWithRetry(sleep, timeout time.Duration) error {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
ch := make(chan error)
var mostRecentLoginWithRetryError error
go func() {
for {
select {
case <-ctx.Done():
return
case ch <- a.Login():
time.Sleep(sleep)
}
}
}()
for {
select {
case mostRecentLoginWithRetryError = <-ch:
if mostRecentLoginWithRetryError == nil {
return nil
}
case <-ctx.Done():
return errors.Errorf("LoginWithRetry timed out: %s\n", mostRecentLoginWithRetryError)
}
}
}
// SetSubscription will call az account set --subscription for the given Account
func (a *Account) SetSubscription() error {
var cmd *exec.Cmd
@ -140,6 +168,34 @@ func (a *Account) SetSubscription() error {
return nil
}
// SetSubscriptionWithRetry invokes SetSubscription, retrying up to a timeout
func (a *Account) SetSubscriptionWithRetry(sleep, timeout time.Duration) error {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
ch := make(chan error)
var mostRecentSetSubscriptionWithRetryError error
go func() {
for {
select {
case <-ctx.Done():
return
case ch <- a.SetSubscription():
time.Sleep(sleep)
}
}
}()
for {
select {
case mostRecentSetSubscriptionWithRetryError = <-ch:
if mostRecentSetSubscriptionWithRetryError == nil {
return nil
}
case <-ctx.Done():
return errors.Errorf("SetSubscriptionWithRetry timed out: %s\n", mostRecentSetSubscriptionWithRetryError)
}
}
}
// CreateGroup will create a resource group in a given location
//--tags "type=${RESOURCE_GROUP_TAG_TYPE:-}" "now=$(date +%s)" "job=${JOB_BASE_NAME:-}" "buildno=${BUILD_NUM:-}"
func (a *Account) CreateGroup(name, location string) error {
@ -168,6 +224,34 @@ func (a *Account) CreateGroup(name, location string) error {
return nil
}
// CreateGroupWithRetry invokes CreateGroup, retrying up to a timeout
func (a *Account) CreateGroupWithRetry(name, location string, sleep, timeout time.Duration) error {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
ch := make(chan error)
var mostRecentCreateGroupWithRetryError error
go func() {
for {
select {
case <-ctx.Done():
return
case ch <- a.CreateGroup(name, location):
time.Sleep(sleep)
}
}
}()
for {
select {
case mostRecentCreateGroupWithRetryError = <-ch:
if mostRecentCreateGroupWithRetryError == nil {
return nil
}
case <-ctx.Done():
return errors.Errorf("CreateGroupWithRetry timed out: %s\n", mostRecentCreateGroupWithRetryError)
}
}
}
// DeleteGroup deletes a given resource group by name
func (a *Account) DeleteGroup(name string, wait bool) error {
var cmd *exec.Cmd
@ -189,6 +273,34 @@ func (a *Account) DeleteGroup(name string, wait bool) error {
return nil
}
// DeleteGroupWithRetry invokes DeleteGroup, retrying up to a timeout
func (a *Account) DeleteGroupWithRetry(name string, wait bool, sleep, timeout time.Duration) error {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
ch := make(chan error)
var mostRecentDeleteGroupWithRetryError error
go func() {
for {
select {
case <-ctx.Done():
return
case ch <- a.DeleteGroup(name, wait):
time.Sleep(sleep)
}
}
}()
for {
select {
case mostRecentDeleteGroupWithRetryError = <-ch:
if mostRecentDeleteGroupWithRetryError == nil {
return nil
}
case <-ctx.Done():
return errors.Errorf("DeleteGroupWithRetry timed out: %s\n", mostRecentDeleteGroupWithRetryError)
}
}
}
// CreateDeployment will deploy a cluster to a given resource group using the template and parameters on disk
func (a *Account) CreateDeployment(name string, e *engine.Engine) error {
d := Deployment{
@ -228,6 +340,34 @@ func (a *Account) CreateDeployment(name string, e *engine.Engine) error {
return nil
}
// CreateDeploymentWithRetry invokes CreateDeployment, retrying up to a timeout
func (a *Account) CreateDeploymentWithRetry(name string, e *engine.Engine, sleep, timeout time.Duration) error {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
ch := make(chan error)
var mostRecentCreateDeploymentWithRetryError error
go func() {
for {
select {
case <-ctx.Done():
return
case ch <- a.CreateDeployment(name, e):
time.Sleep(sleep)
}
}
}()
for {
select {
case mostRecentCreateDeploymentWithRetryError = <-ch:
if mostRecentCreateDeploymentWithRetryError == nil {
return nil
}
case <-ctx.Done():
return errors.Errorf("CreateDeploymentWithRetry timed out: %s\n", mostRecentCreateDeploymentWithRetryError)
}
}
}
// CreateVnet will create a vnet in a resource group
func (a *Account) CreateVnet(vnet, addressPrefixes string) error {
var cmd *exec.Cmd
@ -247,6 +387,34 @@ func (a *Account) CreateVnet(vnet, addressPrefixes string) error {
return nil
}
// CreateVnetWithRetry invokes CreateVnet, retrying up to a timeout
func (a *Account) CreateVnetWithRetry(vnet, addressPrefixes string, sleep, timeout time.Duration) error {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
ch := make(chan error)
var mostRecentCreateVnetWithRetryError error
go func() {
for {
select {
case <-ctx.Done():
return
case ch <- a.CreateVnet(vnet, addressPrefixes):
time.Sleep(sleep)
}
}
}()
for {
select {
case mostRecentCreateVnetWithRetryError = <-ch:
if mostRecentCreateVnetWithRetryError == nil {
return nil
}
case <-ctx.Done():
return errors.Errorf("CreateVnetWithRetry timed out: %s\n", mostRecentCreateVnetWithRetryError)
}
}
}
// ListRGRouteTableResult defines a struct for making a multi-value channel result type
type ListRGRouteTableResult struct {
routeTables []network.RouteTable
@ -356,6 +524,34 @@ func (a *Account) CreateSubnet(vnet, subnetName, subnetPrefix string) error {
return nil
}
// CreateSubnetWithRetry invokes CreateSubnet, retrying up to a timeout
func (a *Account) CreateSubnetWithRetry(vnet, subnetName, subnetPrefix string, sleep, timeout time.Duration) error {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
ch := make(chan error)
var mostRecentCreateSubnetWithRetryError error
go func() {
for {
select {
case <-ctx.Done():
return
case ch <- a.CreateSubnet(vnet, subnetName, subnetPrefix):
time.Sleep(sleep)
}
}
}()
for {
select {
case mostRecentCreateSubnetWithRetryError = <-ch:
if mostRecentCreateSubnetWithRetryError == nil {
return nil
}
case <-ctx.Done():
return errors.Errorf("CreateSubnetWithRetry timed out: %s\n", mostRecentCreateSubnetWithRetryError)
}
}
}
// RouteTable holds information from running az network route-table list
type RouteTable struct {
ID string `json:"id"`
@ -365,36 +561,19 @@ type RouteTable struct {
ResourceGroup string `json:"resourceGroup"`
}
// UpdateRouteTables is used to updated a vnet with the appropriate route tables
func (a *Account) UpdateRouteTables(subnet, vnet string) error {
var cmd *exec.Cmd
if a.TimeoutCommands {
cmd = exec.Command("timeout", "60", "az", "network", "route-table", "list", "-g", a.ResourceGroup.Name)
} else {
cmd = exec.Command("az", "network", "route-table", "list", "-g", a.ResourceGroup.Name)
}
out, err := cmd.CombinedOutput()
if err != nil {
log.Printf("Error while trying to get route table list!\n Output:%s\n", out)
return err
}
rts := []RouteTable{}
json.Unmarshal(out, &rts)
// GetHostsResult is a return struct for GetAsync
type GetHostsResult struct {
Vms []VM
Err error
}
if a.TimeoutCommands {
cmd = exec.Command("timeout", "60", "az", "network", "vnet", "subnet", "update",
"-n", subnet, "-g", a.ResourceGroup.Name, "--vnet-name", vnet, "--route-table", rts[0].Name)
} else {
cmd = exec.Command("az", "network", "vnet", "subnet", "update",
"-n", subnet, "-g", a.ResourceGroup.Name, "--vnet-name", vnet, "--route-table", rts[0].Name)
// GetHostsAsync wraps GetHosts with a struct response for goroutine + channel usage
func (a *Account) GetHostsAsync(name string) GetHostsResult {
vms, err := a.GetHosts(name)
return GetHostsResult{
Vms: vms,
Err: err,
}
util.PrintCommand(cmd)
out, err = cmd.CombinedOutput()
if err != nil {
log.Printf("Error while trying to update vnet route tables:%s\n", out)
return err
}
return nil
}
// GetHosts will get a list of vms in the resource group
@ -427,6 +606,37 @@ func (a *Account) GetHosts(name string) ([]VM, error) {
return v, nil
}
// GetHostsWithRetry invokes GetHostsAsync, retrying up to a timeout
func (a *Account) GetHostsWithRetry(name string, sleep, timeout time.Duration) ([]VM, error) {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
ch := make(chan GetHostsResult)
var mostRecentGetHostsWithRetryError error
var vms []VM
go func() {
for {
select {
case <-ctx.Done():
return
case ch <- a.GetHostsAsync(name):
time.Sleep(sleep)
}
}
}()
for {
select {
case result := <-ch:
mostRecentGetHostsWithRetryError = result.Err
vms = result.Vms
if mostRecentGetHostsWithRetryError == nil {
return vms, nil
}
case <-ctx.Done():
return nil, errors.Errorf("GetHostsWithRetry timed out: %s\n", mostRecentGetHostsWithRetryError)
}
}
}
// SetResourceGroup will set the account resource group
func (a *Account) SetResourceGroup(name string) error {
if a.ResourceGroup.Name != "" {
@ -458,6 +668,34 @@ func (a *Account) SetResourceGroup(name string) error {
return nil
}
// SetResourceGroupWithRetry invokes SetResourceGroup, retrying up to a timeout
func (a *Account) SetResourceGroupWithRetry(name string, sleep, timeout time.Duration) error {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
ch := make(chan error)
var mostRecentSetResourceGroupWithRetryError error
go func() {
for {
select {
case <-ctx.Done():
return
case ch <- a.SetResourceGroup(name):
time.Sleep(sleep)
}
}
}()
for {
select {
case mostRecentSetResourceGroupWithRetryError = <-ch:
if mostRecentSetResourceGroupWithRetryError == nil {
return nil
}
case <-ctx.Done():
return errors.Errorf("SetResourceGroupWithRetry timed out: %s\n", mostRecentSetResourceGroupWithRetryError)
}
}
}
// IsClusterExpired will return true if a deployment was created more than t nanoseconds ago, or if timestamp is not found
func (a *Account) IsClusterExpired(d time.Duration) bool {
tag, err := strconv.ParseInt(a.ResourceGroup.Tags["now"], 10, 64)

Просмотреть файл

@ -59,12 +59,12 @@ func main() {
log.Fatalf("Error while trying to setup azure account: %s\n", err)
}
err := acct.Login()
err := acct.LoginWithRetry(3*time.Second, cfg.Timeout)
if err != nil {
log.Fatalf("Error while trying to login to azure account! %s\n", err)
}
err = acct.SetSubscription()
err = acct.SetSubscriptionWithRetry(3*time.Second, cfg.Timeout)
if err != nil {
log.Fatal("Error while trying to set azure subscription!")
}
@ -95,7 +95,7 @@ func main() {
}
provision := true
rg := cfg.SoakClusterName
err = acct.SetResourceGroup(rg)
err = acct.SetResourceGroupWithRetry(rg, 3*time.Second, cfg.Timeout)
if err != nil {
log.Printf("Error while trying to set RG:%s\n", err)
} else {
@ -110,7 +110,7 @@ func main() {
if provision || cfg.ForceDeploy {
log.Printf("Soak cluster %s does not exist or has expired\n", rg)
log.Printf("Deleting Resource Group:%s\n", rg)
acct.DeleteGroup(rg, true)
acct.DeleteGroupWithRetry(rg, true, 3*time.Second, cfg.Timeout)
log.Printf("Deleting Storage files:%s\n", rg)
sa.DeleteFiles(cfg.SoakClusterName)
cfg.Name = ""
@ -120,7 +120,7 @@ func main() {
if err != nil {
log.Printf("Error while trying to download _output dir: %s, will provision a new cluster.\n", err)
log.Printf("Deleting Resource Group:%s\n", rg)
acct.DeleteGroup(rg, true)
acct.DeleteGroupWithRetry(rg, true, 3*time.Second, cfg.Timeout)
log.Printf("Deleting Storage files:%s\n", rg)
sa.DeleteFiles(cfg.SoakClusterName)
cfg.Name = ""
@ -250,7 +250,7 @@ func teardown() {
if cfg.CleanUpOnExit {
for _, rg := range rgs {
log.Printf("Deleting Group:%s\n", rg)
acct.DeleteGroup(rg, false)
acct.DeleteGroupWithRetry(rg, false, 3*time.Second, cfg.Timeout)
}
}
}

Просмотреть файл

@ -118,7 +118,7 @@ func (cli *CLIProvisioner) provision() error {
os.Setenv("DNS_PREFIX", cli.Config.Name)
err := cli.Account.CreateGroup(cli.Config.Name, cli.Config.Location)
err := cli.Account.CreateGroupWithRetry(cli.Config.Name, cli.Config.Location, 3*time.Second, cli.Config.Timeout)
if err != nil {
return errors.Wrap(err, "Error while trying to create resource group")
}
@ -142,16 +142,16 @@ func (cli *CLIProvisioner) provision() error {
if cli.CreateVNET {
if cli.MasterVMSS {
agentSubnetName := fmt.Sprintf("%sCustomSubnetAgent", cli.Config.Name)
err = cli.Account.CreateVnet(vnetName, "10.239.0.0/16")
err = cli.Account.CreateVnetWithRetry(vnetName, "10.239.0.0/16", 3*time.Second, cli.Config.Timeout)
if err != nil {
return errors.Errorf("Error trying to create vnet:%s", err.Error())
}
err = cli.Account.CreateSubnet(vnetName, masterSubnetName, "10.239.0.0/17")
err = cli.Account.CreateSubnetWithRetry(vnetName, masterSubnetName, "10.239.0.0/17", 3*time.Second, cli.Config.Timeout)
if err != nil {
return errors.Errorf("Error trying to create subnet:%s", err.Error())
}
subnets = append(subnets, masterSubnetName)
err = cli.Account.CreateSubnet(vnetName, agentSubnetName, "10.239.128.0/17")
err = cli.Account.CreateSubnetWithRetry(vnetName, agentSubnetName, "10.239.128.0/17", 3*time.Second, cli.Config.Timeout)
if err != nil {
return errors.Errorf("Error trying to create subnet in subnet:%s", err.Error())
}
@ -159,18 +159,18 @@ func (cli *CLIProvisioner) provision() error {
agentSubnetID = fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Network/virtualNetworks/%s/subnets/%s", cli.Account.SubscriptionID, cli.Account.ResourceGroup.Name, vnetName, agentSubnetName)
} else {
err = cli.Account.CreateVnet(vnetName, "10.239.0.0/16")
err = cli.Account.CreateVnetWithRetry(vnetName, "10.239.0.0/16", 3*time.Second, cli.Config.Timeout)
if err != nil {
return errors.Errorf("Error trying to create vnet:%s", err.Error())
}
err = cli.Account.CreateSubnet(vnetName, masterSubnetName, "10.239.255.0/24")
err = cli.Account.CreateSubnetWithRetry(vnetName, masterSubnetName, "10.239.255.0/24", 3*time.Second, cli.Config.Timeout)
if err != nil {
return errors.Errorf("Error trying to create subnet:%s", err.Error())
}
subnets = append(subnets, masterSubnetName)
for i, pool := range cs.ContainerService.Properties.AgentPoolProfiles {
subnetName := fmt.Sprintf("%sCustomSubnet", pool.Name)
err = cli.Account.CreateSubnet(vnetName, subnetName, fmt.Sprintf("10.239.%d.0/22", i*4))
err = cli.Account.CreateSubnetWithRetry(vnetName, subnetName, fmt.Sprintf("10.239.%d.0/22", i*4), 3*time.Second, cli.Config.Timeout)
if err != nil {
return errors.Errorf("Error trying to create subnet:%s", err.Error())
}
@ -220,7 +220,7 @@ func (cli *CLIProvisioner) provision() error {
if cli.Config.IsKubernetes() {
// Store the hosts for future introspection
hosts, err := cli.Account.GetHosts(cli.Config.Name)
hosts, err := cli.Account.GetHostsWithRetry(cli.Config.Name, 3*time.Second, cli.Config.Timeout)
if err != nil {
return errors.Wrap(err, "GetHosts:%s")
}
@ -277,7 +277,7 @@ func (cli *CLIProvisioner) generateAndDeploy() error {
//if we use Generate, then we need to call CreateDeployment
if !cli.Config.UseDeployCommand {
err = cli.Account.CreateDeployment(cli.Config.Name, cli.Engine)
err = cli.Account.CreateDeploymentWithRetry(cli.Config.Name, cli.Engine, 30*time.Second, 60*time.Minute)
if err != nil {
return errors.Wrap(err, "Error while trying to create deployment")
}