Skip to content

Commit 2579b61

Browse files
committed
f
1 parent 96f4bca commit 2579b61

2 files changed

Lines changed: 93 additions & 46 deletions

File tree

e2e/aks_model.go

Lines changed: 48 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -402,19 +402,28 @@ func ensureFirewallRouteTable(
402402
rg := *clusterModel.Properties.NodeResourceGroup
403403
routeTableName := "abe2e-fw-rt"
404404
toolkit.Logf(ctx, "AKS subnet has no route table; creating dedicated firewall route table %q", routeTableName)
405-
poller, err := config.Azure.RouteTables.BeginCreateOrUpdate(ctx, rg, routeTableName, armnetwork.RouteTable{
406-
Location: clusterModel.Location,
407-
}, nil)
408-
if err != nil {
409-
return "", fmt.Errorf("failed to start creating firewall route table %q: %w", routeTableName, err)
410-
}
411-
routeTableResp, err := poller.PollUntilDone(ctx, config.DefaultPollUntilDoneOptions)
405+
406+
var routeTableID *string
407+
err := retryOn409(ctx, fmt.Sprintf("creating route table %s", routeTableName), func() error {
408+
poller, err := config.Azure.RouteTables.BeginCreateOrUpdate(ctx, rg, routeTableName, armnetwork.RouteTable{
409+
Location: clusterModel.Location,
410+
}, nil)
411+
if err != nil {
412+
return fmt.Errorf("failed to start creating firewall route table %q: %w", routeTableName, err)
413+
}
414+
routeTableResp, err := poller.PollUntilDone(ctx, config.DefaultPollUntilDoneOptions)
415+
if err != nil {
416+
return fmt.Errorf("failed to create firewall route table %q: %w", routeTableName, err)
417+
}
418+
routeTableID = routeTableResp.ID
419+
return nil
420+
})
412421
if err != nil {
413-
return "", fmt.Errorf("failed to create firewall route table %q: %w", routeTableName, err)
422+
return "", err
414423
}
415424

416425
aksSubnet.Properties.RouteTable = &armnetwork.RouteTable{
417-
ID: routeTableResp.ID,
426+
ID: routeTableID,
418427
}
419428
if err := updateSubnet(ctx, clusterModel, aksSubnet, vnet); err != nil {
420429
return "", fmt.Errorf("failed to associate firewall route table %q with AKS subnet: %w", routeTableName, err)
@@ -777,23 +786,32 @@ func createPrivateEndpoint(ctx context.Context, nodeResourceGroup, privateEndpoi
777786
CustomDNSConfigs: []*armnetwork.CustomDNSConfigPropertiesFormat{},
778787
},
779788
}
780-
poller, err := config.Azure.PrivateEndpointClient.BeginCreateOrUpdate(
781-
ctx,
782-
nodeResourceGroup,
783-
privateEndpointName,
784-
peParams,
785-
nil,
786-
)
787-
if err != nil {
788-
return nil, fmt.Errorf("failed to create private endpoint in BeginCreateOrUpdate: %w", err)
789-
}
790-
resp, err := poller.PollUntilDone(ctx, nil)
789+
790+
var result armnetwork.PrivateEndpoint
791+
err = retryOn409(ctx, fmt.Sprintf("creating private endpoint %s", privateEndpointName), func() error {
792+
poller, err := config.Azure.PrivateEndpointClient.BeginCreateOrUpdate(
793+
ctx,
794+
nodeResourceGroup,
795+
privateEndpointName,
796+
peParams,
797+
nil,
798+
)
799+
if err != nil {
800+
return fmt.Errorf("failed to create private endpoint in BeginCreateOrUpdate: %w", err)
801+
}
802+
resp, err := poller.PollUntilDone(ctx, nil)
803+
if err != nil {
804+
return fmt.Errorf("failed to create private endpoint in polling: %w", err)
805+
}
806+
result = resp.PrivateEndpoint
807+
return nil
808+
})
791809
if err != nil {
792-
return nil, fmt.Errorf("failed to create private endpoint in polling: %w", err)
810+
return nil, err
793811
}
794812

795-
toolkit.Logf(ctx, "Private Endpoint created or updated with ID: %s", *resp.ID)
796-
return &resp.PrivateEndpoint, nil
813+
toolkit.Logf(ctx, "Private Endpoint created or updated with ID: %s", *result.ID)
814+
return &result, nil
797815
}
798816

799817
func createPrivateZone(ctx context.Context, nodeResourceGroup, privateZoneName string) (*armprivatedns.PrivateZone, error) {
@@ -1048,13 +1066,12 @@ func createNetworkIsolatedSecurityGroup(ctx context.Context, cluster *armcontain
10481066
}
10491067

10501068
func updateSubnet(ctx context.Context, cluster *armcontainerservice.ManagedCluster, subnetParameters armnetwork.Subnet, vnet VNet) error {
1051-
poller, err := config.Azure.Subnet.BeginCreateOrUpdate(ctx, vnet.resourceGroup, vnet.name, vnet.subnetName, subnetParameters, nil)
1052-
if err != nil {
1053-
return err
1054-
}
1055-
_, err = poller.PollUntilDone(ctx, config.DefaultPollUntilDoneOptions)
1056-
if err != nil {
1069+
return retryOn409(ctx, fmt.Sprintf("updating subnet %s", vnet.subnetName), func() error {
1070+
poller, err := config.Azure.Subnet.BeginCreateOrUpdate(ctx, vnet.resourceGroup, vnet.name, vnet.subnetName, subnetParameters, nil)
1071+
if err != nil {
1072+
return err
1073+
}
1074+
_, err = poller.PollUntilDone(ctx, config.DefaultPollUntilDoneOptions)
10571075
return err
1058-
}
1059-
return nil
1076+
})
10601077
}

e2e/shared_infra.go

Lines changed: 45 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@ import (
55
"crypto/sha256"
66
"errors"
77
"fmt"
8+
"math/rand"
89
"net/http"
910
"strings"
11+
"time"
1012

1113
"github.com/Azure/agentbaker/e2e/config"
1214
"github.com/Azure/agentbaker/e2e/toolkit"
@@ -154,20 +156,22 @@ func ensureSubnet(ctx context.Context, rg, vnetName, subnetName, cidr string) er
154156
return fmt.Errorf("checking subnet %s: %w", subnetName, err)
155157
}
156158

157-
toolkit.Logf(ctx, "creating subnet %s (%s) in VNet %s", subnetName, cidr, vnetName)
158-
poller, err := config.Azure.Subnet.BeginCreateOrUpdate(ctx, rg, vnetName, subnetName, armnetwork.Subnet{
159-
Properties: &armnetwork.SubnetPropertiesFormat{
160-
AddressPrefix: to.Ptr(cidr),
161-
},
162-
}, nil)
163-
if err != nil {
164-
return fmt.Errorf("creating subnet %s: %w", subnetName, err)
165-
}
166-
_, err = poller.PollUntilDone(ctx, config.DefaultPollUntilDoneOptions)
167-
if err != nil {
168-
return fmt.Errorf("waiting for subnet %s: %w", subnetName, err)
169-
}
170-
return nil
159+
return retryOn409(ctx, fmt.Sprintf("creating subnet %s", subnetName), func() error {
160+
toolkit.Logf(ctx, "creating subnet %s (%s) in VNet %s", subnetName, cidr, vnetName)
161+
poller, err := config.Azure.Subnet.BeginCreateOrUpdate(ctx, rg, vnetName, subnetName, armnetwork.Subnet{
162+
Properties: &armnetwork.SubnetPropertiesFormat{
163+
AddressPrefix: to.Ptr(cidr),
164+
},
165+
}, nil)
166+
if err != nil {
167+
return fmt.Errorf("creating subnet %s: %w", subnetName, err)
168+
}
169+
_, err = poller.PollUntilDone(ctx, config.DefaultPollUntilDoneOptions)
170+
if err != nil {
171+
return fmt.Errorf("waiting for subnet %s: %w", subnetName, err)
172+
}
173+
return nil
174+
})
171175
}
172176

173177
func ensureSharedBastion(ctx context.Context, rg, location string) (string, error) {
@@ -478,7 +482,33 @@ func vnetFromSubnetID(ctx context.Context, subnetID string) (VNet, error) {
478482
}, nil
479483
}
480484

481-
// configureSharedVNet sets up the shared infrastructure and configures the cluster
485+
// retryOn409 retries an Azure operation that fails with 409 Conflict due to
486+
// concurrent writes on the same resource (e.g., VNet subnet creates).
487+
func retryOn409(ctx context.Context, operation string, fn func() error) error {
488+
maxRetries := 10
489+
for attempt := 0; attempt < maxRetries; attempt++ {
490+
err := fn()
491+
if err == nil {
492+
return nil
493+
}
494+
var azErr *azcore.ResponseError
495+
if !errors.As(err, &azErr) || azErr.StatusCode != http.StatusConflict {
496+
return err
497+
}
498+
if attempt == maxRetries-1 {
499+
return err
500+
}
501+
// jittered backoff: 2-8s
502+
backoff := time.Duration(2+rand.Intn(6)) * time.Second
503+
toolkit.Logf(ctx, "%s: 409 conflict (attempt %d/%d), retrying in %s...", operation, attempt+1, maxRetries, backoff)
504+
select {
505+
case <-time.After(backoff):
506+
case <-ctx.Done():
507+
return ctx.Err()
508+
}
509+
}
510+
return fmt.Errorf("%s: exhausted retries", operation)
511+
}
482512
// model to use the shared user-assigned identity. The subnet is created later in
483513
// prepareCluster after the cluster name hash is computed, with an auto-allocated CIDR.
484514
func configureSharedVNet(ctx context.Context, model *armcontainerservice.ManagedCluster, location string) error {

0 commit comments

Comments
 (0)