Skip to content

Commit 6d9592b

Browse files
authored
Merge pull request #6059 from nojnhuh/vmss-bootstrap-hash
[release-1.21] Eliminate unnecessary periodic VMSS updates with new bootstrap data
2 parents c0dec5f + cdb47d2 commit 6d9592b

File tree

6 files changed

+66
-214
lines changed

6 files changed

+66
-214
lines changed

azure/const.go

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,4 @@ const (
4646
// See https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/
4747
// for annotation formatting rules.
4848
SecurityRuleLastAppliedAnnotation = "sigs.k8s.io/cluster-api-provider-azure-last-applied-security-rules"
49-
50-
// CustomDataHashAnnotation is the key for the machine object annotation
51-
// which tracks the hash of the custom data.
52-
// See https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/
53-
// for annotation formatting rules.
54-
CustomDataHashAnnotation = "sigs.k8s.io/cluster-api-provider-azure-vmss-custom-data-hash"
5549
)

azure/scope/machinepool.go

Lines changed: 4 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,9 @@ package scope
1818

1919
import (
2020
"context"
21-
"crypto/sha256"
2221
"encoding/base64"
2322
"encoding/json"
2423
"fmt"
25-
"io"
2624
"strings"
2725

2826
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/authorization/armauthorization/v2"
@@ -91,11 +89,10 @@ type (
9189

9290
// MachinePoolCache stores common machine pool information so we don't have to hit the API multiple times within the same reconcile loop.
9391
MachinePoolCache struct {
94-
BootstrapData string
95-
HasBootstrapDataChanges bool
96-
VMImage *infrav1.Image
97-
VMSKU resourceskus.SKU
98-
MaxSurge int
92+
BootstrapData string
93+
VMImage *infrav1.Image
94+
VMSKU resourceskus.SKU
95+
MaxSurge int
9996
}
10097
)
10198

@@ -148,11 +145,6 @@ func (m *MachinePoolScope) InitMachinePoolCache(ctx context.Context) error {
148145
return err
149146
}
150147

151-
m.cache.HasBootstrapDataChanges, err = m.HasBootstrapDataChanges(ctx)
152-
if err != nil {
153-
return err
154-
}
155-
156148
m.cache.VMImage, err = m.GetVMImage(ctx)
157149
if err != nil {
158150
return err
@@ -226,8 +218,6 @@ func (m *MachinePoolScope) ScaleSetSpec(ctx context.Context) azure.ResourceSpecG
226218
}
227219

228220
if m.cache != nil {
229-
spec.ShouldPatchCustomData = m.cache.HasBootstrapDataChanges
230-
log.V(4).Info("has bootstrap data changed?", "shouldPatchCustomData", spec.ShouldPatchCustomData)
231221
spec.VMSSExtensionSpecs = m.VMSSExtensionSpecs()
232222
spec.SKU = m.cache.VMSKU
233223
spec.VMImage = m.cache.VMImage
@@ -708,10 +698,6 @@ func (m *MachinePoolScope) Close(ctx context.Context) error {
708698
if err := m.updateReplicasAndProviderIDs(ctx); err != nil {
709699
return errors.Wrap(err, "failed to update replicas and providerIDs")
710700
}
711-
if err := m.updateCustomDataHash(ctx); err != nil {
712-
// ignore errors to calculating the custom data hash since it's not absolutely crucial.
713-
log.V(4).Error(err, "unable to update custom data hash, ignoring.")
714-
}
715701
}
716702

717703
if err := m.PatchObject(ctx); err != nil {
@@ -745,39 +731,6 @@ func (m *MachinePoolScope) GetBootstrapData(ctx context.Context) (string, error)
745731
return base64.StdEncoding.EncodeToString(value), nil
746732
}
747733

748-
// calculateBootstrapDataHash calculates the sha256 hash of the bootstrap data.
749-
func (m *MachinePoolScope) calculateBootstrapDataHash(_ context.Context) (string, error) {
750-
if m.cache == nil {
751-
return "", fmt.Errorf("machinepool cache is nil")
752-
}
753-
bootstrapData := m.cache.BootstrapData
754-
h := sha256.New()
755-
n, err := io.WriteString(h, bootstrapData)
756-
if err != nil || n == 0 {
757-
return "", fmt.Errorf("unable to write custom data (bytes written: %q): %w", n, err)
758-
}
759-
return fmt.Sprintf("%x", h.Sum(nil)), nil
760-
}
761-
762-
// HasBootstrapDataChanges calculates the sha256 hash of the bootstrap data and compares it with the saved hash in AzureMachinePool.Status.
763-
func (m *MachinePoolScope) HasBootstrapDataChanges(ctx context.Context) (bool, error) {
764-
newHash, err := m.calculateBootstrapDataHash(ctx)
765-
if err != nil {
766-
return false, err
767-
}
768-
return m.AzureMachinePool.GetAnnotations()[azure.CustomDataHashAnnotation] != newHash, nil
769-
}
770-
771-
// updateCustomDataHash calculates the sha256 hash of the bootstrap data and saves it in AzureMachinePool.Status.
772-
func (m *MachinePoolScope) updateCustomDataHash(ctx context.Context) error {
773-
newHash, err := m.calculateBootstrapDataHash(ctx)
774-
if err != nil {
775-
return err
776-
}
777-
m.SetAnnotation(azure.CustomDataHashAnnotation, newHash)
778-
return nil
779-
}
780-
781734
// GetVMImage picks an image from the AzureMachinePool configuration, or uses a default one.
782735
func (m *MachinePoolScope) GetVMImage(ctx context.Context) (*infrav1.Image, error) {
783736
ctx, log, done := tele.StartSpanWithLogger(ctx, "scope.MachinePoolScope.GetVMImage")

azure/scope/machinepool_test.go

Lines changed: 0 additions & 149 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,13 @@ package scope
1818

1919
import (
2020
"context"
21-
"crypto/sha256"
22-
"encoding/base64"
2321
"fmt"
24-
"io"
2522
"reflect"
2623
"testing"
2724
"time"
2825

2926
"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
3027
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/authorization/armauthorization/v2"
31-
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5"
3228
azureautorest "github.com/Azure/go-autorest/autorest/azure"
3329
"github.com/Azure/go-autorest/autorest/azure/auth"
3430
. "github.com/onsi/gomega"
@@ -1750,148 +1746,3 @@ func TestMachinePoolScope_setProvisioningStateAndConditions(t *testing.T) {
17501746
})
17511747
}
17521748
}
1753-
1754-
func TestBootstrapDataChanges(t *testing.T) {
1755-
ctx, cancel := context.WithCancel(t.Context())
1756-
defer cancel()
1757-
scheme := runtime.NewScheme()
1758-
_ = clusterv1.AddToScheme(scheme)
1759-
_ = infrav1.AddToScheme(scheme)
1760-
_ = infrav1exp.AddToScheme(scheme)
1761-
_ = corev1.AddToScheme(scheme)
1762-
1763-
var (
1764-
g = NewWithT(t)
1765-
mockCtrl = gomock.NewController(t)
1766-
cb = fake.NewClientBuilder().WithScheme(scheme)
1767-
cluster = &clusterv1.Cluster{
1768-
ObjectMeta: metav1.ObjectMeta{
1769-
Name: "cluster1",
1770-
Namespace: "default",
1771-
},
1772-
Spec: clusterv1.ClusterSpec{
1773-
InfrastructureRef: &corev1.ObjectReference{
1774-
Name: "azCluster1",
1775-
},
1776-
},
1777-
Status: clusterv1.ClusterStatus{
1778-
InfrastructureReady: true,
1779-
},
1780-
}
1781-
azureCluster = &infrav1.AzureCluster{
1782-
ObjectMeta: metav1.ObjectMeta{
1783-
Name: "azCluster1",
1784-
Namespace: "default",
1785-
},
1786-
Spec: infrav1.AzureClusterSpec{
1787-
AzureClusterClassSpec: infrav1.AzureClusterClassSpec{
1788-
Location: "test",
1789-
},
1790-
},
1791-
}
1792-
mp = &expv1.MachinePool{
1793-
ObjectMeta: metav1.ObjectMeta{
1794-
Name: "mp1",
1795-
Namespace: "default",
1796-
OwnerReferences: []metav1.OwnerReference{
1797-
{
1798-
Name: "cluster1",
1799-
Kind: "Cluster",
1800-
APIVersion: clusterv1.GroupVersion.String(),
1801-
},
1802-
},
1803-
},
1804-
Spec: expv1.MachinePoolSpec{
1805-
Template: clusterv1.MachineTemplateSpec{
1806-
Spec: clusterv1.MachineSpec{
1807-
Bootstrap: clusterv1.Bootstrap{
1808-
DataSecretName: ptr.To("mp-secret"),
1809-
},
1810-
Version: ptr.To("v1.31.0"),
1811-
},
1812-
},
1813-
},
1814-
}
1815-
bootstrapData = "test"
1816-
bootstrapDataHash = sha256Hash(base64.StdEncoding.EncodeToString([]byte(bootstrapData)))
1817-
bootstrapSecret = corev1.Secret{
1818-
ObjectMeta: metav1.ObjectMeta{
1819-
Name: "mp-secret",
1820-
Namespace: "default",
1821-
},
1822-
Data: map[string][]byte{"value": []byte(bootstrapData)},
1823-
}
1824-
amp = &infrav1exp.AzureMachinePool{
1825-
ObjectMeta: metav1.ObjectMeta{
1826-
Name: "amp1",
1827-
Namespace: "default",
1828-
OwnerReferences: []metav1.OwnerReference{
1829-
{
1830-
Name: "mp1",
1831-
Kind: "MachinePool",
1832-
APIVersion: expv1.GroupVersion.String(),
1833-
},
1834-
},
1835-
Annotations: map[string]string{
1836-
azure.CustomDataHashAnnotation: fmt.Sprintf("%x", bootstrapDataHash),
1837-
},
1838-
},
1839-
Spec: infrav1exp.AzureMachinePoolSpec{
1840-
Template: infrav1exp.AzureMachinePoolMachineTemplate{
1841-
Image: &infrav1.Image{},
1842-
NetworkInterfaces: []infrav1.NetworkInterface{
1843-
{
1844-
SubnetName: "test",
1845-
},
1846-
},
1847-
VMSize: "VM_SIZE",
1848-
},
1849-
},
1850-
}
1851-
vmssState = &azure.VMSS{}
1852-
)
1853-
defer mockCtrl.Finish()
1854-
1855-
s := &MachinePoolScope{
1856-
client: cb.
1857-
WithObjects(&bootstrapSecret).
1858-
Build(),
1859-
ClusterScoper: &ClusterScope{
1860-
Cluster: cluster,
1861-
AzureCluster: azureCluster,
1862-
},
1863-
skuCache: resourceskus.NewStaticCache([]armcompute.ResourceSKU{
1864-
{
1865-
Name: ptr.To("VM_SIZE"),
1866-
},
1867-
}, "test"),
1868-
MachinePool: mp,
1869-
AzureMachinePool: amp,
1870-
vmssState: vmssState,
1871-
}
1872-
1873-
g.Expect(s.InitMachinePoolCache(ctx)).NotTo(HaveOccurred())
1874-
1875-
spec := s.ScaleSetSpec(ctx)
1876-
sSpec := spec.(*scalesets.ScaleSetSpec)
1877-
g.Expect(sSpec.ShouldPatchCustomData).To(BeFalse())
1878-
1879-
amp.Annotations[azure.CustomDataHashAnnotation] = "old"
1880-
1881-
// reset cache to be able to build up the cache again
1882-
s.cache = nil
1883-
g.Expect(s.InitMachinePoolCache(ctx)).NotTo(HaveOccurred())
1884-
1885-
spec = s.ScaleSetSpec(ctx)
1886-
sSpec = spec.(*scalesets.ScaleSetSpec)
1887-
g.Expect(sSpec.ShouldPatchCustomData).To(BeTrue())
1888-
}
1889-
1890-
func sha256Hash(text string) []byte {
1891-
h := sha256.New()
1892-
_, err := io.WriteString(h, text)
1893-
if err != nil {
1894-
panic(err)
1895-
}
1896-
return h.Sum(nil)
1897-
}

azure/services/scalesets/scalesets_test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -813,12 +813,14 @@ func newDefaultWindowsVMSS() armcompute.VirtualMachineScaleSet {
813813

814814
func newDefaultVMSS(vmSize string) armcompute.VirtualMachineScaleSet {
815815
dataDisk := fetchDataDiskBasedOnSize(vmSize)
816+
bootstrapData := "fake-bootstrap-data"
816817
return armcompute.VirtualMachineScaleSet{
817818
Location: ptr.To("test-location"),
818819
Tags: map[string]*string{
819820
"Name": ptr.To(defaultVMSSName),
820821
"sigs.k8s.io_cluster-api-provider-azure_cluster_my-cluster": ptr.To("owned"),
821822
"sigs.k8s.io_cluster-api-provider-azure_role": ptr.To("node"),
823+
customDataHashTagName: ptr.To(mustCalculateBootstrapDataHash(bootstrapData)),
822824
},
823825
SKU: &armcompute.SKU{
824826
Name: ptr.To(vmSize),
@@ -837,7 +839,7 @@ func newDefaultVMSS(vmSize string) armcompute.VirtualMachineScaleSet {
837839
OSProfile: &armcompute.VirtualMachineScaleSetOSProfile{
838840
ComputerNamePrefix: ptr.To(defaultVMSSName),
839841
AdminUsername: ptr.To(azure.DefaultUserName),
840-
CustomData: ptr.To("fake-bootstrap-data"),
842+
CustomData: ptr.To(bootstrapData),
841843
LinuxConfiguration: &armcompute.LinuxConfiguration{
842844
SSH: &armcompute.SSHConfiguration{
843845
PublicKeys: []*armcompute.SSHPublicKey{

azure/services/scalesets/spec.go

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@ package scalesets
1818

1919
import (
2020
"context"
21+
"crypto/sha256"
2122
"encoding/base64"
2223
"fmt"
24+
"io"
2325
"strconv"
2426

2527
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5"
@@ -34,6 +36,8 @@ import (
3436
"sigs.k8s.io/cluster-api-provider-azure/util/tele"
3537
)
3638

39+
const customDataHashTagName = "capz-custom-data-hash"
40+
3741
// ScaleSetSpec defines the specification for a Scale Set.
3842
type ScaleSetSpec struct {
3943
Name string
@@ -70,7 +74,6 @@ type ScaleSetSpec struct {
7074
VMSSInstances []armcompute.VirtualMachineScaleSetVM
7175
MaxSurge int
7276
ClusterName string
73-
ShouldPatchCustomData bool
7477
HasReplicasExternallyManaged bool
7578
AdditionalTags infrav1.Tags
7679
PlatformFaultDomainCount *int32
@@ -129,19 +132,22 @@ func (s *ScaleSetSpec) existingParameters(ctx context.Context, existing interfac
129132
}
130133

131134
// If there are no model changes and no increase in the replica count, do not update the VMSS.
132-
// Decreases in replica count is handled by deleting AzureMachinePoolMachine instances in the MachinePoolScope
133-
if *vmss.SKU.Capacity <= existingInfraVMSS.Capacity && !hasModelChanges && !s.ShouldPatchCustomData {
135+
// Decreases in replica count is handled by deleting AzureMachinePoolMachine instances in the MachinePoolScope.
136+
// Bootstrap data is allowed to get stale here and will be updated alongside changes to the model or
137+
// replica count which require fresh bootstrap data.
138+
if *vmss.SKU.Capacity <= existingInfraVMSS.Capacity && !hasModelChanges {
134139
// up to date, nothing to do
135140
return nil, nil
136141
}
137142

138143
// if there are no model changes and no change in custom data, remove VirtualMachineProfile to avoid unnecessary VMSS model
139144
// updates.
140-
if !hasModelChanges && !s.ShouldPatchCustomData {
141-
log.V(4).Info("removing virtual machine profile from parameters", "hasModelChanges", hasModelChanges, "shouldPatchCustomData", s.ShouldPatchCustomData)
145+
shouldPatchCustomData := ptr.Deref(existingVMSS.Tags[customDataHashTagName], "") != ptr.Deref(vmss.Tags[customDataHashTagName], "")
146+
if !hasModelChanges && !shouldPatchCustomData {
147+
log.V(4).Info("removing virtual machine profile from parameters", "hasModelChanges", hasModelChanges, "shouldPatchCustomData", shouldPatchCustomData)
142148
vmss.Properties.VirtualMachineProfile = nil
143149
} else {
144-
log.V(4).Info("has changes, not removing virtual machine profile from parameters", "hasModelChanges", hasModelChanges, "shouldPatchCustomData", s.ShouldPatchCustomData)
150+
log.V(4).Info("has changes, not removing virtual machine profile from parameters", "hasModelChanges", hasModelChanges, "shouldPatchCustomData", shouldPatchCustomData)
145151
}
146152

147153
return vmss, nil
@@ -293,6 +299,15 @@ func (s *ScaleSetSpec) Parameters(ctx context.Context, existing interface{}) (pa
293299
})
294300

295301
vmss.Tags = converters.TagsToMap(tags)
302+
303+
// Custom data is not returned in GET responses, so we store a hash to detect when it changes. This allows
304+
// CAPZ to update the VMSS model only when necessary.
305+
customDataHash, err := calculateBootstrapDataHash(s.BootstrapData)
306+
if err != nil {
307+
return armcompute.VirtualMachineScaleSet{}, err
308+
}
309+
vmss.Tags[customDataHashTagName] = ptr.To(customDataHash)
310+
296311
return vmss, nil
297312
}
298313

@@ -547,3 +562,13 @@ func (s *ScaleSetSpec) getSecurityProfile() (*armcompute.SecurityProfile, error)
547562
EncryptionAtHost: ptr.To(*s.SecurityProfile.EncryptionAtHost),
548563
}, nil
549564
}
565+
566+
// calculateBootstrapDataHash calculates the sha256 hash of the bootstrap data.
567+
func calculateBootstrapDataHash(bootstrapData string) (string, error) {
568+
h := sha256.New()
569+
n, err := io.WriteString(h, bootstrapData)
570+
if err != nil || n == 0 {
571+
return "", fmt.Errorf("unable to write custom data (bytes written: %q): %w", n, err)
572+
}
573+
return fmt.Sprintf("%x", h.Sum(nil)), nil
574+
}

0 commit comments

Comments
 (0)