diff --git a/01-prerequisites.md b/01-prerequisites.md index f6125fd7..c2607962 100644 --- a/01-prerequisites.md +++ b/01-prerequisites.md @@ -32,11 +32,14 @@ This is the starting point for the instructions on deploying the [AKS baseline r 1. [Register the ImageCleaner (Earser) preview feature = `EnableImageCleanerPreview`](https://learn.microsoft.com/azure/aks/image-cleaner#prerequisites) + 1. [Register the Trusted Access preview feature = `TrustedAccessPreview`](https://learn.microsoft.com/azure/backup/azure-kubernetes-service-cluster-manage-backups#register-the-trusted-access) to support AKS Backup. + ```bash az feature register --namespace "Microsoft.ContainerService" -n "EnableImageCleanerPreview" + az feature register --namespace "Microsoft.ContainerService" -n "TrustedAccessPreview" # Keep running until all say "Registered." (This may take up to 20 minutes.) - az feature list -o table --query "[?name=='Microsoft.ContainerService/EnableImageCleanerPreview'].{Name:name,State:properties.state}" + az feature list -o table --query "[?name=='Microsoft.ContainerService/EnableImageCleanerPreview' || name=='Microsoft.ContainerService/TrustedAccessPreview'].{Name:name,State:properties.state}" # When all say "Registered" then re-register the AKS resource provider az provider register --namespace Microsoft.ContainerService diff --git a/05-bootstrap-prep.md b/05-bootstrap-prep.md index 0bee73a7..70f0327d 100644 --- a/05-bootstrap-prep.md +++ b/05-bootstrap-prep.md @@ -18,7 +18,8 @@ We'll be bootstrapping this cluster with the Flux GitOps agent as installed as a ### Additional resources -In addition to ACR being deployed to support bootstrapping, this is where any other resources that are considered not tied to the lifecycle of an individual cluster is deployed. ACR is one example as talked about above. Another example could be an AKS Backup Vault and backup artifacts storage account which likely would exist prior to and after any individual AKS cluster's existance. When designing your pipelines, ensure to isolate components by their lifecycle watch for singletons in an architecture. These are typically resources like regional logging sinks, supporting global routing infrastructure, etc. This is in contrast to potentially transiently/replaceable components, like the AKS cluster itself. _This implemention does not represent a complete seperation of stamp vs regional resources, but is fairly close. Deviations are strickly for ease of deployment in this walkthrough instead of as examples of guidance._ +In addition to ACR being deployed to support bootstrapping, this is where any other resources that are considered not tied to the lifecycle of an individual cluster is deployed. ACR is one example as talked about above. Another example in this implementation, includes the AKS Backup Vault and backup artifacts storage account which likely would exist prior to and after any individual AKS cluster's existance. When designing your pipelines, ensure to isolate components by their lifecycle watch for singletons in an architecture. These are typically resources like regional logging sinks, supporting global routing infrastructure, etc. This is in contrast to potentially transiently/replaceable components, like the AKS cluster itself. _This implemention does not represent a complete seperation of stamp vs regional resources, but is fairly close. Deviations are strickly for ease of deployment in this walkthrough instead of as examples of guidance._ + ## Steps 1. Create the AKS cluster resource group. diff --git a/06-aks-cluster.md b/06-aks-cluster.md index af7fd939..fa303afb 100644 --- a/06-aks-cluster.md +++ b/06-aks-cluster.md @@ -20,7 +20,7 @@ Now that your [ACR instance is deployed and ready to support cluster bootstrappi :exclamation: By default, this deployment will allow unrestricted access to your cluster's API Server. You can limit access to the API Server to a set of well-known IP addresses (i.,e. a jump box subnet (connected to by Azure Bastion), build agents, or any other networks you'll administer the cluster from) by setting the `clusterAuthorizedIPRanges` parameter in all deployment options. This setting will also impact traffic originating from within the cluster trying to use the API server, so you will also need to include _all_ of the public IPs used by your egress Azure Firewall. For more information, see [Secure access to the API server using authorized IP address ranges](https://learn.microsoft.com/azure/aks/api-server-authorized-ip-ranges#create-an-aks-cluster-with-api-server-authorized-ip-ranges-enabled). ```bash - # [This takes about 18 minutes.] + # [This takes about 25 minutes.] az deployment group create -g rg-bu0001a0008 -f cluster-stamp.bicep -p targetVnetResourceId=${RESOURCEID_VNET_CLUSTERSPOKE_AKS_BASELINE} clusterAdminMicrosoftEntraGroupObjectId=${MEIDOBJECTID_GROUP_CLUSTERADMIN_AKS_BASELINE} a0008NamespaceReaderMicrosoftEntraGroupObjectId=${MEIDOBJECTID_GROUP_A0008_READER_AKS_BASELINE} k8sControlPlaneAuthorizationTenantId=${TENANTID_K8SRBAC_AKS_BASELINE} appGatewayListenerCertificate=${APP_GATEWAY_LISTENER_CERTIFICATE_AKS_BASELINE} aksIngressControllerCertificate=${AKS_INGRESS_CONTROLLER_CERTIFICATE_BASE64_AKS_BASELINE} domainName=${DOMAIN_NAME_AKS_BASELINE} gitOpsBootstrappingRepoHttpsUrl=${GITOPS_REPOURL} gitOpsBootstrappingRepoBranch=${GITOPS_CURRENT_BRANCH_NAME} location=eastus2 ``` diff --git a/acr-stamp.bicep b/acr-stamp.bicep index 83838a7f..c04e6a5d 100644 --- a/acr-stamp.bicep +++ b/acr-stamp.bicep @@ -290,6 +290,222 @@ resource privateEndpointAcrToVnet 'Microsoft.Network/privateEndpoints@2022-09-01 } } +// Supports configuring the AKS Backup extension. +resource bvAksBackupVault 'Microsoft.DataProtection/backupVaults@2023-01-01' = { + name: 'bvAksBackupVault' + location: location + identity: { + type: 'SystemAssigned' + } + properties: { + storageSettings: [ + { + datastoreType: 'VaultStore' + type: 'ZoneRedundant' + } + ] + securitySettings: { + immutabilitySettings: { + state: 'Disabled' + } + softDeleteSettings: { + state: 'On' + retentionDurationInDays: 14 + } + } + featureSettings: {} + } + + // Daily UTC midnight Kubernetes backup policy as an example. Configure policy as needed. + resource aksPolicy 'backupPolicies' = { + name: 'bp-aks-default-daily' + properties: { + objectType: 'BackupPolicy' + datasourceTypes: [ + 'Microsoft.ContainerService/managedClusters' + ] + policyRules: [ + { + objectType: 'AzureBackupRule' + name: 'BackupDaily' + backupParameters: { + objectType: 'AzureBackupParams' + backupType: 'Incremental' + } + dataStore: { + objectType: 'DataStoreInfoBase' + dataStoreType: 'OperationalStore' + } + trigger: { + objectType: 'ScheduleBasedTriggerContext' + schedule: { + timeZone: 'UTC' + repeatingTimeIntervals: [ + 'R/2023-01-01T00:00:00+00:00/P1D' + ] + } + taggingCriteria: [ + { + tagInfo: { + tagName: 'Default' + } + taggingPriority: 99 + isDefault: true + } + ] + } + } + { + objectType: 'AzureRetentionRule' + name: 'Default' + isDefault: true + lifecycles: [ + { + deleteAfter: { + objectType: 'AbsoluteDeleteOption' + duration: 'P7D' + } + targetDataStoreCopySettings: [] + sourceDataStore: { + dataStoreType: 'OperationalStore' + objectType: 'DataStoreInfoBase' + } + } + ] + } + ] + } + } +} + +// This stores AKS Backup content, to be used by all clusters +resource storageAksBackups 'Microsoft.Storage/storageAccounts@2022-09-01' = { + name: 'stbackup${subRgUniqueString}' + location: location + sku: { + name: 'Standard_GRS' + } + kind: 'StorageV2' + properties: { + allowSharedKeyAccess: false + defaultToOAuthAuthentication: true + publicNetworkAccess: 'Disabled' + allowCrossTenantReplication: false + allowBlobPublicAccess: false + minimumTlsVersion: 'TLS1_2' + isHnsEnabled: false + isLocalUserEnabled: false + isSftpEnabled: false + routingPreference: { + publishInternetEndpoints: true + publishMicrosoftEndpoints: true + routingChoice: 'MicrosoftRouting' + } + networkAcls: { + bypass: 'None' + virtualNetworkRules: [] + ipRules: [] + defaultAction: 'Deny' + } + encryption: { + keySource: 'Microsoft.Storage' + services: { + file: { + keyType: 'Account' + enabled: true + } + blob: { + keyType: 'Account' + enabled: true + } + } + } + supportsHttpsTrafficOnly: true + accessTier: 'Hot' + } + + resource blobservice 'blobServices' = { + name: 'default' + } +} + +// Private DNS Zone for our AKS Backup storage account +resource dnsPrivateZoneBlob 'Microsoft.Network/privateDnsZones@2020-06-01' = { + name: 'privatelink.blob.core.windows.net' + location: 'global' + properties: {} + + + // Enabling Storage Account Private Link on cluster virtual network. + resource vnetlnk 'virtualNetworkLinks' = { + name: 'to_${spokeVirtualNetwork.name}' + location: 'global' + properties: { + virtualNetwork: { + id: spokeVirtualNetwork.id + } + registrationEnabled: false + } + } +} + +resource storageAksBackups_diagnosticsSettings 'Microsoft.Insights/diagnosticSettings@2021-05-01-preview' = { + name: 'default' + scope: storageAksBackups::blobservice + properties: { + workspaceId: laAks.id + logs: [ + { + category: 'StorageRead' + enabled: true + } + { + category: 'StorageWrite' + enabled: true + } + { + category: 'StorageDelete' + enabled: true + } + ] + } +} + +resource peAksBackupStorage 'Microsoft.Network/privateEndpoints@2022-07-01' = { + name: 'pe-${storageAksBackups.name}' + location: location + properties: { + subnet: { + id: spokeVirtualNetwork::snetPrivateLinkEndpoints.id + } + privateLinkServiceConnections: [ + { + name: 'to_${spokeVirtualNetwork.name}' + properties: { + privateLinkServiceId: storageAksBackups.id + groupIds: [ + 'blob' + ] + } + } + ] + } + + resource pdnszg 'privateDnsZoneGroups' = { + name: 'default' + properties: { + privateDnsZoneConfigs: [ + { + name: 'privatelink-blob-core-windows-net' + properties: { + privateDnsZoneId: dnsPrivateZoneBlob.id + } + } + ] + } + } +} + /*** OUTPUTS ***/ output containerRegistryName string = acrAks.name diff --git a/cluster-stamp.bicep b/cluster-stamp.bicep index 6b316a93..123c768f 100644 --- a/cluster-stamp.bicep +++ b/cluster-stamp.bicep @@ -172,6 +172,30 @@ resource keyVaultSecretsUserRole 'Microsoft.Authorization/roleDefinitions@2018-0 scope: subscription() } +// Built-in Azure RBAC role that is applied to the AKS backup managed identity to allow it to write data to storage. +resource storageBlobDataContributorRole 'Microsoft.Authorization/roleDefinitions@2018-01-01-preview' existing = { + name: 'ba92f5b4-2d11-453d-a403-e96b0029c9fe' + scope: subscription() +} + +// Built-in Azure RBAC role that is applied to the AKS backup managed identity to allow it to write data to storage. +resource storageAccountContributorRole 'Microsoft.Authorization/roleDefinitions@2018-01-01-preview' existing = { + name: '17d1049b-9a84-46fb-8f53-869881c3d3ab' + scope: subscription() +} + +// Built-in Azure RBAC "Reader" role. Used by Backup Vault to see the AKS cluster. +resource readerRole 'Microsoft.Authorization/roleDefinitions@2022-04-01' existing = { + name: 'acdd72a7-3385-48ef-bd42-f606fba81ae7' + scope: subscription() +} + +// Disk Snapshot Contributor (for AKS Backup) +resource diskSnapshotContributorRole 'Microsoft.Authorization/roleDefinitions@2022-04-01' existing = { + name: '7efff54f-a5b4-42b5-a1c5-5411624893ce' + scope: subscription() +} + /*** EXISTING RESOURCE GROUP RESOURCES ***/ // Useful to think of these as resources that are not tied to the lifecycle of any individual @@ -191,6 +215,26 @@ resource la 'Microsoft.OperationalInsights/workspaces@2021-12-01-preview' existi name: 'la-${clusterName}' } +// Backup Vault (for AKS Backup) +resource bvAksBackupVault 'Microsoft.DataProtection/backupVaults@2023-01-01' existing = { + scope: resourceGroup() + name: 'bvAksBackupVault' + + resource defaultPolicy 'backupPolicies' existing = { + name: 'bp-aks-default-daily' + } +} + +// Storage Account for backups +resource storageAksBackups 'Microsoft.Storage/storageAccounts@2022-09-01' existing = { + scope: resourceGroup() + name: 'stbackup${subRgUniqueString}' + + resource blobService 'blobServices' existing = { + name: 'default' + } +} + // Kubernetes namespace: a0008 -- this doesn't technically exist prior to deployment, but is required as a resource reference later in the template // to support Azure RBAC-managed API Server access, scoped to the namespace level. #disable-next-line BCP081 // this namespaces child type doesn't have a defined bicep type yet. @@ -977,6 +1021,7 @@ resource paAKSLinuxRestrictive 'Microsoft.Authorization/policyAssignments@2021-0 'gatekeeper-system' 'azure-arc' 'flux-system' + 'dataprotection-microsoft' // Known violations // K8sAzureAllowedSeccomp @@ -1067,6 +1112,7 @@ resource paRoRootFilesystem 'Microsoft.Authorization/policyAssignments@2021-06-0 'gatekeeper-system' 'azure-arc' 'flux-system' + 'dataprotection-microsoft' ] } excludedContainers: { @@ -1106,6 +1152,7 @@ resource paEnforceResourceLimits 'Microsoft.Authorization/policyAssignments@2021 'gatekeeper-system' 'azure-arc' 'flux-system' + 'dataprotection-microsoft' ] } effect: { @@ -1135,6 +1182,7 @@ resource paEnforceImageSource 'Microsoft.Authorization/policyAssignments@2021-06 'kube-system' 'gatekeeper-system' 'azure-arc' + 'dataprotection-microsoft' ] } effect: { @@ -1160,6 +1208,7 @@ resource paAllowedHostPaths 'Microsoft.Authorization/policyAssignments@2021-06-0 'gatekeeper-system' 'azure-arc' 'flux-system' + 'dataprotection-microsoft' ] } allowedHostPaths: { @@ -1190,6 +1239,7 @@ resource paAllowedExternalIPs 'Microsoft.Authorization/policyAssignments@2021-06 'kube-system' 'gatekeeper-system' 'azure-arc' + 'dataprotection-microsoft' ] } allowedExternalIPs: { @@ -1219,6 +1269,7 @@ resource paDisallowEndpointEditPermissions 'Microsoft.Authorization/policyAssign 'kube-system' 'gatekeeper-system' 'azure-arc' + 'dataprotection-microsoft' ] } effect: { @@ -1244,6 +1295,7 @@ resource paDisallowNamespaceUsage 'Microsoft.Authorization/policyAssignments@202 'kube-system' 'gatekeeper-system' 'azure-arc' + 'dataprotection-microsoft' ] } namespaces: { @@ -1820,13 +1872,13 @@ resource mc 'Microsoft.ContainerService/managedClusters@2023-02-02-preview' = { enabled: false // Azure Blobs } diskCSIDriver: { - enabled: false // Azure Disk + enabled: true // Azure Disk } fileCSIDriver: { enabled: false // Azure Files } snapshotController: { - enabled: false // CSI Snapshotter: https://github.com/kubernetes-csi/external-snapshotter + enabled: true // CSI Snapshotter: https://github.com/kubernetes-csi/external-snapshotter } } workloadAutoScalerProfile: { @@ -1881,6 +1933,7 @@ resource mc 'Microsoft.ContainerService/managedClusters@2023-02-02-preview' = { sci ndEnsureClusterIdentityHasRbacToSelfManagedResources + mcDiskSnapshotSupport_roleAssignment // Azure Policy for Kubernetes policies that we'd want in place before pods start showing up // in the cluster. The are not technically a dependency from the resource provider perspective, @@ -1911,10 +1964,26 @@ resource mc 'Microsoft.ContainerService/managedClusters@2023-02-02-preview' = { paRbacEnabled paManagedIdentitiesEnabled + // Logical dependency, our backup destination should exist before cluster creation, as the cluster will be + // bootstrapped with backup configured. + backupContainer + peKv kvPodMiIngressControllerKeyVaultReader_roleAssignment kvPodMiIngressControllerSecretsUserRole_roleAssignment ] + + // Grant managed identity access from our Backup Vault to this cluster to support + // AKS Backup + resource trustedAccess 'trustedAccessRoleBindings' = { + name: 'ta-aks-backup' + properties: { + roles: [ + 'Microsoft.DataProtection/backupVaults/backup-operator' + ] + sourceResourceId: bvAksBackupVault.id + } + } } resource acrKubeletAcrPullRole_roleAssignment 'Microsoft.Authorization/roleAssignments@2020-10-01-preview' = { @@ -2082,6 +2151,150 @@ resource mc_fluxConfiguration 'Microsoft.KubernetesConfiguration/fluxConfigurati ] } +// New storage container in the existing storage account specifically for this cluster. +// All clusters could back up to a single container or you can follow a container-per-cluster +// model like shown here. +resource backupContainer 'Microsoft.Storage/storageAccounts/blobServices/containers@2022-09-01' = { + parent: storageAksBackups::blobService + name: toLower('backup-${clusterName}') + properties: { + publicAccess: 'None' + } +} + +// Ensures that data protection (AKS Backup) is installed. +resource mc_dataProtectionExtension 'Microsoft.KubernetesConfiguration/extensions@2022-11-01' = { + scope: mc + name: 'azure-aks-backup' + properties: { + extensionType: 'microsoft.dataprotection.kubernetes' + autoUpgradeMinorVersion: true + aksAssignedIdentity: { + type: 'SystemAssigned' + } + releaseTrain: 'Stable' + scope: { + cluster: { + releaseNamespace: 'dataprotection-microsoft' + } + } + configurationSettings: { + 'configuration.backupStorageLocation.config.subscriptionId': split(storageAksBackups.id, '/')[2] + 'configuration.backupStorageLocation.config.resourceGroup': split(storageAksBackups.id, '/')[4] + 'configuration.backupStorageLocation.config.storageAccount': storageAksBackups.name + 'configuration.backupStorageLocation.bucket': backupContainer.name + 'configuration.backupStorageLocation.prefix': '' + 'configuration.volumeSnapshotLocation.config.resourceGroup': resourceGroup().id // Using the cluster resource group, if you use another RG, RBAC needs to be adjusted. + 'configuration.volumeSnapshotLocation.config.incremental': 'false' + 'credentials.tenantId': subscription().tenantId + } + configurationProtectedSettings: {} + } +} + +// Kubelet needs access to the resource group for AKS backup snapshots +resource mcDiskSnapshotSupport_roleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = { + name: guid(resourceGroup().id, diskSnapshotContributorRole.id, miClusterControlPlane.id) + properties: { + roleDefinitionId: diskSnapshotContributorRole.id + principalId: miClusterControlPlane.properties.principalId + principalType: 'ServicePrincipal' + } +} + +// Grant the Data Protection extension write access this cluster's backup container in the storage account. +resource dataProtetionExtensionStorageContainer_roleAssignment 'Microsoft.Authorization/roleAssignments@2020-10-01-preview' = { + scope: backupContainer + name: guid(backupContainer.id, 'mi-dataProtection-extension', storageBlobDataContributorRole.id) + properties: { + roleDefinitionId: storageBlobDataContributorRole.id + principalId: mc_dataProtectionExtension.properties.aksAssignedIdentity.principalId + principalType: 'ServicePrincipal' + } +} + +// Grant the Data Protection extension storage contributor access this cluster's backup storage account. +resource dataProtetionExtensionStorage_roleAssignment 'Microsoft.Authorization/roleAssignments@2020-10-01-preview' = { + scope: storageAksBackups + name: guid(backupContainer.id, mc_dataProtectionExtension.id, storageAccountContributorRole.id) + properties: { + roleDefinitionId: storageAccountContributorRole.id + principalId: mc_dataProtectionExtension.properties.aksAssignedIdentity.principalId + principalType: 'ServicePrincipal' + } +} + +// Enable daily backups +// All and future namespaces +// +resource backupInstance 'Microsoft.DataProtection/backupVaults/backupInstances@2023-01-01' = { + parent: bvAksBackupVault + name: 'bi-${mc.name}' + properties: { + friendlyName: 'bi-${clusterName}' + objectType: 'BackupInstance' + dataSourceSetInfo: { + objectType: 'DatasourceSet' + resourceID: mc.id + resourceType: 'Microsoft.ContainerService/managedClusters' + resourceLocation: location + resourceName: mc.name + resourceUri: mc.id + datasourceType: 'Microsoft.ContainerService/managedClusters' + } + dataSourceInfo: { + objectType: 'Datasource' + resourceID: mc.id + resourceType: 'Microsoft.ContainerService/managedClusters' + resourceLocation: location + resourceName: mc.name + resourceUri: mc.id + datasourceType: 'Microsoft.ContainerService/managedClusters' + } + policyInfo: { + policyId: bvAksBackupVault::defaultPolicy.id + policyParameters: { + dataStoreParametersList: [ + { + objectType: 'AzureOperationalStoreParameters' + dataStoreType: 'OperationalStore' + resourceGroupId: resourceGroup().id + } + ] + backupDatasourceParametersList: [ + { + objectType: 'KubernetesClusterBackupDatasourceParameters' + includeClusterScopeResources: true + snapshotVolumes: false + labelSelectors: null + includedNamespaces: null + excludedNamespaces: null + includedResourceTypes: null + excludedResourceTypes: [ + 'v1/Secret' + ] + } + ] + } + } + datasourceAuthCredentials: null + validationType: null + } +} + + +// Allows the backup vault's identity to see the cluster and the snapshots +// Details: https://learn.microsoft.com/azure/backup/azure-kubernetes-service-cluster-backup-concept#required-roles-and-permissions +resource backupVaultReadClusterAndSnapshots_roleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = { + scope: resourceGroup() // Covers both the cluster and the snapshot resource group + name: guid(bvAksBackupVault.id, mc_dataProtectionExtension.id, readerRole.id) + properties: { + roleDefinitionId: readerRole.id + principalId: bvAksBackupVault.identity.principalId + principalType: 'ServicePrincipal' + } +} + module ndEnsureClusterUserAssignedHasRbacToManageVMSS 'nested_EnsureClusterUserAssignedHasRbacToManageVMSS.bicep' = { name: 'EnsureClusterUserAssignedHasRbacToManageVMSS' scope: nodeResourceGroup diff --git a/networking/hub-regionA.bicep b/networking/hub-regionA.bicep index ec733581..85377dd3 100644 --- a/networking/hub-regionA.bicep +++ b/networking/hub-regionA.bicep @@ -668,6 +668,68 @@ resource fwPolicy 'Microsoft.Network/firewallPolicies@2021-05-01' = { } ] } + { + ruleCollectionType: 'FirewallPolicyFilterRuleCollection' + name: 'AksBackup-Traffic' + priority: 350 + action: { + type: 'Allow' + } + rules: [ + { + ruleType: 'ApplicationRule' + name: 'container-origin' + description: 'Supports pulling AKS backup images.' + protocols: [ + { + protocolType: 'Https' + port: 443 + } + ] + fqdnTags: [] + webCategories: [] + targetFqdns: [ + 'mcr.microsoft.com' + 'kubernetesbcdrextensionacr.azurecr.io' + 'pipelineagent.azurecr.io' + 'search.maven.org' + ] + targetUrls: [] + destinationAddresses: [] + terminateTLS: false + sourceAddresses: [] + sourceIpGroups: [ + ipgNodepoolSubnet.id + ] + } + { + ruleType: 'ApplicationRule' + name: 'cert-requirements' + description: 'Supports cert validation required by the AKS backup agent.' + protocols: [ + { + protocolType: 'Http' + port: 80 + } + ] + fqdnTags: [] + webCategories: [] + targetFqdns: [ + 'oneocsp.microsoft.com' + 'ocsp.digicert.com' + 'crl3.digicert.com' + 'www.microsoft.com' + ] + targetUrls: [] + destinationAddresses: [] + terminateTLS: false + sourceAddresses: [] + sourceIpGroups: [ + ipgNodepoolSubnet.id + ] + } + ] + } ] } }