diff --git a/cluster/expected/validator-runbook/expected.json b/cluster/expected/validator-runbook/expected.json index 39d7faa874..ff29a60bbe 100644 --- a/cluster/expected/validator-runbook/expected.json +++ b/cluster/expected/validator-runbook/expected.json @@ -564,6 +564,11 @@ "validatorApiUrl": "http://validator-app:5003" }, "imageRepo": "us-central1-docker.pkg.dev/da-cn-shared/ghcr/digital-asset/decentralized-canton-sync-dev/docker", + "pvc": { + "name": "party-allocator-keys", + "size": "100Gi", + "storageClassName": "standard-rwo" + }, "tolerations": [ { "effect": "NoSchedule", diff --git a/cluster/helm/splice-party-allocator/templates/party-allocator.yaml b/cluster/helm/splice-party-allocator/templates/party-allocator.yaml index 8ed9ee66b5..13075336cd 100644 --- a/cluster/helm/splice-party-allocator/templates/party-allocator.yaml +++ b/cluster/helm/splice-party-allocator/templates/party-allocator.yaml @@ -54,7 +54,7 @@ spec: apiVersion: v1 kind: PersistentVolumeClaim metadata: - name: party-allocator-keys + name: {{ .Values.pvc.name }} namespace: {{ $.Release.Namespace }} annotations: helm.sh/resource-policy: keep @@ -63,8 +63,12 @@ spec: - ReadWriteOnce resources: requests: - storage: 20G + storage: {{ .Values.pvc.size }} storageClassName: {{ .Values.pvc.volumeStorageClass }} + {{- with .Values.pvc.dataSource }} + dataSource: + {{- toYaml . | nindent 4 }} + {{- end }} --- apiVersion: v1 kind: Service @@ -77,8 +81,8 @@ spec: selector: app: {{ .Release.Name }} ports: - - name: metrics - port: 10013 + - name: metrics + port: 10013 --- apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor @@ -89,11 +93,11 @@ metadata: namespace: {{ .Release.Namespace }} spec: endpoints: - - port: metrics - interval: {{ .Values.metrics.interval }} + - port: metrics + interval: {{ .Values.metrics.interval }} selector: matchLabels: app: {{ .Release.Name }} namespaceSelector: matchNames: - - {{ .Release.Namespace }} + - {{ .Release.Namespace }} diff --git a/cluster/helm/splice-party-allocator/values-template.yaml b/cluster/helm/splice-party-allocator/values-template.yaml index b15306b21f..d307aaaac4 100644 --- a/cluster/helm/splice-party-allocator/values-template.yaml +++ b/cluster/helm/splice-party-allocator/values-template.yaml @@ -4,7 +4,9 @@ imageRepo: "ghcr.io/digital-asset/decentralized-canton-sync/docker" pvc: + name: party-allocator-keys volumeStorageClass: standard-rwo + size: 20Gi metrics: release: prometheus-grafana-monitoring diff --git a/cluster/pulumi/common-sv/src/synchronizer/cometbft.ts b/cluster/pulumi/common-sv/src/synchronizer/cometbft.ts index 96ab189411..74c955c68f 100644 --- a/cluster/pulumi/common-sv/src/synchronizer/cometbft.ts +++ b/cluster/pulumi/common-sv/src/synchronizer/cometbft.ts @@ -9,6 +9,7 @@ import { CLUSTER_HOSTNAME, clusterSmallDisk, config, + createVolumeSnapshot, DomainMigrationIndex, ExactNamespace, InstalledHelmChart, @@ -23,7 +24,6 @@ import { } from '@lfdecentralizedtrust/splice-pulumi-common'; import { CnChartVersion } from '@lfdecentralizedtrust/splice-pulumi-common/src/artifacts'; import { hyperdiskSupportConfig } from '@lfdecentralizedtrust/splice-pulumi-common/src/config/hyperdiskSupportConfig'; -import { CustomResource } from '@pulumi/kubernetes/apiextensions'; import { jsonStringify, Output } from '@pulumi/pulumi'; import { svsConfig } from '../config'; @@ -112,30 +112,15 @@ export function installCometBftNode( volumeStorageClass: standardStorageClassName, }; if (hyperdiskSupportConfig.hyperdiskSupport.migrating) { - const pvcSnapshot = new CustomResource( - `cometbft-${xns.logicalName}-migration-${migrationId}-snapshot`, - { - apiVersion: 'snapshot.storage.k8s.io/v1', - kind: 'VolumeSnapshot', - metadata: { - name: `cometbft-migration-${migrationId}-pd-snapshot`, - namespace: xns.logicalName, - }, - spec: { - volumeSnapshotClassName: 'dev-vsc', - source: { - persistentVolumeClaimName: `global-domain-${migrationId}-cometbft-cometbft-data`, - }, - }, - } - ); + const { dataSource } = createVolumeSnapshot({ + resourceName: `cometbft-${xns.logicalName}-migration-${migrationId}-snapshot`, + snapshotName: `cometbft-migration-${migrationId}-pd-snapshot`, + namespace: xns.logicalName, + pvcName: `global-domain-${migrationId}-cometbft-cometbft-data`, + }); hyperdiskDbValues = { ...hyperdiskDbValues, - dataSource: { - kind: 'VolumeSnapshot', - name: pvcSnapshot.metadata.name, - apiGroup: 'snapshot.storage.k8s.io', - }, + dataSource, }; } } diff --git a/cluster/pulumi/common-validator/src/config.ts b/cluster/pulumi/common-validator/src/config.ts index fb403c34db..aa18329c7f 100644 --- a/cluster/pulumi/common-validator/src/config.ts +++ b/cluster/pulumi/common-validator/src/config.ts @@ -111,6 +111,7 @@ export const PartyAllocatorConfigSchema = z.object({ maxParties: z.number().default(1000000), preapprovalRetries: z.number().default(120), preapprovalRetryDelayMs: z.number().default(1000), + pvcSize: z.string().default('100Gi'), }); export type PartyAllocatorConfig = z.infer; diff --git a/cluster/pulumi/common/src/config/hyperdiskSupportConfig.ts b/cluster/pulumi/common/src/config/hyperdiskSupportConfig.ts index 18d2f21354..f127b8a531 100644 --- a/cluster/pulumi/common/src/config/hyperdiskSupportConfig.ts +++ b/cluster/pulumi/common/src/config/hyperdiskSupportConfig.ts @@ -8,7 +8,9 @@ const HyperdiskSupportConfigSchema = z.object({ hyperdiskSupport: z .object({ enabled: z.boolean().default(false), + enabledForInfra: z.boolean().default(false), migrating: z.boolean().default(false), + migratingInfra: z.boolean().default(false), }) .default({}), }); diff --git a/cluster/pulumi/common/src/index.ts b/cluster/pulumi/common/src/index.ts index 677551b158..3d55073cba 100644 --- a/cluster/pulumi/common/src/index.ts +++ b/cluster/pulumi/common/src/index.ts @@ -30,3 +30,4 @@ export * from './config/networkWideConfig'; export * from './ratelimit'; export * from './config/config'; export * from './storage/storageClass'; +export * from './storage/volumeSnapshot'; diff --git a/cluster/pulumi/common/src/postgres.ts b/cluster/pulumi/common/src/postgres.ts index 01f5b86499..261e59f1d6 100644 --- a/cluster/pulumi/common/src/postgres.ts +++ b/cluster/pulumi/common/src/postgres.ts @@ -4,7 +4,6 @@ import * as gcp from '@pulumi/gcp'; import * as pulumi from '@pulumi/pulumi'; import * as random from '@pulumi/random'; import * as _ from 'lodash'; -import { CustomResource } from '@pulumi/kubernetes/apiextensions'; import { Resource } from '@pulumi/pulumi'; import { CnChartVersion } from './artifacts'; @@ -18,6 +17,7 @@ import { } from './helm'; import { installPostgresPasswordSecret } from './secrets'; import { standardStorageClassName } from './storage/storageClass'; +import { createVolumeSnapshot } from './storage/volumeSnapshot'; import { ChartValues, CLUSTER_BASENAME, ExactNamespace, GCP_ZONE } from './utils'; const project = gcp.organizations.getProjectOutput({}); @@ -227,34 +227,22 @@ export class SplicePostgres extends pulumi.ComponentResource implements Postgres // an initial database named cantonnet is created automatically (configured in the Helm chart). const smallDiskSize = clusterSmallDisk ? '240Gi' : undefined; - const supportsHyperdisk = - hyperdiskSupportConfig.hyperdiskSupport.enabled && !useInfraAffinityAndTolerations; + const supportsHyperdisk = useInfraAffinityAndTolerations + ? hyperdiskSupportConfig.hyperdiskSupport.enabledForInfra + : hyperdiskSupportConfig.hyperdiskSupport.enabled; + const migratingToHyperdisk = useInfraAffinityAndTolerations + ? hyperdiskSupportConfig.hyperdiskSupport.migratingInfra + : hyperdiskSupportConfig.hyperdiskSupport.migrating; + let hyperdiskMigrationValues = {}; - if (supportsHyperdisk && hyperdiskSupportConfig.hyperdiskSupport.migrating) { - const pvcSnapshot = new CustomResource( - `pg-data-${xns.logicalName}-${instanceName}-snapshot`, - { - apiVersion: 'snapshot.storage.k8s.io/v1', - kind: 'VolumeSnapshot', - metadata: { - name: `pg-data-${instanceName}-snapshot`, - namespace: xns.logicalName, - }, - spec: { - volumeSnapshotClassName: 'dev-vsc', - source: { - persistentVolumeClaimName: `pg-data-${instanceName}-0`, - }, - }, - } - ); - hyperdiskMigrationValues = { - dataSource: { - kind: 'VolumeSnapshot', - name: pvcSnapshot.metadata.name, - apiGroup: 'snapshot.storage.k8s.io', - }, - }; + if (supportsHyperdisk && migratingToHyperdisk) { + const { dataSource } = createVolumeSnapshot({ + resourceName: `pg-data-${xns.logicalName}-${instanceName}-snapshot`, + snapshotName: `pg-data-${instanceName}-snapshot`, + namespace: xns.logicalName, + pvcName: `pg-data-${instanceName}-0`, + }); + hyperdiskMigrationValues = { dataSource }; } const pg = installSpliceHelmChart( xns, @@ -284,7 +272,7 @@ export class SplicePostgres extends pulumi.ComponentResource implements Postgres ...((supportsHyperdisk && // during the migration we first delete the stateful set, which keeps the old pvcs (stateful sets always keep the pvcs), and then recreate with the new pvcs // the stateful sets are immutable so they need to be recreated to force the change of the pvcs - hyperdiskSupportConfig.hyperdiskSupport.migrating) || + migratingToHyperdisk) || spliceConfig.pulumiProjectConfig.replacePostgresStatefulSetOnChanges ? { replaceOnChanges: ['*'], diff --git a/cluster/pulumi/common/src/storage/storageClass.ts b/cluster/pulumi/common/src/storage/storageClass.ts index f843ea3561..1d22489a24 100644 --- a/cluster/pulumi/common/src/storage/storageClass.ts +++ b/cluster/pulumi/common/src/storage/storageClass.ts @@ -6,4 +6,11 @@ export const standardStorageClassName = hyperdiskSupportConfig.hyperdiskSupport. ? 'hyperdisk-standard-rwo' : 'standard-rwo'; +export const infraStandardStorageClassName = hyperdiskSupportConfig.hyperdiskSupport.enabled + ? 'hyperdisk-standard-rwo' + : 'standard-rwo'; + +export const infraPremiumStorageClassName = hyperdiskSupportConfig.hyperdiskSupport.enabled + ? 'hyperdisk-balanced-rwo' + : 'premium-rwo'; export const pvcSuffix = hyperdiskSupportConfig.hyperdiskSupport.enabled ? 'hd-pvc' : 'pvc'; diff --git a/cluster/pulumi/common/src/storage/volumeSnapshot.ts b/cluster/pulumi/common/src/storage/volumeSnapshot.ts new file mode 100644 index 0000000000..4778d63f8e --- /dev/null +++ b/cluster/pulumi/common/src/storage/volumeSnapshot.ts @@ -0,0 +1,47 @@ +// Copyright (c) 2024 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +import { CustomResource } from '@pulumi/kubernetes/apiextensions'; +import { Output } from '@pulumi/pulumi'; + +export interface VolumeSnapshotOptions { + resourceName: string; + snapshotName: string; + namespace: string; + pvcName: string; + volumeSnapshotClassName?: string; +} +export function createVolumeSnapshot(options: VolumeSnapshotOptions): { + snapshot: CustomResource; + dataSource: { kind: string; name: Output; apiGroup: string }; +} { + const { + resourceName, + snapshotName, + namespace, + pvcName, + volumeSnapshotClassName = 'dev-vsc', + } = options; + + const snapshot = new CustomResource(resourceName, { + apiVersion: 'snapshot.storage.k8s.io/v1', + kind: 'VolumeSnapshot', + metadata: { + name: snapshotName, + namespace: namespace, + }, + spec: { + volumeSnapshotClassName: volumeSnapshotClassName, + source: { + persistentVolumeClaimName: pvcName, + }, + }, + }); + + const dataSource = { + kind: 'VolumeSnapshot', + name: snapshot.metadata.name, + apiGroup: 'snapshot.storage.k8s.io', + }; + + return { snapshot, dataSource }; +} diff --git a/cluster/pulumi/infra/src/observability.ts b/cluster/pulumi/infra/src/observability.ts index 5900666b40..125da80c7d 100644 --- a/cluster/pulumi/infra/src/observability.ts +++ b/cluster/pulumi/infra/src/observability.ts @@ -11,11 +11,14 @@ import { CLUSTER_NAME, clusterProdLike, commandScriptPath, + createVolumeSnapshot, ExactNamespace, GCP_PROJECT, GrafanaKeys, HELM_MAX_HISTORY_SIZE, infraAffinityAndTolerations, + infraPremiumStorageClassName, + infraStandardStorageClassName, isMainNet, loadTesterConfig, ObservabilityReleaseName, @@ -32,6 +35,7 @@ import { local } from '@pulumi/command'; import { getSecretVersionOutput } from '@pulumi/gcp/secretmanager/getSecretVersion'; import { Input } from '@pulumi/pulumi'; +import { hyperdiskSupportConfig } from '../../common/src/config/hyperdiskSupportConfig'; import { clusterIsResetPeriodically, enableAlertEmailToSupportTeam, @@ -91,9 +95,9 @@ const grafanaExternalUrl = `https://grafana.${CLUSTER_HOSTNAME}`; const alertManagerExternalUrl = `https://alertmanager.${CLUSTER_HOSTNAME}`; const prometheusExternalUrl = `https://prometheus.${CLUSTER_HOSTNAME}`; const shouldIgnoreNoDataOrDataSourceError = clusterIsResetPeriodically; +const namespaceName = 'observability'; export function configureObservability(dependsOn: pulumi.Resource[] = []): pulumi.Resource { - const namespaceName = 'observability'; const namespace = new k8s.core.v1.Namespace( namespaceName, { @@ -116,6 +120,7 @@ export function configureObservability(dependsOn: pulumi.Resource[] = []): pulum const prometheusStackCrdVersion = '0.85.0'; const postgres = installPostgres({ ns: namespace, logicalName: namespaceName }); const adminPassword = grafanaKeysFromSecret().adminPassword; + const migrationSnapshots = getVolumeSnapshotsForHyperdiskMigration(); const prometheusStack = new k8s.helm.v3.Release( 'observability-metrics', { @@ -205,13 +210,14 @@ export function configureObservability(dependsOn: pulumi.Resource[] = []): pulum storage: { volumeClaimTemplate: { spec: { - storageClassName: 'standard-rwo', + storageClassName: infraStandardStorageClassName, accessModes: ['ReadWriteOnce'], resources: { requests: { storage: '5Gi', }, }, + ...(migrationSnapshots.alertManager ? migrationSnapshots.alertManager : {}), }, }, }, @@ -266,13 +272,14 @@ export function configureObservability(dependsOn: pulumi.Resource[] = []): pulum storageSpec: { volumeClaimTemplate: { spec: { - storageClassName: 'premium-rwo', + storageClassName: infraPremiumStorageClassName, accessModes: ['ReadWriteOnce'], resources: { requests: { storage: infraConfig.prometheus.storageSize, }, }, + ...(migrationSnapshots.prometheus ? migrationSnapshots.prometheus : {}), }, }, }, @@ -398,7 +405,7 @@ export function configureObservability(dependsOn: pulumi.Resource[] = []): pulum type: 'Recreate', }, persistence: { - enabled: true, + enabled: !hyperdiskSupportConfig.hyperdiskSupport.migratingInfra, type: 'pvc', accessModes: ['ReadWriteOnce'], size: '5Gi', @@ -1065,3 +1072,30 @@ function installPostgres(namespace: ExactNamespace): SplicePostgres { true // useInfraAffinityAndTolerations ); } + +function getVolumeSnapshotsForHyperdiskMigration() { + if (hyperdiskSupportConfig.hyperdiskSupport.migratingInfra) { + const { dataSource: prometheusDataSource } = createVolumeSnapshot({ + resourceName: `prometheus-hd-migration-snapshot`, + snapshotName: `prometheus-migration-snapshot`, + namespace: namespaceName, + pvcName: `prometheus-prometheus-prometheus-db-prometheus-prometheus-prometheus-0`, + }); + const { dataSource: alertManagerDataSource } = createVolumeSnapshot({ + resourceName: `alertmanager-hd-migration-snapshot`, + snapshotName: `alertmanager-migration-snapshot`, + namespace: namespaceName, + pvcName: `alertmanager-prometheus-alertmanager-db-alertmanager-prometheus-alertmanager-0`, + }); + return { + prometheus: { + dataSource: prometheusDataSource, + }, + alertManager: { + dataSource: alertManagerDataSource, + }, + }; + } else { + return {}; + } +} diff --git a/cluster/pulumi/multi-validator/src/postgres.ts b/cluster/pulumi/multi-validator/src/postgres.ts index 7d8b63ad89..66e56738e6 100644 --- a/cluster/pulumi/multi-validator/src/postgres.ts +++ b/cluster/pulumi/multi-validator/src/postgres.ts @@ -12,8 +12,8 @@ import { installSpliceRunbookHelmChart, spliceConfig, standardStorageClassName, + createVolumeSnapshot, } from '@lfdecentralizedtrust/splice-pulumi-common'; -import { CustomResource } from '@pulumi/kubernetes/apiextensions'; import { hyperdiskSupportConfig } from '../../common/src/config/hyperdiskSupportConfig'; import { multiValidatorConfig } from './config'; @@ -41,27 +41,13 @@ export function installPostgres( hyperdiskSupportConfig.hyperdiskSupport.enabled && hyperdiskSupportConfig.hyperdiskSupport.migrating ) { - const pvcSnapshot = new CustomResource(`pg-data-${xns.logicalName}-${name}-snapshot`, { - apiVersion: 'snapshot.storage.k8s.io/v1', - kind: 'VolumeSnapshot', - metadata: { - name: `pg-data-${name}-snapshot`, - namespace: xns.logicalName, - }, - spec: { - volumeSnapshotClassName: 'dev-vsc', - source: { - persistentVolumeClaimName: `pg-data-${name}-0`, - }, - }, + const { dataSource } = createVolumeSnapshot({ + resourceName: `pg-data-${xns.logicalName}-${name}-snapshot`, + snapshotName: `pg-data-${name}-snapshot`, + namespace: xns.logicalName, + pvcName: `pg-data-${name}-0`, }); - hyperdiskMigrationValues = { - dataSource: { - kind: 'VolumeSnapshot', - name: pvcSnapshot.metadata.name, - apiGroup: 'snapshot.storage.k8s.io', - }, - }; + hyperdiskMigrationValues = { dataSource }; } return installSpliceRunbookHelmChart( xns, diff --git a/cluster/pulumi/validator-runbook/src/partyAllocator.ts b/cluster/pulumi/validator-runbook/src/partyAllocator.ts index eb69787f9f..cd3f12d6e0 100644 --- a/cluster/pulumi/validator-runbook/src/partyAllocator.ts +++ b/cluster/pulumi/validator-runbook/src/partyAllocator.ts @@ -4,18 +4,34 @@ import * as pulumi from '@pulumi/pulumi'; import { activeVersion, CnInput, + createVolumeSnapshot, DecentralizedSynchronizerUpgradeConfig, ExactNamespace, InstalledHelmChart, installSpliceHelmChart, + standardStorageClassName, } from '@lfdecentralizedtrust/splice-pulumi-common'; import { PartyAllocatorConfig } from '@lfdecentralizedtrust/splice-pulumi-common-validator'; +import { hyperdiskSupportConfig } from '../../common/src/config/hyperdiskSupportConfig'; + export function installPartyAllocator( xns: ExactNamespace, config: PartyAllocatorConfig, dependsOn: CnInput[] ): InstalledHelmChart { + const dataSource = + hyperdiskSupportConfig.hyperdiskSupport.enabled && + hyperdiskSupportConfig.hyperdiskSupport.migrating + ? { + dataSource: createVolumeSnapshot({ + resourceName: `party-allocator-keys-migration-snapshot`, + snapshotName: `party-allocator-keys-snapshot`, + namespace: xns.logicalName, + pvcName: `party-allocator-keys`, + }).dataSource, + } + : {}; return installSpliceHelmChart( xns, 'party-allocator', @@ -33,6 +49,14 @@ export function installPartyAllocator( preapprovalRetries: config.preapprovalRetries, preapprovalRetryDelayMs: config.preapprovalRetryDelayMs, }, + pvc: { + size: config.pvcSize, + storageClassName: standardStorageClassName, + name: hyperdiskSupportConfig.hyperdiskSupport.enabled + ? 'party-allocator-keys-hd-pvc' + : 'party-allocator-keys', + ...dataSource, + }, }, activeVersion, { dependsOn }