Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions cluster/expected/validator-runbook/expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,11 @@
"validatorApiUrl": "http://validator-app:5003"
},
"imageRepo": "us-central1-docker.pkg.dev/da-cn-shared/ghcr/digital-asset/decentralized-canton-sync-dev/docker",
"pvc": {
"name": "party-allocator-keys",
"size": "100Gi",
"storageClassName": "standard-rwo"
},
"tolerations": [
{
"effect": "NoSchedule",
Expand Down
18 changes: 11 additions & 7 deletions cluster/helm/splice-party-allocator/templates/party-allocator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ spec:
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: party-allocator-keys
name: {{ .Values.pvc.name }}
namespace: {{ $.Release.Namespace }}
annotations:
helm.sh/resource-policy: keep
Expand All @@ -63,8 +63,12 @@ spec:
- ReadWriteOnce
resources:
requests:
storage: 20G
storage: {{ .Values.pvc.size }}
storageClassName: {{ .Values.pvc.volumeStorageClass }}
{{- with .Values.pvc.dataSource }}
dataSource:
{{- toYaml . | nindent 4 }}
{{- end }}
---
apiVersion: v1
kind: Service
Expand All @@ -77,8 +81,8 @@ spec:
selector:
app: {{ .Release.Name }}
ports:
- name: metrics
port: 10013
- name: metrics
port: 10013
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
Expand All @@ -89,11 +93,11 @@ metadata:
namespace: {{ .Release.Namespace }}
spec:
endpoints:
- port: metrics
interval: {{ .Values.metrics.interval }}
- port: metrics
interval: {{ .Values.metrics.interval }}
selector:
matchLabels:
app: {{ .Release.Name }}
namespaceSelector:
matchNames:
- {{ .Release.Namespace }}
- {{ .Release.Namespace }}
2 changes: 2 additions & 0 deletions cluster/helm/splice-party-allocator/values-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
imageRepo: "ghcr.io/digital-asset/decentralized-canton-sync/docker"

pvc:
name: party-allocator-keys
volumeStorageClass: standard-rwo
size: 20Gi

metrics:
release: prometheus-grafana-monitoring
Expand Down
31 changes: 8 additions & 23 deletions cluster/pulumi/common-sv/src/synchronizer/cometbft.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import {
CLUSTER_HOSTNAME,
clusterSmallDisk,
config,
createVolumeSnapshot,
DomainMigrationIndex,
ExactNamespace,
InstalledHelmChart,
Expand All @@ -23,7 +24,6 @@ import {
} from '@lfdecentralizedtrust/splice-pulumi-common';
import { CnChartVersion } from '@lfdecentralizedtrust/splice-pulumi-common/src/artifacts';
import { hyperdiskSupportConfig } from '@lfdecentralizedtrust/splice-pulumi-common/src/config/hyperdiskSupportConfig';
import { CustomResource } from '@pulumi/kubernetes/apiextensions';
import { jsonStringify, Output } from '@pulumi/pulumi';

import { svsConfig } from '../config';
Expand Down Expand Up @@ -112,30 +112,15 @@ export function installCometBftNode(
volumeStorageClass: standardStorageClassName,
};
if (hyperdiskSupportConfig.hyperdiskSupport.migrating) {
const pvcSnapshot = new CustomResource(
`cometbft-${xns.logicalName}-migration-${migrationId}-snapshot`,
{
apiVersion: 'snapshot.storage.k8s.io/v1',
kind: 'VolumeSnapshot',
metadata: {
name: `cometbft-migration-${migrationId}-pd-snapshot`,
namespace: xns.logicalName,
},
spec: {
volumeSnapshotClassName: 'dev-vsc',
source: {
persistentVolumeClaimName: `global-domain-${migrationId}-cometbft-cometbft-data`,
},
},
}
);
const { dataSource } = createVolumeSnapshot({
resourceName: `cometbft-${xns.logicalName}-migration-${migrationId}-snapshot`,
snapshotName: `cometbft-migration-${migrationId}-pd-snapshot`,
namespace: xns.logicalName,
pvcName: `global-domain-${migrationId}-cometbft-cometbft-data`,
});
hyperdiskDbValues = {
...hyperdiskDbValues,
dataSource: {
kind: 'VolumeSnapshot',
name: pvcSnapshot.metadata.name,
apiGroup: 'snapshot.storage.k8s.io',
},
dataSource,
};
}
}
Expand Down
1 change: 1 addition & 0 deletions cluster/pulumi/common-validator/src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ export const PartyAllocatorConfigSchema = z.object({
maxParties: z.number().default(1000000),
preapprovalRetries: z.number().default(120),
preapprovalRetryDelayMs: z.number().default(1000),
pvcSize: z.string().default('100Gi'),
});
export type PartyAllocatorConfig = z.infer<typeof PartyAllocatorConfigSchema>;

Expand Down
2 changes: 2 additions & 0 deletions cluster/pulumi/common/src/config/hyperdiskSupportConfig.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ const HyperdiskSupportConfigSchema = z.object({
hyperdiskSupport: z
.object({
enabled: z.boolean().default(false),
enabledForInfra: z.boolean().default(false),
migrating: z.boolean().default(false),
migratingInfra: z.boolean().default(false),
})
.default({}),
});
Expand Down
1 change: 1 addition & 0 deletions cluster/pulumi/common/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,4 @@ export * from './config/networkWideConfig';
export * from './ratelimit';
export * from './config/config';
export * from './storage/storageClass';
export * from './storage/volumeSnapshot';
46 changes: 17 additions & 29 deletions cluster/pulumi/common/src/postgres.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import * as gcp from '@pulumi/gcp';
import * as pulumi from '@pulumi/pulumi';
import * as random from '@pulumi/random';
import * as _ from 'lodash';
import { CustomResource } from '@pulumi/kubernetes/apiextensions';
import { Resource } from '@pulumi/pulumi';

import { CnChartVersion } from './artifacts';
Expand All @@ -18,6 +17,7 @@ import {
} from './helm';
import { installPostgresPasswordSecret } from './secrets';
import { standardStorageClassName } from './storage/storageClass';
import { createVolumeSnapshot } from './storage/volumeSnapshot';
import { ChartValues, CLUSTER_BASENAME, ExactNamespace, GCP_ZONE } from './utils';

const project = gcp.organizations.getProjectOutput({});
Expand Down Expand Up @@ -227,34 +227,22 @@ export class SplicePostgres extends pulumi.ComponentResource implements Postgres

// an initial database named cantonnet is created automatically (configured in the Helm chart).
const smallDiskSize = clusterSmallDisk ? '240Gi' : undefined;
const supportsHyperdisk =
hyperdiskSupportConfig.hyperdiskSupport.enabled && !useInfraAffinityAndTolerations;
const supportsHyperdisk = useInfraAffinityAndTolerations
? hyperdiskSupportConfig.hyperdiskSupport.enabledForInfra
: hyperdiskSupportConfig.hyperdiskSupport.enabled;
const migratingToHyperdisk = useInfraAffinityAndTolerations
? hyperdiskSupportConfig.hyperdiskSupport.migratingInfra
: hyperdiskSupportConfig.hyperdiskSupport.migrating;

let hyperdiskMigrationValues = {};
if (supportsHyperdisk && hyperdiskSupportConfig.hyperdiskSupport.migrating) {
const pvcSnapshot = new CustomResource(
`pg-data-${xns.logicalName}-${instanceName}-snapshot`,
{
apiVersion: 'snapshot.storage.k8s.io/v1',
kind: 'VolumeSnapshot',
metadata: {
name: `pg-data-${instanceName}-snapshot`,
namespace: xns.logicalName,
},
spec: {
volumeSnapshotClassName: 'dev-vsc',
source: {
persistentVolumeClaimName: `pg-data-${instanceName}-0`,
},
},
}
);
hyperdiskMigrationValues = {
dataSource: {
kind: 'VolumeSnapshot',
name: pvcSnapshot.metadata.name,
apiGroup: 'snapshot.storage.k8s.io',
},
};
if (supportsHyperdisk && migratingToHyperdisk) {
const { dataSource } = createVolumeSnapshot({
resourceName: `pg-data-${xns.logicalName}-${instanceName}-snapshot`,
snapshotName: `pg-data-${instanceName}-snapshot`,
namespace: xns.logicalName,
pvcName: `pg-data-${instanceName}-0`,
});
hyperdiskMigrationValues = { dataSource };
}
const pg = installSpliceHelmChart(
xns,
Expand Down Expand Up @@ -284,7 +272,7 @@ export class SplicePostgres extends pulumi.ComponentResource implements Postgres
...((supportsHyperdisk &&
// during the migration we first delete the stateful set, which keeps the old pvcs (stateful sets always keep the pvcs), and then recreate with the new pvcs
// the stateful sets are immutable so they need to be recreated to force the change of the pvcs
hyperdiskSupportConfig.hyperdiskSupport.migrating) ||
migratingToHyperdisk) ||
spliceConfig.pulumiProjectConfig.replacePostgresStatefulSetOnChanges
? {
replaceOnChanges: ['*'],
Expand Down
7 changes: 7 additions & 0 deletions cluster/pulumi/common/src/storage/storageClass.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,11 @@ export const standardStorageClassName = hyperdiskSupportConfig.hyperdiskSupport.
? 'hyperdisk-standard-rwo'
: 'standard-rwo';

export const infraStandardStorageClassName = hyperdiskSupportConfig.hyperdiskSupport.enabled
? 'hyperdisk-standard-rwo'
: 'standard-rwo';

export const infraPremiumStorageClassName = hyperdiskSupportConfig.hyperdiskSupport.enabled
? 'hyperdisk-balanced-rwo'
: 'premium-rwo';
export const pvcSuffix = hyperdiskSupportConfig.hyperdiskSupport.enabled ? 'hd-pvc' : 'pvc';
47 changes: 47 additions & 0 deletions cluster/pulumi/common/src/storage/volumeSnapshot.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Copyright (c) 2024 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
import { CustomResource } from '@pulumi/kubernetes/apiextensions';
import { Output } from '@pulumi/pulumi';

export interface VolumeSnapshotOptions {
resourceName: string;
snapshotName: string;
namespace: string;
pvcName: string;
volumeSnapshotClassName?: string;
}
export function createVolumeSnapshot(options: VolumeSnapshotOptions): {
snapshot: CustomResource;
dataSource: { kind: string; name: Output<string>; apiGroup: string };
} {
const {
resourceName,
snapshotName,
namespace,
pvcName,
volumeSnapshotClassName = 'dev-vsc',
} = options;

const snapshot = new CustomResource(resourceName, {
apiVersion: 'snapshot.storage.k8s.io/v1',
kind: 'VolumeSnapshot',
metadata: {
name: snapshotName,
namespace: namespace,
},
spec: {
volumeSnapshotClassName: volumeSnapshotClassName,
source: {
persistentVolumeClaimName: pvcName,
},
},
});

const dataSource = {
kind: 'VolumeSnapshot',
name: snapshot.metadata.name,
apiGroup: 'snapshot.storage.k8s.io',
};

return { snapshot, dataSource };
}
42 changes: 38 additions & 4 deletions cluster/pulumi/infra/src/observability.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,14 @@ import {
CLUSTER_NAME,
clusterProdLike,
commandScriptPath,
createVolumeSnapshot,
ExactNamespace,
GCP_PROJECT,
GrafanaKeys,
HELM_MAX_HISTORY_SIZE,
infraAffinityAndTolerations,
infraPremiumStorageClassName,
infraStandardStorageClassName,
isMainNet,
loadTesterConfig,
ObservabilityReleaseName,
Expand All @@ -32,6 +35,7 @@ import { local } from '@pulumi/command';
import { getSecretVersionOutput } from '@pulumi/gcp/secretmanager/getSecretVersion';
import { Input } from '@pulumi/pulumi';

import { hyperdiskSupportConfig } from '../../common/src/config/hyperdiskSupportConfig';
import {
clusterIsResetPeriodically,
enableAlertEmailToSupportTeam,
Expand Down Expand Up @@ -91,9 +95,9 @@ const grafanaExternalUrl = `https://grafana.${CLUSTER_HOSTNAME}`;
const alertManagerExternalUrl = `https://alertmanager.${CLUSTER_HOSTNAME}`;
const prometheusExternalUrl = `https://prometheus.${CLUSTER_HOSTNAME}`;
const shouldIgnoreNoDataOrDataSourceError = clusterIsResetPeriodically;
const namespaceName = 'observability';

export function configureObservability(dependsOn: pulumi.Resource[] = []): pulumi.Resource {
const namespaceName = 'observability';
const namespace = new k8s.core.v1.Namespace(
namespaceName,
{
Expand All @@ -116,6 +120,7 @@ export function configureObservability(dependsOn: pulumi.Resource[] = []): pulum
const prometheusStackCrdVersion = '0.85.0';
const postgres = installPostgres({ ns: namespace, logicalName: namespaceName });
const adminPassword = grafanaKeysFromSecret().adminPassword;
const migrationSnapshots = getVolumeSnapshotsForHyperdiskMigration();
const prometheusStack = new k8s.helm.v3.Release(
'observability-metrics',
{
Expand Down Expand Up @@ -205,13 +210,14 @@ export function configureObservability(dependsOn: pulumi.Resource[] = []): pulum
storage: {
volumeClaimTemplate: {
spec: {
storageClassName: 'standard-rwo',
storageClassName: infraStandardStorageClassName,
accessModes: ['ReadWriteOnce'],
resources: {
requests: {
storage: '5Gi',
},
},
...(migrationSnapshots.alertManager ? migrationSnapshots.alertManager : {}),
},
},
},
Expand Down Expand Up @@ -266,13 +272,14 @@ export function configureObservability(dependsOn: pulumi.Resource[] = []): pulum
storageSpec: {
volumeClaimTemplate: {
spec: {
storageClassName: 'premium-rwo',
storageClassName: infraPremiumStorageClassName,
accessModes: ['ReadWriteOnce'],
resources: {
requests: {
storage: infraConfig.prometheus.storageSize,
},
},
...(migrationSnapshots.prometheus ? migrationSnapshots.prometheus : {}),
},
},
},
Expand Down Expand Up @@ -398,7 +405,7 @@ export function configureObservability(dependsOn: pulumi.Resource[] = []): pulum
type: 'Recreate',
},
persistence: {
enabled: true,
enabled: !hyperdiskSupportConfig.hyperdiskSupport.migratingInfra,
type: 'pvc',
accessModes: ['ReadWriteOnce'],
size: '5Gi',
Expand Down Expand Up @@ -1065,3 +1072,30 @@ function installPostgres(namespace: ExactNamespace): SplicePostgres {
true // useInfraAffinityAndTolerations
);
}

function getVolumeSnapshotsForHyperdiskMigration() {
if (hyperdiskSupportConfig.hyperdiskSupport.migratingInfra) {
const { dataSource: prometheusDataSource } = createVolumeSnapshot({
resourceName: `prometheus-hd-migration-snapshot`,
snapshotName: `prometheus-migration-snapshot`,
namespace: namespaceName,
pvcName: `prometheus-prometheus-prometheus-db-prometheus-prometheus-prometheus-0`,
});
const { dataSource: alertManagerDataSource } = createVolumeSnapshot({
resourceName: `alertmanager-hd-migration-snapshot`,
snapshotName: `alertmanager-migration-snapshot`,
namespace: namespaceName,
pvcName: `alertmanager-prometheus-alertmanager-db-alertmanager-prometheus-alertmanager-0`,
});
return {
prometheus: {
dataSource: prometheusDataSource,
},
alertManager: {
dataSource: alertManagerDataSource,
},
};
} else {
return {};
}
}
Loading