Skip to content

Commit 9953c68

Browse files
committed
MGMT-23352: Fix disconnected deployment with dev-scripts
Fix disconnected deployment, enclave DNS/config, and Quay LVMS support: Disconnected and enclave: - Add BASE_DOMAIN=lab in dev-scripts config so CLUSTER_DOMAIN is enclave-test.lab - Add mirror -> LZ IP to virsh network DNS after provisioning (mirror.<domain> resolution) - Fix pullSecretPath to config/pull-secret.json (was .config/) for install-config.yaml - Quay backend storage_path: /var/quaydata -> /datastorage/registry - Increase VM extra disk size to 120G (configure_devscripts.sh) Quay operator: - Add quay_lvms.yaml: LVMS PVC, QuayRegistry, and deployment patch for registry storage - Split tasks.yaml: ODF backend (existing QuayRegistry) vs LVMS (include quay_lvms.yaml) - ODF branch: HPA managed true, task conditional on blockStorageBackend == 'odf' Operator tasks (cluster API): - acm_cis.yaml: Add KUBECONFIG to ClusterImageSet tasks (Get current, Delete, Create) - clair_disconnected.yaml: Add KUBECONFIG to Clair tasks (Get Clair Pod, Execute Import in pod)
1 parent 997e4ce commit 9953c68

12 files changed

Lines changed: 159 additions & 18 deletions

defaults/operators.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,9 +112,9 @@ operators:
112112

113113
# AAP is a dependency for osac-operator
114114
- name: ansible-automation-platform-operator
115-
defaultChannel: stable-2.5-cluster-scoped
115+
defaultChannel: stable-2.6-cluster-scoped
116116
channels:
117-
- name: stable-2.5-cluster-scoped
117+
- name: stable-2.6-cluster-scoped
118118
namespace: ansible-aap
119119
source: cs-redhat-operator-index-v4-20
120120
disableTargetNamespace: true

operators/advanced-cluster-management/acm_cis.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
# ACM CIS tasks: ClusterImageSet reconciliation
33

44
- name: Get current ClusterImageSet
5+
environment:
6+
KUBECONFIG: "{{ workingDir }}/ocp-cluster/auth/kubeconfig"
57
kubernetes.core.k8s_info:
68
api_version: hive.openshift.io/v1
79
kind: ClusterImageSet
@@ -12,6 +14,8 @@
1214
r_desired_image_set: "{{ openshift_versions | map(attribute='version') | map('regex_replace', '^(.*)$', 'openshift-v\\1-disconnected') | list }}"
1315

1416
- name: Delete existing ClusterImageSet
17+
environment:
18+
KUBECONFIG: "{{ workingDir }}/ocp-cluster/auth/kubeconfig"
1519
kubernetes.core.k8s:
1620
state: absent
1721
definition:
@@ -27,6 +31,8 @@
2731
loop_var: imageset
2832

2933
- name: Create ClusterImageSet
34+
environment:
35+
KUBECONFIG: "{{ workingDir }}/ocp-cluster/auth/kubeconfig"
3036
kubernetes.core.k8s:
3137
state: present
3238
definition:

operators/quay-operator/clair_disconnected.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
- name: Get Clair Pod object from quay-enterprise
2+
environment:
3+
KUBECONFIG: "{{ workingDir }}/ocp-cluster/auth/kubeconfig"
24
kubernetes.core.k8s_info:
35
kind: Pod
46
namespace: quay-enterprise
@@ -60,6 +62,8 @@
6062
dest: "/var/www/html/clair/updates.json.gz"
6163

6264
- name: Get Clair Pod object from quay-enterprise
65+
environment:
66+
KUBECONFIG: "{{ workingDir }}/ocp-cluster/auth/kubeconfig"
6367
kubernetes.core.k8s_info:
6468
kind: Pod
6569
namespace: quay-enterprise
@@ -73,6 +77,8 @@
7377
failed_when: clair_pod_list.resources | length == 0
7478

7579
- name: Execute Import inside the Clair Pod
80+
environment:
81+
KUBECONFIG: "{{ workingDir }}/ocp-cluster/auth/kubeconfig"
7682
ansible.builtin.shell: |
7783
{{ workingDir }}/bin/oc exec -n quay-enterprise {{ clair_pod.metadata.name }} -- \
7884
/bin/sh -c "

operators/quay-operator/quay_disconnected.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@
3333
docker://registry-quay-quay-enterprise.apps.{{ clusterName }}.{{ baseDomain }} \
3434
--dest-tls-verify=false \
3535
--src-tls-verify=false \
36-
--parallel-images 10 \
37-
--parallel-layers 10 \
36+
--parallel-images {{ 4 if quayBackend == 'LocalStorage' else 10 }} \
37+
--parallel-layers {{ 1 if quayBackend == 'LocalStorage' else 10 }} \
3838
--retry-times 10 \
3939
--retry-delay 0 \
4040
--image-timeout 40m0s \
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
---
2+
# LVMS-specific tasks for Quay: PVC, QuayRegistry, and deployment patch to mount registry storage.
3+
# Included from tasks.yaml only when blockStorageBackend == 'lvms'.
4+
5+
- name: Create LVMS PVC for Quay Storage
6+
kubernetes.core.k8s:
7+
state: present
8+
definition:
9+
apiVersion: v1
10+
kind: PersistentVolumeClaim
11+
metadata:
12+
name: quay-storage-pvc
13+
namespace: quay-enterprise
14+
spec:
15+
accessModes:
16+
- ReadWriteOnce
17+
resources:
18+
requests:
19+
storage: 300Gi
20+
storageClassName: lvms-vg1
21+
22+
- name: Ensure QuayRegistry is present (LVMS Backend)
23+
kubernetes.core.k8s:
24+
state: present
25+
definition:
26+
apiVersion: quay.redhat.com/v1
27+
kind: QuayRegistry
28+
metadata:
29+
name: registry
30+
namespace: quay-enterprise
31+
spec:
32+
configBundleSecret: quay-config
33+
components:
34+
- kind: objectstorage
35+
managed: false
36+
- kind: quay
37+
managed: true
38+
overrides:
39+
replicas: 1
40+
resources:
41+
limits:
42+
cpu: "4"
43+
memory: 12Gi
44+
requests:
45+
cpu: "{{ '2' if (disconnected | default(true)) else '1' }}"
46+
memory: "{{ '6Gi' if (disconnected | default(true)) else '4Gi' }}"
47+
- kind: horizontalpodautoscaler
48+
managed: false
49+
retries: 1
50+
delay: 10
51+
register: r_quay_lvms
52+
until: r_quay_lvms is succeeded
53+
54+
- name: Wait for Quay app deployment to exist (LVMS)
55+
kubernetes.core.k8s_info:
56+
api_version: apps/v1
57+
kind: Deployment
58+
namespace: quay-enterprise
59+
name: registry-quay-app
60+
register: quay_app_deployment
61+
retries: 30
62+
delay: 10
63+
until: quay_app_deployment.resources | length > 0
64+
65+
- name: Set quay container index for LVMS patch (first container is the app)
66+
ansible.builtin.set_fact:
67+
quay_container_index: 0
68+
when: quay_app_deployment.resources | default([]) | length > 0
69+
70+
- name: Check if Quay app deployment already has registry-storage volume (LVMS)
71+
ansible.builtin.set_fact:
72+
quay_registry_storage_already_patched: "{{ quay_app_deployment.resources[0].spec.template.spec.volumes | selectattr('name', 'equalto', 'registry-storage') | list | length > 0 }}"
73+
when: quay_app_deployment.resources | default([]) | length > 0
74+
75+
- name: Patch Quay app deployment to mount registry storage PVC (LVMS)
76+
environment:
77+
KUBECONFIG: "{{ workingDir }}/ocp-cluster/auth/kubeconfig"
78+
ansible.builtin.shell: |
79+
oc patch deployment registry-quay-app -n quay-enterprise --type=json -p='{{ quay_lvms_patch | to_json }}'
80+
args:
81+
executable: /bin/bash
82+
vars:
83+
quay_lvms_patch:
84+
- op: add
85+
path: /spec/template/spec/volumes/-
86+
value:
87+
name: registry-storage
88+
persistentVolumeClaim:
89+
claimName: quay-storage-pvc
90+
- op: add
91+
path: /spec/template/spec/containers/{{ quay_container_index }}/volumeMounts/-
92+
value:
93+
name: registry-storage
94+
mountPath: /datastorage/registry
95+
when:
96+
- quay_app_deployment.resources | default([]) | length > 0
97+
- quay_container_index is defined
98+
- not (quay_registry_storage_already_patched | default(false))
99+
register: quay_patch_result

operators/quay-operator/tasks.yaml

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@
2626
| combine(quayBackendRGWDefaults)
2727
| combine(quayBackendRGWConfiguration))
2828
if quayBackend == 'RadosGWStorage'
29+
else (quayBackendDefaults
30+
| combine(quayBackendLocalStorageConfiguration))
31+
if quayBackend == 'LocalStorage'
2932
else quayBackendDefaults)
3033
| to_yaml }}
3134
DISTRIBUTED_STORAGE_DEFAULT_LOCATIONS:
@@ -57,7 +60,7 @@
5760
matcher:
5861
disable_updaters: true
5962
60-
- name: Ensure QuayRegistry is present
63+
- name: Ensure QuayRegistry is present (ODF Backend)
6164
kubernetes.core.k8s:
6265
state: present
6366
definition:
@@ -70,7 +73,7 @@
7073
configBundleSecret: quay-config
7174
components:
7275
- kind: horizontalpodautoscaler
73-
managed: "{{ true if blockStorageBackend == 'odf' else false }}"
76+
managed: true
7477
- kind: quay
7578
managed: true
7679
overrides:
@@ -83,11 +86,15 @@
8386
memory: "{{ '16Gi' if (disconnected | default(true)) else '2Gi' }}"
8487
- kind: objectstorage
8588
managed: false
89+
when: blockStorageBackend == 'odf'
8690
retries: 60
8791
delay: 10
8892
register: r_quay
8993
until: r_quay is succeeded
9094

95+
- name: Run LVMS-specific Quay tasks (PVC, QuayRegistry, deployment patch)
96+
ansible.builtin.include_tasks: quay_lvms.yaml
97+
when: blockStorageBackend == 'lvms'
9198

9299
- name: Get router pod IP for Quay route access
93100
kubernetes.core.k8s_info:

playbooks/tasks/configure_hardware_ironic_setup.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
ansible.builtin.wait_for:
3838
port: "{{ http_server_port }}"
3939
host: localhost
40-
timeout: 10
40+
timeout: 30
4141

4242
- name: Verify ISO is accessible via temporary HTTP server
4343
ansible.builtin.uri:

scripts/configure_devscripts.sh

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,16 @@ echo "Creating dev-scripts configuration at: $CONFIG_FILE"
5959
ENCLAVE_NUM_MASTERS="${ENCLAVE_NUM_MASTERS:-3}"
6060
ENCLAVE_NUM_LANDINGZONE="${ENCLAVE_NUM_LANDINGZONE:-1}"
6161

62+
# VM extra disk size: 200G for disconnected (mirroring), 60G for connected
63+
DEPLOYMENT_MODE="${ENCLAVE_DEPLOYMENT_MODE:-disconnected}"
64+
if [ "$DEPLOYMENT_MODE" = "connected" ]; then
65+
VM_EXTRADISKS_SIZE_VAL="60G"
66+
MASTER_MEMORY_VAL=24576 # 24 GB for connected
67+
else
68+
VM_EXTRADISKS_SIZE_VAL="200G"
69+
MASTER_MEMORY_VAL=32768 # 32 GB for disconnected
70+
fi
71+
6272
# Create configuration file
6373
cat > "$CONFIG_FILE" <<EOF
6474
#!/bin/bash
@@ -92,14 +102,14 @@ export NUM_EXTRA_WORKERS=0
92102
# =============================================================================
93103
94104
# Master VMs need resources for OpenShift control plane nodes
95-
export MASTER_MEMORY=24576 # 24 GB RAM
105+
export MASTER_MEMORY=${MASTER_MEMORY_VAL} # 24 GB (connected) or 32 GB (disconnected)
96106
export MASTER_DISK=120 # 120 GB disk
97107
export MASTER_VCPU=12 # 12 vCPUs
98108
99109
# Extra disks for storage (used by LVMS for PersistentVolumes)
100110
export VM_EXTRADISKS=true
101111
export VM_EXTRADISKS_LIST="vdb"
102-
export VM_EXTRADISKS_SIZE="60G"
112+
export VM_EXTRADISKS_SIZE="${VM_EXTRADISKS_SIZE_VAL}"
103113
104114
# =============================================================================
105115
# Landing Zone VM Specs
@@ -147,6 +157,9 @@ export CLUSTER_NAME="${ENCLAVE_CLUSTER_NAME}"
147157
# Cluster domain (for DNS)
148158
export CLUSTER_DOMAIN="${ENCLAVE_CLUSTER_NAME}.lab"
149159
160+
# Base domain (so dev-scripts uses .lab not test.metalkube.org for CLUSTER_DOMAIN)
161+
export BASE_DOMAIN="lab"
162+
150163
# Working directory (where VMs and configs are stored)
151164
export WORKING_DIR="/opt/dev-scripts"
152165

scripts/generate_enclave_vars.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,8 @@ quayPassword: SuperPrivate123!
164164
# Option 1: External S3/RadosGW storage (RECOMMENDED for production)
165165
# Option 2: Local storage (NOT recommended for production)
166166
quayBackend: LocalStorage
167+
quayBackendLocalStorageConfiguration:
168+
storage_path: /datastorage/registry
167169
168170
# ============================================================================
169171
# Storage Backend
@@ -189,7 +191,7 @@ lvmsConfig: {}
189191
# Pull secret will be read from pullSecretPath
190192
pullSecret:
191193
auths: {}
192-
pullSecretPath: "{{ workingDir }}/.config/pull-secret.json"
194+
pullSecretPath: "{{ workingDir }}/config/pull-secret.json"
193195
194196
# SSH public key path for cluster nodes
195197
sshPubPath: "{{ workingDir }}/.ssh/id_rsa.pub"
@@ -289,7 +291,7 @@ info "Generated configuration uses:"
289291
info " - Worker IPs: ${WORKER_IP_START}-${WORKER_IP_END} (will be assigned during deployment)"
290292
info " - Storage: LVMS with /dev/vda root disk"
291293
info " - Registry: LocalStorage"
292-
info " - Pull secret: Will use ~/.config/pull-secret.json on Landing Zone"
294+
info " - Pull secret: Will use ~/config/pull-secret.json on Landing Zone"
293295
info " - SSL certificates: Empty (self-signed will be generated)"
294296
echo ""
295297
info "Review config/global.yaml and config/certificates.yaml and adjust if needed before running Enclave Lab"

scripts/install_enclave.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -291,8 +291,8 @@ if [ "$PULL_SECRET_FOUND" = true ]; then
291291
info " Validated pull secret contains registry.redhat.io credentials"
292292

293293
# Copy to Landing Zone
294-
ssh $SSH_OPTS "$LZ_SSH" "mkdir -p ${LZ_ROOT_DIR}/.config"
295-
scp $SSH_OPTS "$PULL_SECRET_SOURCE" "${LZ_SSH}:${LZ_ROOT_DIR}/.config/pull-secret.json"
294+
ssh $SSH_OPTS "$LZ_SSH" "mkdir -p ${LZ_ROOT_DIR}/config"
295+
scp $SSH_OPTS "$PULL_SECRET_SOURCE" "${LZ_SSH}:${LZ_ROOT_DIR}/config/pull-secret.json"
296296

297297
success "Pull secret copied to Landing Zone"
298298

@@ -307,7 +307,7 @@ import yaml
307307
import json
308308
309309
# Read the pull secret
310-
with open('/home/cloud-user/.config/pull-secret.json', 'r') as f:
310+
with open('/home/cloud-user/config/pull-secret.json', 'r') as f:
311311
pull_secret = json.load(f)
312312
313313
# Read config/global.yaml

0 commit comments

Comments
 (0)