forked from Azure/sap-automation
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path5.8.4.0-clusterPrep-ScaleOut-RedHat.yml
More file actions
270 lines (235 loc) · 16.5 KB
/
5.8.4.0-clusterPrep-ScaleOut-RedHat.yml
File metadata and controls
270 lines (235 loc) · 16.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
---
# RHEL Clustering - Deploy HANA cluster resources
# Azure ref: https://docs.microsoft.com/en-us/azure/virtual-machines/workloads/sap/sap-hana-high-availability-rhel
# @TODO Subscribe to subscriptions/repos if required
# This code assumes the deployment is using RHEL SAP image
# SAP HANA Cluster resources prep for ANF
# https://learn.microsoft.com/en-us/azure/sap/workloads/sap-hana-high-availability-netapp-files-red-hat
# +------------------------------------4--------------------------------------*/
# We set this to prevent code overflow. Since pacemaker code for both AFS and ANF are the same barring NFS mount options, we parameterize this parameter.
# Additional mount options (e.g. nodev,nosuid) are derived from afs_mnt_options
# by computing the difference against afs_mnt_options_default and appending them.
- name: "5.8 HANA Pacemaker Scaleout - NFS Compatibility - set mount options based on NFS source"
ansible.builtin.set_fact:
nfs_mount_options: "{% if NFS_provider == 'ANF' %}defaults,rw,hard,rsize=262144,wsize=262144,proto=tcp,noatime,_netdev,nfsvers=4.1,lock,sec=sys{% else %}noresvport,defaults,rw,hard,proto=tcp,noatime,nfsvers=4.1,lock{% set extras = afs_mnt_options.split(',') | difference(afs_mnt_options_default.split(',')) %}{% if extras %},{{ extras | join(',') }}{% endif %}{% endif %}"
directory_path: "{% if NFS_provider == 'ANF' %}/shared{% else %}{% endif %}"
# Ref : https://learn.microsoft.com/en-us/azure/sap/workloads/sap-hana-high-availability-scale-out-hsr-rhel?tabs=lb-portal#create-file-system-resources
- name: "5.8 HANA Pacemaker Scaleout - Configure the ANF/AFS file system resources"
when:
- database_high_availability
- database_scale_out
- NFS_provider in ["ANF","AFS"]
- hana_shared_mountpoint is defined
- hana_shared_mountpoint | length > 1
block:
# Notes :
# 1. keep cluster out of maintenance mode
# 2. Stopping HANA is fine, but do not kill any sap services locking /hana/shared especially if you have configured replication between sites.
# 3. mask the /hana/shared in /etc/fstab and configure filesystem role in pacemaker. Let pacemaker handle share mount.
# 4. Do not kill existing processes and attempt to unmount /hana/shared. Bad things will happen.
# 5. No Seriously !! Terrible things will happen and you will have a hard time repairing the deployment
- name: "5.8 HANA Pacemaker Scaleout - Execute HANA StopSystem on both sites"
become_user: "{{ db_sid | lower }}adm"
become: true
ansible.builtin.command: "{{ sapcontrol_command }} -function StopSystem"
failed_when: false
changed_when: false
register: hana_system_stopped
when: ansible_hostname in [ primary_instance_name , secondary_instance_name ]
- name: "5.8 HANA Pacemaker Scaleout - Wait 2 minutes for SAP system to stop"
ansible.builtin.wait_for:
timeout: 120
- name: "5.8 HANA Pacemaker Scaleout - Comment out the mountpoint from '/etc/fstab' file"
ansible.builtin.lineinfile:
path: /etc/fstab
regexp: "^{{ item }}"
state: absent
loop: "{{ hana_shared_mountpoint[:2] }}"
- name: "5.8 HANA Pacemaker Scaleout - Configure pacemaker hana shared filesystem resources on {{ primary_instance_name }}"
when: ansible_hostname == primary_instance_name
become: true
become_user: root
block:
- name: "5.8 HANA Pacemaker Scaleout - Configure NFS filesystem resource in Pacemaker for HSR sites"
ansible.builtin.shell: >
pcs resource create {{ item.fs_name }} --disabled ocf:heartbeat:Filesystem \
device="{{ item.fs_mount }}" directory="{{ item.fs_dir }}" fstype="nfs" \
options="{{ nfs_mount_options }}" \
op monitor interval=20s on-fail=fence timeout=120s OCF_CHECK_LEVEL=20 \
op start interval=0 timeout=120 op stop interval=0 timeout=120 \
clone meta clone-node-max=1 interleave=true
register: nfs_mount_sites
failed_when: false
ignore_errors: true
loop:
- { fs_name: 'fs_hana_shared_s1',fs_mount: '{{ hana_shared_mountpoint[0] }}{{ directory_path }}', fs_dir: '/hana/shared' }
- { fs_name: 'fs_hana_shared_s2',fs_mount: '{{ hana_shared_mountpoint[1] }}{{ directory_path }}', fs_dir: '/hana/shared' }
loop_control:
loop_var: item
- name: "5.8 HANA Pacemaker Scaleout - Check if NFS hana mounts did not error on {{ primary_instance_name }}"
ansible.builtin.set_fact:
chk_nfs_mount_sites: "{{ nfs_mount_sites.results | selectattr('rc', 'ne', 0) | rejectattr('stderr', 'search', 'already exists') | default([]) | list | select() }}"
- name: "5.8 HANA Pacemaker Scaleout - Check if NFS hana mounts did not error on {{ primary_instance_name }}"
ansible.builtin.debug:
msg:
- "nfs_mount_sites_results: {{ nfs_mount_sites.results }}"
- "chk_nfs_mount_sites: {{ chk_nfs_mount_sites }}"
- name: "5.8 HANA Pacemaker Scaleout - Fail when NFS hana mounts errored on {{ primary_instance_name }}"
ansible.builtin.fail:
msg: "Failed to create NFS hana mounts on {{ primary_instance_name }}"
when:
- chk_nfs_mount_sites | length > 0
- name: "5.8 HANA Pacemaker Scaleout - Configure node attributes for primary site on {{ primary_instance_name }}"
ansible.builtin.shell: >
pcs node attribute {{ item }} NFS_{{ db_sid | upper }}_SITE=S1
register: node_nfs_attribute_site1
failed_when: false
ignore_errors: true
with_items:
- "{{ ansible_play_hosts_all[0::2] }}"
- name: "5.8 HANA Pacemaker Scaleout - Configure node attributes for secondary site on {{ primary_instance_name }}"
ansible.builtin.shell: >
pcs node attribute {{ item }} NFS_{{ db_sid | upper }}_SITE=S2
register: node_nfs_attribute_site2
failed_when: false
ignore_errors: true
with_items:
- "{{ ansible_play_hosts_all[1::2] }}"
- name: "5.8 HANA Pacemaker Scaleout - Configure location constraint for filesystem resource clone on {{ primary_instance_name }}"
ansible.builtin.shell: >
pcs constraint location {{ item.clone_name }} rule resource-discovery=never score=-INFINITY NFS_{{ db_sid | upper }}_SITE ne {{ item.site_code }}
register: location_nfs_attribute_sites
failed_when: false
ignore_errors: true
loop:
- { clone_name: 'fs_hana_shared_s1-clone', site_code: 'S1'}
- { clone_name: 'fs_hana_shared_s2-clone', site_code: 'S2'}
- name: "5.8 HANA Pacemaker Scaleout - Check if NFS hana mounts constraints did not error on {{ primary_instance_name }}"
ansible.builtin.set_fact:
chk_location_nfs_attribute_sites: "{{ location_nfs_attribute_sites.results | selectattr('rc', 'ne', 0) | rejectattr('stderr', 'search', 'already exists') | default([]) | list }}"
- name: "5.8 HANA Pacemaker Scaleout - Fail when NFS hana mounts errored on {{ primary_instance_name }}"
ansible.builtin.fail:
msg: "Failed to create NFS hana mounts on {{ primary_instance_name }}"
when:
- chk_location_nfs_attribute_sites | length > 0
- name: "5.8 HANA Pacemaker Scaleout - Activate filesystem resource on {{ primary_instance_name }}"
ansible.builtin.shell: >
pcs resource enable {{ item.fs_name }}
register: activate_nfs_mount_sites
failed_when: false
ignore_errors: true
loop:
- { fs_name: 'fs_hana_shared_s1' }
- { fs_name: 'fs_hana_shared_s2' }
when:
- chk_location_nfs_attribute_sites | length == 0
- chk_nfs_mount_sites | length == 0
- name: "5.8 HANA Pacemaker Scaleout - Configure pacemaker attribute resource on {{ primary_instance_name }}"
ansible.builtin.shell: >
pcs resource create {{ item.res_name }} ocf:pacemaker:attribute active_value=true \
inactive_value=false name={{ item.res_name }} \
clone meta clone-node-max=1 interleave=true
register: attribute_hana_nfs_sites
failed_when: false
ignore_errors: true
loop:
- { res_name: 'hana_nfs_s1_active' }
- { res_name: 'hana_nfs_s2_active' }
- name: "5.8 HANA Pacemaker Scaleout - Create constraints for pacemaker attribute resource on {{ primary_instance_name }}"
ansible.builtin.shell: >
pcs constraint order {{ item.fs_clone }} then {{ item.res_clone }}
register: loc_attribute_hana_nfs_sites
failed_when: false
ignore_errors: true
loop:
- { fs_clone: 'fs_hana_shared_s1-clone', res_clone: 'hana_nfs_s1_active-clone' }
- { fs_clone: 'fs_hana_shared_s2-clone', res_clone: 'hana_nfs_s2_active-clone' }
- name: "5.8 HANA Pacemaker Scaleout - Wait for /hana/shared to become available on all participating nodes"
block:
- name: Wait for /hana/shared to be mounted
ansible.builtin.wait_for:
path: /hana/shared
state: present
timeout: 300
- name: "5.8 HANA Pacemaker Scaleout - Check if /hana/shared is mounted"
ansible.builtin.shell: >
mountpoint -q /hana/shared
register: hana_shared_mounted
changed_when: false
failed_when: false
- name: "5.8 HANA Pacemaker Scaleout - Fail if /hana/shared is not mounted"
ansible.builtin.fail:
msg: "Critical failure : /hana/shared is not mounted"
when: hana_shared_mounted.rc > 0
- name: "5.8 HANA Pacemaker Scaleout - Start HANA on both nodes"
ansible.builtin.include_tasks: ../../../roles-misc/0.4-helpers/tasks/04.01-start_hana.yml
when:
- ansible_hostname in [ primary_instance_name , secondary_instance_name ]
# # Note: We need to manually start HANA on all participating nodes via HDB script.
# - name: Start HANA database on each participating node
# become_user: "{{ db_sid | lower }}adm"
# args:
# chdir: "/usr/sap/{{ db_sid | upper }}/HDB{{ db_instance_number }}"
# become: true
# ansible.builtin.shell: >
# source /usr/sap/{{ db_sid | upper }}/home/.sapenv.sh &&
# HDB start
# failed_when: hdb_start.rc > 0
# changed_when: false
# register: hdb_start
# - name: Start HANA System on both sites
# become_user: "{{ db_sid | lower }}adm"
# become: true
# ansible.builtin.command: "{{ sapcontrol_command }} -function StartSystem"
# failed_when: hana_system_started.rc > 0
# changed_when: false
# register: hana_system_started
# when:
# - ansible_hostname in [ primary_instance_name , secondary_instance_name ]
- name: "5.8 HANA Pacemaker Scaleout - Wait for SAP system to stabilize (debug)"
ansible.builtin.debug:
msg: "5.8 HANA Pacemaker Scaleout - Wait {{ hana_scaleout_wait_for_start_in_sec }} seconds for SAP system to stabilize"
- name: "5.8 HANA Pacemaker Scaleout - Wait for SAP system to stabilize"
ansible.builtin.wait_for:
timeout: "{{ hana_scaleout_wait_for_start_in_sec }}"
# End of HANA filesystem clustering resources
# Ref : https://access.redhat.com/articles/3004101 - 4.3 Configure general cluster properties
- name: "5.8 HANA Pacemaker Scaleout - Configure general cluster properties"
become: true
become_user: root
when:
- ansible_hostname == primary_instance_name
block:
- name: "5.8 HANA Pacemaker Scaleout - Set resource stickiness value to 1000"
ansible.builtin.shell: >
pcs resource defaults update resource-stickiness=1000
register: res_stickiness
failed_when: res_stickiness.rc > 0
changed_when: false
- name: "5.8 HANA Pacemaker Scaleout - Set migration threshold value to 5000"
ansible.builtin.shell: >
pcs resource defaults update migration-threshold=5000
register: mig_threshold
failed_when: mig_threshold.rc > 0
changed_when: false
# ⠀⠀⠀⠀⠀⠀⣠⡤⠶⠒⢛⢻⠛⠛⠛⠛⠛⠛⢿⣛⡓⠶⢦⣤⠀⠀⠀⠀⠀⠀
# ⠀⠀⠀⠀⠀⡴⡫⠒⠊⠁⠀⣸⠀⠀⠀⠀⠀⠀⢹⠀⠀⠁⠒⡏⢳⡄⠀⠀⠀⠀
# ⠀⠀⠀⢀⡾⡑⡇⡀⠀⠀⠀⡷⠀⠀⠤⠤⠀⠀⢸⠀⠀⠀⠀⡇⡳⢻⡄⠀⠀⠀
# ⠀⠀⢀⡾⢱⠔⠁⡇⠀⠀⠀⣇⠀⣀⣀⣀⣀⣀⣀⡇⠀⠀⠀⠃⠱⣵⢻⡄⠀⠀
# ⠀⠀⡾⠁⢀⡀⢤⠗⠒⠒⢺⢳⠤⠶⠶⠶⠶⠶⢖⣷⠒⠒⠒⡦⢄⡀⠀⢹⡄⠀
# ⠀⢸⡇⠐⣁⠤⠬⠤⠤⣤⣼⣷⣵⣶⣶⣶⣶⣶⣽⣿⢤⣤⠤⠷⠤⢄⡁⠘⣇⠀
# ⠀⠘⣧⣞⣁⣀⡮⡯⡿⠛⠛⠫⠿⠭⠭⠭⠭⠽⠿⠛⠛⢻⠽⡿⣄⣀⣑⣦⠏⠀
# ⠀⠀⣿⢠⣿⠃⠀⠈⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⡍⠁⠈⣿⡆⢸⠀⠀
# ⠀⠀⣿⢸⣿⡀⠀⠀⠑⠄⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⠔⠀⠀⠀⣿⣇⢸⡀⠀
# ⢀⣴⡏⠥⠹⢇⠀⠀⠀⠀⠑⠄⠀⠀⠀⠀⠀⠀⠤⠂⠁⠀⠀⠀⢠⠟⠥⠹⣧⡀
# ⣿⡼⢶⡒⠲⡚⠳⣄⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⡴⠛⡲⠒⣲⠷⢹
# ⣿⣗⡚⠨⠑⢌⠢⡘⠷⣤⣀⣴⣾⠻⠟⠛⠻⡻⣶⣄⣠⡴⠏⡠⢊⠔⡡⢺⢚⢸
# ⠙⢯⣗⣀⡀⠀⠑⠂⠥⢂⠭⣛⢵⣖⣒⣒⢲⡦⢟⠭⣕⠪⠅⠊⠀⠀⣁⣘⣯⠞
# ⠀⠀⢿⡑⢬⣑⢄⠀⠀⠀⠈⡟⡞⣯⣷⣢⢿⣇⡗⡏⠀⠀⠀⢀⢴⡁⢐⣹⠀⠀
# ⠀⠀⠀⠙⢻⡈⠳⣗⢄⣼⠓⣟⡏⣄⠩⠭⢡⡊⣗⡗⣷⣄⣴⡹⠋⡾⠛⠁⠀⠀
# ⠀⠀⠀⠀⠀⠳⣤⡈⣷⣿⣆⢸⡇⠛⠀⠀⠘⠃⣿⢀⣿⣷⢋⣠⠾⠃⠀⠀⠀⠀
# ⠀⠀⠀⠀⠀⠀⠀⠉⠻⠦⣭⣽⠇⠿⠀⠀⠸⠃⣯⣭⡥⠞⠋⠁⠀⠀⠀⠀⠀⠀
# ⢀⢀⣀⣀⣀⣀⣀⣀⣀⣀⣈⣳⣵⣶⣶⣷⣶⣾⡵⣋⣀⣀⣀⣀⢀⠀⠀⠀⠀⠀
# ⣿⣟⣟⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⣽⣽⣯⣿