37
37
cat /mnt/disks/certs/server/{{.Values.tls.certFilename | default "tls.key"}} /mnt/disks/certs/server/{{.Values.tls.certKeyFilename | default "tls.crt"}} > /dev/shm/server.cert
38
38
export tls_servercertificate_filepath="/dev/shm/server.cert"
39
39
{{- end }}
40
+ # Deal with the fact we cannot accept "-" in router names
41
+ export routername=$(echo $(hostname) | sed 's/-//g')
40
42
{{- if .Values.solace.redundancy }}
41
- # [TODO] KBARR not using correct method of finding ordinal until we bump min Kubernetes release above 1.8.1
42
- # https://github.com/kubernetes/kubernetes/issues/40651
43
- # node_ordinal=$(STATEFULSET_ORDINAL)
44
43
IFS='-' read -ra host_array <<< $(hostname)
45
44
node_ordinal=${host_array[-1]}
46
45
if [[ ! -z `echo $STATEFULSET_NAMESPACE` ]]; then
49
48
namespace=default
50
49
fi
51
50
service={{ template "solace.fullname" . }}
52
- # Deal with the fact we cannot accept "-" in routre names
53
51
service_name=$(echo ${service} | sed 's/-//g')
54
- export routername=$(echo $(hostname) | sed 's/-//g')
55
52
export redundancy_enable=yes
56
53
export configsync_enable=yes
57
54
export redundancy_authentication_presharedkey_key=`cat /mnt/disks/secrets/username_admin_password | awk '{x=$0;for(i=length;i<51;i++)x=x "0";}END{print x}' | base64` # Right-pad with 0s to 50 length
92
89
loop_guard=60
93
90
pause=10
94
91
count=0
92
+ # Wait for Solace Management API
95
93
while [ ${count} -lt ${loop_guard} ]; do
96
94
if /mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 -t ; then
97
95
break
@@ -131,6 +129,7 @@ data:
131
129
resync_step_required=""
132
130
role=""
133
131
count=0
132
+ # Determine node's primary or backup role
134
133
while [ ${count} -lt ${loop_guard} ]; do
135
134
role_results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
136
135
-q "<rpc><show><redundancy><detail/></redundancy></show></rpc>" \
@@ -147,16 +146,16 @@ data:
147
146
;;
148
147
esac
149
148
((count++))
150
- echo "`date` INFO : ${APP}-Waited ${run_time} seconds, got ${role_results} for this node's active-standby role"
149
+ echo "`date` INFO : ${APP}-Waited ${run_time} seconds, got ${role_results} for this node's primary or backup role"
151
150
sleep ${pause}
152
151
done
153
152
if [ ${count} -eq ${loop_guard} ]; then
154
- echo "`date` ERROR : ${APP}-Could not determine this node's active-standby role" >&2
153
+ echo "`date` ERROR : ${APP}-Could not determine this node's primary or backup role" >&2
155
154
exit 1
156
155
fi
157
- # Determine local activity
156
+ echo "`date` INFO : ${APP}-Management API is up, determined that this node's role is: ${role}"
157
+ # Determine activity (local or mate active)
158
158
count=0
159
- echo "`date` INFO : ${APP}-Management API is up, determined that this node's active-standby role is: ${role}"
160
159
while [ ${count} -lt ${loop_guard} ]; do
161
160
online_results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
162
161
-q "<rpc><show><redundancy><detail/></redundancy></show></rpc>" \
@@ -172,7 +171,7 @@ data:
172
171
echo "`date` INFO : ${APP}-Broker initial startup detected. This node will assert config-sync configuration over its mate"
173
172
resync_step_required="true"
174
173
else
175
- echo "`date` WARN : ${APP}-Unexpected state: this is not an initial startup of the broker and this node reports Local Active. Normally expected nodes are Mate Active after restart "
174
+ echo "`date` WARN : ${APP}-Unexpected state: this is not an initial startup of the broker and this node reports Local Active. Possibly a redeploy? "
176
175
fi
177
176
break
178
177
;;
@@ -182,15 +181,16 @@ data:
182
181
;;
183
182
esac
184
183
((count++))
185
- echo "`date` INFO : ${APP}-Waited ${run_time} seconds, Local activity state is: ${local_activity}"
184
+ echo "`date` INFO : ${APP}-Waited ${run_time} seconds, node activity state is: ${local_activity}"
186
185
sleep ${pause}
187
186
done
188
187
if [ ${count} -eq ${loop_guard} ]; then
189
- echo "`date` ERROR : ${APP}-Local activity state never become Local Active or Mate Active" >&2
188
+ echo "`date` ERROR : ${APP}-Node activity state never become Local Active or Mate Active" >&2
190
189
exit 1
191
190
fi
192
- # If we need to assert leader, then we need to wait for mate to reconcile
191
+ # If we need to assert leader, then first wait for mate to report Standby state
193
192
if [ "${resync_step_required}" = "true" ]; then
193
+ # This branch is AD-active only
194
194
count=0
195
195
echo "`date` INFO : ${APP}-Waiting for mate activity state to be 'Standby'"
196
196
while [ ${count} -lt ${loop_guard} ]; do
@@ -214,7 +214,7 @@ data:
214
214
exit 1
215
215
fi
216
216
fi # if assert-leader
217
- # Ensure Config-sync connection state is Connected before proceeding
217
+ # Ensure Config-sync connection state is Connected for both primary and backup before proceeding
218
218
count=0
219
219
echo "`date` INFO : ${APP}-Waiting for config-sync connected"
220
220
while [ ${count} -lt ${loop_guard} ]; do
@@ -239,11 +239,12 @@ data:
239
239
fi
240
240
# Now can issue assert-leader command
241
241
if [ "${resync_step_required}" = "true" ]; then
242
- echo "`date` INFO : ${APP}-Initiating assert-leader"
243
- /mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
244
- -q "<rpc><admin><config-sync><assert-leader><router/></assert-leader></config-sync></admin></rpc>"
245
- /mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
246
- -q "<rpc><admin><config-sync><assert-leader><vpn-name>*</vpn-name></assert-leader></config-sync></admin></rpc>"
242
+ # This branch is AD-active only
243
+ echo "`date` INFO : ${APP}-Initiating assert-leader"
244
+ /mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
245
+ -q "<rpc><admin><config-sync><assert-leader><router/></assert-leader></config-sync></admin></rpc>"
246
+ /mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
247
+ -q "<rpc><admin><config-sync><assert-leader><vpn-name>*</vpn-name></assert-leader></config-sync></admin></rpc>"
247
248
fi
248
249
# Wait for config-sync results
249
250
count=0
@@ -263,7 +264,7 @@ data:
263
264
((count++))
264
265
echo "`date` INFO : ${APP}-Waited ${run_time} seconds, Config-sync is: ${confsyncstatus_results}, not yet Up"
265
266
266
- # Additional check to confirm config-sync
267
+ # Additional checks to confirm config-sync (even if reported gloabally as not Up, it may be still up between local primary and backup in a DR setup)
267
268
echo "`date` INFO : ${APP}-Checking Config-sync Setup. Starting additional checks to confirm config-sync locally..."
268
269
messagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
269
270
-q "<rpc><show><config-sync><database/><detail/></config-sync></show></rpc>" \
@@ -378,36 +379,15 @@ data:
378
379
IFS='-' read -ra host_array <<< $(hostname)
379
380
node_ordinal=${host_array[-1]}
380
381
password=`cat /mnt/disks/secrets/username_admin_password`
381
-
382
- # For update (includes SolOS upgrade) purposes, additional checks are required for readiness state when the pod has been started
383
- # This is an update if the LASTVERSION_FILE with K8s controller-revision-hash exists and contents differ from current value
384
- LASTVERSION_FILE=/var/lib/solace/var/lastConfigRevisionBeforeReboot
385
- if [ ! -f ${LASTVERSION_FILE} ] || [[ $(cat ${LASTVERSION_FILE}) != $(get_label "controller-revision-hash") ]] ; then
386
- echo "`date` INFO : ${APP}-Initial startup or Upgrade detected, running additional checks..."
387
- # Check redundancy
388
- echo "`date` INFO : ${APP}-Running checks. Redundancy state check started..."
389
- results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
390
- -q "<rpc><show><redundancy/></show></rpc>" \
391
- -v "/rpc-reply/rpc/show/redundancy/redundancy-status"`
392
- redundancystatus_results=`echo ${results} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
393
- if [ "${redundancystatus_results}" != "Up" ]; then
394
- echo "`date` INFO : ${APP}-Redundancy state is not yet up."
395
- rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1
396
- fi
397
-
398
- fi
399
- # Record current version in LASTVERSION_FILE
400
- echo $(get_label "controller-revision-hash") > ${LASTVERSION_FILE}
401
382
# For monitor node just check for redundancy; active label will never be set
402
383
if [ "${node_ordinal}" = "2" ]; then
403
384
# Check redundancy
404
- echo "`date` INFO : ${APP}-Running checks. Redundancy state check started..."
405
385
results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
406
386
-q "<rpc><show><redundancy/></show></rpc>" \
407
387
-v "/rpc-reply/rpc/show/redundancy/redundancy-status"`
408
388
redundancystatus_results=`echo ${results} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
409
389
if [ "${redundancystatus_results}" != "Up" ]; then
410
- echo "`date` INFO : ${APP}-Redundancy state is not yet up."
390
+ echo "`date` INFO : ${APP}-Waiting for redundancy up, redundancy state is not yet up."
411
391
rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1
412
392
fi
413
393
if [ ! -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE} ]; then
@@ -418,6 +398,7 @@ data:
418
398
fi
419
399
exit 0
420
400
fi # End Monitor Node
401
+ # From here only message routing nodes.
421
402
# For Primary or Backup nodes set both service readiness (active label) and k8s readiness (exit return value)
422
403
health_result=`curl -s -o /dev/null -w "%{http_code}" http://localhost:5550/health-check/guaranteed-active`
423
404
case "${health_result}" in
@@ -467,54 +448,52 @@ data:
467
448
echo "`date` INFO : ${APP}-Running checks.Redundancy state is not yet up."
468
449
rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1
469
450
fi
470
- # Additionally check config-sync status for non-monitoring nodes
471
- if [ "${node_ordinal}" != "2" ]; then
472
- results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
473
- -q "<rpc><show><config-sync></config-sync></show></rpc>" \
474
- -v "/rpc-reply/rpc/show/config-sync/status/oper-status"`
475
- confsyncstatus_results=`echo ${results} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
476
- if [ "${confsyncstatus_results}" != "Up" ]; then
451
+ # Check config-sync status
452
+ results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
453
+ -q "<rpc><show><config-sync></config-sync></show></rpc>" \
454
+ -v "/rpc-reply/rpc/show/config-sync/status/oper-status"`
455
+ confsyncstatus_results=`echo ${results} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
456
+ if [ "${confsyncstatus_results}" != "Up" ]; then
477
457
478
- # Additional check to confirm config-sync
479
- echo "`date` INFO : ${APP}-Checking Config-sync Setup. Starting additional checks to confirm config-sync locally..."
458
+ # Additional check to confirm config-sync
459
+ echo "`date` INFO : ${APP}-Checking Config-sync Setup. Starting additional checks to confirm config-sync locally..."
480
460
481
- messagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
482
- -q "<rpc><show><config-sync><database/><detail/></config-sync></show></rpc>" \
483
- -v "count(/rpc-reply/rpc/show/config-sync/database/local/tables/table)"`
484
- messagevpn_total=`echo ${messagevpn_result} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
461
+ messagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
462
+ -q "<rpc><show><config-sync><database/><detail/></config-sync></show></rpc>" \
463
+ -v "count(/rpc-reply/rpc/show/config-sync/database/local/tables/table)"`
464
+ messagevpn_total=`echo ${messagevpn_result} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
485
465
486
- # Count message_vpns in-sync and compare with total
487
- localmessagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
488
- -q "<rpc><show><config-sync><database/></config-sync></show></rpc>" \
489
- -v "count(//table[sync-state='In-Sync'])"`
490
- local_messagevpn_total_insync=`echo ${localmessagevpn_result} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
491
- if [ "$messagevpn_total" -ne "$local_messagevpn_total_insync" ]; then
492
- echo "`date` INFO : ${APP}-Config-sync state is not in-sync locally."
493
- rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1
494
- fi
466
+ # Count message_vpns in-sync and compare with total
467
+ localmessagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
468
+ -q "<rpc><show><config-sync><database/></config-sync></show></rpc>" \
469
+ -v "count(//table[sync-state='In-Sync'])"`
470
+ local_messagevpn_total_insync=`echo ${localmessagevpn_result} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
471
+ if [ "$messagevpn_total" -ne "$local_messagevpn_total_insync" ]; then
472
+ echo "`date` INFO : ${APP}-Config-sync state is not in-sync locally."
473
+ rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1
474
+ fi
495
475
496
- echo "`date` INFO : ${APP}-Checking Config-sync Setup. Remote config-sync state check starting..."
497
- vpnremotehamate_result=$(get_router_remote_config_state "name")
476
+ echo "`date` INFO : ${APP}-Checking Config-sync Setup. Remote config-sync state check starting..."
477
+ vpnremotehamate_result=$(get_router_remote_config_state "name")
498
478
499
- remote_messagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
500
- -q "<rpc><show><config-sync><database/><remote/></config-sync></show></rpc>" \
501
- -v "count(//table/source-router[name='$vpnremotehamate_result'])"`
502
- remote_messagevpn_total=`echo ${remote_messagevpn_result} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
479
+ remote_messagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
480
+ -q "<rpc><show><config-sync><database/><remote/></config-sync></show></rpc>" \
481
+ -v "count(//table/source-router[name='$vpnremotehamate_result'])"`
482
+ remote_messagevpn_total=`echo ${remote_messagevpn_result} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
503
483
504
- # Count message_vpns in-sync, not stale and compare with total
505
- remotemessagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
506
- -q "<rpc><show><config-sync><database/><remote/></config-sync></show></rpc>" \
507
- -v "count(//table/source-router[name='$vpnremotehamate_result' and sync-state='In-Sync' and stale='No'])"`
508
- remote_messagevpn_total_insync=`echo ${remotemessagevpn_result} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
509
- if [ "$remote_messagevpn_total" -ne "$remote_messagevpn_total_insync" ]; then
510
- echo "`date` INFO : ${APP}-Config-sync state is not in-sync for remote."
511
- rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1
512
- fi
484
+ # Count message_vpns in-sync, not stale and compare with total
485
+ remotemessagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
486
+ -q "<rpc><show><config-sync><database/><remote/></config-sync></show></rpc>" \
487
+ -v "count(//table/source-router[name='$vpnremotehamate_result' and sync-state='In-Sync' and stale='No'])"`
488
+ remote_messagevpn_total_insync=`echo ${remotemessagevpn_result} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
489
+ if [ "$remote_messagevpn_total" -ne "$remote_messagevpn_total_insync" ]; then
490
+ echo "`date` INFO : ${APP}-Config-sync state is not in-sync for remote."
491
+ rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1
513
492
fi
514
493
fi
515
494
# Pass readiness check
516
495
if [ ! -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE} ]; then
517
- echo "`date` INFO : ${APP}-Redundancy is up and node is mate Active"
496
+ echo "`date` INFO : ${APP}-Redundancy is up and node is Mate Active"
518
497
touch ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}
519
498
echo "`date` INFO : ${APP}-Server status check complete for this broker node"
520
499
exit 1
0 commit comments