@@ -15,28 +15,37 @@ DB_NAME=$(echo "$DB_URL" | sed -n 's|.*/\([^?]*\).*|\1|p')
1515echo " DB Host: $DB_HOST "
1616echo " DB Name: $DB_NAME "
1717
18- # 1. Wait for DNS resolution (Aurora DNS can take a minute to propagate)
18+ # 1. Wait for DNS resolution (Aurora DNS can take a minute to propagate).
19+ # Hard-fail if it never resolves: continuing past this point with a missing
20+ # host means Hasura starts up, fails its DB connection, exits, ECS restarts,
21+ # and the deployment circuit breaker eventually trips and rolls back the
22+ # whole stack with no useful error. Better to have the container exit fast
23+ # and ECS restart it (re-resolving DNS each time) than to bleed boot time.
1924echo " [1/3] Waiting for DNS resolution of $DB_HOST ..."
20- for i in $( seq 1 60 ) ; do
25+ for i in $( seq 1 120 ) ; do
2126 if nslookup " $DB_HOST " > /dev/null 2>&1 ; then
2227 echo " DNS resolved."
2328 break
2429 fi
25- if [ $i -eq 60 ]; then
26- echo " WARNING: DNS resolution timed out after 5 minutes. Continuing anyway..."
30+ if [ $i -eq 120 ]; then
31+ echo " ERROR: DNS resolution failed after 10 minutes; exiting so ECS can retry."
32+ exit 1
2733 fi
2834 sleep 5
2935done
3036
31- # 2. Wait for PostgreSQL to accept connections
37+ # 2. Wait for PostgreSQL to accept connections. Same hard-fail rationale as
38+ # above — proceeding when PG is unreachable just guarantees a Hasura crash
39+ # loop and a circuit-breaker rollback.
3240echo " [2/3] Waiting for PostgreSQL to be ready..."
33- for i in $( seq 1 60 ) ; do
41+ for i in $( seq 1 120 ) ; do
3442 if PGPASSWORD=" $DB_PASSWORD " pg_isready -h " $DB_HOST " -p " $DB_PORT " -U " $DB_USER " > /dev/null 2>&1 ; then
3543 echo " PostgreSQL ready."
3644 break
3745 fi
38- if [ $i -eq 60 ]; then
39- echo " WARNING: PostgreSQL not ready after 5 minutes. Continuing anyway..."
46+ if [ $i -eq 120 ]; then
47+ echo " ERROR: PostgreSQL not ready after 10 minutes; exiting so ECS can retry."
48+ exit 1
4049 fi
4150 sleep 5
4251done
0 commit comments