@@ -59,107 +59,22 @@ jobs:
5959 echo "$SSH_KEY" > ~/.ssh/id_rsa_arm
6060 chmod 600 ~/.ssh/id_rsa_arm
6161 echo "$SSH_CONFIG" | sed "s/SSH_IP/$SSH_IP/g" > ~/.ssh/config
62- # Enhanced keepalive and timeout settings
62+ # Enhanced keepalive and timeout settings for long-running builds
6363 cat >> ~/.ssh/config << 'EOF'
64- ServerAliveInterval 30
65- ServerAliveCountMax 3
64+ ServerAliveInterval 15
65+ ServerAliveCountMax 20
6666 TCPKeepAlive yes
67- ConnectTimeout 10
68- ConnectionAttempts 1
67+ ConnectTimeout 30
68+ ConnectionAttempts 3
69+ ControlMaster auto
70+ ControlPath ~/.ssh/control-%C
71+ ControlPersist 1h
72+ StrictHostKeyChecking no
6973 EOF
7074 env :
7175 SSH_KEY : ${{ secrets.ARM_SSH_KEY }}
7276 SSH_IP : ${{ secrets.ARM_SSH_IP }}
7377 SSH_CONFIG : ${{ secrets.ARM_SSH_CONFIG }}
74- - name : Check ARM SSH connectivity
75- if : startsWith(inputs.CACHE_SUFFIX, 'arm')
76- run : |
77- echo "=== Starting enhanced SSH connectivity check ==="
78- SSH_HOST="root@$SSH_IP"
79- MAX_RETRIES=10
80- RETRY_COUNT=0
81- SUCCESS=false
82-
83- # Enhanced SSH options for stability
84- SSH_OPTS="-o StrictHostKeyChecking=no \
85- -o ConnectTimeout=10 \
86- -o ConnectionAttempts=1 \
87- -o ServerAliveInterval=30 \
88- -o ServerAliveCountMax=3 \
89- -o TCPKeepAlive=yes \
90- -o BatchMode=yes \
91- -i ~/.ssh/id_rsa_arm"
92-
93- while [ $RETRY_COUNT -lt $MAX_RETRIES ]; do
94- RETRY_COUNT=$((RETRY_COUNT + 1))
95- echo "----------------------------------------"
96- echo "Attempt $RETRY_COUNT of $MAX_RETRIES"
97- echo "Timestamp: $(date '+%Y-%m-%d %H:%M:%S')"
98-
99- # First check if SSH port is reachable
100- echo "β Checking TCP port 22..."
101- if timeout 5 bash -c "cat < /dev/null > /dev/tcp/$SSH_IP/22" 2>/dev/null; then
102- echo "β Port 22 is open"
103-
104- # Try SSH connection with comprehensive checks
105- echo "β Testing SSH connection..."
106- if ssh $SSH_OPTS $SSH_HOST "echo 'β SSH connection established' && \
107- echo 'β Checking Docker...' && \
108- docker info > /dev/null 2>&1 && \
109- echo 'β Docker is running' && \
110- echo 'β Checking network...' && \
111- ping -c 1 9.9.9.9 > /dev/null 2>&1 && \
112- echo 'β Network connectivity verified (Quad9)' && \
113- echo 'β System info:' && \
114- uptime && \
115- free -h | head -n 2" 2>&1; then
116- echo "βββ All connectivity checks passed! βββ"
117- SUCCESS=true
118- break
119- else
120- echo "β SSH command execution failed"
121- fi
122- else
123- echo "β Port 22 is not reachable"
124- fi
125-
126- # Calculate backoff delay (exponential: 5, 10, 15, 20, 30, 30, ...)
127- if [ $RETRY_COUNT -lt $MAX_RETRIES ]; then
128- DELAY=$((5 * RETRY_COUNT))
129- [ $DELAY -gt 30 ] && DELAY=30
130- echo "β³ Waiting ${DELAY}s before retry..."
131- sleep $DELAY
132- fi
133- done
134-
135- if [ "$SUCCESS" = false ]; then
136- echo "========================================="
137- echo "βββ FATAL: All $MAX_RETRIES connection attempts failed βββ"
138- echo "This likely indicates:"
139- echo " - ARM node is not accessible"
140- echo " - Network connectivity issues"
141- echo " - SSH service not running"
142- echo " - Docker service not running"
143- echo "========================================="
144- exit 1
145- fi
146-
147- # Final stabilization period
148- echo "========================================="
149- echo "β³ Allowing connection to stabilize (10s)..."
150- sleep 10
151-
152- # Final verification
153- echo "β Final connectivity verification..."
154- if ssh $SSH_OPTS $SSH_HOST "echo 'β Final check: SSH OK' && docker ps > /dev/null 2>&1 && echo 'β Final check: Docker OK'"; then
155- echo "βββ SSH connectivity fully stable and ready βββ"
156- echo "========================================="
157- else
158- echo "βββ FATAL: Final verification failed βββ"
159- exit 1
160- fi
161- env :
162- SSH_IP : ${{ secrets.ARM_SSH_IP }}
16378 - name : Setup Buildx
16479 uses : docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
16580 if : startsWith(inputs.CACHE_SUFFIX, 'arm') == false
0 commit comments