@@ -156,6 +156,76 @@ static void xnet_set_no_port(SOCKET sock)
156
156
#define xnet_set_no_port (sock )
157
157
#endif
158
158
159
+ int
160
+ xnet_disable_keepalive (struct xnet_ep * ep )
161
+ {
162
+ int optval = 0 ;
163
+ int ret ;
164
+
165
+ ret = setsockopt (ep -> bsock .sock , SOL_SOCKET , SO_KEEPALIVE , (char * )& optval ,
166
+ sizeof (optval ));
167
+ if (ret ) {
168
+ FI_WARN (& xnet_prov , FI_LOG_EP_CTRL , "set SO_KEEPALIVE failed %d" , ret );
169
+ return - ofi_sockerr ();
170
+ }
171
+
172
+ FI_INFO (& xnet_prov , FI_LOG_EP_CTRL , "ep %p KEEPALIVE is disabled.\n" , ep );
173
+ return ret ;
174
+ }
175
+
176
+ static int
177
+ xnet_enable_keepalive (struct xnet_ep * ep )
178
+ {
179
+ int optval = 1 ;
180
+ int idle_time = 5 ;
181
+ int keep_intvl = 2 ;
182
+ int keep_cnt = 2 ;
183
+ int ret ;
184
+
185
+ ret = setsockopt (ep -> bsock .sock , SOL_SOCKET , SO_KEEPALIVE , (const void * )& optval ,
186
+ sizeof (optval ));
187
+ if (ret ) {
188
+ FI_WARN (& xnet_prov , FI_LOG_EP_CTRL , "set SO_KEEPALIVE failed %d" , ret );
189
+ return - ofi_sockerr ();
190
+ }
191
+
192
+ ret = setsockopt (ep -> bsock .sock , IPPROTO_TCP , TCP_KEEPIDLE , (const void * )& idle_time ,
193
+ sizeof (idle_time ));
194
+ if (ret ) {
195
+ ret = - ofi_sockerr ();
196
+ FI_WARN (& xnet_prov , FI_LOG_EP_CTRL , "set TCP_KEEPIDLE failed %d" , ret );
197
+ goto out ;
198
+ }
199
+
200
+ ret = setsockopt (ep -> bsock .sock , IPPROTO_TCP , TCP_KEEPINTVL , (const void * )& keep_intvl ,
201
+ sizeof (keep_intvl ));
202
+ if (ret ) {
203
+ ret = - ofi_sockerr ();
204
+ FI_WARN (& xnet_prov , FI_LOG_EP_CTRL , "set TCP_KEEPINTVL failed %d" , ret );
205
+ goto out ;
206
+ }
207
+
208
+ ret = setsockopt (ep -> bsock .sock , IPPROTO_TCP , TCP_KEEPCNT , (const void * )& keep_cnt ,
209
+ sizeof (keep_cnt ));
210
+ if (ret ) {
211
+ ret = - ofi_sockerr ();
212
+ FI_WARN (& xnet_prov , FI_LOG_EP_CTRL , "set SO_KEEPALIVE failed %d" , ret );
213
+ goto out ;
214
+ }
215
+
216
+ FI_INFO (& xnet_prov , FI_LOG_EP_CTRL , "%p KEEPALIVE idle %d intvl %d cnt %d\n" ,
217
+ ep , idle_time , keep_intvl , keep_cnt );
218
+
219
+ out :
220
+ if (ret ) {
221
+ FI_WARN (& xnet_prov , FI_LOG_EP_CTRL , "%p KEEPALIVE set keepalive failed %d\n" ,
222
+ ep , ret );
223
+ xnet_disable_keepalive (ep );
224
+ }
225
+
226
+ return ret ;
227
+ }
228
+
159
229
int xnet_setup_socket (SOCKET sock , struct fi_info * info )
160
230
{
161
231
int ret , optval = 1 ;
@@ -294,6 +364,16 @@ xnet_ep_accept(struct fid_ep *ep_fid, const void *param, size_t paramlen)
294
364
ep -> cm_msg -> hdr .seg_size = htons ((uint16_t ) paramlen );
295
365
}
296
366
367
+ /* Enable keepalive to make sure the socket status can be reset in time
368
+ * if the remote peer is restarted after it gets connreq but not replies.
369
+ */
370
+ ret = xnet_enable_keepalive (ep );
371
+ if (ret ) {
372
+ FI_WARN (& xnet_prov , FI_LOG_EP_CTRL , "%p set tcp keepalive failure:%d\n" ,
373
+ ep , ret );
374
+ return ofi_sockerr () ? - ofi_sockerr () : - FI_EINVAL ;
375
+ }
376
+
297
377
ret = xnet_send_cm_msg (ep );
298
378
if (ret )
299
379
return ret ;
0 commit comments