Skip to content

Commit 1f236dc

Browse files
wangdi1j-xiong
authored andcommitted
prov/tcp: Enable keepalive during CM exchange
Enable TCP keepalive during connreq processing, otherwise if the remote peer is restarted between getting connreq and replying, it may hang there to wait for the connreq reply. Since the ep state(req_done) will not allow sending new reqs, and socket state will not be reset, i.e. progress will not be able to detect the disconnection. Signed-off-by: Di Wang <[email protected]>
1 parent efce9fc commit 1f236dc

File tree

8 files changed

+87
-1
lines changed

8 files changed

+87
-1
lines changed

include/freebsd/osd.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
#define HOST_NAME_MAX 128
5050
#define SOL_TCP IPPROTO_TCP
5151

52+
#define OFI_KEEPALIVE TCP_KEEPIDLE
5253
typedef cpuset_t cpu_set_t;
5354

5455
static inline int ofi_shm_remap(struct util_shm *shm, size_t newsize, void **mapped)

include/linux/osd.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,4 +228,5 @@ ofi_recvv_socket(SOCKET fd, const struct iovec *iov, size_t cnt, int flags)
228228
return ofi_recvmsg_tcp(fd, &msg, flags);
229229
}
230230

231+
#define OFI_KEEPALIVE TCP_KEEPIDLE
231232
#endif /* _LINUX_OSD_H_ */

include/osx/osd.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ ssize_t ofi_readv_socket(SOCKET fd, const struct iovec *iovec, size_t iov_cnt);
173173
ssize_t ofi_sendmsg_tcp(SOCKET fd, const struct msghdr *msg, int flags);
174174
ssize_t ofi_recvmsg_tcp(SOCKET fd, struct msghdr *msg, int flags);
175175

176+
#define OFI_KEEPALIVE TCP_KEEPALIVE
176177
/*
177178
* pthread_spinlock is not available on Mac OS X, the following code
178179
* used os_unfair_lock to implement pthread_spinlock.

include/unix/osd.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@
7070
#define OFI_UNUSED UNREFERENCED_PARAMETER
7171
#endif
7272

73+
#define OFI_KEEPALIVE TCP_KEEPIDLE
74+
7375
#define OFI_SOCK_TRY_SND_RCV_AGAIN(err) \
7476
(((err) == EAGAIN) || \
7577
((err) == EWOULDBLOCK))

include/windows/osd.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1115,7 +1115,7 @@ static inline int ofi_set_thread_affinity(const char *s)
11151115
return -FI_ENOSYS;
11161116
}
11171117

1118-
1118+
#define OFI_KEEPALIVE TCP_KEEPIDLE
11191119
#if defined(_M_X64) || defined(_M_AMD64)
11201120

11211121
#include <intrin.h>

prov/tcp/src/xnet.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -790,4 +790,5 @@ int xnet_rdm_ops_open(struct fid *fid, const char *name,
790790
FI_WARN(&xnet_prov, subsystem, log_str "%s (%d)\n", \
791791
fi_strerror((int) -(err)), (int) err)
792792

793+
void xnet_disable_keepalive(struct xnet_ep *ep);
793794
#endif //_XNET_H_

prov/tcp/src/xnet_cm.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ void xnet_req_done(struct xnet_ep *ep)
178178
FI_DBG(&xnet_prov, FI_LOG_EP_CTRL, "connect request done\n");
179179
assert(xnet_progress_locked(xnet_ep2_progress(ep)));
180180

181+
xnet_disable_keepalive(ep);
181182
ret = xnet_recv_cm_msg(ep->bsock.sock, ep->cm_msg);
182183
if (ret == 0)
183184
ret = xnet_handle_cm_msg(ep->bsock.sock, ep->cm_msg, ofi_ctrl_connresp);

prov/tcp/src/xnet_ep.c

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,75 @@ static void xnet_set_no_port(SOCKET sock)
156156
#define xnet_set_no_port(sock)
157157
#endif
158158

159+
void
160+
xnet_disable_keepalive(struct xnet_ep *ep)
161+
{
162+
int optval = 0;
163+
int ret;
164+
165+
ret = setsockopt(ep->bsock.sock, SOL_SOCKET, SO_KEEPALIVE, (char *)&optval,
166+
sizeof(optval));
167+
if (ret) {
168+
FI_WARN(&xnet_prov, FI_LOG_EP_CTRL, "set SO_KEEPALIVE failed %d", ret);
169+
return;
170+
}
171+
172+
FI_INFO(&xnet_prov, FI_LOG_EP_CTRL, "ep %p KEEPALIVE is disabled.\n", ep);
173+
}
174+
175+
static int
176+
xnet_enable_keepalive(struct xnet_ep *ep)
177+
{
178+
int optval = 1;
179+
int idle_time = 5;
180+
int keep_intvl = 2;
181+
int keep_cnt = 2;
182+
int ret;
183+
184+
ret = setsockopt(ep->bsock.sock, SOL_SOCKET, SO_KEEPALIVE, (const void *)&optval,
185+
sizeof(optval));
186+
if (ret) {
187+
FI_WARN(&xnet_prov, FI_LOG_EP_CTRL, "set SO_KEEPALIVE failed %d", ret);
188+
return -ofi_sockerr();
189+
}
190+
191+
ret = setsockopt(ep->bsock.sock, IPPROTO_TCP, OFI_KEEPALIVE, (const void *)&idle_time,
192+
sizeof(idle_time));
193+
if (ret) {
194+
ret = -ofi_sockerr();
195+
FI_WARN(&xnet_prov, FI_LOG_EP_CTRL, "set TCP_KEEPIDLE failed %d", ret);
196+
goto out;
197+
}
198+
199+
ret = setsockopt(ep->bsock.sock, IPPROTO_TCP, TCP_KEEPINTVL, (const void *)&keep_intvl,
200+
sizeof(keep_intvl));
201+
if (ret) {
202+
ret = -ofi_sockerr();
203+
FI_WARN(&xnet_prov, FI_LOG_EP_CTRL, "set TCP_KEEPINTVL failed %d", ret);
204+
goto out;
205+
}
206+
207+
ret = setsockopt(ep->bsock.sock, IPPROTO_TCP, TCP_KEEPCNT, (const void *)&keep_cnt,
208+
sizeof(keep_cnt));
209+
if (ret) {
210+
ret = -ofi_sockerr();
211+
FI_WARN(&xnet_prov, FI_LOG_EP_CTRL, "set SO_KEEPALIVE failed %d", ret);
212+
goto out;
213+
}
214+
215+
FI_INFO(&xnet_prov, FI_LOG_EP_CTRL, "%p KEEPALIVE idle %d intvl %d cnt %d\n",
216+
ep, idle_time, keep_intvl, keep_cnt);
217+
218+
out:
219+
if (ret) {
220+
FI_WARN(&xnet_prov, FI_LOG_EP_CTRL, "%p KEEPALIVE set keepalive failed %d\n",
221+
ep, ret);
222+
xnet_disable_keepalive(ep);
223+
}
224+
225+
return ret;
226+
}
227+
159228
int xnet_setup_socket(SOCKET sock, struct fi_info *info)
160229
{
161230
int ret, optval = 1;
@@ -294,6 +363,16 @@ xnet_ep_accept(struct fid_ep *ep_fid, const void *param, size_t paramlen)
294363
ep->cm_msg->hdr.seg_size = htons((uint16_t) paramlen);
295364
}
296365

366+
/* Enable keepalive to make sure the socket status can be reset in time
367+
* if the remote peer is restarted after it gets connreq but not replies.
368+
*/
369+
ret = xnet_enable_keepalive(ep);
370+
if (ret) {
371+
FI_WARN(&xnet_prov, FI_LOG_EP_CTRL, "%p set tcp keepalive failure:%d\n",
372+
ep, ret);
373+
return ret;
374+
}
375+
297376
ret = xnet_send_cm_msg(ep);
298377
if (ret)
299378
return ret;

0 commit comments

Comments
 (0)