Skip to content

Enable TCP keepalive during connreq processing, otherwise #11058

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/freebsd/osd.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
#define HOST_NAME_MAX 128
#define SOL_TCP IPPROTO_TCP

#define OFI_KEEPALIVE TCP_KEEPIDLE
typedef cpuset_t cpu_set_t;

static inline int ofi_shm_remap(struct util_shm *shm, size_t newsize, void **mapped)
Expand Down
1 change: 1 addition & 0 deletions include/linux/osd.h
Original file line number Diff line number Diff line change
Expand Up @@ -228,4 +228,5 @@ ofi_recvv_socket(SOCKET fd, const struct iovec *iov, size_t cnt, int flags)
return ofi_recvmsg_tcp(fd, &msg, flags);
}

#define OFI_KEEPALIVE TCP_KEEPIDLE
#endif /* _LINUX_OSD_H_ */
1 change: 1 addition & 0 deletions include/osx/osd.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ ssize_t ofi_readv_socket(SOCKET fd, const struct iovec *iovec, size_t iov_cnt);
ssize_t ofi_sendmsg_tcp(SOCKET fd, const struct msghdr *msg, int flags);
ssize_t ofi_recvmsg_tcp(SOCKET fd, struct msghdr *msg, int flags);

#define OFI_KEEPALIVE TCP_KEEPALIVE
/*
* pthread_spinlock is not available on Mac OS X, the following code
* used os_unfair_lock to implement pthread_spinlock.
Expand Down
2 changes: 2 additions & 0 deletions include/unix/osd.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@
#define OFI_UNUSED UNREFERENCED_PARAMETER
#endif

#define OFI_KEEPALIVE TCP_KEEPIDLE

#define OFI_SOCK_TRY_SND_RCV_AGAIN(err) \
(((err) == EAGAIN) || \
((err) == EWOULDBLOCK))
Expand Down
2 changes: 1 addition & 1 deletion include/windows/osd.h
Original file line number Diff line number Diff line change
Expand Up @@ -1115,7 +1115,7 @@ static inline int ofi_set_thread_affinity(const char *s)
return -FI_ENOSYS;
}


#define OFI_KEEPALIVE TCP_KEEPIDLE
#if defined(_M_X64) || defined(_M_AMD64)

#include <intrin.h>
Expand Down
1 change: 1 addition & 0 deletions prov/tcp/src/xnet.h
Original file line number Diff line number Diff line change
Expand Up @@ -781,4 +781,5 @@ int xnet_rdm_ops_open(struct fid *fid, const char *name,
FI_WARN(&xnet_prov, subsystem, log_str "%s (%d)\n", \
fi_strerror((int) -(err)), (int) err)

void xnet_disable_keepalive(struct xnet_ep *ep);
#endif //_XNET_H_
1 change: 1 addition & 0 deletions prov/tcp/src/xnet_cm.c
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ void xnet_req_done(struct xnet_ep *ep)
FI_DBG(&xnet_prov, FI_LOG_EP_CTRL, "connect request done\n");
assert(xnet_progress_locked(xnet_ep2_progress(ep)));

xnet_disable_keepalive(ep);
ret = xnet_recv_cm_msg(ep->bsock.sock, ep->cm_msg);
if (ret == 0)
ret = xnet_handle_cm_msg(ep->bsock.sock, ep->cm_msg, ofi_ctrl_connresp);
Expand Down
79 changes: 79 additions & 0 deletions prov/tcp/src/xnet_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,75 @@ static void xnet_set_no_port(SOCKET sock)
#define xnet_set_no_port(sock)
#endif

void
xnet_disable_keepalive(struct xnet_ep *ep)
{
int optval = 0;
int ret;

ret = setsockopt(ep->bsock.sock, SOL_SOCKET, SO_KEEPALIVE, (char *)&optval,
sizeof(optval));
if (ret) {
FI_WARN(&xnet_prov, FI_LOG_EP_CTRL, "set SO_KEEPALIVE failed %d", ret);
return;
}

FI_INFO(&xnet_prov, FI_LOG_EP_CTRL, "ep %p KEEPALIVE is disabled.\n", ep);
}

static int
xnet_enable_keepalive(struct xnet_ep *ep)
{
int optval = 1;
int idle_time = 5;
int keep_intvl = 2;
int keep_cnt = 2;
int ret;
Comment on lines +178 to +182
Copy link
Member

@sydidelot sydidelot May 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be possible to make these parameters configurable via environment variables? I implemented a similar feature for the socket provider about 8 years ago in this commit.

Also worth noting: the TCP provider now supports the FI_GET_FD flag with fi_control(), which enables applications to fine-tune the TCP socket for a given endpoint: #11003

Copy link
Contributor Author

@wangdi1 wangdi1 May 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be possible to make these parameters configurable via environment variables?

I thought about this, though this will only be used during CM exchange, maybe using fixed value is good enough for now. Though I may miss some use cases.


ret = setsockopt(ep->bsock.sock, SOL_SOCKET, SO_KEEPALIVE, (const void *)&optval,
sizeof(optval));
if (ret) {
FI_WARN(&xnet_prov, FI_LOG_EP_CTRL, "set SO_KEEPALIVE failed %d", ret);
return -ofi_sockerr();
}

ret = setsockopt(ep->bsock.sock, IPPROTO_TCP, OFI_KEEPALIVE, (const void *)&idle_time,
sizeof(idle_time));
if (ret) {
ret = -ofi_sockerr();
FI_WARN(&xnet_prov, FI_LOG_EP_CTRL, "set TCP_KEEPIDLE failed %d", ret);
goto out;
}

ret = setsockopt(ep->bsock.sock, IPPROTO_TCP, TCP_KEEPINTVL, (const void *)&keep_intvl,
sizeof(keep_intvl));
if (ret) {
ret = -ofi_sockerr();
FI_WARN(&xnet_prov, FI_LOG_EP_CTRL, "set TCP_KEEPINTVL failed %d", ret);
goto out;
}

ret = setsockopt(ep->bsock.sock, IPPROTO_TCP, TCP_KEEPCNT, (const void *)&keep_cnt,
sizeof(keep_cnt));
if (ret) {
ret = -ofi_sockerr();
FI_WARN(&xnet_prov, FI_LOG_EP_CTRL, "set SO_KEEPALIVE failed %d", ret);
goto out;
}

FI_INFO(&xnet_prov, FI_LOG_EP_CTRL, "%p KEEPALIVE idle %d intvl %d cnt %d\n",
ep, idle_time, keep_intvl, keep_cnt);

out:
if (ret) {
FI_WARN(&xnet_prov, FI_LOG_EP_CTRL, "%p KEEPALIVE set keepalive failed %d\n",
ep, ret);
xnet_disable_keepalive(ep);
}

return ret;
}

int xnet_setup_socket(SOCKET sock, struct fi_info *info)
{
int ret, optval = 1;
Expand Down Expand Up @@ -294,6 +363,16 @@ xnet_ep_accept(struct fid_ep *ep_fid, const void *param, size_t paramlen)
ep->cm_msg->hdr.seg_size = htons((uint16_t) paramlen);
}

/* Enable keepalive to make sure the socket status can be reset in time
* if the remote peer is restarted after it gets connreq but not replies.
*/
ret = xnet_enable_keepalive(ep);
if (ret) {
FI_WARN(&xnet_prov, FI_LOG_EP_CTRL, "%p set tcp keepalive failure:%d\n",
ep, ret);
return ret;
}

ret = xnet_send_cm_msg(ep);
if (ret)
return ret;
Expand Down