-
Notifications
You must be signed in to change notification settings - Fork 417
Enable TCP keepalive during connreq processing, otherwise #11058
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -156,6 +156,75 @@ static void xnet_set_no_port(SOCKET sock) | |
#define xnet_set_no_port(sock) | ||
#endif | ||
|
||
void | ||
xnet_disable_keepalive(struct xnet_ep *ep) | ||
{ | ||
int optval = 0; | ||
int ret; | ||
|
||
ret = setsockopt(ep->bsock.sock, SOL_SOCKET, SO_KEEPALIVE, (char *)&optval, | ||
sizeof(optval)); | ||
if (ret) { | ||
FI_WARN(&xnet_prov, FI_LOG_EP_CTRL, "set SO_KEEPALIVE failed %d", ret); | ||
return; | ||
} | ||
|
||
FI_INFO(&xnet_prov, FI_LOG_EP_CTRL, "ep %p KEEPALIVE is disabled.\n", ep); | ||
} | ||
wangdi1 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
static int | ||
xnet_enable_keepalive(struct xnet_ep *ep) | ||
{ | ||
int optval = 1; | ||
int idle_time = 5; | ||
int keep_intvl = 2; | ||
int keep_cnt = 2; | ||
int ret; | ||
Comment on lines
+178
to
+182
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it be possible to make these parameters configurable via environment variables? I implemented a similar feature for the socket provider about 8 years ago in this commit. Also worth noting: the TCP provider now supports the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I thought about this, though this will only be used during CM exchange, maybe using fixed value is good enough for now. Though I may miss some use cases. |
||
|
||
ret = setsockopt(ep->bsock.sock, SOL_SOCKET, SO_KEEPALIVE, (const void *)&optval, | ||
sizeof(optval)); | ||
if (ret) { | ||
FI_WARN(&xnet_prov, FI_LOG_EP_CTRL, "set SO_KEEPALIVE failed %d", ret); | ||
return -ofi_sockerr(); | ||
} | ||
|
||
ret = setsockopt(ep->bsock.sock, IPPROTO_TCP, OFI_KEEPALIVE, (const void *)&idle_time, | ||
sizeof(idle_time)); | ||
if (ret) { | ||
ret = -ofi_sockerr(); | ||
FI_WARN(&xnet_prov, FI_LOG_EP_CTRL, "set TCP_KEEPIDLE failed %d", ret); | ||
goto out; | ||
} | ||
|
||
ret = setsockopt(ep->bsock.sock, IPPROTO_TCP, TCP_KEEPINTVL, (const void *)&keep_intvl, | ||
sizeof(keep_intvl)); | ||
if (ret) { | ||
ret = -ofi_sockerr(); | ||
FI_WARN(&xnet_prov, FI_LOG_EP_CTRL, "set TCP_KEEPINTVL failed %d", ret); | ||
goto out; | ||
} | ||
|
||
ret = setsockopt(ep->bsock.sock, IPPROTO_TCP, TCP_KEEPCNT, (const void *)&keep_cnt, | ||
sizeof(keep_cnt)); | ||
if (ret) { | ||
ret = -ofi_sockerr(); | ||
FI_WARN(&xnet_prov, FI_LOG_EP_CTRL, "set SO_KEEPALIVE failed %d", ret); | ||
goto out; | ||
} | ||
|
||
FI_INFO(&xnet_prov, FI_LOG_EP_CTRL, "%p KEEPALIVE idle %d intvl %d cnt %d\n", | ||
ep, idle_time, keep_intvl, keep_cnt); | ||
|
||
out: | ||
if (ret) { | ||
FI_WARN(&xnet_prov, FI_LOG_EP_CTRL, "%p KEEPALIVE set keepalive failed %d\n", | ||
ep, ret); | ||
xnet_disable_keepalive(ep); | ||
} | ||
|
||
return ret; | ||
} | ||
|
||
int xnet_setup_socket(SOCKET sock, struct fi_info *info) | ||
{ | ||
int ret, optval = 1; | ||
|
@@ -294,6 +363,16 @@ xnet_ep_accept(struct fid_ep *ep_fid, const void *param, size_t paramlen) | |
ep->cm_msg->hdr.seg_size = htons((uint16_t) paramlen); | ||
} | ||
|
||
/* Enable keepalive to make sure the socket status can be reset in time | ||
* if the remote peer is restarted after it gets connreq but not replies. | ||
*/ | ||
ret = xnet_enable_keepalive(ep); | ||
if (ret) { | ||
FI_WARN(&xnet_prov, FI_LOG_EP_CTRL, "%p set tcp keepalive failure:%d\n", | ||
ep, ret); | ||
return ret; | ||
} | ||
|
||
ret = xnet_send_cm_msg(ep); | ||
if (ret) | ||
return ret; | ||
|
Uh oh!
There was an error while loading. Please reload this page.