Skip to content

Commit 14a6371

Browse files
committed
git push --force batsheva rsocket_upstream_epolllibrdmacm: Add support for epoll_wait
Implement repoll_wait as the rsocket equivalent of epoll_wait. Snapshots the fd array under the lock, calls rpoll without holding it, then collects events back under the lock. Wakes the monitor thread on timeout so it can resume background polling. Future option: the monitor thread can be disabled entirely for a threadless mode where repoll_wait calls rpoll directly each time. Signed-off-by: Batsheva Black <bblack@nvidia.com>
1 parent e2145cc commit 14a6371

7 files changed

Lines changed: 103 additions & 1 deletion

File tree

debian/librdmacm1.symbols

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ librdmacm.so.1 librdmacm1 #MINVER#
99
raccept@RDMACM_1.0 1.0.16
1010
repoll_create@RDMACM_1.5 61
1111
repoll_ctl@RDMACM_1.5 61
12+
repoll_wait@RDMACM_1.5 61
1213
rbind@RDMACM_1.0 1.0.16
1314
rclose@RDMACM_1.0 1.0.16
1415
rconnect@RDMACM_1.0 1.0.16

librdmacm/librdmacm.map

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,4 +99,5 @@ RDMACM_1.5 {
9999
global:
100100
repoll_create;
101101
repoll_ctl;
102+
repoll_wait;
102103
} RDMACM_1.4;

librdmacm/librspreload.map

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,5 +36,6 @@
3636
epoll_create;
3737
epoll_create1;
3838
epoll_ctl;
39+
epoll_wait;
3940
local: *;
4041
};

librdmacm/man/rsocket.7.in

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ rgetpeername, rgetsockname
3131
.P
3232
rsetsockopt, rgetsockopt, rfcntl
3333
.P
34-
repoll_create, repoll_ctl
34+
repoll_create, repoll_ctl, repoll_wait
3535
.P
3636
Functions take the same parameters as that used for sockets. The
3737
follow capabilities and flags are supported at this time:
@@ -153,6 +153,8 @@ wake_up_interval - maximum number of milliseconds to block in poll.
153153
This value is used to safe guard against potential application hangs
154154
in rpoll().
155155
.P
156+
max_events - maximum number of events for the epoll thread to handle for an epoll_instance.
157+
.P
156158
All configuration files should contain a single integer value. Values may
157159
be set by issuing a command similar to the following example.
158160
.P

librdmacm/preload.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1364,3 +1364,8 @@ int epoll_ctl(int epfd, int op, int fd, struct epoll_event *event)
13641364

13651365
return repoll_ctl(epfd, op, internal_fd, event);
13661366
}
1367+
1368+
int epoll_wait(int epfd, struct epoll_event *events, int maxevents, int timeout)
1369+
{
1370+
return repoll_wait(epfd, events, maxevents, timeout);
1371+
}

librdmacm/rsocket.c

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5170,3 +5170,93 @@ int repoll_ctl(int epfd, int op, int fd, struct epoll_event *event)
51705170
pthread_mutex_unlock(&ri->lock);
51715171
return ret;
51725172
}
5173+
5174+
static uint32_t poll_to_epoll_events(uint32_t poll_revents)
5175+
{
5176+
uint32_t epoll_events = 0;
5177+
5178+
if (poll_revents & POLLIN)
5179+
epoll_events |= EPOLLIN;
5180+
if (poll_revents & POLLOUT)
5181+
epoll_events |= EPOLLOUT;
5182+
if (poll_revents & POLLERR)
5183+
epoll_events |= EPOLLERR;
5184+
if (poll_revents & POLLHUP)
5185+
epoll_events |= EPOLLHUP;
5186+
return epoll_events;
5187+
}
5188+
5189+
static void repoll_wake_monitor(struct repoll_info *ri)
5190+
{
5191+
atomic_store(&ri->monitor_state, REPOLL_MONITOR_RUNNING);
5192+
pthread_cond_signal(&ri->monitor_wake);
5193+
}
5194+
5195+
static int repoll_collect_events(struct repoll_info *ri,
5196+
struct pollfd *polled_fds, int polled_nfds,
5197+
struct epoll_event *events, int maxevents)
5198+
{
5199+
int j = 0;
5200+
5201+
for (int i = 0; j < maxevents && i < polled_nfds; ++i) {
5202+
int slot = i + 1;
5203+
5204+
if (polled_fds[i].revents && slot < ri->slot_count &&
5205+
ri->fds[slot].fd == polled_fds[i].fd) {
5206+
memcpy(&events[j].data, &ri->user_data[slot],
5207+
sizeof(events->data));
5208+
events[j].events =
5209+
poll_to_epoll_events(polled_fds[i].revents);
5210+
++j;
5211+
}
5212+
}
5213+
return j;
5214+
}
5215+
5216+
/* Future option: disable the monitor thread for a threadless mode
5217+
* where repoll_wait calls rpoll directly.
5218+
*/
5219+
5220+
int repoll_wait(int epfd, struct epoll_event *events, int maxevents,
5221+
int timeout)
5222+
{
5223+
struct repoll_info *ri;
5224+
struct pollfd *local_fds;
5225+
int nfds, ret;
5226+
5227+
if (epfd < 0)
5228+
return EBADF;
5229+
5230+
ri = idm_lookup(&repoll_idm, epfd);
5231+
if (!ri)
5232+
return EINVAL;
5233+
5234+
pthread_mutex_lock(&ri->lock);
5235+
nfds = ri->slot_count - 1;
5236+
if (nfds <= 0) {
5237+
pthread_mutex_unlock(&ri->lock);
5238+
return 0;
5239+
}
5240+
local_fds = malloc(sizeof(*local_fds) * nfds);
5241+
if (!local_fds) {
5242+
pthread_mutex_unlock(&ri->lock);
5243+
return ENOMEM;
5244+
}
5245+
memcpy(local_fds, ri->fds + 1, sizeof(*local_fds) * nfds);
5246+
pthread_mutex_unlock(&ri->lock);
5247+
5248+
ret = rpoll(local_fds, nfds, timeout);
5249+
5250+
if (ret > 0) {
5251+
pthread_mutex_lock(&ri->lock);
5252+
ret = repoll_collect_events(ri, local_fds, nfds, events,
5253+
maxevents);
5254+
pthread_mutex_unlock(&ri->lock);
5255+
} else if (ret == 0) {
5256+
if (atomic_load(&ri->monitor_state) == REPOLL_MONITOR_SLEEPING)
5257+
repoll_wake_monitor(ri);
5258+
}
5259+
5260+
free(local_fds);
5261+
return ret;
5262+
}

librdmacm/rsocket.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ size_t riowrite(int socket, const void *buf, size_t count, off_t offset, int fla
9595

9696
int repoll_create(int flags);
9797
int repoll_ctl(int epfd, int op, int fd, struct epoll_event *event);
98+
int repoll_wait(int epfd, struct epoll_event *events, int maxevents,
99+
int timeout);
98100

99101
#ifdef __cplusplus
100102
}

0 commit comments

Comments
 (0)