@@ -118,23 +118,24 @@ inline void sockinfo_udp::reuse_buffer(mem_buf_desc_t *buff)
118118 }
119119}
120120
121- inline int sockinfo_udp::poll_os ()
121+ int sockinfo_udp::poll_and_arm_os ()
122122{
123- int ret;
124123 uint64_t pending_data = 0 ;
125124
126- m_rx_udp_poll_os_ratio_counter = 0 ;
127- ret = orig_os_api.ioctl (m_fd, FIONREAD, &pending_data);
128- if (unlikely (ret == -1 )) {
125+ int ret = orig_os_api.ioctl (m_fd, FIONREAD, &pending_data);
126+ if (pending_data > 0 ) {
127+ return pending_data;
128+ }
129+
130+ // No data is available
131+ unset_immediate_os_sample ();
132+
133+ if (ret < 0 ) {
129134 m_p_socket_stats->counters .n_rx_os_errors ++;
130135 si_udp_logdbg (" orig_os_api.ioctl returned with error in polling loop (errno=%d %m)" , errno);
131- return -1 ;
132- }
133- if (pending_data > 0 ) {
134- m_p_socket_stats->counters .n_rx_poll_os_hit ++;
135- return 1 ;
136136 }
137- return 0 ;
137+
138+ return ret;
138139}
139140
140141inline int sockinfo_udp::rx_wait (bool blocking)
@@ -145,31 +146,26 @@ inline int sockinfo_udp::rx_wait(bool blocking)
145146 epoll_event rx_epfd_events[SI_RX_EPFD_EVENT_MAX];
146147 uint64_t poll_sn;
147148
148- m_loops_timer.start ();
149+ m_loops_timer.start ();
149150
150151 while (loops_to_go) {
151152
152153 // Multi-thread polling support - let other threads have a go on this CPU
153- if ((m_n_sysvar_rx_poll_yield_loops > 0 ) && ((loops % m_n_sysvar_rx_poll_yield_loops) == (m_n_sysvar_rx_poll_yield_loops - 1 ))) {
154+ if ((m_n_sysvar_rx_poll_yield_loops > 0 ) && ((loops++ % m_n_sysvar_rx_poll_yield_loops) == (m_n_sysvar_rx_poll_yield_loops - 1 ))) {
154155 sched_yield ();
155156 }
156157
157- // Poll socket for OS ready packets... (at a ratio of the offloaded sockets as defined in m_n_sysvar_rx_udp_poll_os_ratio)
158- if ((m_n_sysvar_rx_udp_poll_os_ratio > 0 ) && (m_rx_udp_poll_os_ratio_counter >= m_n_sysvar_rx_udp_poll_os_ratio)) {
159- ret = poll_os ();
160- if ((ret == -1 ) || (ret == 1 )) {
161- return ret;
162- }
158+ if (m_b_os_data_available) {
159+ // OS data might be available
160+ return 1 ;
163161 }
164162
165163 // Poll cq for offloaded ready packets ...
166- m_rx_udp_poll_os_ratio_counter++;
167164 if (is_readable (&poll_sn)) {
168165 m_p_socket_stats->counters .n_rx_poll_hit ++;
169166 return 0 ;
170167 }
171168
172- loops++;
173169 if (!blocking || m_n_sysvar_rx_poll_num != -1 ) {
174170 loops_to_go--;
175171 }
@@ -287,7 +283,8 @@ inline int sockinfo_udp::rx_wait(bool blocking)
287283
288284 // Check if OS fd is ready for reading
289285 if (fd == m_fd) {
290- m_rx_udp_poll_os_ratio_counter = 0 ;
286+ // OS data might be available
287+ set_immediate_os_sample ();
291288 return 1 ;
292289 }
293290
@@ -377,7 +374,6 @@ sockinfo_udp::sockinfo_udp(int fd) throw (vma_exception) :
377374 ,m_b_mc_tx_loop(safe_mce_sys().tx_mc_loopback_default) // default value is 'true'. User can change this with config parameter SYS_VAR_TX_MC_LOOPBACK
378375 ,m_n_mc_ttl(DEFAULT_MC_TTL)
379376 ,m_loops_to_go(safe_mce_sys().rx_poll_num_init) // Start up with a init polling loops value
380- ,m_rx_udp_poll_os_ratio_counter(0 )
381377 ,m_sock_offload(true )
382378 ,m_mc_num_grp_with_src_filter(0 )
383379 ,m_port_map_lock(" sockinfo_udp::m_ports_map_lock" )
@@ -388,14 +384,14 @@ sockinfo_udp::sockinfo_udp(int fd) throw (vma_exception) :
388384 ,m_b_rcvtstampns(false )
389385 ,m_n_tsing_flags(0 )
390386 ,m_n_sysvar_rx_poll_yield_loops(safe_mce_sys().rx_poll_yield_loops)
391- ,m_n_sysvar_rx_udp_poll_os_ratio(safe_mce_sys().rx_udp_poll_os_ratio)
392387 ,m_n_sysvar_rx_ready_byte_min_limit(safe_mce_sys().rx_ready_byte_min_limit)
393388 ,m_n_sysvar_rx_cq_drain_rate_nsec(safe_mce_sys().rx_cq_drain_rate_nsec)
394389 ,m_n_sysvar_rx_delta_tsc_between_cq_polls(safe_mce_sys().rx_delta_tsc_between_cq_polls)
395390 ,m_reuseaddr(false )
396391 ,m_sockopt_mapped(false )
397392 ,m_is_connected(false )
398393 ,m_multicast(false )
394+ ,m_b_os_data_available(false )
399395{
400396 si_udp_logfunc (" " );
401397
@@ -417,17 +413,17 @@ sockinfo_udp::sockinfo_udp(int fd) throw (vma_exception) :
417413 rx_ready_byte_count_limit_update (n_so_rcvbuf_bytes);
418414
419415 epoll_event ev = {0 , {0 }};
420-
421416 ev.events = EPOLLIN;
422-
423- // Add the user's orig fd to the rx epfd handle
424- ev.data .fd = m_fd;
417+ ev.data .fd = m_fd; // Add the user's orig fd to the rx epfd handle
425418
426419 BULLSEYE_EXCLUDE_BLOCK_START
427420 if (unlikely (orig_os_api.epoll_ctl (m_rx_epfd, EPOLL_CTL_ADD, ev.data .fd , &ev)))
428421 si_udp_logpanic (" failed to add user's fd to internal epfd errno=%d (%m)" , errno);
429422 BULLSEYE_EXCLUDE_BLOCK_END
430423
424+ // Register this socket to read nonoffloaded data
425+ g_p_event_handler_manager->update_epfd (m_fd, EPOLL_CTL_ADD, EPOLLIN | EPOLLPRI | EPOLLONESHOT);
426+
431427 si_udp_logfunc (" done" );
432428}
433429
@@ -1415,30 +1411,25 @@ ssize_t sockinfo_udp::rx(const rx_call_t call_type, iovec* p_iov,ssize_t sz_iov,
14151411
14161412 return_reuse_buffers_postponed ();
14171413
1418- // Drop lock to not starve other threads
1419- m_lock_rcv.unlock ();
1420-
1421- // Poll socket for OS ready packets... (at a ratio of the offloaded sockets as defined in m_n_sysvar_rx_udp_poll_os_ratio)
1422- if ((m_n_sysvar_rx_udp_poll_os_ratio > 0 ) && (m_rx_udp_poll_os_ratio_counter >= m_n_sysvar_rx_udp_poll_os_ratio)) {
1423- ret = poll_os ();
1424- if (ret == -1 ) {
1425- /* coverity[double_lock] TODO: RM#1049980 */
1426- m_lock_rcv.lock ();
1427- goto out;
1428- }
1429- if (ret == 1 ) {
1430- /* coverity[double_lock] TODO: RM#1049980 */
1431- m_lock_rcv.lock ();
1414+ // Check for nonoffloaded data if m_b_os_data_available is true
1415+ if (m_b_os_data_available) {
1416+ ret = poll_and_arm_os ();
1417+ if (ret > 0 ) {
14321418 goto os;
14331419 }
1420+ if (unlikely (ret < 0 )) {
1421+ goto out;
1422+ }
14341423 }
14351424
1425+ // Drop lock to not starve other threads
1426+ m_lock_rcv.unlock ();
1427+
14361428 // First check if we have a packet in the ready list
14371429 if ((m_n_rx_pkt_ready_list_count > 0 && m_n_sysvar_rx_cq_drain_rate_nsec == MCE_RX_CQ_DRAIN_RATE_DISABLED)
14381430 || is_readable (&poll_sn)) {
14391431 /* coverity[double_lock] TODO: RM#1049980 */
14401432 m_lock_rcv.lock ();
1441- m_rx_udp_poll_os_ratio_counter++;
14421433 if (m_n_rx_pkt_ready_list_count > 0 ) {
14431434 // Found a ready packet in the list
14441435 if (__msg) handle_cmsg (__msg);
@@ -1464,24 +1455,33 @@ ssize_t sockinfo_udp::rx(const rx_call_t call_type, iovec* p_iov,ssize_t sz_iov,
14641455 if (__msg) handle_cmsg (__msg);
14651456 ret = dequeue_packet (p_iov, sz_iov, (sockaddr_in *)__from, __fromlen, in_flags, &out_flags);
14661457 goto out;
1467- } else {
1468- m_lock_rcv.unlock ();
1469- goto wait;
14701458 }
1459+ m_lock_rcv.unlock ();
1460+ goto wait;
14711461 }
1472- else if (unlikely (rx_wait_ret < 0 )) {
1462+
1463+ if (unlikely (rx_wait_ret < 0 )) {
14731464 // Got < 0, means an error occurred
14741465 ret = rx_wait_ret;
14751466 goto out;
1476- } // else - packet in OS
1467+ }
1468+
1469+ // Else, check for nonoffloaded data - rx_wait() returned 1.
1470+ ret = poll_and_arm_os ();
1471+ if (ret == 0 ) {
1472+ m_lock_rcv.unlock ();
1473+ goto wait;
1474+ }
1475+ if (unlikely (ret < 0 )) {
1476+ goto out;
1477+ }
14771478
14781479 /*
14791480 * If we got here, either the socket is not offloaded or rx_wait() returned 1.
14801481 */
14811482os:
14821483 if (in_flags & MSG_VMA_ZCOPY_FORCE) {
14831484 // Enable the next non-blocked read to check the OS
1484- m_rx_udp_poll_os_ratio_counter = m_n_sysvar_rx_udp_poll_os_ratio;
14851485 errno = EIO;
14861486 ret = -1 ;
14871487 goto out;
@@ -1495,10 +1495,9 @@ ssize_t sockinfo_udp::rx(const rx_call_t call_type, iovec* p_iov,ssize_t sz_iov,
14951495 ret = socket_fd_api::rx_os (call_type, p_iov, sz_iov, in_flags, __from, __fromlen, __msg);
14961496 *p_flags = in_flags;
14971497 save_stats_rx_os (ret);
1498- if (ret > 0 ) {
1499- // This will cause the next non-blocked read to check the OS again.
1500- // We do this only after a successful read.
1501- m_rx_udp_poll_os_ratio_counter = m_n_sysvar_rx_udp_poll_os_ratio;
1498+ if (ret <= 0 ) {
1499+ // Do not poll the os fd on the next rx() call.
1500+ unset_immediate_os_sample ();
15021501 }
15031502
15041503out:
@@ -1633,16 +1632,16 @@ void sockinfo_udp::handle_cmsg(struct msghdr * msg)
16331632 cm_state.mhdr ->msg_controllen = cm_state.cmsg_bytes_consumed ;
16341633}
16351634
1636- // This function is relevant only for non-blocking socket
16371635void sockinfo_udp::set_immediate_os_sample ()
16381636{
1639- m_rx_udp_poll_os_ratio_counter = m_n_sysvar_rx_udp_poll_os_ratio ;
1637+ m_b_os_data_available = true ;
16401638}
16411639
1642- // This function is relevant only for non-blocking socket
16431640void sockinfo_udp::unset_immediate_os_sample ()
16441641{
1645- m_rx_udp_poll_os_ratio_counter = 0 ;
1642+ // Reassign EPOLLIN event
1643+ m_b_os_data_available = false ;
1644+ g_p_event_handler_manager->update_epfd (m_fd, EPOLL_CTL_MOD, EPOLLIN | EPOLLPRI | EPOLLONESHOT);
16461645}
16471646
16481647bool sockinfo_udp::is_readable (uint64_t *p_poll_sn, fd_array_t * p_fd_ready_array)
@@ -1952,7 +1951,6 @@ int sockinfo_udp::rx_verify_available_data()
19521951 if (ret >= 0 ) {
19531952 // This will cause the next non-blocked read to check the OS again.
19541953 // We do this only after a successful read.
1955- m_rx_udp_poll_os_ratio_counter = m_n_sysvar_rx_udp_poll_os_ratio;
19561954 ret = pending_data;
19571955 }
19581956 }
@@ -2297,12 +2295,12 @@ void sockinfo_udp::rx_add_ring_cb(flow_tuple_with_local_if &flow_key, ring* p_ri
22972295 si_udp_logdbg (" " );
22982296 sockinfo::rx_add_ring_cb (flow_key, p_ring, is_migration);
22992297
2300- // Now that we got at least 1 CQ attached enable the skip os mechanism .
2301- m_rx_udp_poll_os_ratio_counter = m_n_sysvar_rx_udp_poll_os_ratio ;
2298+ // Now that we got at least 1 CQ attached enable poll os for available data .
2299+ set_immediate_os_sample () ;
23022300
23032301 // Now that we got at least 1 CQ attached start polling the CQs
23042302 if (m_b_blocking) {
2305- m_loops_to_go = m_n_sysvar_rx_poll_num;
2303+ m_loops_to_go = m_n_sysvar_rx_poll_num;
23062304 }
23072305 else {
23082306 m_loops_to_go = 1 ; // Force single CQ poll in case of non-blocking socket
@@ -2742,24 +2740,30 @@ size_t sockinfo_udp::handle_msg_trunc(size_t total_rx, size_t payload_size, int
27422740 return total_rx;
27432741}
27442742
2745- mem_buf_desc_t * sockinfo_udp::get_front_m_rx_pkt_ready_list (){
2743+ mem_buf_desc_t * sockinfo_udp::get_front_m_rx_pkt_ready_list ()
2744+ {
27462745 return m_rx_pkt_ready_list.front ();
27472746}
27482747
2749- size_t sockinfo_udp::get_size_m_rx_pkt_ready_list (){
2748+ size_t sockinfo_udp::get_size_m_rx_pkt_ready_list ()
2749+ {
27502750 return m_rx_pkt_ready_list.size ();
27512751}
27522752
2753- void sockinfo_udp::pop_front_m_rx_pkt_ready_list (){
2753+ void sockinfo_udp::pop_front_m_rx_pkt_ready_list ()
2754+ {
27542755 m_rx_pkt_ready_list.pop_front ();
27552756}
27562757
2757- void sockinfo_udp::push_back_m_rx_pkt_ready_list (mem_buf_desc_t * buff){
2758+ void sockinfo_udp::push_back_m_rx_pkt_ready_list (mem_buf_desc_t * buff)
2759+ {
27582760 m_rx_pkt_ready_list.push_back (buff);
27592761}
27602762
2761- bool sockinfo_udp::prepare_to_close (bool process_shutdown) {
2763+ bool sockinfo_udp::prepare_to_close (bool process_shutdown)
2764+ {
27622765 m_lock_rcv.lock ();
2766+ g_p_event_handler_manager->update_epfd (m_fd, EPOLL_CTL_DEL, EPOLLIN | EPOLLPRI | EPOLLONESHOT);
27632767 do_wakeup ();
27642768 m_lock_rcv.unlock ();
27652769 NOT_IN_USE (process_shutdown);
0 commit comments