Skip to content

Commit a97400c

Browse files
z-wenlintuhaihe
authored andcommitted
Add hint message for MTU settings when IC reports ERROR "Failed to send packet" (#17164)
* Add hint message for MTU settings when IC reports ERROR "Failed to send packet".
1 parent 1e5ffff commit a97400c

File tree

4 files changed

+50
-4
lines changed

4 files changed

+50
-4
lines changed

contrib/interconnect/udp/ic_faultinjection.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ typedef enum {
8383
FINC_OS_NET_INTERFACE = 19,
8484
FINC_OS_MEM_INTERFACE = 20,
8585
FINC_OS_CREATE_THREAD = 21,
86+
FINC_PKT_TOO_LONG = 22,
8687

8788
/* These are used to inject network faults. */
8889
FINC_NET_RECV_ERROR = 23,
@@ -301,6 +302,13 @@ testmode_sendto(const char *caller_name, int socket, const void *buffer,
301302
errno = EFAULT;
302303
return -1;
303304

305+
case FINC_PKT_TOO_LONG:
306+
if (!FINC_HAS_FAULT(fault_type) || !is_pkt)
307+
break;
308+
write_log("inject fault to sendto: FINC_PKT_TOO_LONG");
309+
errno = EMSGSIZE;
310+
return -1;
311+
304312
default:
305313
break;
306314
}

contrib/interconnect/udp/ic_udpifc.c

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2246,10 +2246,23 @@ sendControlMessage(icpkthdr *pkt, int fd, struct sockaddr *addr, socklen_t peerL
22462246
if (gp_interconnect_full_crc)
22472247
addCRC(pkt);
22482248

2249-
char errDetail[100];
2250-
snprintf(errDetail, sizeof(errDetail), "Send control message: got error with seq %u", pkt->seq);
2251-
/* Retry for infinite times since we have no retransmit mechanism for control message */
2252-
n = sendtoWithRetry(fd, (const char *) pkt, pkt->len, 0, addr, peerLen, -1, errDetail);
2249+
/* retry 10 times for sending control message */
2250+
int counter = 0;
2251+
while (counter < 10)
2252+
{
2253+
counter++;
2254+
n = sendto(fd, (const char *) pkt, pkt->len, 0, addr, peerLen);
2255+
if (n < 0)
2256+
{
2257+
if (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK)
2258+
continue;
2259+
else {
2260+
write_log("sendcontrolmessage: got errno %d", errno);
2261+
return;
2262+
}
2263+
}
2264+
break;
2265+
}
22532266
if (n < pkt->len)
22542267
write_log("sendcontrolmessage: got error %d errno %d seq %d", n, errno, pkt->seq);
22552268
}
@@ -5392,6 +5405,19 @@ sendtoWithRetry(int socket, const void *message, size_t length,
53925405
return n;
53935406
}
53945407

5408+
/*
5409+
* If the OS can detect an MTU issue on the host network interfaces, we
5410+
* would get EMSGSIZE here. So, bail with a HINT about checking MTU.
5411+
*/
5412+
if (errno == EMSGSIZE)
5413+
{
5414+
ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR),
5415+
errmsg("Interconnect error writing an outgoing packet: %m"),
5416+
errdetail("error during sendto() call (error:%d).\n"
5417+
"%s", save_errno, errDetail),
5418+
errhint("check if interface MTU is equal across the cluster and lower than gp_max_packet_size")));
5419+
}
5420+
53955421
ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR),
53965422
errmsg("Interconnect error writing an outgoing packet: %m"),
53975423
errdetail("error during sendto() call (error:%d).\n"

src/test/regress/expected/icudp/icudp_full.out

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,14 @@ SELECT system_call_fault_injection_test();
544544

545545
(1 row)
546546

547+
-- inject faults for errMsgSize when packet is too long.
548+
SET gp_udpic_fault_inject_bitmap = 4194304;
549+
SELECT system_call_fault_injection_test();
550+
system_call_fault_injection_test
551+
----------------------------------
552+
553+
(1 row)
554+
547555
-- disable ipv6 may increase the code coverage.
548556
SET gp_udpic_network_disable_ipv6 = 1;
549557
SELECT system_call_fault_injection_test();

src/test/regress/sql/icudp/icudp_full.sql

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,10 @@ $$;
276276
SET gp_udpic_fault_inject_bitmap = 524288;
277277
SELECT system_call_fault_injection_test();
278278

279+
-- inject faults for errMsgSize when packet is too long.
280+
SET gp_udpic_fault_inject_bitmap = 4194304;
281+
SELECT system_call_fault_injection_test();
282+
279283
-- disable ipv6 may increase the code coverage.
280284
SET gp_udpic_network_disable_ipv6 = 1;
281285
SELECT system_call_fault_injection_test();

0 commit comments

Comments
 (0)