2525#include <ucs/time/time.h>
2626#include <ucs/sys/netlink.h>
2727#include <ucs/sys/sock.h>
28+ #include <net/if.h>
2829#include <string.h>
2930#include <stdlib.h>
3031#include <poll.h>
@@ -699,14 +700,14 @@ uct_ib_iface_roce_is_routable(uct_ib_iface_t *iface, uint8_t gid_index,
699700{
700701 uct_ib_device_t * dev = uct_ib_iface_device (iface );
701702 uint8_t port_num = iface -> config .port_num ;
703+ char ndev_ifname [IFNAMSIZ ], lo_ifname [IFNAMSIZ ];
702704 char remote_str [128 ];
703705 unsigned ndev_index , lo_ndev_index ;
704706
705707 if (uct_ib_device_get_roce_ndev_index (dev , port_num , gid_index ,
706708 & ndev_index ) != UCS_OK ) {
707709 uct_iface_fill_info_str_buf (params ,
708- "iface index is not found for "
709- UCT_IB_IFACE_FMT ", gid index %u" ,
710+ "no ndev for " UCT_IB_IFACE_FMT " gid[%u]" ,
710711 UCT_IB_IFACE_ARG (iface ), gid_index );
711712 return 0 ;
712713 }
@@ -720,23 +721,23 @@ uct_ib_iface_roce_is_routable(uct_ib_iface_t *iface, uint8_t gid_index,
720721 * because it may be used for routing in case of an interface with
721722 * VRF is configured and a RoCE IP interface uses this VRF table for
722723 * routing. */
723- if ((uct_ib_iface_get_loopback_ndev_index (& lo_ndev_index ) == UCS_OK ) &&
724+ if ((ucs_get_loopback_ndev_index (& lo_ndev_index ) == UCS_OK ) &&
724725 ucs_netlink_route_exists (lo_ndev_index , sa_remote , NULL )) {
725726 ucs_trace (UCT_IB_IFACE_FMT ": found specific route via loopback to %s" ,
726727 UCT_IB_IFACE_ARG (iface ),
727728 ucs_sockaddr_str (sa_remote , remote_str , sizeof (remote_str )));
728729 return 1 ;
729730 }
730731
731- uct_iface_fill_info_str_buf (params ,
732- "remote address %s is not routable "
733- "neither by interface " UCT_IB_IFACE_FMT
734- " (ifname_index=%u) nor by loopback "
735- "interface (ifname_index=%u)" ,
736- ucs_sockaddr_str ( sa_remote , remote_str ,
737- sizeof ( remote_str )) ,
738- UCT_IB_IFACE_ARG ( iface ), ndev_index ,
739- lo_ndev_index );
732+ uct_iface_fill_info_str_buf (
733+ params , "no route to %s from %s (idx %u) or %s (idx %u)" ,
734+ ucs_sockaddr_str ( sa_remote , remote_str , sizeof ( remote_str )),
735+ ucs_ndev_index_to_ifname ( ndev_index , ndev_ifname ,
736+ sizeof ( ndev_ifname )) ,
737+ ndev_index ,
738+ ucs_ndev_index_to_ifname ( lo_ndev_index , lo_ifname ,
739+ sizeof ( lo_ifname )) ,
740+ lo_ndev_index );
740741 return 0 ;
741742}
742743
@@ -762,11 +763,9 @@ uct_ib_iface_roce_is_local_subnet(int prefix_bits,
762763
763764 if (!matched ) {
764765 uct_iface_fill_info_str_buf (
765- params ,
766- "IP addresses do not match with a %u-bit prefix. local IP"
767- " is %s, remote IP is %s" ,
768- prefix_bits , ucs_sockaddr_str (sa_local , local_str , 128 ),
769- ucs_sockaddr_str (sa_remote , remote_str , 128 ));
766+ params , "subnet local %s/%u remote %s/%u" ,
767+ ucs_sockaddr_str (sa_local , local_str , 128 ), prefix_bits ,
768+ ucs_sockaddr_str (sa_remote , remote_str , 128 ), prefix_bits );
770769 }
771770
772771 return matched ;
@@ -780,7 +779,8 @@ uct_ib_iface_roce_is_reachable(uct_ib_iface_t *iface,
780779 uct_ib_device_gid_info_t local_gid_info = iface -> gid_info ;
781780 sa_family_t local_ib_addr_af = local_gid_info .roce_info .addr_family ;
782781 uct_ib_roce_version_t local_roce_ver = local_gid_info .roce_info .ver ;
783- uint8_t remote_ib_addr_flags = remote_ib_addr -> flags ;
782+ const union ibv_gid * remote_gid = (union ibv_gid * )(remote_ib_addr + 1 );
783+ uint8_t remote_ib_addr_flags = remote_ib_addr -> flags ;
784784 struct sockaddr_storage sa_local , sa_remote ;
785785 uct_ib_roce_version_t remote_roce_ver ;
786786 sa_family_t remote_ib_addr_af ;
@@ -799,16 +799,14 @@ uct_ib_iface_roce_is_reachable(uct_ib_iface_t *iface,
799799 ucs_assert (local_roce_ver != UCT_IB_DEVICE_ROCE_ANY );
800800
801801 if (local_roce_ver != remote_roce_ver ) {
802- uct_iface_fill_info_str_buf (
803- params ,
804- "different RoCE versions detected. local %s (gid=%s) "
805- "remote %s (gid=%s)" ,
806- uct_ib_roce_version_str (local_roce_ver ),
807- uct_ib_gid_str (& local_gid_info .gid , local_str ,
808- sizeof (local_str )),
809- uct_ib_roce_version_str (remote_roce_ver ),
810- uct_ib_gid_str ((union ibv_gid * )(remote_ib_addr + 1 ), remote_str ,
811- sizeof (remote_str )));
802+ uct_iface_fill_info_str_buf (params , "local %s/%s remote %s/%s" ,
803+ uct_ib_gid_str (& local_gid_info .gid ,
804+ local_str ,
805+ sizeof (local_str )),
806+ uct_ib_roce_version_str (local_roce_ver ),
807+ uct_ib_gid_str (remote_gid , remote_str ,
808+ sizeof (remote_str )),
809+ uct_ib_roce_version_str (remote_roce_ver ));
812810 return 0 ;
813811 }
814812
@@ -819,25 +817,27 @@ uct_ib_iface_roce_is_reachable(uct_ib_iface_t *iface,
819817 remote_ib_addr_af = uct_ib_address_flags_get_roce_af (remote_ib_addr_flags );
820818 if (local_ib_addr_af != remote_ib_addr_af ) {
821819 uct_iface_fill_info_str_buf (
822- params , "different IP versions, local %s vs remote %s\n " ,
823- local_ib_addr_af == AF_INET ? "IPv4" : "IPv6" ,
824- remote_ib_addr_af == AF_INET ? "IPv4" : "IPv6" );
820+ params , "local %s remote %s" ,
821+ ucs_sockaddr_address_family_str ( local_ib_addr_af ) ,
822+ ucs_sockaddr_address_family_str ( remote_ib_addr_af ) );
825823 return 0 ;
826824 }
827825
828826 if ((uct_ib_device_roce_gid_to_sockaddr (local_ib_addr_af ,
829827 & local_gid_info .gid ,
830828 & sa_local ) != UCS_OK )) {
831- uct_iface_fill_info_str_buf (
832- params , "couldn't convert local RoCE address to socket address" );
829+ uct_iface_fill_info_str_buf (params , "invalid local GID %s" ,
830+ uct_ib_gid_str (& local_gid_info .gid ,
831+ local_str ,
832+ sizeof (local_str )));
833833 return 0 ;
834834 }
835835
836- if (uct_ib_device_roce_gid_to_sockaddr (remote_ib_addr_af ,
837- remote_ib_addr + 1 ,
836+ if (uct_ib_device_roce_gid_to_sockaddr (remote_ib_addr_af , remote_gid ,
838837 & sa_remote ) != UCS_OK ) {
839- uct_iface_fill_info_str_buf (
840- params , "couldn't convert remote RoCE address to socket address" );
838+ uct_iface_fill_info_str_buf (params , "invalid remote GID %s" ,
839+ uct_ib_gid_str (remote_gid , remote_str ,
840+ sizeof (remote_str )));
841841 return 0 ;
842842 }
843843
@@ -911,22 +911,18 @@ static int uct_ib_iface_dev_addr_is_reachable(
911911 return 0 ;
912912 }
913913
914- /* at least one PKEY has to be with full membership */
915- if (!((params .pkey | iface -> pkey ) & UCT_IB_PKEY_MEMBERSHIP_MASK )) {
916- uct_iface_fill_info_str_buf (
917- is_reachable_params ,
918- "both local and remote pkeys (0x%x, 0x%x) "
919- "have partial membership" ,
920- iface -> pkey , params .pkey );
914+ /* PKEY values have to be equal */
915+ if ((params .pkey ^ iface -> pkey ) & UCT_IB_PKEY_PARTITION_MASK ) {
916+ uct_iface_fill_info_str_buf (is_reachable_params ,
917+ "pkey local 0x%x remote 0x%x" , iface -> pkey ,
918+ params .pkey );
921919 return 0 ;
922920 }
923921
924- /* PKEY values have to be equal */
925- if ((params .pkey ^ iface -> pkey ) & UCT_IB_PKEY_PARTITION_MASK ) {
926- uct_iface_fill_info_str_buf (
927- is_reachable_params ,
928- "local pkey 0x%x differs from remote pkey 0x%x" ,
929- iface -> pkey , params .pkey );
922+ /* At least one PKEY has to be with full membership */
923+ if (!((params .pkey | iface -> pkey ) & UCT_IB_PKEY_MEMBERSHIP_MASK )) {
924+ uct_iface_fill_info_str_buf (is_reachable_params ,
925+ "partial member pkey 0x%x" , params .pkey );
930926 return 0 ;
931927 }
932928
@@ -948,8 +944,7 @@ static int uct_ib_iface_dev_addr_is_reachable(
948944
949945 uct_iface_fill_info_str_buf (
950946 is_reachable_params ,
951- "different subnet prefix 0x%" PRIx64 "/0x%" PRIx64
952- " and FLID is %s" ,
947+ "IB subnet local %" PRIx64 " remote %" PRIx64 " FLID %s" ,
953948 be64toh (iface -> gid_info .gid .global .subnet_prefix ),
954949 be64toh (params .gid .global .subnet_prefix ), flid_info_str );
955950 return 0 ;
@@ -962,11 +957,10 @@ static int uct_ib_iface_dev_addr_is_reachable(
962957 } else {
963958 /* local and remote have different link layers and therefore are unreachable */
964959 uct_iface_fill_info_str_buf (
965- is_reachable_params ,
966- "link layers differ %s (local) vs %s (remote)" ,
967- is_local_eth ? "RoCE" : "IB" ,
968- ib_addr -> flags & UCT_IB_ADDRESS_FLAG_LINK_LAYER_ETH ?
969- "RoCE" : "IB" );
960+ is_reachable_params , "local %s remote %s" ,
961+ is_local_eth ? "RoCE" : "IB" ,
962+ (ib_addr -> flags & UCT_IB_ADDRESS_FLAG_LINK_LAYER_ETH ) ? "RoCE" :
963+ "IB" );
970964 return 0 ;
971965 }
972966}
0 commit comments