diff --git a/go/vt/vttablet/tabletserver/throttle/base/metric_result.go b/go/vt/vttablet/tabletserver/throttle/base/metric_result.go index 0fa48fe3240..b3abab5cb53 100644 --- a/go/vt/vttablet/tabletserver/throttle/base/metric_result.go +++ b/go/vt/vttablet/tabletserver/throttle/base/metric_result.go @@ -19,6 +19,9 @@ package base import ( "errors" "net" + + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" ) // MetricResult is what we expect our probes to return. This can be a numeric result, or @@ -58,6 +61,11 @@ func IsDialTCPError(err error) bool { if err == nil { return false } + + if s, ok := status.FromError(err); ok { + return s.Code() == codes.Unavailable || s.Code() == codes.DeadlineExceeded + } + switch err := err.(type) { case *net.OpError: return err.Op == "dial" && err.Net == "tcp" diff --git a/go/vt/vttablet/tabletserver/throttle/throttler.go b/go/vt/vttablet/tabletserver/throttle/throttler.go index aa681cbd496..ef8518e02ab 100644 --- a/go/vt/vttablet/tabletserver/throttle/throttler.go +++ b/go/vt/vttablet/tabletserver/throttle/throttler.go @@ -904,7 +904,7 @@ func (throttler *Throttler) generateTabletProbeFunction(scope base.Scope, probe req := &tabletmanagerdatapb.CheckThrottlerRequest{} // We leave AppName empty; it will default to VitessName anyway, and we can save some proto space resp, gRPCErr := tmClient.CheckThrottler(ctx, probe.Tablet, req) if gRPCErr != nil { - return metricsWithError(fmt.Errorf("gRPC error accessing tablet %v. Err=%v", probe.Alias, gRPCErr)) + return metricsWithError(fmt.Errorf("gRPC error accessing tablet %v. Err=%w", probe.Alias, gRPCErr)) } throttleMetric.Value = resp.Value if resp.ResponseCode == tabletmanagerdatapb.CheckThrottlerResponseCode_INTERNAL_ERROR { diff --git a/go/vt/vttablet/tabletserver/throttle/throttler_test.go b/go/vt/vttablet/tabletserver/throttle/throttler_test.go index 8d62f6605e0..fe5a32ab808 100644 --- a/go/vt/vttablet/tabletserver/throttle/throttler_test.go +++ b/go/vt/vttablet/tabletserver/throttle/throttler_test.go @@ -30,9 +30,15 @@ import ( "github.com/stretchr/testify/require" "golang.org/x/exp/maps" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" + "vitess.io/vitess/go/protoutil" + + "vitess.io/vitess/go/vt/grpcclient" "vitess.io/vitess/go/vt/topo" "vitess.io/vitess/go/vt/vtenv" + "vitess.io/vitess/go/vt/vttablet/grpctmclient" "vitess.io/vitess/go/vt/vttablet/tabletserver/connpool" "vitess.io/vitess/go/vt/vttablet/tabletserver/tabletenv" "vitess.io/vitess/go/vt/vttablet/tabletserver/throttle/base" @@ -837,6 +843,113 @@ func TestApplyThrottlerConfigAppCheckedMetrics(t *testing.T) { }) } +func TestIsDialTCPError(t *testing.T) { + // Verify that IsDialTCPError actually recognizes grpc dial errors + cc, err := grpcclient.DialContext(t.Context(), ":0", true, grpc.WithTransportCredentials(insecure.NewCredentials())) + require.NoError(t, err) + defer cc.Close() + + err = cc.Invoke(context.Background(), "/Fail", nil, nil) + + require.True(t, base.IsDialTCPError(err)) + require.True(t, base.IsDialTCPError(fmt.Errorf("wrapped: %w", err))) + + nonDialErr := fmt.Errorf("rpc error: code = NotFound desc = method not found") + require.False(t, base.IsDialTCPError(nonDialErr)) +} + +func TestProbeWithUnavailableHost(t *testing.T) { + throttler := Throttler{ + throttledApps: cache.New(cache.NoExpiration, 0), + heartbeatWriter: &FakeHeartbeatWriter{}, + } + + alias := &topodatapb.TabletAlias{ + Cell: "cell1", + Uid: 100, + } + + // The hostname used here is not routable, so the connection will fail. + tablet := &topo.TabletInfo{ + Tablet: &topodatapb.Tablet{ + Alias: alias, + Hostname: "192.0.2.0", + MysqlHostname: "192.0.2.0", + MysqlPort: 3306, + PortMap: map[string]int32{"grpc": 5000}, + Type: topodatapb.TabletType_PRIMARY, + }, + } + + probe := &base.Probe{ + Alias: "cell1-100", + Tablet: tablet.Tablet, + CacheMillis: 100, + } + + tmClient := grpctmclient.NewClient() + + probeFunc := throttler.generateTabletProbeFunction(base.ShardScope, probe) + + metrics := probeFunc(t.Context(), tmClient) + require.True(t, base.IsDialTCPError(metrics["custom"].Err)) + + tabletResultsMap := base.TabletResultMap{ + "cell1-100": base.MetricResultMap{ + "custom": metrics["custom"], + }, + } + + worstMetric := base.AggregateTabletMetricResults("custom", tabletResultsMap, 0, true, 0.0) + require.Equal(t, base.NoHostsMetricResult, worstMetric) +} + +func TestProbeWithEmptyHostAndPort(t *testing.T) { + throttler := Throttler{ + throttledApps: cache.New(cache.NoExpiration, 0), + heartbeatWriter: &FakeHeartbeatWriter{}, + } + + alias := &topodatapb.TabletAlias{ + Cell: "cell1", + Uid: 100, + } + + // The hostname used here is not routable, so the connection will fail. + tablet := &topo.TabletInfo{ + Tablet: &topodatapb.Tablet{ + Alias: alias, + Hostname: "", + MysqlHostname: "192.0.2.0", + MysqlPort: 3306, + PortMap: map[string]int32{"grpc": 0}, + Type: topodatapb.TabletType_PRIMARY, + }, + } + + probe := &base.Probe{ + Alias: "cell1-100", + Tablet: tablet.Tablet, + CacheMillis: 100, + } + + tmClient := grpctmclient.NewClient() + + probeFunc := throttler.generateTabletProbeFunction(base.ShardScope, probe) + + metrics := probeFunc(t.Context(), tmClient) + require.True(t, base.IsDialTCPError(metrics["custom"].Err)) + + tabletResultsMap := base.TabletResultMap{ + "cell1-100": base.MetricResultMap{ + "custom": metrics["custom"], + }, + } + + worstMetric := base.AggregateTabletMetricResults("custom", tabletResultsMap, 0, true, 0.0) + require.Equal(t, base.NoHostsMetricResult, worstMetric) +} + func TestIsAppThrottled(t *testing.T) { plusOneHour := time.Now().Add(time.Hour) throttler := Throttler{