Skip to content

Commit b53c615

Browse files
owenmonnfindinpath
authored andcommitted
Apply backoff when metastore connection fails
When createMetastoreClient fails to establish a connection, the failure was only recorded as the last exception without updating the backoff state, unlike API-level failures which already back off. As a result the unreachable metastore was never penalized in the candidate ordering and kept being tried first on every call, producing repeated connection attempts to it instead of preferring a healthy fallback. Invoke backoff.fail() on connection failure as well. Co-authored-by: Marius Grama <findinpath@gmail.com>
1 parent 234836b commit b53c615

2 files changed

Lines changed: 55 additions & 0 deletions

File tree

plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/StaticTokenAwareMetastoreClientFactory.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ public ThriftMetastoreClient createMetastoreClient(Optional<String> delegationTo
103103
}
104104
catch (TException e) {
105105
lastException = e;
106+
backoff.fail();
106107
}
107108
}
108109

plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/TestStaticTokenAwareMetastoreClientFactory.java

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,12 @@
1919
import io.airlift.testing.TestingTicker;
2020
import io.trino.hive.thrift.metastore.Table;
2121
import org.apache.thrift.TException;
22+
import org.apache.thrift.transport.TTransportException;
2223
import org.junit.jupiter.api.Test;
2324

2425
import java.net.SocketTimeoutException;
26+
import java.net.URI;
27+
import java.util.HashMap;
2528
import java.util.Map;
2629
import java.util.Optional;
2730

@@ -183,6 +186,27 @@ public void testReturnsToDefaultClientAfterErrorOnFallback()
183186
assertEqualHiveClient(metastoreClient3, DEFAULT_CLIENT);
184187
}
185188

189+
@Test
190+
public void testBackoffAppliedOnConnectionFailure()
191+
throws TException
192+
{
193+
ConnectionCountingThriftMetastoreClientFactory clientFactory = new ConnectionCountingThriftMetastoreClientFactory(
194+
ImmutableMap.of(DEFAULT_URI, Optional.empty(), FALLBACK_URI, Optional.of(FALLBACK_CLIENT)));
195+
StaticMetastoreConfig config = new StaticMetastoreConfig().setMetastoreUris(ImmutableList.of(DEFAULT_URI, FALLBACK_URI));
196+
StaticTokenAwareMetastoreClientFactory metastoreClientFactory = new StaticTokenAwareMetastoreClientFactory(
197+
config, new ThriftMetastoreAuthenticationConfig(), clientFactory, new TestingTicker());
198+
199+
assertEqualHiveClient(metastoreClientFactory.createMetastoreClient(Optional.empty()), FALLBACK_CLIENT);
200+
assertThat(clientFactory.connectionAttempts()).containsExactlyInAnyOrderEntriesOf(ImmutableMap.of(
201+
URI.create(DEFAULT_URI), 1,
202+
URI.create(FALLBACK_URI), 1));
203+
clientFactory.resetConnectionAttempts();
204+
assertEqualHiveClient(metastoreClientFactory.createMetastoreClient(Optional.empty()), FALLBACK_CLIENT);
205+
// The previous connection failure on DEFAULT_URI is not attempted first again on the second call
206+
assertThat(clientFactory.connectionAttempts()).containsExactlyInAnyOrderEntriesOf(ImmutableMap.of(
207+
URI.create(FALLBACK_URI), 1));
208+
}
209+
186210
private static void assertGetTableException(ThriftMetastoreClient client)
187211
{
188212
assertThatThrownBy(() -> client.getTable("foo", "bar"))
@@ -230,4 +254,34 @@ private static void assertEqualHiveClient(ThriftMetastoreClient actual, ThriftMe
230254
}
231255
assertThat(actual).isEqualTo(expected);
232256
}
257+
258+
private static class ConnectionCountingThriftMetastoreClientFactory
259+
implements ThriftMetastoreClientFactory
260+
{
261+
private final ThriftMetastoreClientFactory delegate;
262+
private final Map<URI, Integer> attempts = new HashMap<>();
263+
264+
public ConnectionCountingThriftMetastoreClientFactory(Map<String, Optional<ThriftMetastoreClient>> clients)
265+
{
266+
this.delegate = new MockThriftMetastoreClientFactory(clients);
267+
}
268+
269+
@Override
270+
public ThriftMetastoreClient create(URI uri, Optional<String> delegationToken)
271+
throws TTransportException
272+
{
273+
attempts.merge(uri, 1, Integer::sum);
274+
return delegate.create(uri, delegationToken);
275+
}
276+
277+
public void resetConnectionAttempts()
278+
{
279+
attempts.clear();
280+
}
281+
282+
public Map<URI, Integer> connectionAttempts()
283+
{
284+
return ImmutableMap.copyOf(attempts);
285+
}
286+
}
233287
}

0 commit comments

Comments
 (0)