Skip to content

Commit b401608

Browse files
pkoenig10bulldozer-bot[bot]
authored andcommitted
[improvement] Improve exceptions and logging when retrying (#941)
## Before this PR - Request failures were logged at the `debug` level - Nothing was logged when handling a `RetryOther` QoS response - The message "Rescheduling call after backoff" was logged when retrying requests for different reasons - `IOException` was used to indicate failures ## After this PR - Request failures are logged at the `info` level - Log when handling any QoS response - Use unique, descriptive messages when retrying requests - Use `SafeIoException` to indicate failures Logging request failures at the info level is the primary motivation of this PR. Without this there is no indication when requests fail. cc @pnepywoda
1 parent d3d824f commit b401608

File tree

3 files changed

+117
-70
lines changed

3 files changed

+117
-70
lines changed

okhttp-clients/build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ dependencies {
2020

2121
testImplementation project(":conjure-java-jersey-server")
2222
testImplementation project(":keystores")
23+
testImplementation 'com.palantir.safe-logging:preconditions-assertj'
2324
testImplementation "com.squareup.okhttp3:mockwebserver"
2425
testImplementation "javax.ws.rs:javax.ws.rs-api"
2526
testImplementation "junit:junit"

okhttp-clients/src/main/java/com/palantir/conjure/java/okhttp/RemotingOkHttpCall.java

Lines changed: 82 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
import com.palantir.conjure.java.api.errors.RemoteException;
2727
import com.palantir.conjure.java.client.config.ClientConfiguration;
2828
import com.palantir.logsafe.SafeArg;
29+
import com.palantir.logsafe.UnsafeArg;
30+
import com.palantir.logsafe.exceptions.SafeIoException;
2931
import java.io.IOException;
3032
import java.io.InterruptedIOException;
3133
import java.time.Duration;
@@ -175,25 +177,35 @@ public void onFailure(Call call, IOException exception) {
175177
// Fail call if backoffs are exhausted or if no retry URL can be determined.
176178
Optional<Duration> backoff = backoffStrategy.nextBackoff();
177179
if (!backoff.isPresent()) {
178-
callback.onFailure(call, new IOException("Failed to complete the request due to an "
179-
+ "IOException", exception));
180+
callback.onFailure(call, new SafeIoException(
181+
"Failed to complete the request due to an IOException",
182+
exception,
183+
UnsafeArg.of("requestUrl", call.request().url().toString())));
180184
return;
181185
}
186+
182187
Optional<HttpUrl> redirectTo = urls.redirectToNext(request().url());
183188
if (!redirectTo.isPresent()) {
184-
callback.onFailure(call, new IOException("Failed to determine valid failover URL for '"
185-
+ request().url() + "' and base URLs " + urls.getBaseUrls()));
189+
callback.onFailure(call, new SafeIoException(
190+
"Failed to determine valid failover URL",
191+
exception,
192+
UnsafeArg.of("requestUrl", call.request().url().toString()),
193+
UnsafeArg.of("baseUrls", urls.getBaseUrls())));
186194
return;
187195
}
188196

197+
log.info("Retrying call after failure",
198+
SafeArg.of("backoffMillis", backoff.get().toMillis()),
199+
UnsafeArg.of("redirectToUrl", redirectTo.get()),
200+
exception);
189201
Request redirectedRequest = request().newBuilder()
190202
.url(redirectTo.get())
191203
.build();
192204
RemotingOkHttpCall retryCall =
193205
client.newCallWithMutableState(redirectedRequest, backoffStrategy, maxNumRelocations - 1);
194-
log.debug("Rescheduling call after backoff", SafeArg.of("backoffMillis", backoff.get().toMillis()),
195-
exception);
196-
scheduleExecution(() -> retryCall.enqueue(callback), backoff.get());
206+
scheduleExecution(
207+
() -> retryCall.enqueue(callback),
208+
backoff.get());
197209
}
198210

199211
@Override
@@ -258,65 +270,89 @@ private QosException.Visitor<Void> createQosVisitor(Callback callback, Call call
258270
public Void visit(QosException.Throttle exception) {
259271
Optional<Duration> nonAdvertizedBackoff = backoffStrategy.nextBackoff();
260272
if (!nonAdvertizedBackoff.isPresent()) {
261-
callback.onFailure(call, new IOException("Failed to reschedule call since "
262-
+ "the number of configured backoffs are exhausted", exception));
273+
callback.onFailure(call, new SafeIoException(
274+
"Failed to complete the request due to QosException.Throttle",
275+
exception,
276+
UnsafeArg.of("requestUrl", call.request().url().toString())));
263277
return null;
264278
}
265279

266280
Duration backoff = exception.getRetryAfter().orElse(nonAdvertizedBackoff.get());
267-
log.debug("Rescheduling call after backoff", SafeArg.of("backoffMillis", backoff.toMillis()),
281+
log.debug("Rescheduling call after receiving QosException.Throttle",
282+
SafeArg.of("backoffMillis", backoff.toMillis()),
268283
exception);
269-
scheduleExecution(() -> doClone().enqueue(callback), backoff);
284+
scheduleExecution(
285+
() -> doClone().enqueue(callback),
286+
backoff);
270287
return null;
271288
}
272289

273290
@Override
274291
public Void visit(QosException.RetryOther exception) {
275292
if (maxNumRelocations <= 0) {
276-
callback.onFailure(call, new IOException("Exceeded the maximum number of allowed redirects for "
277-
+ "initial URL: " + call.request().url()));
278-
} else {
279-
// Redirect to the URL specified by the exception.
280-
Optional<HttpUrl> redirectTo = urls.redirectTo(request().url(),
281-
exception.getRedirectTo().toString());
282-
if (!redirectTo.isPresent()) {
283-
callback.onFailure(call, new IOException("Failed to determine valid redirect URL for '"
284-
+ exception.getRedirectTo() + "' and base URLs " + urls.getBaseUrls()));
285-
} else {
286-
Request redirectedRequest = request().newBuilder()
287-
.url(redirectTo.get())
288-
.build();
289-
client.newCallWithMutableState(redirectedRequest, backoffStrategy, maxNumRelocations - 1)
290-
.enqueue(callback);
291-
}
293+
callback.onFailure(call, new SafeIoException(
294+
"Exceeded the maximum number of allowed redirects",
295+
exception,
296+
UnsafeArg.of("requestUrl", call.request().url().toString())));
297+
return null;
298+
}
299+
300+
// Redirect to the URL specified by the exception.
301+
Optional<HttpUrl> redirectTo = urls.redirectTo(request().url(), exception.getRedirectTo().toString());
302+
if (!redirectTo.isPresent()) {
303+
callback.onFailure(call, new SafeIoException(
304+
"Failed to determine valid redirect URL after receiving QosException.RetryOther",
305+
exception,
306+
UnsafeArg.of("requestUrl", call.request().url().toString()),
307+
UnsafeArg.of("redirectToUrl", exception.getRedirectTo().toString()),
308+
UnsafeArg.of("baseUrls", urls.getBaseUrls())));
309+
return null;
292310
}
311+
312+
log.debug("Retrying call after receiving QosException.RetryOther",
313+
UnsafeArg.of("requestUrl", call.request().url()),
314+
UnsafeArg.of("redirectToUrl", redirectTo.get()),
315+
exception);
316+
Request redirectedRequest = request().newBuilder()
317+
.url(redirectTo.get())
318+
.build();
319+
client.newCallWithMutableState(redirectedRequest, backoffStrategy, maxNumRelocations - 1)
320+
.enqueue(callback);
293321
return null;
294322
}
295323

296324
@Override
297325
public Void visit(QosException.Unavailable exception) {
298326
Optional<Duration> backoff = backoffStrategy.nextBackoff();
299327
if (!backoff.isPresent()) {
300-
log.debug("Max number of retries exceeded, failing call");
301-
callback.onFailure(call,
302-
new IOException("Failed to complete the request due to a "
303-
+ "server-side QoS condition: 503", exception));
304-
} else {
305-
log.debug("Rescheduling call after backoff",
306-
SafeArg.of("backoffMillis", backoff.get().toMillis()), exception);
307-
// Redirect to the "next" URL, whichever that may be, after backing off.
308-
Optional<HttpUrl> redirectTo = urls.redirectToNext(request().url());
309-
if (!redirectTo.isPresent()) {
310-
callback.onFailure(call, new IOException("Failed to determine valid redirect URL for base "
311-
+ "URLs " + urls.getBaseUrls()));
312-
} else {
313-
Request redirectedRequest = request().newBuilder()
314-
.url(redirectTo.get())
315-
.build();
316-
scheduleExecution(() -> client.newCallWithMutableState(redirectedRequest, backoffStrategy,
317-
maxNumRelocations).enqueue(callback), backoff.get());
318-
}
328+
callback.onFailure(call, new SafeIoException(
329+
"Failed to complete the request due to QosException.Unavailable",
330+
exception,
331+
UnsafeArg.of("requestUrl", call.request().url().toString())));
332+
return null;
333+
}
334+
335+
// Redirect to the "next" URL, whichever that may be, after backing off.
336+
Optional<HttpUrl> redirectTo = urls.redirectToNext(request().url());
337+
if (!redirectTo.isPresent()) {
338+
callback.onFailure(call, new SafeIoException(
339+
"Failed to determine valid redirect URL after receiving QosException.Unavailable",
340+
UnsafeArg.of("requestUrl", call.request().url().toString()),
341+
UnsafeArg.of("baseUrls", urls.getBaseUrls())));
342+
return null;
319343
}
344+
345+
log.debug("Retrying call after receiving QosException.Unavailable",
346+
SafeArg.of("backoffMillis", backoff.get().toMillis()),
347+
UnsafeArg.of("redirectToUrl", redirectTo.get()),
348+
exception);
349+
Request redirectedRequest = request().newBuilder()
350+
.url(redirectTo.get())
351+
.build();
352+
scheduleExecution(
353+
() -> client.newCallWithMutableState(redirectedRequest, backoffStrategy, maxNumRelocations)
354+
.enqueue(callback),
355+
backoff.get());
320356
return null;
321357
}
322358
};

okhttp-clients/src/test/java/com/palantir/conjure/java/okhttp/OkHttpClientsTest.java

Lines changed: 34 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
package com.palantir.conjure.java.okhttp;
1818

19+
import static com.palantir.logsafe.testing.Assertions.assertThatLoggableExceptionThrownBy;
1920
import static org.assertj.core.api.Assertions.assertThat;
2021
import static org.assertj.core.api.Assertions.assertThatExceptionOfType;
2122
import static org.assertj.core.api.Assertions.assertThatThrownBy;
@@ -31,6 +32,7 @@
3132
import com.palantir.conjure.java.api.errors.SerializableError;
3233
import com.palantir.conjure.java.client.config.ClientConfiguration;
3334
import com.palantir.conjure.java.client.config.ClientConfigurations;
35+
import com.palantir.logsafe.UnsafeArg;
3436
import com.palantir.logsafe.exceptions.SafeIoException;
3537
import java.io.IOException;
3638
import java.nio.file.Paths;
@@ -228,17 +230,19 @@ public void handlesUnavailable_obeysMaxNumRetriesAndEventuallyPropagatesQosExcep
228230

229231
server.enqueue(new MockResponse().setResponseCode(503));
230232
call = createRetryingClient(0).newCall(new Request.Builder().url(url).build());
231-
assertThatThrownBy(call::execute)
232-
.isInstanceOf(IOException.class)
233-
.hasMessage("Failed to complete the request due to a server-side QoS condition: 503");
233+
assertThatLoggableExceptionThrownBy(call::execute)
234+
.isInstanceOf(SafeIoException.class)
235+
.hasLogMessage("Failed to complete the request due to QosException.Unavailable")
236+
.hasArgs(UnsafeArg.of("requestUrl", url + "/"));
234237

235238
server.enqueue(new MockResponse().setResponseCode(503));
236239
server.enqueue(new MockResponse().setResponseCode(503));
237240
server.enqueue(new MockResponse().setResponseCode(503));
238241
call = createRetryingClient(2).newCall(new Request.Builder().url(url).build());
239-
assertThatThrownBy(call::execute)
240-
.isInstanceOf(IOException.class)
241-
.hasMessage("Failed to complete the request due to a server-side QoS condition: 503");
242+
assertThatLoggableExceptionThrownBy(call::execute)
243+
.isInstanceOf(SafeIoException.class)
244+
.hasLogMessage("Failed to complete the request due to QosException.Unavailable")
245+
.hasArgs(UnsafeArg.of("requestUrl", url + "/"));
242246

243247
assertThat(server.getRequestCount()).isEqualTo(4 /* original plus two retries */);
244248
}
@@ -260,17 +264,19 @@ public void handlesThrottle_obeysMaxNumRetriesAndEventuallyPropagatesQosExceptio
260264

261265
server.enqueue(new MockResponse().setResponseCode(429));
262266
call = createRetryingClient(0).newCall(new Request.Builder().url(url).build());
263-
assertThatThrownBy(call::execute)
264-
.isInstanceOf(IOException.class)
265-
.hasMessage("Failed to reschedule call since the number of configured backoffs are exhausted");
267+
assertThatLoggableExceptionThrownBy(call::execute)
268+
.isInstanceOf(SafeIoException.class)
269+
.hasLogMessage("Failed to complete the request due to QosException.Throttle")
270+
.hasArgs(UnsafeArg.of("requestUrl", url + "/"));
266271

267272
server.enqueue(new MockResponse().setResponseCode(429));
268273
server.enqueue(new MockResponse().setResponseCode(429));
269274
server.enqueue(new MockResponse().setResponseCode(429));
270275
call = createRetryingClient(2).newCall(new Request.Builder().url(url).build());
271-
assertThatThrownBy(call::execute)
272-
.isInstanceOf(IOException.class)
273-
.hasMessage("Failed to reschedule call since the number of configured backoffs are exhausted");
276+
assertThatLoggableExceptionThrownBy(call::execute)
277+
.isInstanceOf(SafeIoException.class)
278+
.hasLogMessage("Failed to complete the request due to QosException.Throttle")
279+
.hasArgs(UnsafeArg.of("requestUrl", url + "/"));
274280

275281
assertThat(server.getRequestCount()).isEqualTo(4 /* original plus two retries */);
276282
}
@@ -281,9 +287,10 @@ public void handlesThrottle_obeysMaxNumRetriesEvenWhenRetryAfterHeaderIsGiven()
281287
server.enqueue(new MockResponse().setResponseCode(429).addHeader(HttpHeaders.RETRY_AFTER, "0"));
282288
server.enqueue(new MockResponse().setResponseCode(429).addHeader(HttpHeaders.RETRY_AFTER, "0"));
283289
Call call = createRetryingClient(2).newCall(new Request.Builder().url(url).build());
284-
assertThatThrownBy(call::execute)
285-
.isInstanceOf(IOException.class)
286-
.hasMessage("Failed to reschedule call since the number of configured backoffs are exhausted");
290+
assertThatLoggableExceptionThrownBy(call::execute)
291+
.isInstanceOf(SafeIoException.class)
292+
.hasLogMessage("Failed to complete the request due to QosException.Throttle")
293+
.hasArgs(UnsafeArg.of("requestUrl", url + "/"));
287294
assertThat(server.getRequestCount()).isEqualTo(3 /* original plus two retries */);
288295
}
289296

@@ -317,9 +324,10 @@ public void handlesThrottle_usesConfiguredBackoffWhenResponseDoesNotAdvertiseBac
317324
// no backoff advertised, configured no retry: fails
318325
server.enqueue(new MockResponse().setResponseCode(429).setBody("foo"));
319326
call = createRetryingClient(0).newCall(new Request.Builder().url(url).build());
320-
assertThatThrownBy(call::execute)
321-
.isInstanceOf(IOException.class)
322-
.hasMessage("Failed to reschedule call since the number of configured backoffs are exhausted");
327+
assertThatLoggableExceptionThrownBy(call::execute)
328+
.isInstanceOf(SafeIoException.class)
329+
.hasLogMessage("Failed to complete the request due to QosException.Throttle")
330+
.hasArgs(UnsafeArg.of("requestUrl", url + "/"));
323331
}
324332

325333
@Test
@@ -372,9 +380,10 @@ public void handlesRetryOther_doesNotRedirectInfinitelyOften() throws Exception
372380
}
373381

374382
Call call = createRetryingClient(1).newCall(new Request.Builder().url(url).build());
375-
assertThatThrownBy(call::execute)
376-
.isInstanceOf(IOException.class)
377-
.hasMessage("Exceeded the maximum number of allowed redirects for initial URL: %s/", url);
383+
assertThatLoggableExceptionThrownBy(call::execute)
384+
.isInstanceOf(SafeIoException.class)
385+
.hasLogMessage("Exceeded the maximum number of allowed redirects")
386+
.hasArgs(UnsafeArg.of("requestUrl", url + "/"));
378387
assertThat(server.getRequestCount()).isEqualTo(21);
379388
}
380389

@@ -447,9 +456,10 @@ public void handlesIoExceptions_obeysMaxNumRetries() throws Exception {
447456

448457
OkHttpClient client = createRetryingClient(1, url, url2, url3);
449458
Call call = client.newCall(new Request.Builder().url(url + "/foo?bar").build());
450-
assertThatThrownBy(call::execute)
451-
.isInstanceOf(IOException.class)
452-
.hasMessage("Failed to complete the request due to an IOException");
459+
assertThatLoggableExceptionThrownBy(call::execute)
460+
.isInstanceOf(SafeIoException.class)
461+
.hasLogMessage("Failed to complete the request due to an IOException")
462+
.hasArgs(UnsafeArg.of("requestUrl", url2 + "/foo?bar"));
453463

454464
assertThat(server3.getRequestCount()).isEqualTo(0);
455465
}

0 commit comments

Comments
 (0)