Skip to content

Commit 429c9ad

Browse files
committed
Sleep and retry on 503 when downloading files
1 parent ed9447d commit 429c9ad

File tree

1 file changed

+11
-1
lines changed

1 file changed

+11
-1
lines changed

src/main/java/org/dstadler/commoncrawl/Utils.java

+11-1
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,17 @@ public static File downloadFileFromCommonCrawl(CloseableHttpClient httpClient, S
152152
} catch (IOException e) {
153153
// retry once for HTTP 500 that we see sometimes
154154
if(e.getMessage().contains("HTTP StatusCode 500")) {
155-
downloadFileFromCommonCrawl(httpClient, url, header, useWARC, destFile);
155+
downloadFileFromCommonCrawl(httpClient, url, header, useWARC, destFile);
156+
} else if(e.getMessage().contains("HTTP StatusCode 503")) {
157+
log.info("Sleeping 120 seconds before retrying to reduce request rate");
158+
159+
try {
160+
Thread.sleep(120_000);
161+
} catch (InterruptedException ex) {
162+
throw new RuntimeException(ex);
163+
}
164+
165+
downloadFileFromCommonCrawl(httpClient, url, header, useWARC, destFile);
156166
} else {
157167
throw e;
158168
}

0 commit comments

Comments
 (0)