Skip to content

Commit 0a2caa0

Browse files
authored
Queue improvements (#695)
* Introduce new waiting state * Display the state in the progress bar (and drop the ETA) * Record the last response/request when there's an error * Ignore max retries * Show retries in progress bar, and document it
1 parent a11740f commit 0a2caa0

File tree

3 files changed

+67
-44
lines changed

3 files changed

+67
-44
lines changed

R/req-perform-parallel.R

+49-26
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,12 @@
55
#' parallel. Never use it without [req_throttle()]; otherwise it's too easy to
66
#' pummel a server with a very large number of simultaneous requests.
77
#'
8+
#' While running, you'll get a progress bar that looks like:
9+
#' `[working] (1 + 4) -> 5 -> 5`. The string tells you the current status of
10+
#' the queue (e.g. working, waiting, errored, finishing) followed by (the
11+
#' number of pending requests + pending retried requests) -> the number of
12+
#' active requests -> the number of complete requests.
13+
#'
814
#' ## Limitations
915
#'
1016
#' The main limitation of `req_perform_parallel()` is that it assumes applies
@@ -16,6 +22,10 @@
1622
#' these limitation, but it's enough work that I'm unlikely to do it unless
1723
#' I know that people would fine it useful: so please let me know!
1824
#'
25+
#' Additionally, it does not respect the `max_tries` argument to `req_retry()`
26+
#' because if you have five requests in flight and the first one gets rate
27+
#' limited, it's likely that all the others do too.
28+
#'
1929
#' @inherit req_perform_sequential params return
2030
#' @param pool `r lifecycle::badge("deprecated")`. No longer supported;
2131
#' to control the maximum number of concurrent requests, set `max_active`.
@@ -89,9 +99,12 @@ req_perform_parallel <- function(
8999
)
90100

91101
if (on_error == "stop") {
92-
errors <- keep(queue$resps, is_error)
93-
if (length(errors) > 0) {
94-
cnd_signal(errors[[1]])
102+
is_error <- map_lgl(queue$resps, is_error)
103+
if (any(is_error)) {
104+
i <- which(is_error)[[1]]
105+
the$last_response <- queue$resps[[i]]$resp %||% queue$resps[[i]]
106+
the$last_request <- queue$reqs[[i]]
107+
cnd_signal(queue$resps[[i]])
95108
}
96109
}
97110

@@ -103,14 +116,15 @@ RequestQueue <- R6::R6Class(
103116
public = list(
104117
pool = NULL,
105118
rate_limit_deadline = 0,
119+
token_deadline = 0,
106120
max_active = NULL,
107121

108122
# Overall status for the queue
109123
queue_status = NULL,
110-
deadline = Inf,
111124
n_pending = 0,
112125
n_active = 0,
113126
n_complete = 0,
127+
n_retries = 0,
114128
on_error = "stop",
115129
progress = NULL,
116130

@@ -122,7 +136,7 @@ RequestQueue <- R6::R6Class(
122136
tries = integer(),
123137

124138
# Requests that have failed due to OAuth expiration; used to ensure that we
125-
# don't retry repeatedly, but still allow all active requests to retry one
139+
# don't retry repeatedly, but still allow all active requests to retry once
126140
oauth_failed = integer(),
127141

128142
initialize = function(
@@ -139,8 +153,9 @@ RequestQueue <- R6::R6Class(
139153
self$progress <- cli::cli_progress_bar(
140154
total = n,
141155
format = paste0(
142-
"{self$n_pending} -> {self$n_active} -> {self$n_complete} | ",
143-
"{cli::pb_bar} {cli::pb_percent} | ETA: {cli::pb_eta}"
156+
"[{self$queue_status}] ",
157+
"({self$n_pending} + {self$n_retried}) -> {self$n_active} -> {self$n_complete} | ",
158+
"{cli::pb_bar} {cli::pb_percent}"
144159
),
145160
.envir = error_call
146161
)
@@ -195,19 +210,34 @@ RequestQueue <- R6::R6Class(
195210
process1 = function(deadline = Inf) {
196211
if (self$queue_status == "done") {
197212
FALSE
213+
} else if (self$queue_status == "waiting") {
214+
request_deadline <- max(self$token_deadline, self$rate_limit_deadline)
215+
if (request_deadline <= deadline) {
216+
# Assume we're done waiting; done_failure() will reset if needed
217+
self$queue_status <- "working"
218+
pool_wait_for_deadline(self$pool, request_deadline)
219+
NULL
220+
} else {
221+
pool_wait_for_deadline(self$pool, deadline)
222+
TRUE
223+
}
198224
} else if (self$queue_status == "working") {
199225
if (self$n_pending == 0) {
200226
self$queue_status <- "finishing"
201227
} else if (self$n_active < self$max_active) {
202-
self$submit_next(deadline)
228+
if (!self$submit_next(deadline)) {
229+
self$queue_status <- "waiting"
230+
}
203231
} else {
204232
pool_wait_for_one(self$pool, deadline)
205233
}
206234
NULL
207235
} else if (self$queue_status == "finishing") {
208236
pool_wait_for_one(self$pool, deadline)
209237

210-
if (self$n_pending > 0) {
238+
if (self$rate_limit_deadline > unix_time()) {
239+
self$queue_status <- "waiting"
240+
} else if (self$n_pending > 0) {
211241
# we had to retry
212242
self$queue_status <- "working"
213243
} else if (self$n_active > 0) {
@@ -228,23 +258,12 @@ RequestQueue <- R6::R6Class(
228258
submit_next = function(deadline) {
229259
next_i <- which(self$status == "pending")[[1]]
230260

231-
# Need to wait for a token from the bucket AND for any rate limits.
232-
# The ordering is important here because requests will complete
233-
# while we wait and that might change the rate_limit_deadline
234-
token_deadline <- throttle_deadline(self$reqs[[next_i]])
235-
pool_wait_for_deadline(self$pool, min(token_deadline, deadline))
236-
if (token_deadline >= deadline) {
261+
self$token_deadline <- throttle_deadline(self$reqs[[next_i]])
262+
if (self$token_deadline > unix_time()) {
237263
throttle_return_token(self$reqs[[next_i]])
238-
return()
264+
return(FALSE)
239265
}
240266

241-
while (unix_time() < self$rate_limit_deadline) {
242-
pool_wait_for_deadline(self$pool, min(self$rate_limit_deadline, deadline))
243-
if (self$rate_limit_deadline >= deadline) {
244-
throttle_return_token(self$reqs[[next_i]])
245-
return()
246-
}
247-
}
248267
self$submit(next_i)
249268
},
250269

@@ -256,6 +275,7 @@ RequestQueue <- R6::R6Class(
256275
self$tries[[i]] <- self$tries[[i]] + 1
257276

258277
self$pooled_reqs[[i]]$submit(self$pool)
278+
TRUE
259279
},
260280

261281
done_success = function(i, resp) {
@@ -280,11 +300,11 @@ RequestQueue <- R6::R6Class(
280300
tries <- self$tries[[i]]
281301

282302
if (retry_is_transient(req, resp) && self$can_retry(i)) {
283-
# Do we need to somehow expose this to the user? Because if they're
284-
# hitting it a bunch, it's a sign that the throttling is too low
285303
delay <- retry_after(req, resp, tries)
286304
self$rate_limit_deadline <- unix_time() + delay
305+
287306
self$set_status(i, "pending")
307+
self$n_retries <- self$n_retries + 1
288308
} else if (resp_is_invalid_oauth_token(req, resp) && self$can_reauth(i)) {
289309
# This isn't quite right, because if there are (e.g.) four requests in
290310
# the queue and the first one fails, we'll clear the cache for all four,
@@ -293,6 +313,7 @@ RequestQueue <- R6::R6Class(
293313
self$oauth_failed <- c(self$oauth_failed, i)
294314
req_auth_clear_cache(self$reqs[[i]])
295315
self$set_status(i, "pending")
316+
self$n_retries <- self$n_retries + 1
296317
} else {
297318
self$set_status(i, "complete")
298319
if (self$on_error != "continue") {
@@ -322,7 +343,8 @@ RequestQueue <- R6::R6Class(
322343
},
323344

324345
can_retry = function(i) {
325-
self$tries[[i]] < retry_max_tries(self$reqs[[i]])
346+
TRUE
347+
# self$tries[[i]] < retry_max_tries(self$reqs[[i]])
326348
},
327349
can_reauth = function(i) {
328350
!i %in% self$oauth_failed
@@ -347,6 +369,7 @@ pool_wait_for_deadline <- function(pool, deadline) {
347369
# pool might finish early; we still want to wait out the full time
348370
remaining <- timeout - (unix_time() - now)
349371
if (remaining > 0) {
372+
# cat("Sleeping for ", remaining, " seconds\n", sep = "")
350373
Sys.sleep(remaining)
351374
}
352375

man/req_perform_parallel.Rd

+10
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-req-perform-parallel.R

+8-18
Original file line numberDiff line numberDiff line change
@@ -141,22 +141,6 @@ test_that("req_perform_parallel respects http_error() body message", {
141141
expect_snapshot(req_perform_parallel(reqs), error = TRUE)
142142
})
143143

144-
test_that("respects max retries", {
145-
req <- local_app_request(function(req, res) {
146-
i <- res$app$locals$i %||% 1
147-
res$
148-
set_status(429)$
149-
set_header("retry-after", 0)$
150-
send_json(list(status = "waiting"), auto_unbox = TRUE)
151-
})
152-
req <- req_retry(req, max_tries = 3)
153-
queue <- RequestQueue$new(list(req), progress = FALSE)
154-
155-
queue$process()
156-
expect_s3_class(queue$resps[[1]], "httr2_http_429")
157-
expect_equal(queue$tries[1], 3)
158-
})
159-
160144
test_that("requests are throttled", {
161145
withr::defer(throttle_reset())
162146

@@ -252,7 +236,7 @@ test_that("can retry a transient error", {
252236

253237
# Now we process the request and capture the retry
254238
expect_null(queue$process1())
255-
expect_equal(queue$queue_status, "working")
239+
expect_equal(queue$queue_status, "waiting")
256240
expect_equal(queue$rate_limit_deadline, mock_time + 2)
257241
expect_equal(queue$n_pending, 1)
258242
expect_s3_class(queue$resps[[1]], "httr2_http_429")
@@ -263,6 +247,10 @@ test_that("can retry a transient error", {
263247
expect_equal(queue$queue_status, "working")
264248
expect_equal(mock_time, 3)
265249

250+
# Now we go back to working
251+
expect_null(queue$process1())
252+
expect_equal(queue$queue_status, "working")
253+
266254
# Then resume finishing again
267255
expect_null(queue$process1())
268256
expect_equal(queue$queue_status, "finishing")
@@ -290,7 +278,9 @@ test_that("throttling is limited by deadline", {
290278

291279
# Check time only advances by one second, and token is returned to bucket
292280
local_mocked_bindings(throttle_deadline = function(...) mock_time + 2)
293-
queue$submit_next(1)
281+
queue$process1(1)
282+
expect_equal(queue$queue_status, "waiting")
283+
queue$process1(1)
294284
expect_equal(mock_time, 1)
295285
expect_equal(the$throttle[["test"]]$tokens, 1)
296286

0 commit comments

Comments
 (0)