5
5
# ' parallel. Never use it without [req_throttle()]; otherwise it's too easy to
6
6
# ' pummel a server with a very large number of simultaneous requests.
7
7
# '
8
+ # ' While running, you'll get a progress bar that looks like:
9
+ # ' `[working] (1 + 4) -> 5 -> 5`. The string tells you the current status of
10
+ # ' the queue (e.g. working, waiting, errored, finishing) followed by (the
11
+ # ' number of pending requests + pending retried requests) -> the number of
12
+ # ' active requests -> the number of complete requests.
13
+ # '
8
14
# ' ## Limitations
9
15
# '
10
16
# ' The main limitation of `req_perform_parallel()` is that it assumes applies
16
22
# ' these limitation, but it's enough work that I'm unlikely to do it unless
17
23
# ' I know that people would fine it useful: so please let me know!
18
24
# '
25
+ # ' Additionally, it does not respect the `max_tries` argument to `req_retry()`
26
+ # ' because if you have five requests in flight and the first one gets rate
27
+ # ' limited, it's likely that all the others do too.
28
+ # '
19
29
# ' @inherit req_perform_sequential params return
20
30
# ' @param pool `r lifecycle::badge("deprecated")`. No longer supported;
21
31
# ' to control the maximum number of concurrent requests, set `max_active`.
@@ -89,9 +99,12 @@ req_perform_parallel <- function(
89
99
)
90
100
91
101
if (on_error == " stop" ) {
92
- errors <- keep(queue $ resps , is_error )
93
- if (length(errors ) > 0 ) {
94
- cnd_signal(errors [[1 ]])
102
+ is_error <- map_lgl(queue $ resps , is_error )
103
+ if (any(is_error )) {
104
+ i <- which(is_error )[[1 ]]
105
+ the $ last_response <- queue $ resps [[i ]]$ resp %|| % queue $ resps [[i ]]
106
+ the $ last_request <- queue $ reqs [[i ]]
107
+ cnd_signal(queue $ resps [[i ]])
95
108
}
96
109
}
97
110
@@ -103,14 +116,15 @@ RequestQueue <- R6::R6Class(
103
116
public = list (
104
117
pool = NULL ,
105
118
rate_limit_deadline = 0 ,
119
+ token_deadline = 0 ,
106
120
max_active = NULL ,
107
121
108
122
# Overall status for the queue
109
123
queue_status = NULL ,
110
- deadline = Inf ,
111
124
n_pending = 0 ,
112
125
n_active = 0 ,
113
126
n_complete = 0 ,
127
+ n_retries = 0 ,
114
128
on_error = " stop" ,
115
129
progress = NULL ,
116
130
@@ -122,7 +136,7 @@ RequestQueue <- R6::R6Class(
122
136
tries = integer(),
123
137
124
138
# Requests that have failed due to OAuth expiration; used to ensure that we
125
- # don't retry repeatedly, but still allow all active requests to retry one
139
+ # don't retry repeatedly, but still allow all active requests to retry once
126
140
oauth_failed = integer(),
127
141
128
142
initialize = function (
@@ -139,8 +153,9 @@ RequestQueue <- R6::R6Class(
139
153
self $ progress <- cli :: cli_progress_bar(
140
154
total = n ,
141
155
format = paste0(
142
- " {self$n_pending} -> {self$n_active} -> {self$n_complete} | " ,
143
- " {cli::pb_bar} {cli::pb_percent} | ETA: {cli::pb_eta}"
156
+ " [{self$queue_status}] " ,
157
+ " ({self$n_pending} + {self$n_retried}) -> {self$n_active} -> {self$n_complete} | " ,
158
+ " {cli::pb_bar} {cli::pb_percent}"
144
159
),
145
160
.envir = error_call
146
161
)
@@ -195,19 +210,34 @@ RequestQueue <- R6::R6Class(
195
210
process1 = function (deadline = Inf ) {
196
211
if (self $ queue_status == " done" ) {
197
212
FALSE
213
+ } else if (self $ queue_status == " waiting" ) {
214
+ request_deadline <- max(self $ token_deadline , self $ rate_limit_deadline )
215
+ if (request_deadline < = deadline ) {
216
+ # Assume we're done waiting; done_failure() will reset if needed
217
+ self $ queue_status <- " working"
218
+ pool_wait_for_deadline(self $ pool , request_deadline )
219
+ NULL
220
+ } else {
221
+ pool_wait_for_deadline(self $ pool , deadline )
222
+ TRUE
223
+ }
198
224
} else if (self $ queue_status == " working" ) {
199
225
if (self $ n_pending == 0 ) {
200
226
self $ queue_status <- " finishing"
201
227
} else if (self $ n_active < self $ max_active ) {
202
- self $ submit_next(deadline )
228
+ if (! self $ submit_next(deadline )) {
229
+ self $ queue_status <- " waiting"
230
+ }
203
231
} else {
204
232
pool_wait_for_one(self $ pool , deadline )
205
233
}
206
234
NULL
207
235
} else if (self $ queue_status == " finishing" ) {
208
236
pool_wait_for_one(self $ pool , deadline )
209
237
210
- if (self $ n_pending > 0 ) {
238
+ if (self $ rate_limit_deadline > unix_time()) {
239
+ self $ queue_status <- " waiting"
240
+ } else if (self $ n_pending > 0 ) {
211
241
# we had to retry
212
242
self $ queue_status <- " working"
213
243
} else if (self $ n_active > 0 ) {
@@ -228,23 +258,12 @@ RequestQueue <- R6::R6Class(
228
258
submit_next = function (deadline ) {
229
259
next_i <- which(self $ status == " pending" )[[1 ]]
230
260
231
- # Need to wait for a token from the bucket AND for any rate limits.
232
- # The ordering is important here because requests will complete
233
- # while we wait and that might change the rate_limit_deadline
234
- token_deadline <- throttle_deadline(self $ reqs [[next_i ]])
235
- pool_wait_for_deadline(self $ pool , min(token_deadline , deadline ))
236
- if (token_deadline > = deadline ) {
261
+ self $ token_deadline <- throttle_deadline(self $ reqs [[next_i ]])
262
+ if (self $ token_deadline > unix_time()) {
237
263
throttle_return_token(self $ reqs [[next_i ]])
238
- return ()
264
+ return (FALSE )
239
265
}
240
266
241
- while (unix_time() < self $ rate_limit_deadline ) {
242
- pool_wait_for_deadline(self $ pool , min(self $ rate_limit_deadline , deadline ))
243
- if (self $ rate_limit_deadline > = deadline ) {
244
- throttle_return_token(self $ reqs [[next_i ]])
245
- return ()
246
- }
247
- }
248
267
self $ submit(next_i )
249
268
},
250
269
@@ -256,6 +275,7 @@ RequestQueue <- R6::R6Class(
256
275
self $ tries [[i ]] <- self $ tries [[i ]] + 1
257
276
258
277
self $ pooled_reqs [[i ]]$ submit(self $ pool )
278
+ TRUE
259
279
},
260
280
261
281
done_success = function (i , resp ) {
@@ -280,11 +300,11 @@ RequestQueue <- R6::R6Class(
280
300
tries <- self $ tries [[i ]]
281
301
282
302
if (retry_is_transient(req , resp ) && self $ can_retry(i )) {
283
- # Do we need to somehow expose this to the user? Because if they're
284
- # hitting it a bunch, it's a sign that the throttling is too low
285
303
delay <- retry_after(req , resp , tries )
286
304
self $ rate_limit_deadline <- unix_time() + delay
305
+
287
306
self $ set_status(i , " pending" )
307
+ self $ n_retries <- self $ n_retries + 1
288
308
} else if (resp_is_invalid_oauth_token(req , resp ) && self $ can_reauth(i )) {
289
309
# This isn't quite right, because if there are (e.g.) four requests in
290
310
# the queue and the first one fails, we'll clear the cache for all four,
@@ -293,6 +313,7 @@ RequestQueue <- R6::R6Class(
293
313
self $ oauth_failed <- c(self $ oauth_failed , i )
294
314
req_auth_clear_cache(self $ reqs [[i ]])
295
315
self $ set_status(i , " pending" )
316
+ self $ n_retries <- self $ n_retries + 1
296
317
} else {
297
318
self $ set_status(i , " complete" )
298
319
if (self $ on_error != " continue" ) {
@@ -322,7 +343,8 @@ RequestQueue <- R6::R6Class(
322
343
},
323
344
324
345
can_retry = function (i ) {
325
- self $ tries [[i ]] < retry_max_tries(self $ reqs [[i ]])
346
+ TRUE
347
+ # self$tries[[i]] < retry_max_tries(self$reqs[[i]])
326
348
},
327
349
can_reauth = function (i ) {
328
350
! i %in% self $ oauth_failed
@@ -347,6 +369,7 @@ pool_wait_for_deadline <- function(pool, deadline) {
347
369
# pool might finish early; we still want to wait out the full time
348
370
remaining <- timeout - (unix_time() - now )
349
371
if (remaining > 0 ) {
372
+ # cat("Sleeping for ", remaining, " seconds\n", sep = "")
350
373
Sys.sleep(remaining )
351
374
}
352
375
0 commit comments