Skip to content

Commit e7082e3

Browse files
Merge pull request #380 from JLG-WOCFR-DEV/codex/refactor-scanner-classes-and-update-tests
Refactor ScanQueue responsibilities into service classes
2 parents 69f3289 + 5598a47 commit e7082e3

File tree

10 files changed

+1219
-746
lines changed

10 files changed

+1219
-746
lines changed
Lines changed: 348 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,348 @@
1+
<?php
2+
3+
namespace JLG\BrokenLinks\Scanner;
4+
5+
use Requests_Exception;
6+
use Requests_Response;
7+
use WP_Error;
8+
9+
class ParallelRequestDispatcher
10+
{
11+
/** @var HttpClientInterface */
12+
private $client;
13+
14+
/** @var int */
15+
private $concurrency;
16+
17+
/** @var callable */
18+
private $dispatcher;
19+
20+
/** @var int */
21+
private $linkDelayMs;
22+
23+
/** @var float */
24+
private $lastRemoteRequestCompletedAt = 0.0;
25+
26+
/** @var array<int, array<string, mixed>> */
27+
private $pending = [];
28+
29+
public function __construct(HttpClientInterface $client, $concurrency, $linkDelayMs, $dispatcher = null)
30+
{
31+
$this->client = $client;
32+
$this->concurrency = max(1, (int) $concurrency);
33+
$this->linkDelayMs = max(0, (int) $linkDelayMs);
34+
$this->dispatcher = $dispatcher ?: $this->resolveDispatcher();
35+
}
36+
37+
public static function fromFilters(HttpClientInterface $client, $concurrency, $linkDelayMs): self
38+
{
39+
$dispatcher = null;
40+
if (function_exists('apply_filters')) {
41+
$dispatcher = apply_filters('blc_parallel_requests_dispatcher', null);
42+
}
43+
44+
if (!is_callable($dispatcher)) {
45+
$dispatcher = static function (array $requests) {
46+
if (empty($requests)) {
47+
return [];
48+
}
49+
50+
if (!class_exists('Requests')) {
51+
if (defined('ABSPATH') && defined('WPINC')) {
52+
$requests_class_path = trailingslashit(ABSPATH) . WPINC . '/class-requests.php';
53+
if (file_exists($requests_class_path)) {
54+
require_once $requests_class_path;
55+
}
56+
}
57+
if (!class_exists('Requests')) {
58+
return [];
59+
}
60+
}
61+
62+
return \Requests::request_multiple($requests);
63+
};
64+
}
65+
66+
return new self($client, $concurrency, $linkDelayMs, $dispatcher);
67+
}
68+
69+
public function enqueue(
70+
$url,
71+
array $headArgs,
72+
array $getArgs,
73+
$scanMethod,
74+
array $temporaryStatuses,
75+
callable $callback
76+
): void {
77+
$this->pending[] = [
78+
'url' => $url,
79+
'head_args' => $headArgs,
80+
'get_args' => $getArgs,
81+
'scan_method' => $scanMethod,
82+
'temporary_statuses' => $temporaryStatuses,
83+
'callback' => $callback,
84+
];
85+
86+
$this->dispatch(false);
87+
}
88+
89+
public function drain(): void
90+
{
91+
$this->dispatch(true);
92+
}
93+
94+
private function dispatch($force): void
95+
{
96+
while (!empty($this->pending) && ($force || count($this->pending) >= $this->concurrency)) {
97+
$batch = array_splice($this->pending, 0, min($this->concurrency, count($this->pending)));
98+
$this->executeBatch($batch);
99+
}
100+
}
101+
102+
/**
103+
* @param array<int, array<string, mixed>> $batch
104+
*/
105+
private function executeBatch(array $batch): void
106+
{
107+
if (empty($batch)) {
108+
return;
109+
}
110+
111+
$headRequests = [];
112+
foreach ($batch as $index => $job) {
113+
$requestKey = 'head-' . $index;
114+
$headRequests[$requestKey] = $this->buildRequest($job['url'], 'HEAD', $job['head_args']);
115+
}
116+
117+
$headResponses = $this->sendRequests($headRequests);
118+
119+
$getJobs = [];
120+
121+
foreach ($batch as $index => $job) {
122+
$requestKey = 'head-' . $index;
123+
$headResponse = $headResponses[$requestKey] ?? new WP_Error('blc_missing_head_response', 'Missing HEAD response.');
124+
125+
$needsGetFallback = false;
126+
$fallbackDueToTemporaryStatus = false;
127+
$headRequestDisallowed = false;
128+
129+
if ($job['scan_method'] === 'precise') {
130+
if (is_wp_error($headResponse)) {
131+
$needsGetFallback = true;
132+
} else {
133+
$headStatus = (int) $this->client->responseCode($headResponse);
134+
if (in_array($headStatus, $job['temporary_statuses'], true)) {
135+
$needsGetFallback = true;
136+
$fallbackDueToTemporaryStatus = true;
137+
} elseif (in_array($headStatus, [403, 405, 501], true)) {
138+
$needsGetFallback = true;
139+
}
140+
}
141+
} else {
142+
if (!is_wp_error($headResponse)) {
143+
$headStatus = (int) $this->client->responseCode($headResponse);
144+
if (in_array($headStatus, [403, 405, 501], true)) {
145+
$needsGetFallback = true;
146+
$headRequestDisallowed = true;
147+
}
148+
}
149+
}
150+
151+
if ($needsGetFallback) {
152+
$getJobs[] = [
153+
'index' => $index,
154+
'job' => $job,
155+
'fallback_due_to_temporary' => $fallbackDueToTemporaryStatus,
156+
'head_request_disallowed' => $headRequestDisallowed,
157+
];
158+
} else {
159+
$this->triggerCallback(
160+
$job,
161+
$headResponse,
162+
$headRequestDisallowed,
163+
$fallbackDueToTemporaryStatus,
164+
false
165+
);
166+
}
167+
}
168+
169+
if (empty($getJobs)) {
170+
return;
171+
}
172+
173+
$getRequests = [];
174+
foreach ($getJobs as $entry) {
175+
$requestKey = 'get-' . $entry['index'];
176+
$getRequests[$requestKey] = $this->buildRequest($entry['job']['url'], 'GET', $entry['job']['get_args']);
177+
}
178+
179+
$getResponses = $this->sendRequests($getRequests);
180+
181+
foreach ($getJobs as $entry) {
182+
$requestKey = 'get-' . $entry['index'];
183+
$response = $getResponses[$requestKey] ?? new WP_Error('blc_missing_get_response', 'Missing GET response.');
184+
$this->triggerCallback(
185+
$entry['job'],
186+
$response,
187+
$entry['head_request_disallowed'],
188+
$entry['fallback_due_to_temporary'],
189+
true
190+
);
191+
}
192+
}
193+
194+
private function triggerCallback(
195+
array $job,
196+
$response,
197+
$headRequestDisallowed,
198+
$fallbackDueToTemporaryStatus,
199+
$usedGetRequest
200+
): void {
201+
$callback = $job['callback'];
202+
$callback($response, $headRequestDisallowed, $fallbackDueToTemporaryStatus, $usedGetRequest);
203+
}
204+
205+
private function buildRequest($url, $method, array $args): array
206+
{
207+
$headers = [];
208+
if (isset($args['user-agent'])) {
209+
$headers['user-agent'] = (string) $args['user-agent'];
210+
}
211+
212+
$options = [];
213+
if (isset($args['timeout'])) {
214+
$options['timeout'] = (float) $args['timeout'];
215+
}
216+
if (isset($args['redirection'])) {
217+
$options['redirects'] = (int) $args['redirection'];
218+
}
219+
if (isset($args['limit_response_size'])) {
220+
$options['max_bytes'] = (int) $args['limit_response_size'];
221+
}
222+
223+
return [
224+
'url' => $url,
225+
'type' => $method,
226+
'headers' => $headers,
227+
'data' => $args['body'] ?? null,
228+
'options' => $options,
229+
'args' => $args,
230+
];
231+
}
232+
233+
/**
234+
* @param array<string, array<string, mixed>> $requests
235+
* @return array<string, mixed>
236+
*/
237+
private function sendRequests(array $requests): array
238+
{
239+
if (empty($requests)) {
240+
return [];
241+
}
242+
243+
foreach ($requests as $_) {
244+
$this->waitForRemoteSlot();
245+
}
246+
247+
$dispatcher = $this->dispatcher;
248+
$responses = $dispatcher($requests);
249+
if (!is_array($responses)) {
250+
$responses = [];
251+
}
252+
253+
$normalized = [];
254+
foreach ($requests as $key => $requestSpec) {
255+
$raw = $responses[$key] ?? null;
256+
if ($raw === null) {
257+
$args = isset($requestSpec['args']) && is_array($requestSpec['args']) ? $requestSpec['args'] : [];
258+
$method = strtoupper((string) ($requestSpec['type'] ?? 'GET'));
259+
if ($method === 'HEAD') {
260+
$raw = $this->client->head($requestSpec['url'], $args);
261+
} else {
262+
$raw = $this->client->get($requestSpec['url'], $args);
263+
}
264+
}
265+
$normalized[$key] = $this->normalizeResponse($raw);
266+
$this->markRemoteRequestComplete();
267+
}
268+
269+
return $normalized;
270+
}
271+
272+
private function normalizeResponse($raw)
273+
{
274+
if ($raw instanceof WP_Error) {
275+
return $raw;
276+
}
277+
278+
if ($raw instanceof Requests_Response) {
279+
return [
280+
'headers' => $raw->headers->getAll(),
281+
'body' => $raw->body,
282+
'response' => [
283+
'code' => $raw->status_code,
284+
'message' => $raw->status_text,
285+
],
286+
];
287+
}
288+
289+
if ($raw instanceof Requests_Exception) {
290+
$code = method_exists($raw, 'getCode') ? (int) $raw->getCode() : 0;
291+
return new WP_Error('http_request_failed', $raw->getMessage(), ['status' => $code]);
292+
}
293+
294+
if (is_array($raw)) {
295+
return $raw;
296+
}
297+
298+
if ($raw === null) {
299+
return new WP_Error('http_request_failed', 'Empty HTTP response.');
300+
}
301+
302+
return $raw;
303+
}
304+
305+
private function waitForRemoteSlot(): void
306+
{
307+
if ($this->linkDelayMs <= 0) {
308+
return;
309+
}
310+
311+
$delaySeconds = $this->linkDelayMs / 1000;
312+
if ($this->lastRemoteRequestCompletedAt > 0) {
313+
$elapsed = microtime(true) - $this->lastRemoteRequestCompletedAt;
314+
$remaining = $delaySeconds - $elapsed;
315+
if ($remaining > 0) {
316+
usleep((int) round($remaining * 1000000));
317+
}
318+
}
319+
}
320+
321+
private function markRemoteRequestComplete(): void
322+
{
323+
$this->lastRemoteRequestCompletedAt = microtime(true);
324+
}
325+
326+
private function resolveDispatcher(): callable
327+
{
328+
return static function (array $requests) {
329+
if (empty($requests)) {
330+
return [];
331+
}
332+
333+
if (!class_exists('Requests')) {
334+
if (defined('ABSPATH') && defined('WPINC')) {
335+
$requests_class_path = trailingslashit(ABSPATH) . WPINC . '/class-requests.php';
336+
if (file_exists($requests_class_path)) {
337+
require_once $requests_class_path;
338+
}
339+
}
340+
if (!class_exists('Requests')) {
341+
return [];
342+
}
343+
}
344+
345+
return \Requests::request_multiple($requests);
346+
};
347+
}
348+
}

0 commit comments

Comments
 (0)