Skip to content

Commit a00828f

Browse files
Merge pull request #384 from JLG-WOCFR-DEV/codex/add-proxy-selection-mechanism-in-remoterequestclient
Add proxy pool management and proxy-aware telemetry
2 parents 9beac87 + f5c6099 commit a00828f

File tree

11 files changed

+1741
-13
lines changed

11 files changed

+1741
-13
lines changed

liens-morts-detector-jlg/includes/Scanner/ProxyPool.php

Lines changed: 845 additions & 0 deletions
Large diffs are not rendered by default.

liens-morts-detector-jlg/includes/Scanner/RemoteRequestClient.php

Lines changed: 177 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,14 @@ class RemoteRequestClient implements HttpClientInterface
3434
*/
3535
private $lastRequestAt = 0.0;
3636

37-
public function __construct(array $defaultArgs = [], array $retryPlan = [], array $userAgents = [])
37+
/**
38+
* Pool of proxies used to route outgoing requests.
39+
*
40+
* @var ProxyPool|null
41+
*/
42+
private $proxyPool;
43+
44+
public function __construct(array $defaultArgs = [], array $retryPlan = [], array $userAgents = [], ?ProxyPool $proxyPool = null)
3845
{
3946
$defaults = [
4047
'timeout' => 10,
@@ -57,6 +64,12 @@ public function __construct(array $defaultArgs = [], array $retryPlan = [], arra
5764
$this->defaultArgs = array_merge($defaults, $defaultArgs);
5865
$this->retryPlan = array_merge($retryDefaults, $retryPlan);
5966
$this->userAgents = $userAgents !== [] ? array_values(array_filter($userAgents, 'is_string')) : $this->getDefaultUserAgents();
67+
$this->proxyPool = $proxyPool;
68+
}
69+
70+
public function setProxyPool(?ProxyPool $proxyPool = null)
71+
{
72+
$this->proxyPool = $proxyPool;
6073
}
6174

6275
public function head($url, array $args = [])
@@ -94,13 +107,22 @@ protected function requestWithRetries($method, $url, array $args)
94107
$this->enforceRateLimit();
95108

96109
$requestArgs = $this->prepareRequestArguments($args, $attempt);
110+
$proxySelection = $this->acquireProxySelection($url, $requestArgs);
111+
if ($proxySelection !== null && $this->proxyPool instanceof ProxyPool && $this->proxyPool->isEnabled()) {
112+
$requestArgs = $this->proxyPool->injectProxyArguments($url, $requestArgs, $proxySelection);
113+
}
114+
97115
$requestStartedAt = microtime(true);
98116
$lastResponse = $this->dispatchRequest($method, $url, $requestArgs);
99117
$durationMs = (int) round((microtime(true) - $requestStartedAt) * 1000);
100118

119+
$responseCode = $this->extractResponseCode($lastResponse);
101120
$retryAfter = $this->getRetryAfterDelay($lastResponse);
102121
$willRetry = $this->shouldRetry($lastResponse, $attempt, $attempts);
103122

123+
$proxyFailure = $this->shouldMarkProxyFailure($lastResponse, $responseCode);
124+
$proxyOutcomeRecorded = $this->reportProxyOutcome($proxySelection, $lastResponse, $responseCode, $willRetry, $proxyFailure);
125+
104126
$this->recordRequestMetrics(
105127
$method,
106128
$url,
@@ -110,7 +132,11 @@ protected function requestWithRetries($method, $url, array $args)
110132
$durationMs,
111133
$lastResponse,
112134
$willRetry,
113-
$retryAfter
135+
$retryAfter,
136+
$proxySelection,
137+
$proxyOutcomeRecorded,
138+
$responseCode,
139+
$proxyFailure
114140
);
115141

116142
if (!$willRetry) {
@@ -355,7 +381,11 @@ private function recordRequestMetrics(
355381
$durationMs,
356382
$response,
357383
$willRetry,
358-
$retryAfterMs
384+
$retryAfterMs,
385+
$proxySelection,
386+
$proxyOutcomeRecorded,
387+
$responseCode,
388+
$proxyFailure
359389
) {
360390
$metrics = $this->createRequestMetricsPayload(
361391
$method,
@@ -366,7 +396,11 @@ private function recordRequestMetrics(
366396
$durationMs,
367397
$response,
368398
$willRetry,
369-
$retryAfterMs
399+
$retryAfterMs,
400+
$proxySelection,
401+
$proxyOutcomeRecorded,
402+
$responseCode,
403+
$proxyFailure
370404
);
371405

372406
if (function_exists('\\blc_record_remote_request_metrics')) {
@@ -402,7 +436,11 @@ private function createRequestMetricsPayload(
402436
$durationMs,
403437
$response,
404438
$willRetry,
405-
$retryAfterMs
439+
$retryAfterMs,
440+
$proxySelection,
441+
$proxyOutcomeRecorded,
442+
$responseCode,
443+
$proxyFailure
406444
) {
407445
$parsedUrl = function_exists('wp_parse_url') ? \wp_parse_url($url) : parse_url($url);
408446

@@ -429,7 +467,7 @@ private function createRequestMetricsPayload(
429467

430468
$methodLabel = strtoupper((string) $method);
431469
$timestamp = time();
432-
$responseCode = 0;
470+
$responseCode = (int) $responseCode;
433471
$success = false;
434472

435473
$errorCode = '';
@@ -439,7 +477,6 @@ private function createRequestMetricsPayload(
439477
$errorCode = (string) $response->get_error_code();
440478
$errorMessage = $response->get_error_message();
441479
} else {
442-
$responseCode = (int) \wp_remote_retrieve_response_code($response);
443480
if (!$willRetry && $responseCode >= 200 && $responseCode < 400) {
444481
$success = true;
445482
}
@@ -469,9 +506,142 @@ private function createRequestMetricsPayload(
469506
$metrics['user_agent'] = trim($args['user-agent']);
470507
}
471508

509+
if (is_array($proxySelection)) {
510+
$proxyId = isset($proxySelection['id']) ? (string) $proxySelection['id'] : '';
511+
if ($proxyId !== '') {
512+
$metrics['proxy_id'] = $proxyId;
513+
}
514+
515+
if (isset($proxySelection['region']) && is_string($proxySelection['region'])) {
516+
$metrics['proxy_region'] = (string) $proxySelection['region'];
517+
}
518+
519+
if (isset($proxySelection['priority'])) {
520+
$metrics['proxy_priority'] = (int) $proxySelection['priority'];
521+
}
522+
523+
if (isset($proxySelection['url']) && is_string($proxySelection['url'])) {
524+
$metrics['proxy_url'] = $this->sanitizeProxyUrl($proxySelection['url']);
525+
}
526+
527+
$metrics['proxy_has_credentials'] = !empty($proxySelection['credentials']);
528+
$metrics['proxy_failure'] = (bool) $proxyFailure;
529+
$metrics['proxy_outcome_recorded'] = (bool) $proxyOutcomeRecorded;
530+
531+
if ($this->proxyPool instanceof ProxyPool) {
532+
$health = $this->proxyPool->getHealthSnapshot();
533+
if ($proxyId !== '' && isset($health[$proxyId])) {
534+
if (isset($health[$proxyId]['suspended_until'])) {
535+
$metrics['proxy_suspended_until'] = (int) $health[$proxyId]['suspended_until'];
536+
}
537+
538+
if (isset($health[$proxyId]['failure_count'])) {
539+
$metrics['proxy_failure_count'] = (int) $health[$proxyId]['failure_count'];
540+
}
541+
}
542+
}
543+
}
544+
472545
return $metrics;
473546
}
474547

548+
private function acquireProxySelection($url, array $args)
549+
{
550+
if (!$this->proxyPool instanceof ProxyPool || !$this->proxyPool->isEnabled()) {
551+
return null;
552+
}
553+
554+
$context = ['url' => $url];
555+
$parsedUrl = function_exists('wp_parse_url') ? \wp_parse_url($url) : parse_url($url);
556+
if (is_array($parsedUrl) && isset($parsedUrl['host'])) {
557+
$context['host'] = strtolower((string) $parsedUrl['host']);
558+
}
559+
560+
if (isset($args['proxy_region']) && is_string($args['proxy_region'])) {
561+
$context['region'] = $args['proxy_region'];
562+
} elseif (isset($args['blc_proxy_region']) && is_string($args['blc_proxy_region'])) {
563+
$context['region'] = $args['blc_proxy_region'];
564+
}
565+
566+
return $this->proxyPool->acquire($context);
567+
}
568+
569+
private function extractResponseCode($response)
570+
{
571+
if ($response instanceof WP_Error) {
572+
return 0;
573+
}
574+
575+
if (function_exists('wp_remote_retrieve_response_code')) {
576+
return (int) \wp_remote_retrieve_response_code($response);
577+
}
578+
579+
return 0;
580+
}
581+
582+
private function shouldMarkProxyFailure($response, $responseCode)
583+
{
584+
if ($response instanceof WP_Error) {
585+
return true;
586+
}
587+
588+
if ((int) $responseCode === 0) {
589+
return true;
590+
}
591+
592+
$code = (int) $responseCode;
593+
594+
return in_array($code, [407, 502, 503, 504], true);
595+
}
596+
597+
private function reportProxyOutcome($proxySelection, $response, $responseCode, $willRetry, $proxyFailure)
598+
{
599+
if (!is_array($proxySelection) || !$this->proxyPool instanceof ProxyPool || !$this->proxyPool->isEnabled()) {
600+
return false;
601+
}
602+
603+
$proxyId = isset($proxySelection['id']) ? (string) $proxySelection['id'] : '';
604+
if ($proxyId === '') {
605+
return false;
606+
}
607+
608+
if ($proxyFailure) {
609+
return $this->proxyPool->reportOutcome($proxyId, false);
610+
}
611+
612+
$code = (int) $responseCode;
613+
if (!$willRetry && $code >= 200 && $code < 400) {
614+
return $this->proxyPool->reportOutcome($proxyId, true);
615+
}
616+
617+
return false;
618+
}
619+
620+
private function sanitizeProxyUrl($proxyUrl)
621+
{
622+
if (!is_string($proxyUrl) || $proxyUrl === '') {
623+
return '';
624+
}
625+
626+
$parts = parse_url($proxyUrl);
627+
if (!is_array($parts)) {
628+
return $proxyUrl;
629+
}
630+
631+
$scheme = isset($parts['scheme']) ? $parts['scheme'] . '://' : '';
632+
$host = $parts['host'] ?? '';
633+
$port = isset($parts['port']) ? ':' . $parts['port'] : '';
634+
$path = $parts['path'] ?? '';
635+
$query = isset($parts['query']) ? '?' . $parts['query'] : '';
636+
637+
if ($host === '' && isset($parts['path']) && strpos($parts['path'], '/') === false) {
638+
$host = $parts['path'];
639+
$path = '';
640+
}
641+
642+
return $scheme . $host . $port . $path . $query;
643+
}
644+
475645
/**
476646
* Provide a default pool of user agent strings inspired by common browsers.
477647
*

liens-morts-detector-jlg/includes/Scanner/ScanLockManager.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ public function acquireOrReschedule(
113113
* @param array<string, mixed> $preflight
114114
* @param string $lock_token
115115
*/
116-
public function deferDuringRestWindow(array $preflight, $lock_token, $debug_mode, array $jobContext): array
116+
public function deferDuringRestWindow(array $preflight, $lock_token, $debug_mode, array $jobContext = []): array
117117
{
118118
$batch = $preflight['batch'];
119119
$is_full_scan = $preflight['is_full_scan'];
@@ -239,7 +239,7 @@ public function deferDuringRestWindow(array $preflight, $lock_token, $debug_mode
239239
* @param array<string, mixed> $preflight
240240
* @param string $lock_token
241241
*/
242-
public function deferForServerLoad(array $preflight, $lock_token, $debug_mode, array $jobContext): array
242+
public function deferForServerLoad(array $preflight, $lock_token, $debug_mode, array $jobContext = []): array
243243
{
244244
if (!function_exists('sys_getloadavg')) {
245245
return [

liens-morts-detector-jlg/includes/blc-scanner.php

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55

66
require_once __DIR__ . '/Scanner/HttpClientInterface.php';
7+
require_once __DIR__ . '/Scanner/ProxyPool.php';
78
require_once __DIR__ . '/Scanner/RemoteRequestClient.php';
89
require_once __DIR__ . '/Scanner/ImageUrlNormalizer.php';
910
require_once __DIR__ . '/Scanner/ImageNormalizationContext.php';
@@ -796,6 +797,22 @@ function blc_record_remote_request_metrics(array $metrics) {
796797
}
797798

798799
update_option('blc_remote_request_metrics_history', $history, false);
800+
801+
if (isset($metrics['proxy_id']) && is_string($metrics['proxy_id'])) {
802+
$proxyId = function_exists('sanitize_key') ? sanitize_key($metrics['proxy_id']) : strtolower(preg_replace('/[^a-z0-9_\-]/i', '', $metrics['proxy_id']));
803+
if ($proxyId !== '') {
804+
$timestamp = isset($metrics['timestamp']) ? (int) $metrics['timestamp'] : time();
805+
$outcomeRecorded = !empty($metrics['proxy_outcome_recorded']);
806+
807+
if (!$outcomeRecorded) {
808+
if (!empty($metrics['proxy_failure'])) {
809+
blc_proxy_pool_register_outcome($proxyId, false, $timestamp);
810+
} elseif (!empty($metrics['success'])) {
811+
blc_proxy_pool_register_outcome($proxyId, true, $timestamp);
812+
}
813+
}
814+
}
815+
}
799816
}
800817
}
801818

@@ -2525,7 +2542,13 @@ function blc_make_remote_request_client($force_refresh = false) {
25252542
return $cache['instance'];
25262543
}
25272544

2528-
$client = new \JLG\BrokenLinks\Scanner\RemoteRequestClient($default_args, $retry_plan, $user_agents);
2545+
$proxy_pool = blc_get_proxy_pool_instance();
2546+
2547+
$client = new \JLG\BrokenLinks\Scanner\RemoteRequestClient($default_args, $retry_plan, $user_agents, $proxy_pool instanceof \JLG\BrokenLinks\Scanner\ProxyPool ? $proxy_pool : null);
2548+
2549+
if ($proxy_pool instanceof \JLG\BrokenLinks\Scanner\ProxyPool) {
2550+
$client->setProxyPool($proxy_pool);
2551+
}
25292552

25302553
if (function_exists('apply_filters')) {
25312554
$maybe_client = apply_filters('blc_remote_request_client_instance', $client, $default_args, $retry_plan, $user_agents);

0 commit comments

Comments
 (0)