Skip to content

Commit 55a5a5f

Browse files
Merge pull request #221 from JLG-WOCFR-DEV/codex/add-remote-image-analysis-option
Support remote CDN images in scan
2 parents d75e144 + a267d93 commit 55a5a5f

File tree

7 files changed

+410
-62
lines changed

7 files changed

+410
-62
lines changed

README.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@ Liens Morts Detector est une extension WordPress qui détecte les liens et image
44

55
## Fonctionnalités
66
- Vérification automatique des liens `<a>` grâce à WP‑Cron, et déclenchement manuel des images `<img>` (traitées ensuite en arrière-plan)
7-
- Planification quotidienne, hebdomadaire ou mensuelle
8-
- Tableau de bord listant les liens et images cassés avec statistiques
9-
- Actions rapides pour modifier une URL ou retirer un lien directement depuis la liste
7+
- Planification quotidienne, hebdomadaire ou mensuelle
8+
- Tableau de bord listant les liens et images cassés avec statistiques
9+
- Actions rapides pour modifier une URL ou retirer un lien directement depuis la liste
1010
- Options avancées : exclusion de domaines, plages horaires de repos, mode debug
11+
- Option dédiée pour analyser les images servies depuis un CDN ou un domaine externe sécurisé
1112

1213
## Installation
1314
1. Copier le dossier `liens-morts-detector-jlg` dans `wp-content/plugins/`.
@@ -18,6 +19,7 @@ Liens Morts Detector est une extension WordPress qui détecte les liens et image
1819
- Les liens sont vérifiés automatiquement selon la fréquence choisie, tandis que les images nécessitent de lancer un scan manuel depuis le rapport (le traitement se poursuit ensuite en arrière-plan).
1920
- Les liens ou images détectés comme cassés apparaissent dans une table permettant la modification rapide de l’URL ou la suppression du lien.
2021
- Des réglages avancés permettent d’exclure certains domaines, de limiter l’analyse à des plages horaires et d’activer un mode debug pour le suivi.
22+
- L’analyse des images distantes (CDN, sous-domaines médias) peut être activée dans les réglages. Cette vérification reste basée sur les fichiers présents dans `wp-content/uploads` et peut rallonger la durée du scan ou consommer davantage de quotas côté CDN.
2123

2224
## Hooks disponibles
2325
### `blc_max_load_threshold`

liens-morts-detector-jlg/includes/blc-admin-pages.php

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -302,12 +302,22 @@ function blc_dashboard_images_page() {
302302

303303
global $wpdb;
304304
$table_name = $wpdb->prefix . 'blc_broken_links';
305-
$broken_images_count = (int) $wpdb->get_var(
306-
$wpdb->prepare(
307-
"SELECT COUNT(*) FROM $table_name WHERE type = %s",
308-
'image'
309-
)
310-
);
305+
$image_row_types = blc_get_dataset_row_types('image');
306+
if (count($image_row_types) === 1) {
307+
$broken_images_count = (int) $wpdb->get_var(
308+
$wpdb->prepare(
309+
"SELECT COUNT(*) FROM $table_name WHERE type = %s",
310+
reset($image_row_types)
311+
)
312+
);
313+
} else {
314+
$placeholders = implode(',', array_fill(0, count($image_row_types), '%s'));
315+
$query = $wpdb->prepare(
316+
"SELECT COUNT(*) FROM $table_name WHERE type IN ($placeholders)",
317+
$image_row_types
318+
);
319+
$broken_images_count = (int) $wpdb->get_var($query);
320+
}
311321
$option_size_bytes = blc_get_dataset_storage_footprint_bytes('image');
312322
$last_image_check_time = get_option('blc_last_image_check_time', 0);
313323
$option_size_kb = $option_size_bytes / 1024;
@@ -466,6 +476,9 @@ function blc_settings_page() {
466476
$scan_method = sanitize_text_field($scan_method_raw);
467477
update_option('blc_scan_method', $scan_method);
468478

479+
$remote_image_scan_enabled = isset($_POST['blc_remote_image_scan_enabled']) ? (bool) $_POST['blc_remote_image_scan_enabled'] : false;
480+
update_option('blc_remote_image_scan_enabled', $remote_image_scan_enabled);
481+
469482
$available_status_names = get_post_stati([], 'names');
470483
if (!is_array($available_status_names)) {
471484
$available_status_names = [];
@@ -663,6 +676,7 @@ function blc_settings_page() {
663676
$get_timeout_limits['max']
664677
);
665678
$scan_method = get_option('blc_scan_method', 'precise');
679+
$remote_image_scan_enabled = (bool) get_option('blc_remote_image_scan_enabled', false);
666680
$excluded_domains = get_option('blc_excluded_domains', "x.com\ntwitter.com\nlinkedin.com");
667681
$debug_mode = get_option('blc_debug_mode', false);
668682
$notification_recipients = (string) get_option('blc_notification_recipients', '');
@@ -867,6 +881,30 @@ function blc_settings_page() {
867881
</tr>
868882
</tbody>
869883
</table>
884+
<h2><?php esc_html_e('Images distantes', 'liens-morts-detector-jlg'); ?></h2>
885+
<table class="form-table" role="presentation">
886+
<tbody>
887+
<tr>
888+
<th scope="row"><?php esc_html_e('Analyse des images CDN', 'liens-morts-detector-jlg'); ?></th>
889+
<td>
890+
<fieldset>
891+
<label for="blc_remote_image_scan_enabled">
892+
<input type="checkbox" name="blc_remote_image_scan_enabled" id="blc_remote_image_scan_enabled" <?php checked($remote_image_scan_enabled, true); ?>>
893+
<?php esc_html_e('Vérifier aussi les images servies depuis un domaine ou un CDN distinct.', 'liens-morts-detector-jlg'); ?>
894+
</label>
895+
<p class="description">
896+
<?php
897+
echo wp_kses(
898+
__('Activez cette option si vos images sont délivrées via un CDN ou un sous-domaine dédié. Le plugin s\'appuie toujours sur les fichiers présents dans <code>wp-content/uploads</code> pour détecter les absences. Cette vérification supplémentaire peut rallonger la durée du scan et consommer davantage de quotas côté CDN (latence, limitations de requêtes).', 'liens-morts-detector-jlg'),
899+
['code' => []]
900+
);
901+
?>
902+
</p>
903+
</fieldset>
904+
</td>
905+
</tr>
906+
</tbody>
907+
</table>
870908
<h2><?php esc_html_e('Notifications', 'liens-morts-detector-jlg'); ?></h2>
871909
<table class="form-table" role="presentation">
872910
<tbody>

liens-morts-detector-jlg/includes/blc-scanner.php

Lines changed: 127 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,25 @@ function blc_maybe_send_scan_summary($dataset_type) {
6262

6363
$table_name = $wpdb->prefix . 'blc_broken_links';
6464
$broken_count = 0;
65+
$row_types = blc_get_dataset_row_types($dataset_type);
66+
if ($row_types === []) {
67+
return;
68+
}
69+
6570
if (method_exists($wpdb, 'prepare') && method_exists($wpdb, 'get_var')) {
66-
$query = $wpdb->prepare(
67-
"SELECT COUNT(*) FROM $table_name WHERE type = %s",
68-
$dataset_type
69-
);
71+
if (count($row_types) === 1) {
72+
$query = $wpdb->prepare(
73+
"SELECT COUNT(*) FROM $table_name WHERE type = %s",
74+
reset($row_types)
75+
);
76+
} else {
77+
$placeholders = implode(',', array_fill(0, count($row_types), '%s'));
78+
$query = $wpdb->prepare(
79+
"SELECT COUNT(*) FROM $table_name WHERE type IN ($placeholders)",
80+
$row_types
81+
);
82+
}
83+
7084
if (is_string($query)) {
7185
$broken_count = (int) $wpdb->get_var($query);
7286
}
@@ -947,15 +961,36 @@ function blc_generate_scan_run_token() {
947961
*
948962
* @return int|\WP_Error Number of rows marked or WP_Error on failure.
949963
*/
950-
function blc_stage_dataset_refresh($table_name, $type, $scan_run_id, ?array $post_ids = null) {
964+
function blc_stage_dataset_refresh($table_name, $types, $scan_run_id, ?array $post_ids = null) {
951965
if (is_array($post_ids) && count($post_ids) === 0) {
952966
return 0;
953967
}
954968

969+
if (!is_array($types)) {
970+
$types = [$types];
971+
}
972+
973+
$types = array_values(array_filter(array_map('strval', $types), static function ($value) {
974+
return $value !== '';
975+
}));
976+
977+
if ($types === []) {
978+
return 0;
979+
}
980+
955981
global $wpdb;
956982

957-
$clauses = ['type = %s'];
958-
$args = [$scan_run_id, $type];
983+
$clauses = [];
984+
$args = [$scan_run_id];
985+
986+
if (count($types) === 1) {
987+
$clauses[] = 'type = %s';
988+
$args[] = $types[0];
989+
} else {
990+
$type_placeholders = implode(',', array_fill(0, count($types), '%s'));
991+
$clauses[] = "type IN ($type_placeholders)";
992+
$args = array_merge($args, $types);
993+
}
959994

960995
if (is_array($post_ids)) {
961996
$post_ids = array_values(array_unique(array_map('intval', $post_ids)));
@@ -998,15 +1033,36 @@ function blc_stage_dataset_refresh($table_name, $type, $scan_run_id, ?array $pos
9981033
*
9991034
* @return int|\WP_Error Number of rows deleted or WP_Error on failure.
10001035
*/
1001-
function blc_commit_dataset_refresh($table_name, $type, $scan_run_id, $dataset_type, ?array $post_ids = null) {
1036+
function blc_commit_dataset_refresh($table_name, $types, $scan_run_id, $dataset_type, ?array $post_ids = null) {
10021037
if (is_array($post_ids) && count($post_ids) === 0) {
10031038
return 0;
10041039
}
10051040

1041+
if (!is_array($types)) {
1042+
$types = [$types];
1043+
}
1044+
1045+
$types = array_values(array_filter(array_map('strval', $types), static function ($value) {
1046+
return $value !== '';
1047+
}));
1048+
1049+
if ($types === []) {
1050+
return 0;
1051+
}
1052+
10061053
global $wpdb;
10071054

1008-
$clauses = ['scan_run_id = %s', 'type = %s'];
1009-
$args = [$scan_run_id, $type];
1055+
$clauses = ['scan_run_id = %s'];
1056+
$args = [$scan_run_id];
1057+
1058+
if (count($types) === 1) {
1059+
$clauses[] = 'type = %s';
1060+
$args[] = $types[0];
1061+
} else {
1062+
$type_placeholders = implode(',', array_fill(0, count($types), '%s'));
1063+
$clauses[] = "type IN ($type_placeholders)";
1064+
$args = array_merge($args, $types);
1065+
}
10101066

10111067
if (is_array($post_ids)) {
10121068
$post_ids = array_values(array_unique(array_map('intval', $post_ids)));
@@ -1056,20 +1112,41 @@ function blc_commit_dataset_refresh($table_name, $type, $scan_run_id, $dataset_t
10561112
/**
10571113
* Clear staging markers so the previous dataset remains available.
10581114
*
1059-
* @param string $table_name Fully qualified table name.
1060-
* @param string $type Dataset type stored in the table.
1061-
* @param string $scan_run_id Marker assigned during staging.
1062-
* @param array<int>|null $post_ids Optional subset of posts to restore.
1115+
* @param string $table_name Fully qualified table name.
1116+
* @param string|string[] $types Dataset type(s) stored in the table.
1117+
* @param string $scan_run_id Marker assigned during staging.
1118+
* @param array<int>|null $post_ids Optional subset of posts to restore.
10631119
*/
1064-
function blc_restore_dataset_refresh($table_name, $type, $scan_run_id, ?array $post_ids = null) {
1120+
function blc_restore_dataset_refresh($table_name, $types, $scan_run_id, ?array $post_ids = null) {
10651121
if (is_array($post_ids) && count($post_ids) === 0) {
10661122
return;
10671123
}
10681124

1125+
if (!is_array($types)) {
1126+
$types = [$types];
1127+
}
1128+
1129+
$types = array_values(array_filter(array_map('strval', $types), static function ($value) {
1130+
return $value !== '';
1131+
}));
1132+
1133+
if ($types === []) {
1134+
return;
1135+
}
1136+
10691137
global $wpdb;
10701138

1071-
$clauses = ['scan_run_id = %s', 'type = %s'];
1072-
$args = [$scan_run_id, $type];
1139+
$clauses = ['scan_run_id = %s'];
1140+
$args = [$scan_run_id];
1141+
1142+
if (count($types) === 1) {
1143+
$clauses[] = 'type = %s';
1144+
$args[] = $types[0];
1145+
} else {
1146+
$type_placeholders = implode(',', array_fill(0, count($types), '%s'));
1147+
$clauses[] = "type IN ($type_placeholders)";
1148+
$args = array_merge($args, $types);
1149+
}
10731150

10741151
if (is_array($post_ids)) {
10751152
$post_ids = array_values(array_unique(array_map('intval', $post_ids)));
@@ -2307,6 +2384,8 @@ function blc_perform_image_check($batch = 0, $is_full_scan = true) { // Une anal
23072384
if ($debug_mode) { error_log("--- Début du scan IMAGES (Lot #$batch) ---"); }
23082385

23092386
$table_name = $wpdb->prefix . 'blc_broken_links';
2387+
$remote_image_scan_enabled = (bool) get_option('blc_remote_image_scan_enabled', false);
2388+
$image_dataset_row_types = blc_get_dataset_row_types('image');
23102389
$batch_delay_s = max(0, (int) get_option('blc_batch_delay', 60));
23112390
$default_lock_timeout = defined('MINUTE_IN_SECONDS') ? 15 * MINUTE_IN_SECONDS : 900;
23122391
$lock_timeout = apply_filters('blc_image_scan_lock_timeout', $default_lock_timeout);
@@ -2417,7 +2496,7 @@ function blc_perform_image_check($batch = 0, $is_full_scan = true) { // Une anal
24172496
$scan_run_token = '';
24182497
if (!empty($post_ids_in_batch)) {
24192498
$scan_run_token = blc_generate_scan_run_token();
2420-
$stage_result = blc_stage_dataset_refresh($table_name, 'image', $scan_run_token, $post_ids_in_batch);
2499+
$stage_result = blc_stage_dataset_refresh($table_name, $image_dataset_row_types, $scan_run_token, $post_ids_in_batch);
24212500
if (is_wp_error($stage_result)) {
24222501
if ($lock_token !== '') {
24232502
blc_release_image_scan_lock($lock_token);
@@ -2509,7 +2588,7 @@ function blc_perform_image_check($batch = 0, $is_full_scan = true) { // Une anal
25092588
if (!$dom instanceof DOMDocument) {
25102589
error_log(sprintf('BLC: DOM creation failed during image scan for post ID %d; restoring staged entries.', $post->ID));
25112590
if ($scan_run_token !== '') {
2512-
blc_restore_dataset_refresh($table_name, 'image', $scan_run_token, [$post->ID]);
2591+
blc_restore_dataset_refresh($table_name, $image_dataset_row_types, $scan_run_token, [$post->ID]);
25132592
}
25142593
continue;
25152594
}
@@ -2533,7 +2612,8 @@ function blc_perform_image_check($batch = 0, $is_full_scan = true) { // Une anal
25332612
$wpdb,
25342613
$post_title_for_storage,
25352614
$register_pending_image_insert,
2536-
$site_host_for_metadata
2615+
$site_host_for_metadata,
2616+
$remote_image_scan_enabled
25372617
) {
25382618
$candidate_url = trim((string) $candidate_url);
25392619
if ($candidate_url === '') {
@@ -2559,10 +2639,26 @@ function blc_perform_image_check($batch = 0, $is_full_scan = true) { // Une anal
25592639

25602640
$hosts_match_site = ($image_host !== '' && $normalized_site_host !== '' && $image_host === $normalized_site_host);
25612641
$hosts_match_upload = ($image_host !== '' && $upload_baseurl_host !== '' && $image_host === $upload_baseurl_host);
2642+
$is_remote_upload_candidate = false;
2643+
25622644
if (!$hosts_match_site && !$hosts_match_upload) {
2563-
return;
2564-
}
2565-
if (!$hosts_match_site && $hosts_match_upload) {
2645+
if (!$remote_image_scan_enabled) {
2646+
if ($debug_mode) {
2647+
error_log(" -> Image distante ignorée (analyse désactivée) : " . $normalized_image_url);
2648+
}
2649+
return;
2650+
}
2651+
2652+
$is_safe_remote_host = blc_is_safe_remote_host($image_host);
2653+
if (!$is_safe_remote_host) {
2654+
if ($debug_mode) {
2655+
error_log(" -> Image ignorée (IP non autorisée) : " . $normalized_image_url);
2656+
}
2657+
return;
2658+
}
2659+
2660+
$is_remote_upload_candidate = true;
2661+
} elseif (!$hosts_match_site && $hosts_match_upload) {
25662662
$is_safe_remote_host = blc_is_safe_remote_host($image_host);
25672663
if (!$is_safe_remote_host) {
25682664
if ($debug_mode) {
@@ -2582,8 +2678,11 @@ function blc_perform_image_check($batch = 0, $is_full_scan = true) { // Une anal
25822678
}
25832679

25842680
$normalized_upload_baseurl_length = strlen($normalized_upload_baseurl);
2681+
if ($normalized_upload_baseurl_length === 0) {
2682+
return;
2683+
}
25852684
if (
2586-
$normalized_upload_baseurl_length === 0 ||
2685+
!$is_remote_upload_candidate &&
25872686
strncasecmp($normalized_image_url, $normalized_upload_baseurl, $normalized_upload_baseurl_length) !== 0
25882687
) {
25892688
return;
@@ -2663,14 +2762,15 @@ function blc_perform_image_check($batch = 0, $is_full_scan = true) { // Une anal
26632762
$metadata = blc_get_url_metadata_for_storage($candidate_url, $normalized_image_url, $site_host_for_metadata);
26642763
$row_bytes = blc_calculate_row_storage_footprint_bytes($url_for_storage, $anchor_for_storage, $post_title_for_storage);
26652764
$checked_at_gmt = current_time('mysql', true);
2765+
$row_type = $is_remote_upload_candidate ? 'remote-image' : 'image';
26662766
$inserted = $wpdb->insert(
26672767
$table_name,
26682768
[
26692769
'url' => $url_for_storage,
26702770
'anchor' => $anchor_for_storage,
26712771
'post_id' => $post->ID,
26722772
'post_title' => $post_title_for_storage,
2673-
'type' => 'image',
2773+
'type' => $row_type,
26742774
'url_host' => $metadata['host'],
26752775
'is_internal' => $metadata['is_internal'],
26762776
'http_status' => null,
@@ -2727,7 +2827,7 @@ function blc_perform_image_check($batch = 0, $is_full_scan = true) { // Une anal
27272827
}
27282828

27292829
if ($scan_run_token !== '') {
2730-
$commit_result = blc_commit_dataset_refresh($table_name, 'image', $scan_run_token, 'image', [$post->ID]);
2830+
$commit_result = blc_commit_dataset_refresh($table_name, $image_dataset_row_types, $scan_run_token, 'image', [$post->ID]);
27312831
if (is_wp_error($commit_result)) {
27322832
$batch_wp_error = $commit_result;
27332833
break;
@@ -2760,7 +2860,7 @@ function blc_perform_image_check($batch = 0, $is_full_scan = true) { // Une anal
27602860
}
27612861

27622862
if ($scan_run_token !== '' && $should_cleanup_pending_images) {
2763-
blc_restore_dataset_refresh($table_name, 'image', $scan_run_token);
2863+
blc_restore_dataset_refresh($table_name, $image_dataset_row_types, $scan_run_token);
27642864
}
27652865

27662866
if ($batch_exception instanceof \Throwable) {

0 commit comments

Comments
 (0)