Skip to content

Commit 2e40550

Browse files
CopilotswissspidyCopilot
authored
Add --skip-duplicates flag to wp media import (#241)
Co-authored-by: swissspidy <841956+swissspidy@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: Pascal Birchler <pascal.birchler@gmail.com> Co-authored-by: Pascal Birchler <pascalb@google.com>
1 parent 9f90fd9 commit 2e40550

2 files changed

Lines changed: 217 additions & 14 deletions

File tree

features/media-import.feature

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,3 +364,120 @@ Feature: Manage WordPress attachments
364364
"""
365365
/foo/large-image.jpg
366366
"""
367+
368+
Scenario: Skip importing a local file that was already imported
369+
Given download:
370+
| path | url |
371+
| {CACHE_DIR}/large-image.jpg | http://wp-cli.github.io/behat-data/large-image.jpg |
372+
373+
When I run `wp media import {CACHE_DIR}/large-image.jpg --porcelain`
374+
Then save STDOUT as {ATTACHMENT_ID}
375+
And STDOUT should not be empty
376+
377+
When I run `wp media import {CACHE_DIR}/large-image.jpg --skip-duplicates`
378+
Then STDOUT should contain:
379+
"""
380+
Skipped importing file
381+
"""
382+
And STDOUT should contain:
383+
"""
384+
already exists as attachment ID {ATTACHMENT_ID}
385+
"""
386+
And STDOUT should contain:
387+
"""
388+
Success: Imported 0 of 1 items (1 skipped).
389+
"""
390+
And the return code should be 0
391+
392+
Scenario: Skip importing a remote file that was already imported
393+
When I run `wp media import 'http://wp-cli.github.io/behat-data/codeispoetry.png' --porcelain`
394+
Then save STDOUT as {ATTACHMENT_ID}
395+
And STDOUT should not be empty
396+
397+
When I run `wp media import 'http://wp-cli.github.io/behat-data/codeispoetry.png' --skip-duplicates`
398+
Then STDOUT should contain:
399+
"""
400+
Skipped importing file
401+
"""
402+
And STDOUT should contain:
403+
"""
404+
already exists as attachment ID {ATTACHMENT_ID}
405+
"""
406+
And STDOUT should contain:
407+
"""
408+
Success: Imported 0 of 1 items (1 skipped).
409+
"""
410+
And the return code should be 0
411+
412+
Scenario: Import new file while skipping duplicates from a batch
413+
Given download:
414+
| path | url |
415+
| {CACHE_DIR}/large-image.jpg | http://wp-cli.github.io/behat-data/large-image.jpg |
416+
417+
When I run `wp media import {CACHE_DIR}/large-image.jpg`
418+
Then STDOUT should contain:
419+
"""
420+
Success: Imported 1 of 1 items.
421+
"""
422+
423+
When I run `wp media import {CACHE_DIR}/large-image.jpg 'http://wp-cli.github.io/behat-data/codeispoetry.png' --skip-duplicates`
424+
Then STDOUT should contain:
425+
"""
426+
Skipped importing file
427+
"""
428+
And STDOUT should contain:
429+
"""
430+
Success: Imported 1 of 2 items (1 skipped).
431+
"""
432+
And the return code should be 0
433+
434+
@require-wp-5.3
435+
Scenario: Skip importing a file that was stored with a scaled suffix by WP 5.3+
436+
Given download:
437+
| path | url |
438+
| {CACHE_DIR}/large-image.jpg | http://wp-cli.github.io/behat-data/large-image.jpg |
439+
And I run `wp option update uploads_use_yearmonth_folders 0`
440+
441+
When I run `wp media import {CACHE_DIR}/large-image.jpg --porcelain`
442+
Then save STDOUT as {ATTACHMENT_ID}
443+
And STDOUT should not be empty
444+
And the wp-content/uploads/large-image-scaled.jpg file should exist
445+
446+
When I run `wp media import {CACHE_DIR}/large-image.jpg --skip-duplicates`
447+
Then STDOUT should contain:
448+
"""
449+
Skipped importing file
450+
"""
451+
And STDOUT should contain:
452+
"""
453+
already exists as attachment ID {ATTACHMENT_ID}
454+
"""
455+
And STDOUT should contain:
456+
"""
457+
Success: Imported 0 of 1 items (1 skipped).
458+
"""
459+
And the return code should be 0
460+
461+
Scenario: Skip importing a file when a custom --file_name was used in the original import
462+
Given download:
463+
| path | url |
464+
| {CACHE_DIR}/large-image.jpg | http://wp-cli.github.io/behat-data/large-image.jpg |
465+
466+
When I run `wp media import {CACHE_DIR}/large-image.jpg --file_name=custom --porcelain`
467+
Then save STDOUT as {ATTACHMENT_ID}
468+
And STDOUT should not be empty
469+
470+
When I run `wp media import {CACHE_DIR}/large-image.jpg --file_name=custom --skip-duplicates`
471+
Then STDOUT should contain:
472+
"""
473+
Skipped importing file
474+
"""
475+
And STDOUT should contain:
476+
"""
477+
already exists as attachment ID {ATTACHMENT_ID}
478+
"""
479+
And STDOUT should contain:
480+
"""
481+
Success: Imported 0 of 1 items (1 skipped).
482+
"""
483+
And the return code should be 0

src/Media_Command.php

Lines changed: 100 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,9 @@ public function prune( $args, $assoc_args = array() ) {
422422
* [--featured_image]
423423
* : If set, set the imported image as the Featured Image of the post it is attached to.
424424
*
425+
* [--skip-duplicates]
426+
* : If set, media files that have already been imported will be skipped.
427+
*
425428
* [--porcelain[=<field>]]
426429
* : Output a single field for each imported image. Defaults to attachment ID when used as flag.
427430
* ---
@@ -465,7 +468,7 @@ public function prune( $args, $assoc_args = array() ) {
465468
* Success: Imported 1 of 1 items.
466469
*
467470
* @param string[] $args Positional arguments.
468-
* @param array{post_id?: string, post_name?: string, file_name?: string, title?: string, caption?: string, alt?: string, desc?: string, 'skip-copy'?: bool, 'destination-dir'?: string, 'preserve-filetime'?: bool, featured_image?: bool, porcelain?: bool|string} $assoc_args Associative arguments.
471+
* @param array{post_id?: string, post_name?: string, file_name?: string, title?: string, caption?: string, alt?: string, desc?: string, 'skip-copy'?: bool, 'destination-dir'?: string, 'preserve-filetime'?: bool, featured_image?: bool, 'skip-duplicates'?: bool, porcelain?: bool|string} $assoc_args Associative arguments.
469472
* @return void
470473
*/
471474
public function import( $args, $assoc_args = array() ) {
@@ -518,6 +521,7 @@ public function import( $args, $assoc_args = array() ) {
518521
$number = 0;
519522
$successes = 0;
520523
$errors = 0;
524+
$skips = 0;
521525
foreach ( $args as $file ) {
522526
++$number;
523527
if ( 0 === $number % self::WP_CLEAR_OBJECT_CACHE_INTERVAL ) {
@@ -603,17 +607,53 @@ public function import( $args, $assoc_args = array() ) {
603607
++$errors;
604608
continue;
605609
}
610+
$src_basename = Path::basename( $file );
611+
if ( Utils\get_flag_value( $assoc_args, 'skip-duplicates' ) ) {
612+
$check_basename = $src_basename;
613+
if ( ! empty( $assoc_args['file_name'] ) ) {
614+
$resolved_name = $this->get_image_name( $src_basename, $assoc_args['file_name'] );
615+
if ( ! empty( $resolved_name ) ) {
616+
$check_basename = $resolved_name;
617+
}
618+
}
619+
$existing = $this->find_duplicate_attachment( $check_basename );
620+
if ( false !== $existing ) {
621+
if ( ! $porcelain ) {
622+
WP_CLI::log( "Skipped importing file '$orig_filename'. Reason: already exists as attachment ID $existing." );
623+
}
624+
++$skips;
625+
continue;
626+
}
627+
}
606628
if ( Utils\get_flag_value( $assoc_args, 'skip-copy' ) ) {
607629
$tempfile = $file;
608630
} else {
609631
$tempfile = $this->make_copy( $file );
610632
}
611-
$name = Path::basename( $file );
633+
$name = $src_basename;
612634

613635
if ( Utils\get_flag_value( $assoc_args, 'preserve-filetime' ) ) {
614636
$file_time = @filemtime( $file );
615637
}
616638
} else {
639+
$src_basename = (string) explode( '?', Path::basename( $file ), 2 )[0];
640+
if ( Utils\get_flag_value( $assoc_args, 'skip-duplicates' ) ) {
641+
$check_basename = $src_basename;
642+
if ( ! empty( $assoc_args['file_name'] ) ) {
643+
$resolved_name = $this->get_image_name( $src_basename, $assoc_args['file_name'] );
644+
if ( ! empty( $resolved_name ) ) {
645+
$check_basename = $resolved_name;
646+
}
647+
}
648+
$existing = $this->find_duplicate_attachment( $check_basename );
649+
if ( false !== $existing ) {
650+
if ( ! $porcelain ) {
651+
WP_CLI::log( "Skipped importing file '$orig_filename'. Reason: already exists as attachment ID $existing." );
652+
}
653+
++$skips;
654+
continue;
655+
}
656+
}
617657
$tempfile = download_url( $file );
618658
if ( is_wp_error( $tempfile ) ) {
619659
WP_CLI::warning(
@@ -626,7 +666,7 @@ public function import( $args, $assoc_args = array() ) {
626666
++$errors;
627667
continue;
628668
}
629-
$name = (string) strtok( Path::basename( $file ), '?' );
669+
$name = $src_basename;
630670
}
631671
}
632672

@@ -769,7 +809,7 @@ public function import( $args, $assoc_args = array() ) {
769809

770810
// Report the result of the operation
771811
if ( ! Utils\get_flag_value( $assoc_args, 'porcelain' ) ) {
772-
Utils\report_batch_operation_results( $noun, 'import', count( $args ), $successes, $errors );
812+
Utils\report_batch_operation_results( $noun, 'import', count( $args ), $successes, $errors, Utils\get_flag_value( $assoc_args, 'skip-duplicates' ) ? $skips : null );
773813
} elseif ( $errors ) {
774814
WP_CLI::halt( 1 );
775815
}
@@ -1081,6 +1121,61 @@ private function make_copy( $path ) {
10811121
return $filename;
10821122
}
10831123

1124+
/**
1125+
* Finds an existing attachment whose basename matches the given filename.
1126+
*
1127+
* Searches the `_wp_attached_file` post meta, which stores the path relative to
1128+
* the uploads directory (e.g. '2026/03/image.jpg' or just 'image.jpg'). Also
1129+
* checks for the WP 5.3+ big-image scaled variant (e.g. 'image-scaled.jpg') so
1130+
* that re-importing a large file that was scaled on first import is correctly
1131+
* detected as a duplicate. Matches the first attachment found when multiple files
1132+
* share the same basename across different upload subdirectories.
1133+
*
1134+
* @param string $basename Filename basename to search for (e.g. 'image.jpg').
1135+
* @return int|false Attachment ID if found, false otherwise.
1136+
*/
1137+
private function find_duplicate_attachment( $basename ) {
1138+
// WP 5.3+ big-image scaling renames 'image.jpg' → 'image-scaled.jpg' and
1139+
// stores the scaled name in _wp_attached_file, so search for both variants.
1140+
$ext = pathinfo( $basename, PATHINFO_EXTENSION );
1141+
$name = pathinfo( $basename, PATHINFO_FILENAME );
1142+
$scaled_basename = $name . '-scaled' . ( $ext ? '.' . $ext : '' );
1143+
1144+
// Build OR meta query clauses matching exact basename or year/month-prefixed paths.
1145+
$meta_clauses = array( 'relation' => 'OR' );
1146+
foreach ( array( $basename, $scaled_basename ) as $variant ) {
1147+
$meta_clauses[] = array(
1148+
'key' => '_wp_attached_file',
1149+
'value' => $variant,
1150+
'compare' => '=',
1151+
);
1152+
$meta_clauses[] = array(
1153+
'key' => '_wp_attached_file',
1154+
'value' => '/' . $variant,
1155+
'compare' => 'LIKE',
1156+
);
1157+
}
1158+
1159+
$posts = get_posts(
1160+
array(
1161+
'post_type' => 'attachment',
1162+
'post_status' => 'any',
1163+
'posts_per_page' => 1,
1164+
'meta_query' => $meta_clauses, // phpcs:ignore WordPress.DB.SlowDBQuery.slow_db_query_meta_query
1165+
'fields' => 'ids',
1166+
'no_found_rows' => true,
1167+
'update_post_meta_cache' => false,
1168+
'update_post_term_cache' => false,
1169+
)
1170+
);
1171+
1172+
if ( empty( $posts ) ) {
1173+
return false;
1174+
}
1175+
1176+
return $posts[0];
1177+
}
1178+
10841179
/**
10851180
* Returns a human-readable description for one or more image size names.
10861181
*
@@ -1620,16 +1715,7 @@ private function get_intermediate_sizes( $is_pdf, $metadata, $att_id ) {
16201715

16211716
// Adapted from wp_generate_attachment_metadata() in "wp-admin/includes/image.php".
16221717

1623-
if ( function_exists( 'wp_get_additional_image_sizes' ) ) {
1624-
$_wp_additional_image_sizes = wp_get_additional_image_sizes();
1625-
} else {
1626-
// For WP < 4.7.0.
1627-
global $_wp_additional_image_sizes;
1628-
if ( ! $_wp_additional_image_sizes ) {
1629-
// phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited -- Used as a fallback for WordPress version less than 4.7.0 as function wp_get_additional_image_sizes didn't exist then.
1630-
$_wp_additional_image_sizes = array();
1631-
}
1632-
}
1718+
$_wp_additional_image_sizes = wp_get_additional_image_sizes();
16331719

16341720
$sizes = array();
16351721
foreach ( $intermediate_image_sizes as $s ) {

0 commit comments

Comments
 (0)