|
| 1 | +use clap::Parser; |
| 2 | +use rand::seq::SliceRandom; |
| 3 | +use serde::Serialize; |
| 4 | +use std::error::Error; |
| 5 | + |
| 6 | +// AWS SDK for Rust (1.x) |
| 7 | +use aws_config::{load_defaults, BehaviorVersion}; |
| 8 | +use aws_sdk_s3::error::SdkError; |
| 9 | +use aws_sdk_s3::types::Object; |
| 10 | +use aws_sdk_s3::Client; |
| 11 | + |
| 12 | +/// Command-line arguments (all required, no defaults) |
| 13 | +#[derive(Parser, Debug)] |
| 14 | +#[command(author, version, about)] |
| 15 | +struct Args { |
| 16 | + /// Number of pairs to generate |
| 17 | + #[arg(long, required = true)] |
| 18 | + num_pairs: usize, |
| 19 | + |
| 20 | + /// Name of the S3 bucket |
| 21 | + #[arg(long, required = true)] |
| 22 | + bucket: String, |
| 23 | + |
| 24 | + /// Directory (prefix) in the bucket (e.g. "image/") |
| 25 | + #[arg(long, required = true)] |
| 26 | + directory: String, |
| 27 | + |
| 28 | + /// URL prefix to form the final URL (e.g. "https://api.example.com/s3/api/v1/resource?url=s3://") |
| 29 | + #[arg(long, required = true)] |
| 30 | + url_prefix: String, |
| 31 | +} |
| 32 | + |
| 33 | +#[derive(Serialize)] |
| 34 | +struct PairsOutput { |
| 35 | + pairs: Vec<Pair>, |
| 36 | +} |
| 37 | + |
| 38 | +#[derive(Serialize)] |
| 39 | +struct Pair { |
| 40 | + source: String, |
| 41 | + candidate: String, |
| 42 | +} |
| 43 | + |
| 44 | +#[tokio::main] |
| 45 | +async fn main() -> Result<(), Box<dyn Error>> { |
| 46 | + let args = Args::parse(); |
| 47 | + |
| 48 | + let num_pairs = args.num_pairs; |
| 49 | + let bucket_name = &args.bucket; |
| 50 | + let directory_prefix = &args.directory; |
| 51 | + let url_prefix = &args.url_prefix; |
| 52 | + |
| 53 | + let shared_config = load_defaults(BehaviorVersion::latest()).await; |
| 54 | + let s3_client = Client::new(&shared_config); |
| 55 | + |
| 56 | + let resp = s3_client |
| 57 | + .list_objects_v2() |
| 58 | + .bucket(bucket_name) |
| 59 | + .prefix(directory_prefix) |
| 60 | + .send() |
| 61 | + .await; |
| 62 | + |
| 63 | + let output = match resp { |
| 64 | + Ok(o) => o, |
| 65 | + Err(SdkError::ServiceError(e)) => { |
| 66 | + eprintln!("Service error: {:#?}", e); |
| 67 | + return Ok(()); |
| 68 | + } |
| 69 | + Err(e) => { |
| 70 | + eprintln!("Other error listing objects: {:?}", e); |
| 71 | + return Ok(()); |
| 72 | + } |
| 73 | + }; |
| 74 | + |
| 75 | + // Extract all object keys |
| 76 | + let objects: &[Object] = output.contents(); |
| 77 | + let all_keys: Vec<String> = objects |
| 78 | + .iter() |
| 79 | + .filter_map(|obj| obj.key().map(str::to_string)) |
| 80 | + .collect(); |
| 81 | + |
| 82 | + if all_keys.len() < 2 { |
| 83 | + eprintln!( |
| 84 | + "Not enough objects to generate pairs. Found only {} object(s).", |
| 85 | + all_keys.len() |
| 86 | + ); |
| 87 | + return Ok(()); |
| 88 | + } |
| 89 | + |
| 90 | + // Generate all unique pairs (source, candidate) where source != candidate |
| 91 | + let mut all_pairs = Vec::new(); |
| 92 | + for (i, source) in all_keys.iter().enumerate() { |
| 93 | + // check if source is empty |
| 94 | + if source.is_empty() { |
| 95 | + continue; |
| 96 | + } |
| 97 | + for (j, candidate) in all_keys.iter().enumerate() { |
| 98 | + // check if candidate is is_empty |
| 99 | + if candidate.is_empty() { |
| 100 | + continue; |
| 101 | + } |
| 102 | + if i != j { |
| 103 | + all_pairs.push(Pair { |
| 104 | + source: format!("{}{}/{}", url_prefix, bucket_name, source), |
| 105 | + candidate: format!("{}{}/{}", url_prefix, bucket_name, candidate), |
| 106 | + }); |
| 107 | + } |
| 108 | + } |
| 109 | + } |
| 110 | + |
| 111 | + let max_pairs_possible = all_pairs.len(); |
| 112 | + if num_pairs > max_pairs_possible { |
| 113 | + eprintln!( |
| 114 | + "Requested {} pairs, but only {} unique pairs can be generated with {} objects.", |
| 115 | + num_pairs, |
| 116 | + max_pairs_possible, |
| 117 | + all_keys.len() |
| 118 | + ); |
| 119 | + } |
| 120 | + |
| 121 | + // Shuffle and take the requested number of pairs |
| 122 | + let mut rng = rand::thread_rng(); |
| 123 | + all_pairs.shuffle(&mut rng); |
| 124 | + |
| 125 | + let selected_pairs: Vec<Pair> = all_pairs.into_iter().take(num_pairs).collect(); |
| 126 | + |
| 127 | + if selected_pairs.len() < num_pairs { |
| 128 | + eprintln!( |
| 129 | + "Requested {} pairs, but only {} unique pairs could be generated with {} objects.", |
| 130 | + num_pairs, |
| 131 | + selected_pairs.len(), |
| 132 | + all_keys.len() |
| 133 | + ); |
| 134 | + } |
| 135 | + |
| 136 | + // Print JSON output |
| 137 | + let output_json = PairsOutput { |
| 138 | + pairs: selected_pairs, |
| 139 | + }; |
| 140 | + println!("{}", serde_json::to_string_pretty(&output_json)?); |
| 141 | + |
| 142 | + Ok(()) |
| 143 | +} |
0 commit comments