mjc
diff --git a/‎Cargo.toml‎
Lines changed: 5 additions & 1 deletion b/‎Cargo.toml‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 44 additions & 13 deletions b/‎README.md‎
Lines changed: 44 additions & 13 deletions
diff --git a/‎benches/create_benchmark.rs‎
Lines changed: 204 additions & 0 deletions b/‎benches/create_benchmark.rs‎
Lines changed: 204 additions & 0 deletions
@@ -65,6 +65,10 @@ harness = false
 name = "verify_performance"
 harness = false
 
+[[bench]]
+name = "create_benchmark"
+harness = false
+
 [[bench]]
 name = "md5_throughput"
 harness = false
@@ -82,4 +86,4 @@ debug = true
 strip = false
 lto = true            # Link-time optimization for better performance
 codegen-units = 1     # Single codegen unit for better optimization
-overflow-checks = true  # Keep overflow checks for safety with untrusted network data
+overflow-checks = true  # Keep overflow checks for safety with untrusted network data
@@ -8,7 +8,9 @@ A Rust implementation of PAR2 (Parity Archive) for data recovery and verificatio
 
 ### Performance
 
-par2rs achieves **1.1-2.9x speedup** over par2cmdline through:
+#### Verification/Repair
+
+par2rs achieves **1.1-2.9x speedup** over par2cmdline for verification and repair through:
 - **Optimized I/O patterns** using full slice-size chunks instead of 64KB blocks (eliminates redundant reads)
 - **Parallel Reed-Solomon reconstruction** using Rayon for multi-threaded chunk processing
 - **SIMD-accelerated operations** (PSHUFB on x86_64, NEON on ARM64, portable_simd cross-platform)
@@ -17,7 +19,7 @@ par2rs achieves **1.1-2.9x speedup** over par2cmdline through:
 
 **⚠️ Performance Regression Note:** These results show significantly lower speedups than previous benchmarks (which showed 2-200× improvements). This is considered a **regression** and is under investigation. The current implementation maintains correctness but has lost most of its performance advantages on Linux x86_64.
 
-**Latest benchmark results:**
+**Latest verification/repair benchmark results:**
 
 **Linux x86_64 (AMD Ryzen 9 5950X, 64GB RAM):**
 - 1MB: **1.23x speedup** (0.032s → 0.026s)
@@ -84,7 +86,7 @@ par2 v myfile.par2  # short form
 par2 repair myfile.par2
 par2 r myfile.par2  # short form
 
-# Create recovery files (coming soon)
+# Create recovery files
 par2 create myfile.par2 file1 file2
 par2 c myfile.par2 file1 file2  # short form
 ```
@@ -101,6 +103,17 @@ par2 r -p myfile.par2
 # Use specific number of threads
 par2 v -t 8 myfile.par2
 
+# Create with explicit recovery settings
+par2 c -s65536 -r10 myfile.par2 file1 file2
+
+# Store source names relative to a base path
+par2 c -B /data/archive myfile.par2 /data/archive/file1
+par2 v -B /data/archive myfile.par2
+
+# Scan renamed or relocated data while verifying/repairing
+par2 v myfile.par2 renamed-file
+par2 r myfile.par2 renamed-file
+
 # Disable parallel processing (single-threaded)
 par2 v --no-parallel myfile.par2
 ```
@@ -265,15 +278,29 @@ Verifies the integrity of files using PAR2 archives.
 **Features:**
 - Complete PAR2 set analysis
 - File integrity verification
+- Byte-by-byte block scanning for renamed or displaced data
 - Progress reporting
 - Detailed statistics
 
-### par2create (Planned)
+### par2create
 Creates PAR2 recovery files for data protection.
 
-### par2repair (Planned)
+**Features:**
+- par2cmdline-style create options for block size/count, redundancy, recovery volume layout, recursion, base paths, and quiet/verbose modes
+- PAR2 index and recovery volume generation
+- Reed-Solomon recovery block generation
+- Compatibility coverage against par2cmdline for generated sets
+
+### par2repair
 Repairs corrupted files using PAR2 recovery data.
 
+**Features:**
+- Recovery set loading from main and volume PAR2 files
+- Corrupt or missing file reconstruction
+- Base path support for relocated data files
+- Extra file scanning for renamed or relocated protected files
+- Optional purge of backup and PAR2 files after successful repair
+
 ### split_par2 (Utility)
 Development utility to split PAR2 files into individual packets for analysis.
 
@@ -327,18 +354,18 @@ This implementation follows the PAR2 specification and supports:
 
 ### File Scanning Strategy
 
-`par2rs` uses a **block-aligned sequential scanning** approach that differs from `par2cmdline`'s sliding window scanner:
+`par2rs` uses a **global block scanner** modeled after `par2cmdline`:
 
-- **par2cmdline**: Uses a byte-by-byte sliding window with rolling CRC32 that can find blocks at *any offset* in a file, even if displaced by inserted/deleted data. This is more thorough but slower.
+- **Fast path**: Aligned blocks are checked first for the common case where files are present at their expected paths and offsets.
 
-- **par2rs**: Only checks blocks at their expected aligned positions using sequential reads with large buffers (128MB). This is significantly faster for normal verification but cannot find displaced blocks.
+- **Compatibility path**: When needed, verification and repair scan byte-by-byte with rolling CRC32 to find protected data blocks at displaced offsets or inside extra files passed on the command line.
 
 **Practical Impact:**
-- ✅ **par2rs is faster** for standard verification/repair scenarios (files are either intact or corrupted at known positions)
-- ⚠️ **par2cmdline is more robust** for edge cases like files with prepended data or non-aligned block corruption
-- 🎯 For typical use cases (bit rot, transmission errors, filesystem corruption), both tools will perform equivalently
+- ✅ Intact files still take the fast aligned path.
+- ✅ Renamed or relocated files can be supplied as extra arguments to `verify` or `repair`.
+- ✅ Displaced blocks from inserted or deleted bytes are detected by the byte-scanning path.
 
-This design choice optimizes for the common case where files are either intact or have corruption at expected block boundaries, delivering substantial performance improvements while maintaining correctness for standard PAR2 operations.
+This keeps the common case efficient while matching `par2cmdline` behavior for the recovery cases where data is present but not at the protected filename or expected block offset.
 
 ## Known Issues
 
@@ -347,11 +374,15 @@ This design choice optimizes for the common case where files are either intact o
 ## Roadmap
 
 - [x] **Phase 1**: Complete packet parsing and verification
-- [ ] **Phase 2**: PAR2 file creation (`par2create`)
+- [x] **Phase 2**: PAR2 file creation (`par2create`)
 - [x] **Phase 3**: File repair functionality (`par2repair`)
 - [x] **Phase 4**: SIMD optimizations (PSHUFB, NEON, portable_simd)
 - [ ] **Phase 5**: Runtime SIMD dispatch
 - [ ] **Phase 6**: Advanced features (progress callbacks, custom block sizes)
+- [ ] **Performance**: Investigate the Linux x86_64 verification/repair regression and restore prior benchmark speedups
+- [ ] **Create Optimization**: Merge hashing and recovery generation into a single pass to avoid reading source files twice
+- [ ] **Repair Reliability**: Reproduce and fix the repair hang on small files within large multi-file PAR2 sets
+- [ ] **Benchmarks**: Re-test and refresh macOS Apple Silicon results
 
 ## Documentation
 
 
@@ -0,0 +1,204 @@
+use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
+use par2rs::create::{CreateContextBuilder, SilentCreateReporter};
+use std::fs;
+use std::hint::black_box;
+use std::path::PathBuf;
+use tempfile::tempdir;
+
+fn create_test_file(size: usize) -> (tempfile::TempDir, PathBuf) {
+    let temp_dir = tempdir().unwrap();
+    let file_path = temp_dir.path().join("test.dat");
+
+    // Create file with pattern to avoid compression optimizations
+    let pattern = (0..256)
+        .cycle()
+        .take(size)
+        .map(|i| i as u8)
+        .collect::<Vec<_>>();
+    fs::write(&file_path, pattern).unwrap();
+
+    (temp_dir, file_path)
+}
+
+fn bench_par2_creation(c: &mut Criterion) {
+    let mut group = c.benchmark_group("par2_creation");
+
+    // Test different file sizes
+    let sizes = vec![
+        ("1KB", 1024),
+        ("10KB", 10 * 1024),
+        ("100KB", 100 * 1024),
+        ("1MB", 1024 * 1024),
+        ("10MB", 10 * 1024 * 1024),
+    ];
+
+    for (size_name, size) in sizes {
+        group.bench_with_input(
+            BenchmarkId::new("create_file", size_name),
+            &size,
+            |b, &size| {
+                b.iter_batched(
+                    || create_test_file(size),
+                    |(temp_dir, test_file)| {
+                        let par2_file = temp_dir.path().join("test.par2");
+                        let reporter = Box::new(SilentCreateReporter);
+
+                        let mut context = CreateContextBuilder::new()
+                            .output_name(par2_file.to_str().unwrap())
+                            .source_files(vec![test_file])
+                            .redundancy_percentage(5)
+                            .reporter(reporter)
+                            .build()
+                            .unwrap();
+
+                        context.create().unwrap();
+                        black_box(());
+                    },
+                    criterion::BatchSize::SmallInput,
+                );
+            },
+        );
+    }
+
+    group.finish();
+}
+
+fn bench_par2_creation_redundancy(c: &mut Criterion) {
+    let mut group = c.benchmark_group("par2_creation_redundancy");
+
+    // Test different redundancy levels with 1MB file
+    let redundancy_levels = vec![5, 10, 20, 50];
+
+    for redundancy in redundancy_levels {
+        group.bench_with_input(
+            BenchmarkId::new("redundancy", format!("{}%", redundancy)),
+            &redundancy,
+            |b, &redundancy| {
+                b.iter_batched(
+                    || create_test_file(1024 * 1024), // 1MB
+                    |(temp_dir, test_file)| {
+                        let par2_file = temp_dir.path().join("test.par2");
+                        let reporter = Box::new(SilentCreateReporter);
+
+                        let mut context = CreateContextBuilder::new()
+                            .output_name(par2_file.to_str().unwrap())
+                            .source_files(vec![test_file])
+                            .redundancy_percentage(redundancy)
+                            .reporter(reporter)
+                            .build()
+                            .unwrap();
+
+                        context.create().unwrap();
+                        black_box(());
+                    },
+                    criterion::BatchSize::SmallInput,
+                );
+            },
+        );
+    }
+
+    group.finish();
+}
+
+fn bench_par2_creation_multifile(c: &mut Criterion) {
+    let mut group = c.benchmark_group("par2_creation_multifile");
+
+    // Test different numbers of files (each 100KB)
+    let file_counts = vec![1, 3, 5, 10];
+
+    for file_count in file_counts {
+        group.bench_with_input(
+            BenchmarkId::new("files", format!("{}_files", file_count)),
+            &file_count,
+            |b, &file_count| {
+                b.iter_batched(
+                    || {
+                        let temp_dir = tempdir().unwrap();
+                        let mut files = Vec::new();
+
+                        for i in 0..file_count {
+                            let file_path = temp_dir.path().join(format!("test{}.dat", i));
+                            let pattern = (0..256)
+                                .cycle()
+                                .take(100 * 1024)
+                                .map(|x| (x + i) as u8)
+                                .collect::<Vec<_>>();
+                            fs::write(&file_path, pattern).unwrap();
+                            files.push(file_path);
+                        }
+
+                        (temp_dir, files)
+                    },
+                    |(temp_dir, test_files)| {
+                        let par2_file = temp_dir.path().join("test.par2");
+                        let reporter = Box::new(SilentCreateReporter);
+
+                        let mut context = CreateContextBuilder::new()
+                            .output_name(par2_file.to_str().unwrap())
+                            .source_files(test_files)
+                            .redundancy_percentage(5)
+                            .reporter(reporter)
+                            .build()
+                            .unwrap();
+
+                        context.create().unwrap();
+                        black_box(());
+                    },
+                    criterion::BatchSize::SmallInput,
+                );
+            },
+        );
+    }
+
+    group.finish();
+}
+
+fn bench_block_size_calculation(c: &mut Criterion) {
+    let mut group = c.benchmark_group("block_size_calculation");
+
+    // Test block size calculation performance
+    let sizes = vec![
+        ("1MB", 1024 * 1024),
+        ("10MB", 10 * 1024 * 1024),
+        ("100MB", 100 * 1024 * 1024),
+    ];
+
+    for (size_name, size) in sizes {
+        group.bench_with_input(
+            BenchmarkId::new("calculate", size_name),
+            &size,
+            |b, &size| {
+                b.iter_batched(
+                    || create_test_file(size),
+                    |(temp_dir, test_file)| {
+                        let par2_file = temp_dir.path().join("test.par2");
+                        let reporter = Box::new(SilentCreateReporter);
+
+                        // This will trigger block size calculation
+                        let context = CreateContextBuilder::new()
+                            .output_name(par2_file.to_str().unwrap())
+                            .source_files(vec![test_file])
+                            .redundancy_percentage(10)
+                            .reporter(reporter)
+                            .build()
+                            .unwrap();
+
+                        black_box(context.block_size());
+                    },
+                    criterion::BatchSize::SmallInput,
+                );
+            },
+        );
+    }
+
+    group.finish();
+}
+
+criterion_group!(
+    benches,
+    bench_par2_creation,
+    bench_par2_creation_redundancy,
+    bench_par2_creation_multifile,
+    bench_block_size_calculation
+);
+criterion_main!(benches);