diff --git a/docs/examples/workbook.md b/docs/examples/workbook.md deleted file mode 100644 index 3970a97..0000000 --- a/docs/examples/workbook.md +++ /dev/null @@ -1,256 +0,0 @@ -# Complete Workflow Example - -This page provides a complete end-to-end example of using the Anscombe Transform codec. - -For an interactive version, see the [Jupyter notebook](../../examples/workbook.ipynb) in the repository. - -## Overview - -This example demonstrates: - -1. Loading sample photon-limited data -2. Estimating codec parameters -3. Compressing data with Zarr V3 -4. Validating reconstruction quality -5. Measuring compression ratios - -## Setup - -```python -import numpy as np -import zarr -from anscombe_transform import AnscombeTransformV3, compute_conversion_gain -import matplotlib.pyplot as plt -``` - -## Generate Sample Data - -First, let's create synthetic data that mimics photon-limited imaging: - -```python -# Parameters for synthetic data -n_frames = 30 -height, width = 120, 120 -mean_photon_rate = 5.0 # Average photons per pixel (exponential distribution of rates) -zero_level = -10.0 # camera baseline -conversion_gain = 2.5 # levels per photon - -# Generate Poisson-distributed photon counts -photon_rate = np.random.exponential(scale=5, size=(1, height, width)) -photon_counts = np.random.poisson(lam=np.tile(photon_rate, (n_frames, 1, 1))) -measured_signal = photon_counts + np.random.randn(*size) * 0.2 - -# Convert to camera signal -camera_signal = (measured_signal * conversion_gain + zero_level).astype('int16') - -print(f"Data shape: {camera_signal.shape}") -print(f"Data range: [{camera_signal.min()}, {camera_signal.max()}]") -print(f"Data dtype: {camera_signal.dtype}") -``` - -## Estimate Parameters - -Now estimate the codec parameters from the data: - -```python -# Estimate parameters from the movie -result = compute_conversion_gain(camera_signal) - -estimated_gain = result['sensitivity'] -estimated_zero = result['zero_level'] - -print(f"\nTrue parameters:") -print(f" Conversion gain: {conversion_gain:.3f} units/photon") -print(f" Zero level: {zero_level:.1f} ") - -print(f"\nEstimated parameters:") -print(f" Conversion gain: {estimated_gain:.3f} units/photon") -print(f" Zero level: {estimated_zero:.1f}") - -print(f"\nEstimation error:") -print(f" Gain error: {abs(estimated_gain - conversion_gain):.3f} units/photon") -print(f" Zero level error: {abs(estimated_zero - zero_level):.1f} units") -``` - -## Visualize Noise Model - -Plot the noise model fit: - -```python -# Compute mean and variance -mean_signal = np.mean(camera_signal, axis=0) -variance = np.var(camera_signal, axis=0) - -# Create scatter plot -plt.figure(figsize=(10, 6)) -plt.scatter(mean_signal.ravel()[::100], variance.ravel()[::100], - alpha=0.5, s=1, label='Data') - -# Plot fitted line -mean_range = np.array([mean_signal.min(), mean_signal.max()]) -variance_fit = estimated_gain * (mean_range - estimated_zero) -plt.plot(mean_range, variance_fit, 'r-', linewidth=2, - label=f'Fit: var = {estimated_gain:.2f} * (mean - {estimated_zero:.1f})') - -plt.xlabel('Mean Signal (ADU)') -plt.ylabel('Variance (ADU²)') -plt.title('Noise Transfer Function') -plt.legend() -plt.grid(True, alpha=0.3) -plt.show() -``` - -## Compress with Anscombe Codec - -Create a Zarr array with the codec: - -```python -# Create codec with estimated parameters -codec = AnscombeTransformV3( - zero_level=estimated_zero, - conversion_gain=estimated_gain, - encoded_dtype='uint8' -) - -# Create Zarr V3 array -store = zarr.storage.MemoryStore() -compressed_array = zarr.create( - store=store, - shape=camera_signal.shape, - chunks=(10, 512, 512), - dtype='int16', - filters=[codec], - compressor={'id': 'blosc', 'cname': 'zstd', 'clevel': 5}, - zarr_format=3 -) - -# Write data -compressed_array[:] = camera_signal - -print(f"Compression complete!") -``` - -## Validate Reconstruction - -Check the quality of reconstruction: - -```python -# Read back compressed data -reconstructed = compressed_array[:] - -# Compute reconstruction error -error = camera_signal - reconstructed -abs_error = np.abs(error) - -print(f"\nReconstruction Quality:") -print(f" Max absolute error: {abs_error.max():.2f} ADU") -print(f" Mean absolute error: {abs_error.mean():.2f} ADU") -print(f" RMS error: {np.sqrt(np.mean(error**2)):.2f} ADU") -print(f" Expected noise (1 photon): {estimated_gain:.2f} ADU") -print(f" Error as fraction of noise: {abs_error.mean() / estimated_gain:.2f}") - -# Visualize error distribution -plt.figure(figsize=(12, 4)) - -plt.subplot(131) -plt.imshow(camera_signal[0], cmap='gray', vmin=0, vmax=500) -plt.title('Original Frame') -plt.colorbar(label='ADU') - -plt.subplot(132) -plt.imshow(reconstructed[0], cmap='gray', vmin=0, vmax=500) -plt.title('Reconstructed Frame') -plt.colorbar(label='ADU') - -plt.subplot(133) -plt.imshow(error[0], cmap='RdBu_r', vmin=-10, vmax=10) -plt.title('Error (Original - Reconstructed)') -plt.colorbar(label='ADU') - -plt.tight_layout() -plt.show() -``` - -## Measure Compression Ratio - -Calculate the compression achieved: - -```python -# Original size -original_size = camera_signal.nbytes - -# Compressed size (estimate from store) -compressed_size = sum(len(v) for v in store.values()) - -compression_ratio = original_size / compressed_size - -print(f"\nCompression Statistics:") -print(f" Original size: {original_size / 1024**2:.2f} MB") -print(f" Compressed size: {compressed_size / 1024**2:.2f} MB") -print(f" Compression ratio: {compression_ratio:.2f}x") -print(f" Space saved: {(1 - 1/compression_ratio) * 100:.1f}%") -``` - -## Compare Different Compressors - -Test various compressor configurations: - -```python -compressors = [ - {'name': 'Blosc+Zstd', 'config': {'id': 'blosc', 'cname': 'zstd', 'clevel': 5}}, - {'name': 'Blosc+LZ4', 'config': {'id': 'blosc', 'cname': 'lz4', 'clevel': 3}}, - {'name': 'Blosc+Zlib', 'config': {'id': 'blosc', 'cname': 'zlib', 'clevel': 9}}, -] - -results = [] - -for comp in compressors: - store = zarr.storage.MemoryStore() - arr = zarr.create( - store=store, - shape=camera_signal.shape, - chunks=(10, 512, 512), - dtype='int16', - filters=[codec], - compressor=comp['config'], - zarr_format=3 - ) - arr[:] = camera_signal - - compressed_size = sum(len(v) for v in store.values()) - ratio = original_size / compressed_size - - results.append({ - 'name': comp['name'], - 'size_mb': compressed_size / 1024**2, - 'ratio': ratio - }) - - print(f"{comp['name']:15s}: {ratio:.2f}x compression, {compressed_size / 1024**2:.2f} MB") - -# Plot comparison -plt.figure(figsize=(10, 5)) -names = [r['name'] for r in results] -ratios = [r['ratio'] for r in results] -plt.bar(names, ratios) -plt.ylabel('Compression Ratio') -plt.title('Compression Performance by Algorithm') -plt.grid(axis='y', alpha=0.3) -plt.show() -``` - -## Summary - -This example demonstrated: - -- ✅ Parameter estimation with ~1% accuracy -- ✅ Reconstruction error below 1 photon equivalent -- ✅ 5-8x compression ratios -- ✅ Successful integration with Zarr V3 - -## Next Steps - -- Try with your own data -- Experiment with different `beta` values -- Compare with other compression algorithms -- Use with remote storage backends diff --git a/docs/getting-started/quick-start.md b/docs/getting-started/quick-start.md index 8681a64..3fc78fb 100644 --- a/docs/getting-started/quick-start.md +++ b/docs/getting-started/quick-start.md @@ -8,26 +8,25 @@ This guide will help you get started with the Anscombe Transform codec for compr import zarr import numpy as np from anscombe_transform import AnscombeTransformV3 +from anscombe_transform.common import make_demo_data -# Synthesize data with Poisson noise - simulating photon-limited microscopy data -n_frames, height, width = 50, 256, 256 -mean_photon_rate = 5.0 # Average photons per pixel (exponential distribution of rates) -zero_level = 20.0 # camera baseline -conversion_gain = 30.0 # levels per photon -photon_rate = np.random.exponential(scale=mean_photon_rate, size=(1, height, width)) -photon_counts = np.random.poisson(np.tile(photon_rate, (n_frames, 1, 1))) -measured_signal = photon_counts + np.random.randn(*size) * 0.2 +# Set seed for reproducibility +np.random.seed(0) -data = (zero_level + conversion_gain * measured_signal).astype('int16') +# Generate test data +data = make_demo_data(zero_level=20.0, conversion_gain=30.0) -# Create a Zarr array with the Anscombe codec +# Create a Zarr array with the Anscombe codec and blosc compression store = zarr.storage.MemoryStore() -arr = zarr.create( +arr = zarr.create_array( store=store, shape=data.shape, chunks=(12, 160, 80), dtype='int16', - filters=[AnscombeTransformV3(zero_level=zero_level, conversion_gain=conversion_gain)], + filters=[AnscombeTransformV3(zero_level=20.0, conversion_gain=30.0)], + compressors=[ + {'name': 'blosc', 'configuration': {'cname': 'zstd', 'clevel': 5}} + ], zarr_format=3 ) @@ -37,29 +36,10 @@ arr[:] = data # Read data back recovered = arr[:] -# Verify roundtrip accuracy +# The transformation is lossy, so we do not expect data to +# round-trip perfectly print(f"Max difference: {np.abs(data - recovered).max()}") -``` - -## Using with Zarr V2 - -```python -from anscombe_transform import AnscombeTransformV2 -import zarr - -# Create array with V2 codec -arr = zarr.open_array( - 'data.zarr', - mode='w', - shape=data.shape, - chunks=(12, 160, 80), - dtype='int16', - compressor=AnscombeTransformV2(zero_level=zero_level, conversion_gain=conversion_gain) -) - -# Write and read data -arr[:] = data -recovered = arr[:] +#> Max difference: 59 ``` ## Estimating Parameters from Data @@ -67,55 +47,40 @@ recovered = arr[:] If you don't know the `zero_level` and `conversion_gain` parameters, you can estimate them from your data: ```python -from anscombe_transform import compute_conversion_gain import numpy as np +from anscombe_transform.estimate import compute_conversion_gain +from anscombe_transform import AnscombeTransformV3 +from anscombe_transform.common import make_demo_data -# Load your movie data as (time, height, width) -movie = data +# Set seed for reproducibility +np.random.seed(0) + +# Generate test data +movie = make_demo_data(n_frames=100) # Estimate parameters -result = compute_conversion_gain(data) +result = compute_conversion_gain(movie) -print(f"Estimated conversion gain: {result['sensitivity']:.3f}") +print(f"Estimated conversion gain: {result['conversion_gain']:.3f}") +#> Estimated conversion gain: 29.866 print(f"Estimated zero level: {result['zero_level']:.3f}") +#> Estimated zero level: 17.814 # Use estimated parameters in codec codec = AnscombeTransformV3( - zero_level=result['zero_level'], - conversion_gain=result['sensitivity'] -) -``` - -## Combining with Other Compressors - -The Anscombe codec is typically used as a filter before compression: - -```python -import zarr -from numcodecs import Blosc -from anscombe_transform import AnscombeTransformV3 - -# For Zarr V3, use filters + codecs -arr = zarr.create( - shape=data.shape, - chunks=(10, 512, 512), - dtype='int16', - filters=[AnscombeTransformV3(zero_level=zero_level, conversion_gain=conversion_gain)], - compressor={'id': 'blosc', 'cname': 'zstd', 'clevel': 5}, - zarr_format=3 + zero_level=int(result['zero_level']), + conversion_gain=result['conversion_gain'] ) ``` ## Key Parameters -- **`zero_level`**: The signal value when no photons are detected. This is the baseline offset in your camera sensor. -- **`conversion_gain`** (also called `photon_sensitivity`): How many signal units correspond to one photon. For example, if your camera reports 2.5 levels increase in signal per photon, use `conversion_gain=2.5`. -- **`encoded_dtype`**: The data type for encoded values (default: `uint8`). Use `uint8` for maximum compression. -- **`decoded_dtype`**: The data type for decoded values (default: inferred from data). +The Anscombe codec requires two key parameters: + +- **`zero_level`**: Baseline signal with no photons. See [Parameters](../concepts/parameters.md#zero_level) for details. +- **`conversion_gain`**: Signal units per photon. See [Parameters](../concepts/parameters.md#conversion_gain) for details. -## Next Steps +Optional parameters: -- Learn more in the [User Guide](../user-guide/overview.md) -- See [Parameter Estimation](../user-guide/parameter-estimation.md) for details on computing parameters -- Check out the full [Workbook Example](../examples/workbook.md) -- Explore the [API Reference](../api/codec.md) +- **`encoded_dtype`**: Data type for encoded values (default: `uint8`). Use `uint8` for maximum compression. +- **`decoded_dtype`**: Data type for decoded values (default: inferred from data). diff --git a/docs/index.md b/docs/index.md index 9f3181a..9965053 100644 --- a/docs/index.md +++ b/docs/index.md @@ -48,14 +48,15 @@ from anscombe_transform import AnscombeTransformV3 # Create sample data with Poisson noise data = np.random.poisson(lam=50, size=(100, 512, 512)).astype('int16') -# Create Zarr array with Anscombe codec +# Create Zarr array with Anscombe codec and blosc compression store = zarr.storage.MemoryStore() -arr = zarr.create( +arr = zarr.create_array( store=store, shape=data.shape, chunks=(10, 512, 512), dtype='int16', filters=[AnscombeTransformV3(zero_level=100, conversion_gain=2.5)], + compressors=[{'name': 'blosc', 'configuration': {'cname': 'zstd', 'clevel': 5}}], zarr_format=3 ) diff --git a/docs/user-guide/parameter-estimation.md b/docs/user-guide/parameter-estimation.md index 7c4c801..83b776e 100644 --- a/docs/user-guide/parameter-estimation.md +++ b/docs/user-guide/parameter-estimation.md @@ -1,9 +1,8 @@ # Parameter Estimation -To use the Anscombe Transform codec effectively, you need two key parameters: +To use the Anscombe Transform codec effectively, you need two key parameters: **`zero_level`** and **`conversion_gain`**. -1. **`zero_level`**: The baseline signal value when no photons are detected -2. **`conversion_gain`** (also called `photon_sensitivity`): The conversion factor from signal units to photon counts +See [Codec Parameters](../concepts/parameters.md) for detailed explanations of what these parameters mean and how they relate to your camera hardware. This guide explains how to estimate these parameters from your data. @@ -12,17 +11,23 @@ This guide explains how to estimate these parameters from your data. The codec provides a built-in parameter estimation function: ```python -from anscombe_transform import compute_conversion_gain import numpy as np +from anscombe_transform.estimate import compute_conversion_gain +from anscombe_transform.common import make_demo_data -# Load your movie data as (time, height, width) -movie = load_my_movie() # Shape: (n_frames, height, width) +# Set seed for reproducibility +np.random.seed(0) + +# Generate demo data +movie = make_demo_data(n_frames=100) # Estimate parameters result = compute_conversion_gain(movie) -print(f"Conversion gain: {result['sensitivity']:.3f}") +print(f"Conversion gain: {result['conversion_gain']:.3f}") +#> Conversion gain: 29.866 print(f"Zero level: {result['zero_level']:.3f}") +#> Zero level: 17.814 ``` ### Input Requirements @@ -37,8 +42,8 @@ The `compute_conversion_gain()` function expects: The function uses the **noise transfer function** approach: -1. **Compute temporal variance**: Calculate pixel-wise variance across time -2. **Compute temporal mean**: Calculate pixel-wise mean across time +1. **Compute temporal variance**: Calculate sample-wise variance across time +2. **Compute temporal mean**: Calculate sample-wise mean across time 3. **Fit noise model**: Use HuberRegressor to fit `variance = slope * mean + intercept` For Poisson noise: `variance = conversion_gain * (mean - zero_level)` @@ -51,59 +56,20 @@ Therefore: The function returns a dictionary with: -```python -{ - 'sensitivity': float, # The conversion gain (photons per signal unit) - 'zero_level': float, # The baseline signal level - 'variance': ndarray, # Computed pixel-wise variance - 'model': HuberRegressor # The fitted regression model -} -``` - -## Manual Parameter Estimation - -If you know your camera specifications, you can compute the parameters manually: - -### Zero Level - -The zero level is typically: -- **Dark current**: The signal level with the shutter closed -- **Bias level**: The electronic offset added to prevent negative values - -To measure: -1. Capture several frames with no light (shutter closed or lens cap on) -2. Compute the median value across all pixels and frames - -```python -dark_frames = capture_dark_frames(n=20) -zero_level = np.median(dark_frames) -``` - -### Conversion Gain - -The conversion gain depends on your camera's specifications: - -```python -# If you know electrons per ADU: -electrons_per_adu = 2.5 # From camera spec sheet -quantum_efficiency = 0.9 # Photons to electrons conversion - -conversion_gain = electrons_per_adu / quantum_efficiency -``` - -Or measure from a uniform illumination: ```python -# Capture frames of uniform illumination -uniform_frames = capture_uniform_frames(n=100) - -# Compute mean and variance for each pixel -mean = np.mean(uniform_frames, axis=0) -variance = np.var(uniform_frames, axis=0) - -# For Poisson noise: variance = gain * (mean - zero) -# Fit a line through the origin after subtracting zero level -conversion_gain = np.median(variance / (mean - zero_level)) +import numpy as np +from sklearn.linear_model import HuberRegressor + +result = { + 'conversion_gain': float, # The conversion gain (signal units per event) + 'zero_level': float, # The baseline signal level + 'variance': np.ndarray, # Computed sample-wise variance + 'model': HuberRegressor, # The fitted regression model + 'counts': np.ndarray, # Event counts per sample + 'min_intensity': int, # Minimum intensity value + 'max_intensity': int, # Maximum intensity value +} ``` ## Validation @@ -111,89 +77,66 @@ conversion_gain = np.median(variance / (mean - zero_level)) After estimating parameters, validate them: ```python +import numpy as np from anscombe_transform import AnscombeTransformV3 +from anscombe_transform.estimate import compute_conversion_gain +from anscombe_transform.common import make_demo_data + +# Set seed for reproducibility +np.random.seed(0) + +# Generate test movie data +movie = make_demo_data(n_frames=100) + +# Estimate parameters +result = compute_conversion_gain(movie) # Create codec with estimated parameters codec = AnscombeTransformV3( zero_level=result['zero_level'], - conversion_gain=result['sensitivity'] + conversion_gain=result['conversion_gain'] ) -# Test on a sample frame -frame = movie[0] -encoded = codec.encode(frame) -decoded = codec.decode(encoded) - -# Check reconstruction error -error = np.abs(frame - decoded) -max_error = np.max(error) -mean_error = np.mean(error) - -print(f"Max error: {max_error:.2f} ADU") -print(f"Mean error: {mean_error:.2f} ADU") -print(f"Expected noise (1 photon): {result['sensitivity']:.2f} ADU") - -# Error should be less than ~1 photon equivalent -assert max_error < 2 * result['sensitivity'] +# Print estimated parameters +print(f"Estimated zero level: {result['zero_level']:.1f}") +#> Estimated zero level: 17.8 +print(f"Estimated conversion gain: {result['conversion_gain']:.3f}") +#> Estimated conversion gain: 29.866 ``` -## Best Practices - -### Data Collection - -1. **Use multiple frames**: 20+ frames for reliable statistics -2. **Avoid motion**: Use static scenes or stabilized video -3. **Cover dynamic range**: Include both bright and dark regions -4. **Use raw data**: Don't use pre-processed or normalized data - -### Parameter Refinement - -1. **Check fit quality**: Inspect `result['model'].score()` (R² should be > 0.95) -2. **Visualize fit**: Plot variance vs. mean to verify linear relationship -3. **Test reconstruction**: Verify that encoding/decoding preserves data quality - -### Common Issues - -**Negative zero level**: Usually indicates pre-processed data or incorrect bias subtraction. Check if your data has been normalized. - -**Very high conversion gain (> 10)**: May indicate the data is already in photon units or has been scaled. - -**Poor R² score (< 0.9)**: Could mean: -- Too much motion in the scene -- Non-Poisson noise dominates (e.g., readout noise) -- Not enough temporal variation - ## Example Workflow + ```python import numpy as np -from anscombe_transform import compute_conversion_gain, AnscombeTransformV3 +from anscombe_transform.estimate import compute_conversion_gain +from anscombe_transform import AnscombeTransformV3 +from anscombe_transform.common import make_demo_data import zarr -# 1. Load temporal data -movie = load_movie() # Shape: (100, 512, 512) +# Set seed for reproducibility +np.random.seed(0) + +# 1. Load temporal data (generate sample movie) +movie = make_demo_data(n_frames=100) # 2. Estimate parameters params = compute_conversion_gain(movie) -print(f"Estimated parameters:") -print(f" Conversion gain: {params['sensitivity']:.3f} ADU/photon") -print(f" Zero level: {params['zero_level']:.1f} ADU") - -# 3. Validate fit quality -r2_score = params['model'].score( - params['variance'].ravel().reshape(-1, 1), - np.mean(movie, axis=0).ravel() -) -print(f" Fit quality (R²): {r2_score:.3f}") +print(f"Conversion gain: {params['conversion_gain']:.3f} ADU/event") +#> Conversion gain: 29.866 ADU/event +print(f"Zero level: {params['zero_level']:.1f} ADU") +#> Zero level: 17.8 ADU -# 4. Create codec with estimated parameters +# 3. Create codec with estimated parameters codec = AnscombeTransformV3( zero_level=params['zero_level'], - conversion_gain=params['sensitivity'] + conversion_gain=params['conversion_gain'] ) -# 5. Create Zarr array -arr = zarr.create( +# 4. Create Zarr array in memory +store = zarr.storage.MemoryStore() +arr = zarr.create_array( + store=store, shape=movie.shape, chunks=(10, 512, 512), dtype='int16', @@ -201,13 +144,6 @@ arr = zarr.create( zarr_format=3 ) -# 6. Compress data +# 5. Compress data arr[:] = movie -print(f"Compression successful!") -``` - -## Next Steps - -- [Zarr V2 Integration](zarr-v2.md) -- [Zarr V3 Integration](zarr-v3.md) -- [API Reference: estimate module](../api/estimate.md) +``` \ No newline at end of file diff --git a/docs/user-guide/zarr-v2.md b/docs/user-guide/zarr-v2.md index 58dac0a..270f7d1 100644 --- a/docs/user-guide/zarr-v2.md +++ b/docs/user-guide/zarr-v2.md @@ -2,6 +2,8 @@ This guide covers using the Anscombe Transform codec with Zarr V2. +The codec requires `zero_level` and `conversion_gain` parameters. See [Codec Parameters](../concepts/parameters.md) for details. + ## Basic Usage ```python @@ -13,169 +15,18 @@ from anscombe_transform import AnscombeTransformV2 data = np.random.poisson(lam=50, size=(100, 512, 512)).astype('int16') # Create Zarr V2 array with Anscombe codec as compressor -arr = zarr.open_array( - 'data.zarr', - mode='w', +arr = zarr.create_array( + {}, shape=data.shape, chunks=(10, 512, 512), dtype='int16', - compressor=AnscombeTransformV2( + filters=AnscombeTransformV2( zero_level=100, conversion_gain=2.5 - ) + ), + zarr_format=2 ) -# Write and read +# Write arr[:] = data -recovered = arr[:] -``` - -## Using with Additional Compression - -In Zarr V2, the Anscombe codec can be combined with other compressors by nesting them: - -```python -from numcodecs import Blosc -from anscombe_transform import AnscombeTransformV2 - -# Note: Zarr V2 doesn't support filter chains natively -# The Anscombe codec must be the primary compressor -compressor = AnscombeTransformV2( - zero_level=100, - conversion_gain=2.5, - encoded_dtype='uint8' -) - -arr = zarr.open_array( - 'compressed.zarr', - mode='w', - shape=(100, 512, 512), - chunks=(10, 512, 512), - dtype='int16', - compressor=compressor -) -``` - -!!! note "Limitation" - Zarr V2 doesn't support filter chains like V3 does. The Anscombe codec serves as both the transform and the compressor. For better compression with additional algorithms, consider upgrading to Zarr V3. - -## Codec Parameters - -### Required Parameters - -- **`zero_level`** (float): Baseline signal with no photons -- **`conversion_gain`** (float): Signal units per photon (also called `photon_sensitivity`) - -### Optional Parameters - -- **`encoded_dtype`** (str or dtype): Data type for encoded values, default: `'uint8'` - - Use `'uint8'` for maximum compression (0-255 range) - - Use `'uint16'` for higher dynamic range -- **`decoded_dtype`** (str or dtype): Data type for decoded output, default: inferred from input - -## Codec Registration - -The codec is automatically registered when you import it: - -```python -from anscombe_transform import AnscombeTransformV2 -import numcodecs - -# The codec is now registered -codec = numcodecs.get_codec({'id': 'anscombe-v1', 'zero_level': 100, 'conversion_gain': 2.5}) -``` - -## Serialization - -The codec can be serialized to/from JSON: - -```python -from anscombe_transform import AnscombeTransformV2 - -# Create codec -codec = AnscombeTransformV2(zero_level=100, conversion_gain=2.5) - -# Serialize to dict -config = codec.get_config() -print(config) -# {'id': 'anscombe-v1', 'zero_level': 100, 'conversion_gain': 2.5, ...} - -# Deserialize from dict -codec2 = AnscombeTransformV2.from_config(config) -``` - -This is useful for: -- Storing codec configuration in metadata -- Sharing compression settings across systems -- Programmatic codec creation - -## Working with Existing Arrays - -### Reading Compressed Data - -If data was compressed with the Anscombe codec: - -```python -import zarr -from anscombe_transform import AnscombeTransformV2 - -# Open existing array (codec info is stored in .zarray metadata) -arr = zarr.open_array('data.zarr', mode='r') - -# Read data (automatically decompressed) -data = arr[:] -``` - -### Inspecting Codec Configuration - -```python -import zarr -import json - -# Read .zarray metadata -with open('data.zarr/.zarray', 'r') as f: - metadata = json.load(f) - -print(metadata['compressor']) -# {'id': 'anscombe-v1', 'zero_level': 100, 'conversion_gain': 2.5, ...} ``` - -## Migration to Zarr V3 - -To migrate data from V2 to V3: - -```python -import zarr -from anscombe_transform import AnscombeTransformV2, AnscombeTransformV3 - -# Open V2 array -v2_arr = zarr.open_array('data_v2.zarr', mode='r') - -# Get codec config -v2_config = v2_arr.compressor.get_config() - -# Create V3 array with equivalent codec -v3_arr = zarr.create( - shape=v2_arr.shape, - chunks=v2_arr.chunks, - dtype=v2_arr.dtype, - filters=[AnscombeTransformV3( - zero_level=v2_config['zero_level'], - conversion_gain=v2_config['conversion_gain'] - )], - zarr_format=3 -) - -# Copy data -v3_arr[:] = v2_arr[:] -``` - -## API Reference - -See the [Codec API Reference](../api/codec.md) for detailed documentation of all parameters and methods. - -## Next Steps - -- [Zarr V3 Integration](zarr-v3.md) - Learn about the improved V3 interface -- [Parameter Estimation](parameter-estimation.md) - Estimate codec parameters from data -- [Examples](../examples/workbook.md) - See complete examples diff --git a/docs/user-guide/zarr-v3.md b/docs/user-guide/zarr-v3.md index d81b31c..cc27d82 100644 --- a/docs/user-guide/zarr-v3.md +++ b/docs/user-guide/zarr-v3.md @@ -12,268 +12,20 @@ from anscombe_transform import AnscombeTransformV3 # Create data data = np.random.poisson(lam=50, size=(100, 512, 512)).astype('int16') -# Create Zarr V3 array with Anscombe codec as a filter +# Create Zarr V3 array with Anscombe codec and blosc compression store = zarr.storage.MemoryStore() -arr = zarr.create( +arr = zarr.create_array( store=store, shape=data.shape, chunks=(10, 512, 512), dtype='int16', - filters=[AnscombeTransformV3( - zero_level=100, - conversion_gain=2.5 - )], - zarr_format=3 -) - -# Write and read -arr[:] = data -recovered = arr[:] -``` - -## Filter Chains - -Zarr V3 supports filter chains, allowing you to combine the Anscombe transform with other compression algorithms: - -```python -import zarr -from anscombe_transform import AnscombeTransformV3 - -# Use Anscombe as a filter with Blosc compression -arr = zarr.create( - shape=(100, 512, 512), - chunks=(10, 512, 512), - dtype='int16', - filters=[ - AnscombeTransformV3(zero_level=100, conversion_gain=2.5) - ], - compressor={ - 'id': 'blosc', - 'cname': 'zstd', - 'clevel': 5, - 'shuffle': 'bitshuffle' - }, - zarr_format=3 -) -``` - -The processing pipeline is: -1. **Original data** (int16) -2. **Anscombe filter** → transformed data (uint8) -3. **Blosc compressor** → compressed bytes -4. **Storage** - -## Recommended Compressors - -Different compressors work well with the Anscombe-transformed data: - -### Blosc with Zstd (Best Overall) - -```python -filters = [AnscombeTransformV3(zero_level=100, conversion_gain=2.5)] -compressor = { - 'id': 'blosc', - 'cname': 'zstd', # Excellent compression + speed - 'clevel': 5, # Compression level (1-9) - 'shuffle': 'bitshuffle' -} -``` - -### Blosc with LZ4 (Fastest) - -```python -compressor = { - 'id': 'blosc', - 'cname': 'lz4', # Fastest decompression - 'clevel': 3, - 'shuffle': 'bitshuffle' -} -``` - -### Blosc with Zlib (Maximum Compression) - -```python -compressor = { - 'id': 'blosc', - 'cname': 'zlib', # Best compression ratio - 'clevel': 9, - 'shuffle': 'bitshuffle' -} -``` - -## Codec Parameters - -### Required Parameters - -- **`zero_level`** (float): Baseline signal with no photons -- **`conversion_gain`** (float): Signal units per photon - -### Optional Parameters - -- **`encoded_dtype`** (str or dtype): Data type for encoded values, default: `'uint8'` -- **`decoded_dtype`** (str or dtype): Data type for decoded output, default: inferred -- **`beta`** (float): Quantization step size in noise standard deviations, default: `0.5` - -### Advanced: Beta Parameter - -The `beta` parameter controls the quantization precision: - -```python -# Finer quantization (more levels, better accuracy, less compression) -codec_fine = AnscombeTransformV3( - zero_level=100, - conversion_gain=2.5, - beta=0.25 # Half the default step size -) - -# Coarser quantization (fewer levels, more compression, lower accuracy) -codec_coarse = AnscombeTransformV3( - zero_level=100, - conversion_gain=2.5, - beta=1.0 # Double the default step size -) -``` - -Default `beta=0.5` means each quantization level represents 0.5 standard deviations of noise, which is a good balance for most applications. - -## Codec Registration - -The codec is automatically registered with Zarr V3: - -```python -from anscombe_transform import AnscombeTransformV3 -import zarr - -# Check if registered -print('anscombe-v1' in zarr.codecs.registry.get_codec_class('anscombe-v1')) -``` - -## Serialization and Metadata - -### Codec Configuration - -The codec configuration is stored in the array metadata: - -```python -import zarr -from anscombe_transform import AnscombeTransformV3 - -# Create array -arr = zarr.create( - shape=(100, 512, 512), - dtype='int16', - filters=[AnscombeTransformV3(zero_level=100, conversion_gain=2.5)], - zarr_format=3 -) - -# Access metadata -print(arr.metadata) -``` - -### JSON Serialization - -```python -from anscombe_transform import AnscombeTransformV3 - -codec = AnscombeTransformV3(zero_level=100, conversion_gain=2.5) - -# Convert to dict -config = codec.to_dict() -print(config) -# {'name': 'anscombe-v1', 'configuration': {'zero_level': 100, ...}} - -# Reconstruct from dict -codec2 = AnscombeTransformV3.from_dict(config) -``` - -## Performance Optimization - -### Chunk Size Selection - -Choose chunk sizes that balance compression and access patterns: - -```python -# For sequential access (e.g., video playback) -chunks = (10, 512, 512) # 10 frames at a time - -# For random time-point access -chunks = (1, 512, 512) # Single frames - -# For spatial crops across time -chunks = (100, 128, 128) # Smaller spatial regions, all time points -``` - -### Parallel Processing - -Zarr V3 supports parallel chunk processing: - -```python -import zarr -from concurrent.futures import ThreadPoolExecutor - -arr = zarr.open_array('data.zarr', mode='r') - -# Read chunks in parallel -with ThreadPoolExecutor(max_workers=4) as executor: - futures = [] - for i in range(0, arr.shape[0], 10): - future = executor.submit(lambda i=i: arr[i:i+10]) - futures.append(future) - - results = [f.result() for f in futures] -``` - -## Working with Different Storage Backends - -### Local Filesystem - -```python -import zarr -from anscombe_transform import AnscombeTransformV3 - -store = zarr.storage.LocalStore('data.zarr') -arr = zarr.create( - store=store, - shape=(100, 512, 512), - filters=[AnscombeTransformV3(zero_level=100, conversion_gain=2.5)], - zarr_format=3 -) -``` - -### In-Memory - -```python -store = zarr.storage.MemoryStore() -arr = zarr.create( - store=store, - shape=(100, 512, 512), - filters=[AnscombeTransformV3(zero_level=100, conversion_gain=2.5)], - zarr_format=3 -) -``` - -### Remote Storage (S3, GCS) - -```python -import zarr -from anscombe_transform import AnscombeTransformV3 - -# Requires fsspec and s3fs/gcsfs -store = zarr.storage.RemoteStore('s3://bucket/data.zarr') -arr = zarr.create( - store=store, - shape=(100, 512, 512), filters=[AnscombeTransformV3(zero_level=100, conversion_gain=2.5)], + compressors=[ + {'name': 'blosc', 'configuration': {'cname': 'zstd', 'clevel': 5}} + ], zarr_format=3 ) -``` - -## API Reference -See the [Codec API Reference](../api/codec.md) for detailed documentation. - -## Next Steps - -- [Parameter Estimation](parameter-estimation.md) - Estimate codec parameters -- [Examples](../examples/workbook.md) - Complete workflow examples -- [Zarr V2 Integration](zarr-v2.md) - If you need V2 compatibility +# Write data +arr[:] = data +``` \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 8bea9cb..ad4b193 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,9 @@ dependencies = [ test = [ "pytest==8.4.2", "pytest-cov", + "pytest-examples", "nbmake", + "ipykernel", "scipy", "imageio", "matplotlib", @@ -197,6 +199,9 @@ filterwarnings = [ "ignore:Numcodecs codecs are not in the Zarr version 3 specification.*:UserWarning" ] +[tool.pytest-examples] +collect_ignore_glob = [] + [tool.numpydoc_validation] checks = [ "all", # Enable all checks diff --git a/src/anscombe_transform/common.py b/src/anscombe_transform/common.py new file mode 100644 index 0000000..6127d4d --- /dev/null +++ b/src/anscombe_transform/common.py @@ -0,0 +1,20 @@ +import numpy as np + +def make_demo_data( + *, + n_frames: int = 50, + height: int = 256, + width: int = 256, + mean_event_rate: float = 5.0, + zero_level: float = 20.0, + conversion_gain: float = 30.0 + ) -> np.ndarray: + """ + A convenience function for generating synthetic imaging data + """ + + event_rate = np.random.exponential(scale=mean_event_rate, size=(1, height, width)) + event_counts = np.random.poisson(np.tile(event_rate, (n_frames, 1, 1))) + measured_signal = event_counts + np.random.randn(n_frames, height, width) * 0.2 + + return (zero_level + conversion_gain * measured_signal).astype('int16') \ No newline at end of file diff --git a/tests/test_docs.py b/tests/test_docs.py new file mode 100644 index 0000000..49531e9 --- /dev/null +++ b/tests/test_docs.py @@ -0,0 +1,166 @@ +""" +Tests for Python code examples embedded in documentation. + +This module uses pytest-examples to extract all Python code blocks +from the documentation markdown files and executes them to ensure they +remain correct and up-to-date. + +Output assertions can be specified using #> style comments: + print("hello") + #> hello +""" + +from io import StringIO +from pathlib import Path + +import pytest +from pytest_examples import CodeExample, find_examples + + +def _should_skip_example(example: CodeExample) -> bool: + """Determine if an example should be skipped.""" + # Check for explicit test: skip marker + if "# test: skip" in example.source or "test: skip" in example.source: + return True + skip_patterns: list[str] = [] + return any(pattern in example.source for pattern in skip_patterns) + + +def _make_group_id(group_examples: list[CodeExample]) -> str: + """Generate a descriptive ID for a group of code examples. + + Uses the filename and line range from the first and last examples + in the group to create an informative test ID. + + Example: "sentinel2.md[12-26]" + """ + if not group_examples: + return "empty-group" + first = group_examples[0] + last = group_examples[-1] + # Extract filename from path + filename = Path(first.path).name + return f"{filename}[{first.start_line}-{last.end_line}]" + + +def _parse_output_assertions(source: str) -> list[str]: + """Parse expected output assertions from code using #> style comments. + + Examples: + print("hello") + #> hello + + Returns a list of expected output lines. + """ + expected_lines = [] + for line in source.split("\n"): + if line.strip().startswith("#>"): + # Extract the expected output after '#>' + expected = line.split("#>", 1)[1].strip() + expected_lines.append(expected) + return expected_lines + + +def _validate_output(example: CodeExample, captured_output: str) -> None: + """Validate that captured output matches expected output assertions. + + Raises AssertionError if output doesn't match expected assertions. + """ + expected_lines = _parse_output_assertions(example.source) + if not expected_lines: + # No output assertions to validate + return + + actual_lines = [line.rstrip() for line in captured_output.rstrip().split("\n")] + + if len(actual_lines) != len(expected_lines): + raise AssertionError( + f"Output line count mismatch in {example.path} (lines {example.start_line}-{example.end_line}):\n" + f"Expected {len(expected_lines)} lines but got {len(actual_lines)} lines\n" + f"Expected:\n{chr(10).join(expected_lines)}\n" + f"Actual:\n{chr(10).join(actual_lines)}" + ) + + for i, (expected, actual) in enumerate(zip(expected_lines, actual_lines)): + if expected != actual: + raise AssertionError( + f"Output mismatch on line {i + 1} in {example.path} (lines {example.start_line}-{example.end_line}):\n" + f"Expected: {expected!r}\n" + f"Actual: {actual!r}" + ) + + +# Get all examples and group them by group ID +_all_examples = list(find_examples("docs")) +_examples_by_group = {} +for ex in _all_examples: + if ex.prefix == "python": + if ex.group not in _examples_by_group: + _examples_by_group[ex.group] = [] + _examples_by_group[ex.group].append(ex) + + +@pytest.mark.parametrize( + "group_examples", + list(_examples_by_group.values()), + ids=_make_group_id, +) +def test_doc_example_group(group_examples: list[CodeExample]) -> None: + """ + Test a group of related Python code examples from the documentation. + + Examples are grouped by their pytest-examples group ID, allowing dependent + examples to share state (e.g., a variable defined in one example used in the next). + + If any example would be skipped but later examples depend on it, the entire + group is skipped. + + Output can be validated by adding #> style assertions: + print("hello") + #> hello + """ + # Execute all examples in the group sequentially to share state + namespace = {} + any_skipped = False + + for example in group_examples: + if _should_skip_example(example): + any_skipped = True + continue + + # If we're about to execute an example but we've skipped some, + # we might have missing dependencies + if any_skipped: + pytest.skip( + "Earlier examples in group were skipped; dependencies may be missing" + ) + + # Execute the example code with shared namespace and capture output + try: + # Capture stdout to check output assertions + import sys + + old_stdout = sys.stdout + sys.stdout = StringIO() + try: + exec(example.source, namespace) + captured_output = sys.stdout.getvalue() + finally: + sys.stdout = old_stdout + + # Validate output if there are expected output assertions + _validate_output(example, captured_output) + + except NameError as e: + # If a NameError occurs, it might be due to skipped dependencies + if any_skipped: + pytest.skip(f"Skipped dependencies caused NameError: {e}") + raise AssertionError( + f"Code example in {example.path} (lines {example.start_line}-{example.end_line}) failed:\n" + f"{example.source}\n\n{e}" + ) from e + except Exception as e: + raise AssertionError( + f"Code example in {example.path} (lines {example.start_line}-{example.end_line}) failed:\n" + f"{example.source}\n\n{e}" + ) from e