diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000..85e9571 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,53 @@ +cff-version: 1.2.0 +title: Anscombe Transform Codec +message: If you use this software, please cite it. +version: 1.0.0 +repository-code: https://github.com/datajoint/anscombe-transform +abstract: > + Codec implementation of the Anscombe transform for compressing photon-limited + microscopy, radiography, and astronomy image recordings, published by DataJoint. +authors: + - family-names: Yatsenko + given-names: Dimitri + email: dimitr@datajoint.com + - family-names: Lecoq + given-names: Jerome + email: jeromel@alleninstitute.org + - family-names: Bennett + given-names: Davis + email: davis.v.bennett@gmail.com +contact: + - family-names: Yatsenko + given-names: Dimitri + email: dimitr@datajoint.com +license: MIT +preferred-citation: + type: article + title: Standardized measurements for monitoring and comparing multiphoton microscope systems + authors: + - family-names: Lees + given-names: Robert M. + - family-names: Bianco + given-names: Isaac H. + - family-names: Campbell + given-names: Robert A. A. + - family-names: Orlova + given-names: Natalia + - family-names: Peterka + given-names: Darcy S. + - family-names: Pichler + given-names: Bruno + - family-names: Smith + given-names: Spencer LaVere + - family-names: Yatsenko + given-names: Dimitri + - family-names: Yu + given-names: Che-Hang + - family-names: Packer + given-names: Adam M. + journal: Nature Protocols + volume: '20' + pages: 1-38 + date-published: 2025-03-17 + doi: 10.1038/s41596-024-01120-w + url: https://doi.org/10.1038/s41596-024-01120-w diff --git a/README.md b/README.md index ab3c446..062ab36 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,18 @@ -[![PyPI version](https://badge.fury.io/py/anscombe-transform.svg)](https://badge.fury.io/py/anscombe-transform) ![tests](https://github.com/datajoint/anscombe-transform/actions/workflows/tests.yaml/badge.svg) +[![PyPI version](https://badge.fury.io/py/anscombe-transform.svg)](https://badge.fury.io/py/anscombe-transform) ![tests](https://github.com/datajoint/anscombe-transform/actions/workflows/test.yml/badge.svg) # Anscombe transform -This codec is designed for compressing image recordings with Poisson noise, which are produced by photon-limited modalities such multiphoton microscopy, radiography, and astronomy. +This codec is designed for compressing image recordings with Poisson noise such as in microscopy, radiography, and astronomy. -The codec assumes that the video is linearly encoded with a potential offset (`zero_level`) and that the `photon_sensitivity` (the average increase in intensity per photon) is either already known or can be accurately estimated from the data. +**Status:** This is the official and actively maintained Anscombe transform codec for Zarr/Numcodecs, maintained by DataJoint. +It originated as a fork of [AllenNeuralDynamics/poisson-numcodecs](https://github.com/AllenNeuralDynamics/poisson-numcodecs) and earlier developments at https://github.com/datajoint/compress-multiphoton. It has since diverged significantly. New users should rely on this repository as the canonical source. + +The codec assumes that the video is linearly encoded with a potential offset (`zero_level`) and that the `conversion_gain` (also called `photon_sensitivity`)—the average increase in intensity per photon—is either already known or can be accurately estimated from the data. The codec re-quantizes the grayscale efficiently with a square-root-like transformation to equalize the noise variance across the grayscale levels: the [Anscombe Transform](https://en.wikipedia.org/wiki/Anscombe_transform). This results in a smaller number of unique grayscale levels and significant improvements in the compressibility of the data without sacrificing signal accuracy. -To use the codec, one must supply two pieces of information: `zero_level` (the input value corresponding to the absence of light) and `photon_sensitivity` (levels/photon). +To use the codec, one must supply two pieces of information: `zero_level` (the input value corresponding to the absence of light) and `conversion_gain` (levels/photon). The codec is used in Zarr as a filter prior to compression. diff --git a/docs/examples/workbook.md b/docs/examples/workbook.md index 2d918f8..3970a97 100644 --- a/docs/examples/workbook.md +++ b/docs/examples/workbook.md @@ -29,17 +29,19 @@ First, let's create synthetic data that mimics photon-limited imaging: ```python # Parameters for synthetic data -n_frames = 100 -height, width = 512, 512 -mean_photons = 50 # Average photons per pixel -zero_level = 100 # Camera baseline -conversion_gain = 2.5 # ADU per photon +n_frames = 30 +height, width = 120, 120 +mean_photon_rate = 5.0 # Average photons per pixel (exponential distribution of rates) +zero_level = -10.0 # camera baseline +conversion_gain = 2.5 # levels per photon # Generate Poisson-distributed photon counts -photon_counts = np.random.poisson(lam=mean_photons, size=(n_frames, height, width)) +photon_rate = np.random.exponential(scale=5, size=(1, height, width)) +photon_counts = np.random.poisson(lam=np.tile(photon_rate, (n_frames, 1, 1))) +measured_signal = photon_counts + np.random.randn(*size) * 0.2 -# Convert to camera signal (ADU) -camera_signal = (photon_counts * conversion_gain + zero_level).astype('int16') +# Convert to camera signal +camera_signal = (measured_signal * conversion_gain + zero_level).astype('int16') print(f"Data shape: {camera_signal.shape}") print(f"Data range: [{camera_signal.min()}, {camera_signal.max()}]") @@ -58,16 +60,16 @@ estimated_gain = result['sensitivity'] estimated_zero = result['zero_level'] print(f"\nTrue parameters:") -print(f" Conversion gain: {conversion_gain:.3f} ADU/photon") -print(f" Zero level: {zero_level:.1f} ADU") +print(f" Conversion gain: {conversion_gain:.3f} units/photon") +print(f" Zero level: {zero_level:.1f} ") print(f"\nEstimated parameters:") -print(f" Conversion gain: {estimated_gain:.3f} ADU/photon") -print(f" Zero level: {estimated_zero:.1f} ADU") +print(f" Conversion gain: {estimated_gain:.3f} units/photon") +print(f" Zero level: {estimated_zero:.1f}") print(f"\nEstimation error:") -print(f" Gain error: {abs(estimated_gain - conversion_gain):.3f} ADU/photon") -print(f" Zero level error: {abs(estimated_zero - zero_level):.1f} ADU") +print(f" Gain error: {abs(estimated_gain - conversion_gain):.3f} units/photon") +print(f" Zero level error: {abs(estimated_zero - zero_level):.1f} units") ``` ## Visualize Noise Model diff --git a/docs/getting-started/quick-start.md b/docs/getting-started/quick-start.md index ce073c8..8681a64 100644 --- a/docs/getting-started/quick-start.md +++ b/docs/getting-started/quick-start.md @@ -9,18 +9,25 @@ import zarr import numpy as np from anscombe_transform import AnscombeTransformV3 -# Generate sample data with Poisson noise -# Simulating photon-limited imaging data -data = np.random.poisson(lam=50, size=(100, 512, 512)).astype('int16') +# Synthesize data with Poisson noise - simulating photon-limited microscopy data +n_frames, height, width = 50, 256, 256 +mean_photon_rate = 5.0 # Average photons per pixel (exponential distribution of rates) +zero_level = 20.0 # camera baseline +conversion_gain = 30.0 # levels per photon +photon_rate = np.random.exponential(scale=mean_photon_rate, size=(1, height, width)) +photon_counts = np.random.poisson(np.tile(photon_rate, (n_frames, 1, 1))) +measured_signal = photon_counts + np.random.randn(*size) * 0.2 + +data = (zero_level + conversion_gain * measured_signal).astype('int16') # Create a Zarr array with the Anscombe codec store = zarr.storage.MemoryStore() arr = zarr.create( store=store, shape=data.shape, - chunks=(10, 512, 512), + chunks=(12, 160, 80), dtype='int16', - filters=[AnscombeTransformV3(zero_level=100, conversion_gain=2.5)], + filters=[AnscombeTransformV3(zero_level=zero_level, conversion_gain=conversion_gain)], zarr_format=3 ) @@ -44,10 +51,10 @@ import zarr arr = zarr.open_array( 'data.zarr', mode='w', - shape=(100, 512, 512), - chunks=(10, 512, 512), + shape=data.shape, + chunks=(12, 160, 80), dtype='int16', - compressor=AnscombeTransformV2(zero_level=100, conversion_gain=2.5) + compressor=AnscombeTransformV2(zero_level=zero_level, conversion_gain=conversion_gain) ) # Write and read data @@ -64,10 +71,10 @@ from anscombe_transform import compute_conversion_gain import numpy as np # Load your movie data as (time, height, width) -movie = np.random.poisson(lam=50, size=(100, 512, 512)) +movie = data # Estimate parameters -result = compute_conversion_gain(movie) +result = compute_conversion_gain(data) print(f"Estimated conversion gain: {result['sensitivity']:.3f}") print(f"Estimated zero level: {result['zero_level']:.3f}") @@ -90,10 +97,10 @@ from anscombe_transform import AnscombeTransformV3 # For Zarr V3, use filters + codecs arr = zarr.create( - shape=(100, 512, 512), + shape=data.shape, chunks=(10, 512, 512), dtype='int16', - filters=[AnscombeTransformV3(zero_level=100, conversion_gain=2.5)], + filters=[AnscombeTransformV3(zero_level=zero_level, conversion_gain=conversion_gain)], compressor={'id': 'blosc', 'cname': 'zstd', 'clevel': 5}, zarr_format=3 ) @@ -102,7 +109,7 @@ arr = zarr.create( ## Key Parameters - **`zero_level`**: The signal value when no photons are detected. This is the baseline offset in your camera sensor. -- **`conversion_gain`** (also called `photon_sensitivity`): How many signal units correspond to one photon. For example, if your camera reports 2.5 ADU per photon, use `conversion_gain=2.5`. +- **`conversion_gain`** (also called `photon_sensitivity`): How many signal units correspond to one photon. For example, if your camera reports 2.5 levels increase in signal per photon, use `conversion_gain=2.5`. - **`encoded_dtype`**: The data type for encoded values (default: `uint8`). Use `uint8` for maximum compression. - **`decoded_dtype`**: The data type for decoded values (default: inferred from data). diff --git a/docs/user-guide/overview.md b/docs/user-guide/overview.md index 5d09038..028cd50 100644 --- a/docs/user-guide/overview.md +++ b/docs/user-guide/overview.md @@ -74,7 +74,7 @@ These tables are computed once during codec initialization and reused for all en ### Compression Ratios Typical compression ratios (Anscombe + Blosc/Zstd): -- **4-8x** for typical multiphoton microscopy data +- **3-8x** for typical multiphoton microscopy data - **6-10x** for astronomy data - **3-6x** for radiography data diff --git a/examples/workbook.ipynb b/examples/workbook.ipynb index a0e9a83..7518e26 100644 --- a/examples/workbook.ipynb +++ b/examples/workbook.ipynb @@ -392,7 +392,7 @@ ], "metadata": { "kernelspec": { - "display_name": "microns_trippy", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -406,7 +406,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.16" + "version": "3.11.11" } }, "nbformat": 4, diff --git a/pyproject.toml b/pyproject.toml index 9462394..8bea9cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,8 +7,8 @@ name = "anscombe_transform" version = "1.0.0" authors = [ - { name = "Jerome Lecoq", email = "jeromel@alleninstitute.org" }, { name = "Dimitri Yatsenko", email = "dimitr@datajoint.com" }, + { name = "Jerome Lecoq", email = "jeromel@alleninstitute.org" }, { name = "Davis Bennett", email = "davis.v.bennett@gmail.com" }, ] maintainers = [ diff --git a/tests/test_zarr.py b/tests/test_zarr.py index cba3b5b..3ba1761 100644 --- a/tests/test_zarr.py +++ b/tests/test_zarr.py @@ -10,35 +10,56 @@ def test_zarr_v2_roundtrip() -> None: decoded_dtype = "int16" encoded_dtype = "uint8" - data = np.random.poisson(100, size=(20, 20)).astype(decoded_dtype) + + # generate fake data + np.random.seed(42) + size = (20, 20) sensitivity = 100.0 + zero_level = -5.0 + true_rate = np.random.exponential(scale=5, size=size) + data = ( + zero_level + + sensitivity * (np.random.poisson(true_rate) + np.random.randn(*size) * 0.25) + ).astype(decoded_dtype) + + # construct codec codec = AnscombeTransformV2( conversion_gain=sensitivity, - zero_level=0, + zero_level=zero_level, encoded_dtype=encoded_dtype, decoded_dtype=decoded_dtype, ) data_encoded = codec.encode(data) data_rt = codec.decode(data_encoded).reshape(data.shape) - store = {} - # write data + store = {} _ = create_array(store=store, data=data, zarr_format=2, compressors=codec) + # read data z_arr_r = open_array(store=store) assert z_arr_r.dtype == decoded_dtype - assert nearly_equal(z_arr_r, data_rt, sensitivity / 2) + assert nearly_equal(z_arr_r, data_rt, sensitivity * 0.5) def test_zarr_v3_roundtrip() -> None: decoded_dtype = "int16" encoded_dtype = "uint8" - data = np.random.poisson(100, size=(20, 20)).astype(decoded_dtype) + + # generate fake data + np.random.seed(42) + size = (20, 20) sensitivity = 100.0 + zero_level = -5.0 + true_rate = np.random.exponential(scale=5, size=size) + data = ( + zero_level + + sensitivity * (np.random.poisson(true_rate) + np.random.randn(*size) * 0.25) + ).astype(decoded_dtype) + codec = AnscombeTransformV3( conversion_gain=sensitivity, - zero_level=0, + zero_level=zero_level, encoded_dtype=encoded_dtype, decoded_dtype=decoded_dtype, ) @@ -47,9 +68,8 @@ def test_zarr_v3_roundtrip() -> None: data_encoded = codec._encode(data) data_rt = codec._decode(data_encoded).reshape(data.shape) - store = {} - # write data + store = {} _ = create_array(store=store, data=data, zarr_format=3, filters=[codec]) # read data z_arr_r = open_array(store=store)