diff --git a/.github/workflows/draft-pdf.yaml b/.github/workflows/draft-pdf.yaml new file mode 100644 index 0000000..f9bb6c4 --- /dev/null +++ b/.github/workflows/draft-pdf.yaml @@ -0,0 +1,24 @@ +name: Draft PDF +on: + push: + paths: + - paper/** + - .github/workflows/draft-pdf.yaml + +jobs: + paper: + runs-on: ubuntu-latest + name: Paper Draft + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Build draft PDF + uses: openjournals/openjournals-draft-action@master + with: + journal: joss + paper-path: paper/paper.md + - name: Upload + uses: actions/upload-artifact@v4 + with: + name: paper + path: paper/paper.pdf \ No newline at end of file diff --git a/.gitignore b/.gitignore index a2ef030..430ab1f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,13 @@ # dir with input files -*/data/ +snazzy_processing/data/* +snazzy_analysis/data/* + +# add annotated data for ROI length evaluation +!snazzy_processing/data/20240611/ +snazzy_processing/data/20240611/* +!snazzy_processing/data/20240611/annotated +!snazzy_processing/data/20240611/emb_sizes + # dir with output files */results/ diff --git a/README.md b/README.md index 1b9c417..6850453 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,12 @@ # SNAzzy: an image processing pipeline for investigating global Synchronous Network Activity +SNAzzy is a Python package for studying synchronous network activity (SNA) in Drosophia embryos via high-thoughput microscopy. +The software includes processing raw data into individual `.tif` files, quantification of fluorescence and changes in morphology, a custom peak detection algorithm, and a GUI for data visualization and curation. + ## Getting Started +Refer to the README files inside the `snazzy_processing` or `snazzy_analysis` packages for details on running the code. + ### Installation The project uses [conda](https://docs.conda.io) to manage dependencies. @@ -19,6 +24,18 @@ Activate the environment: ``` conda activate snazzy-env ``` + +## Testing + +Tests can be run with pytest. + +You can run the test suite from the project’s root directory to test everything at once. +Make sure the environment is active, and then run: + +``` + pytest +``` + ## Contributing Thank you for being interested in `snazzy`! diff --git a/docs/source/Data_analysis/Graphical_User_Interface.rst b/docs/source/Data_analysis/Graphical_User_Interface.rst index 54dcdce..da1be57 100644 --- a/docs/source/Data_analysis/Graphical_User_Interface.rst +++ b/docs/source/Data_analysis/Graphical_User_Interface.rst @@ -98,9 +98,13 @@ From this window it's possible to set: * To_remove: embryo numbers that will be analyzed, but will appear in the 'Removed' group. * Embryos that have it's first peak before the first peak threshold or that were marked by the user as removed will also be at the to_remove category. * Has_transients: if selected the code will try to identify and skip the first peak if it's likely just a transient. -* Has_dsna: if selected the code will try to determine dSNA and ignore all peaks that happen after dSNA start. * Dff_strategy: Combo box with the baseline strategy methods. ``local_minima`` will pick the bottom 11 points out of the ``baseline_window_size`` and use that average as the baseline. ``baseline`` will split the DFF values into bins and use the average of the most frequent bin as the baseline. This method assumes that the bursts of activity are sparse, so that for all windows the most frequent bin falls into the baseline values. +The embryos listed in ``to_remove`` are not used for plotting and comparisons between Groups. +There are different reasons for marking an embryo as removed. +One example would be to remove embryos that already are in later stages of development when the imaging session starts. +Another would be to remove unhealthy embryos, or embryos that don't hatch, if that is a requirement for the experiment. + Inside the File menu there is an option to open the ``json`` file and change any of its parameters. Updating the file causes the entire Dataset to be recreated with the new configuration data. diff --git a/docs/source/Data_analysis/Hatching_Point.rst b/docs/source/Data_analysis/Hatching_Point.rst index 452fe81..ea44a3d 100644 --- a/docs/source/Data_analysis/Hatching_Point.rst +++ b/docs/source/Data_analysis/Hatching_Point.rst @@ -18,7 +18,7 @@ It can also happen that an embryo is still inside the egg and a larva that alrea It's very rare that this scenario affects the ROI calculation, because we only consider the largest connected area for calculating the ROI, which is usually the VNC. If it does, then the only option is to remove that embryo. -When loading an experiment for the first time, it's worth it to visualize the structural channel signal of each embryo. +When loading a dataset for the first time, it's worth it to visualize the structural channel signal of each embryo. On a few occasions, mostly due to very abrupt motion, the ROI is underestimated and the hatching point is determined earlier. In these cases, you can drag the line that indicates the hatching to a more accurate position or remove that embryo. diff --git a/docs/source/Data_analysis/Peak_Detection.rst b/docs/source/Data_analysis/Peak_Detection.rst index f96ef16..cbd451a 100644 --- a/docs/source/Data_analysis/Peak_Detection.rst +++ b/docs/source/Data_analysis/Peak_Detection.rst @@ -39,6 +39,9 @@ Since the leftmost peak can have an amplitude very different than the local maxi 3. Filter peaks by local threshold ---------------------------------- +This step is **not enabled** by default. +To enable it the ``dff_peak_indices`` in ``trace.calculate_peaks`` must be passed to ``trace.filter_peaks_by_local_threshold``. + As the embryos develop, there is a global trend of peak amplitude to rise and then stabilize before hatching. We use this fact to perform an extra validation step for the calculated peaks. Each peak is compared against its neighboring peaks, and peaks that are too high or too low are discarted. diff --git a/docs/source/Data_processing/ROI_length.rst b/docs/source/Data_processing/Neurodevelopmental_progression.rst similarity index 88% rename from docs/source/Data_processing/ROI_length.rst rename to docs/source/Data_processing/Neurodevelopmental_progression.rst index 9c32f84..695e330 100644 --- a/docs/source/Data_processing/ROI_length.rst +++ b/docs/source/Data_processing/Neurodevelopmental_progression.rst @@ -1,15 +1,14 @@ -Neurodevelopmental Time ----------------- -Together the ROI length and the full embryo size are used as a proxy to measure the embryonic neurodevelopmental progression: - -developmental_progression = embryo_length / ROI_length - -ROI length -========== +Neurodevelopmental Progression +============================== The ROI length is calculated by center line estimation. The general approach is to measure the line that will pass through the center of the ROI; this will correspond to the ventral nerve cord length. +Together the ROI length and the full embryo size are used as a proxy to measure the embryonic neurodevelopmental progression: + +.. math:: + neurodevelopmental\ progression = \frac{embryo\ length}{ROI\ length} + To determine the ROI length, the following steps are used: 1. Binarize the image @@ -26,6 +25,7 @@ Embryo Full size The full specimen size is calculated by approximating the entire sample shape as an ellipse, and measuring this ellipse's diameter. The steps to calculate the embryo's size are: + 1. Equalize the image histogram 2. Automatic threshold (Triangle method) 3. Binarize the image diff --git a/docs/source/Data_processing/Overview.rst b/docs/source/Data_processing/Overview.rst index 440f796..a3160c0 100644 --- a/docs/source/Data_processing/Overview.rst +++ b/docs/source/Data_processing/Overview.rst @@ -34,4 +34,4 @@ Refer to the other documentation pages for a description of the pipeline steps: * `Process raw data `__ * `ROI and signal intensity `__ -* `ROI length `__ \ No newline at end of file +* `Neurodevelopmental_progression `__ \ No newline at end of file diff --git a/docs/source/Data_processing/ROIs_and_signal_intensity.rst b/docs/source/Data_processing/ROIs_and_signal_intensity.rst index 2249b6e..638ab88 100644 --- a/docs/source/Data_processing/ROIs_and_signal_intensity.rst +++ b/docs/source/Data_processing/ROIs_and_signal_intensity.rst @@ -36,5 +36,5 @@ To display it, ``cd`` into the ``snazzy_processing`` directory, and run the file python3 scripts/plot_contours.py -It will look for any experiment directories you have inside the ``./data`` directory and present the available options in the terminal. +It will look for any dataset directories you have inside the ``./data`` directory and present the available options in the terminal. Animations can be paused by pressing any key. \ No newline at end of file diff --git a/docs/source/Data_processing/index.rst b/docs/source/Data_processing/index.rst index 8f69929..e76f3b6 100644 --- a/docs/source/Data_processing/index.rst +++ b/docs/source/Data_processing/index.rst @@ -8,4 +8,4 @@ Data Processing Overview Process_raw_data ROIs_and_signal_intensity - ROI_length \ No newline at end of file + Neurodevelopmental_progression \ No newline at end of file diff --git a/docs/source/Getting_Started.rst b/docs/source/Getting_Started.rst index 982b859..1787922 100644 --- a/docs/source/Getting_Started.rst +++ b/docs/source/Getting_Started.rst @@ -42,6 +42,9 @@ Running the code Refer to the Getting Started session of each package for how to run the code. +Two sample datasets with a reduced number of samples (to reduce dataset size) were uploaded to zenodo. +Please find the datasets here: https://doi.org/10.5281/zenodo.17295552. + To process raw data, start with `Getting Started `__. To analyze the output of the processing step, go to `Getting Started `__. diff --git a/paper/figures/snazzy-fig1.png b/paper/figures/snazzy-fig1.png new file mode 100644 index 0000000..312bbac Binary files /dev/null and b/paper/figures/snazzy-fig1.png differ diff --git a/paper/figures/snazzy-fig2.png b/paper/figures/snazzy-fig2.png new file mode 100644 index 0000000..a813a15 Binary files /dev/null and b/paper/figures/snazzy-fig2.png differ diff --git a/paper/figures/snazzy-fig3.png b/paper/figures/snazzy-fig3.png new file mode 100644 index 0000000..98c48e0 Binary files /dev/null and b/paper/figures/snazzy-fig3.png differ diff --git a/paper/figures/snazzy-fig4.png b/paper/figures/snazzy-fig4.png new file mode 100644 index 0000000..f6377c5 Binary files /dev/null and b/paper/figures/snazzy-fig4.png differ diff --git a/paper/paper.bib b/paper/paper.bib new file mode 100644 index 0000000..944577b --- /dev/null +++ b/paper/paper.bib @@ -0,0 +1,203 @@ +@article{wu:2024, + title={Network state transitions during cortical development}, + ISSN={1471-003X}, + url={http://dx.doi.org/10.1038/s41583-024-00824-y}, + DOI={10.1038/s41583-024-00824-y}, + journal={Nature reviews. Neuroscience}, + author={Wu, Michelle W. and Kourdougli, Nazim and Portera-Cailliau, Carlos}, + year={2024}, + month=may, + language={en} } + +@article{akin:2020, + title={Activity regulates brain development in the fly}, + volume={65}, + ISSN={0959-437X}, + DOI={10.1016/j.gde.2020.04.005}, + journal={Current opinion in genetics & development}, + publisher={Elsevier BV}, + author={Akin, Orkun and Zipursky, S. Lawrence}, + year={2020}, + month=dec, + pages={8–13}, + language={en} } + +@article{ardiel:2022, + title={Stereotyped behavioral maturation and rhythmic quiescence in C. elegans embryos}, + volume={11}, + ISSN={2050-084X}, + url={http://dx.doi.org/10.7554/eLife.76836}, + DOI={10.7554/eLife.76836}, + journal={eLife}, + publisher={eLife Sciences Publications, Ltd}, + author={Ardiel, Evan L. and Lauziere, Andrew and Xu, Stephen and Harvey, Brandon J. and Christensen, Ryan Patrick and Nurrish, Stephen and Kaplan, Joshua M. and Shroff, Hari}, + year={2022}, + month=aug, + keywords={C. elegans; behavior; embryo; neuropeptides; neuroscience}, + language={en} } + +@article{avasthi:2023, + title={Gotta catch ‘em all: Agar microchambers for high-throughput single-cell live imaging}, + ISSN={2998-4084}, + url={http://dx.doi.org/10.57844/arcadia-v1bg-6b60}, + DOI={10.57844/arcadia-v1bg-6b60}, + publisher={Arcadia Science}, + author={Avasthi, Prachee and Essock-Burns, Tara and Garcia, Galo, III and Gehring, Jase and Matus, David Q. and Mets, David G. and York, Ryan}, + year={2023}, + month=apr } + +@article{blankenship:2009, + title={Mechanisms underlying spontaneous patterned activity in developing neural circuits}, + volume={11}, + ISSN={1471-003X}, + DOI={10.1038/nrn2759}, + number={1}, + journal={Nature reviews. Neuroscience}, + publisher={Nature Publishing Group}, + author={Blankenship, Aaron G. and Feller, Marla B.}, + year={2009}, + pages={18–29} } + +@article{carreira:2021, + title={Mechanosensory input during circuit formation shapes Drosophila motor behavior through patterned spontaneous network activity}, + volume={31}, + ISSN={0960-9822}, + DOI={10.1016/j.cub.2021.08.022}, + number={23}, + journal={Current biology: CB}, + author={Carreira-Rosario, Arnaldo and York, Ryan A. and Choi, Minseung and Doe, Chris Q. and Clandinin, Thomas R.}, + year={2021}, + month=dec, + pages={5341–5349.e4}, + keywords={Drosophila embryo; behavioral development; development of locomotor behavior; nervous system development; neural circuit wiring; spontaneous network activity}, + language={en} } + +@article{crisp:2008, + title={The development of motor coordination in Drosophila embryos}, + volume={3717}, + DOI={10.1242/dev.026773}, + author={Crisp, Sarah and Evers, Jan Felix and Fiala, André and Bate, Michael}, + year={2008}, + pages={3707–3717}, + keywords={coordination; drosophila; embryo; movement; muscle} } + +@article{donoughe:2018, + title={High-throughput live-imaging of embryos in microwell arrays using a modular specimen mounting system}, + volume={7}, + ISSN={2046-6390}, + DOI={10.1242/bio.031260}, + number={7}, + journal={Biology open}, + publisher={The Company of Biologists}, + author={Donoughe, Seth and Kim, Chiyoung and Extavour, Cassandra G.}, + year={2018}, + month=jul, + pages={bio031260}, + keywords={Development; Embryogenesis; High-throughput; Image analysis; Microscopy; Quantitative imaging; Time lapse}, + language={en} } + +@article{fischler:1981, + title={Random sample consensus: a paradigm for model fitting with applications to image analysis and automated cartography}, + volume={24}, + ISSN={0001-0782}, + DOI={10.1145/358669.358692}, + number={6}, + journal={Communications of the ACM}, + publisher={Association for Computing Machinery (ACM)}, + author={Fischler, Martin A. and Bolles, Robert C.}, + year={1981}, + month=jun, + pages={381–395}, + language={en} } + +@article{giovannucci:2019, + title={CaImAn an open source tool for scalable calcium imaging data analysis}, + volume={8}, + ISSN={2050-084X}, + url={http://dx.doi.org/10.7554/eLife.38173}, + DOI={10.7554/eLife.38173}, + journal={eLife}, + publisher={eLife Sciences Publications, Ltd}, + author={Giovannucci, Andrea and Friedrich, Johannes and Gunn, Pat and Kalfon, Jérémie and Brown, Brandon L. and Koay, Sue Ann and Taxidis, Jiannis and Najafi, Farzaneh and Gauthier, Jeffrey L. and Zhou, Pengcheng and Khakh, Baljit S. and Tank, David W. and Chklovskii, Dmitri B. and Pnevmatikakis, Eftychios A.}, + year={2019}, + month=jan, + keywords={calcium imaging; data analysis; mouse; neuroscience; one-photon; open source; software; two-photon; zebrafish}, + language={en} } + +@article{lin:2016, + title={Genetically encoded indicators of neuronal activity}, + volume={19}, + ISSN={1097-6256}, + DOI={10.1038/nn.4359}, + number={9}, + journal={Nature neuroscience}, + publisher={Nature Publishing Group}, + author={Lin, Michael Z. and Schnitzer, Mark J.}, + year={2016}, + month=aug, + pages={1142–1153}, + language={en} } + +@unpublished{menzies:2024, + title={A microRNA that controls the emergence of embryonic movement}, + url={http://dx.doi.org/10.7554/eLife.95209.2}, + DOI={10.7554/elife.95209.2}, + journal={eLife}, + author={Menzies, Jonathan A. C. and Chagas, Andre M. and Baden, Tom and Alonso, Claudio R.}, + year={2024}, + month=jun, + language={en} } + +@article{nakai:2001, + title={A high signal-to-noise Ca(2+) probe composed of a single green fluorescent protein}, + volume={19}, + ISSN={1087-0156}, + DOI={10.1038/84397}, + number={2}, + journal={Nature biotechnology}, + publisher={Springer Science and Business Media LLC}, + author={Nakai, J. and Ohkura, M. and Imoto, K.}, + year={2001}, + month=feb, + pages={137–141}, + language={en} } + +@article{otsu:1979, + title={A threshold selection method from gray-level histograms}, + volume={9}, + ISSN={0018-9472}, + DOI={10.1109/TSMC.1979.4310076}, + number={1}, + journal={IEEE transactions on systems, man, and cybernetics}, + publisher={Institute of Electrical and Electronics Engineers (IEEE)}, + author={Otsu, Nobuyuki}, + year={1979}, + month=jan, + pages={62–66}, + language={en} } + +@unpublished{pachitariu:2016, + title={Suite2p: beyond 10,000 neurons with standard two-photon microscopy}, + url={https://www.biorxiv.org/content/10.1101/061507v2.abstract}, + DOI={10.1101/061507}, + journal={bioRxiv}, + publisher={bioRxiv}, + author={Pachitariu, Marius and Stringer, Carsen and Dipoppa, Mario and Schröder, Sylvia and Rossi, L. Federico and Dalgleish, Henry and Carandini, Matteo and Harris, Kenneth D.}, + year={2016}, + month=jun, + pages={061507}, + language={en} } + +@article{virtanen:2020, + title={SciPy 1.0: fundamental algorithms for scientific computing in Python}, + volume={17}, + ISSN={1548-7091}, + DOI={10.1038/s41592-019-0686-2}, + number={3}, + journal={Nature methods}, + publisher={Springer Science and Business Media LLC}, + author={Virtanen, Pauli and Gommers, Ralf and Oliphant, Travis E. and Haberland, Matt and Reddy, Tyler and Cournapeau, David and Burovski, Evgeni and Peterson, Pearu and Weckesser, Warren and Bright, Jonathan and van der Walt, Stéfan J. and Brett, Matthew and Wilson, Joshua and Millman, K. Jarrod and Mayorov, Nikolay and Nelson, Andrew R. J. and Jones, Eric and Kern, Robert and Larson, Eric and Carey, C. J. and Polat, İlhan and Feng, Yu and Moore, Eric W. and VanderPlas, Jake and Laxalde, Denis and Perktold, Josef and Cimrman, Robert and Henriksen, Ian and Quintero, E. A. and Harris, Charles R. and Archibald, Anne M. and Ribeiro, Antônio H. and Pedregosa, Fabian and van Mulbregt, Paul and SciPy 1.0 Contributors}, + year={2020}, + month=mar, + pages={261–272}, + language={en} } diff --git a/paper/paper.md b/paper/paper.md new file mode 100644 index 0000000..0644245 --- /dev/null +++ b/paper/paper.md @@ -0,0 +1,153 @@ +--- +title: "SNAzzy: an image processing pipeline for investigating global Synchronous Network Activity" +tags: + - Calcium imaging + - Widefield microscopy + - Image analysis + - Drosophila + - Spontaneous Neural Activity + - Neurodevelopment + - Circuit Wiring +authors: + - name: Carlos Damiani Paiva + orcid: 0009-0007-6658-2620 + affiliation: "1, 2" + - name: Alana J. Evora + orcid: 0009-0002-3174-7839 + affiliation: "1, 2" + - name: Shirui Zheng + orcid: 0009-0006-1836-7208 + affiliation: "1, 2" + - name: Arnaldo Carreira-Rosario + orcid: 0000-0003-0202-3858 + corresponding: true + affiliation: "1, 2" +affiliations: + - name: Department of Biology, Duke University, Durham, NC 27708 + index: 1 + - name: Department of Neurobiology, Duke University, Durham, NC 27708 + index: 2 +date: 8 October 2025 +bibliography: paper.bib +--- + +# Summary + +Genetically encoded fluorescent indicators are powerful tools for monitoring biological processes in live samples [@lin:2016; @nakai:2001]. +When combined with a large field of view, a single time-lapse recording has the potential to capture many specimens, facilitating high-throughput data collection. +However, the simultaneous recording of many biological samples across time points produces large, multidimensional datasets that are challenging to process and analyze. +We present `SNAzzy`, a Python package for studying synchronous network activity (SNA) in Drosophila embryos via high-throughput microscopy. +SNA is a hallmark of developing nervous systems [@wu:2024; @blankenship:2009; @akin:2020], often studied using genetically encoded calcium indicators to monitor neural activity in vivo. +`SNAzzy` processes and analyzes time-lapse datasets taken from live samples using fluorescent widefield microscopy. +Each dataset contains dozens of individual specimens in the same field of view and thousands of time points. +The software offers individual specimen cropping for optimization of storage and processing, adaptive regions of interest for quantification of fluorescence and changes in morphology over time, a custom peak detection algorithm, and a graphical user interface for data visualization, curation, and dataset comparison. +This tool can be readily applied to analyze fluorescent intensities in time-lapse microscopy experiments that involve simultaneous imaging of multiple samples, particularly small-sized specimens [@donoughe:2018; @avasthi:2023]. + +# Statement of need + +During synchronous network activity (SNA), many neurons fire synchronously, generating waves of activity that span across large portions of the nervous system [@blankenship:2009; @wu:2024; @akin:2020]. +In Drosophila embryos, SNA typically lasts 4 hours, during which the nervous system undergoes a stereotyped morphological change via ventral nerve cord condensation [@crisp:2008; @carreira:2021]. +To gain an understanding of SNA, it is essential to quantify waves of activity in the nervous system while also tracking morphology as a proxy of neurodevelopment. +For these reasons, we combine a commonly used genetically encoded calcium indicator (GECI) that reports neural activity with a structural fluorophore [@carreira:2021]. +The structural fluorophore signal remains stable, independent of neural activity, making it suitable for continuous tracking morphology of the nerve cord. +To record many embryos during SNA, we use a wide-field fluorescence microscopy system that captures the GECI and structural fluorophore signal of dozens of developing embryos for over 5 hours. + +We were unable to find a tool designed for widefield microscopy that rapidly processes multiple specimens, quantifies levels of fluorophore activity, and incorporates a peak-finding algorithm suitable for global calcium traces. +`SNAzzy` is designed to investigate global levels of neural activity across multiple developing embryos simultaneously. + +## Tracking of multiple “adaptive ROIs” + +To the best of our knowledge, there are no other packages that provide functionality for automated parsing of raw images of many live specimens into activity and morphological quantifications. +Other studies have employed manual selection of regions of interest (ROIs) and used static ROIs [@akin:2020; @menzies:2024; @ardiel:2022; @carreira:2021]. +Manual selection often generates imprecise ROIs, which can lead to inaccurate quantifications, and is also cumbersome and prone to human error. +Static ROIs are not reliable for detecting the fluorescent signal of live specimens that change in morphology and move while imaging. +`SNAzzy` fills these gaps as an accessible pipeline for the automated analysis of multiple live samples in parallel. +The pipeline generates an “adaptive ROI” that changes frame-by-frame for each specimen. +This enables the accurate tracking of fluorescence intensity as well as changes in tissue morphology or size. +`SNAzzy`’s design provides an automated, modular, and fully auditable workflow, and ultimately contributes to more reproducible and comparable results across experiments. + +## Capturing global Calcium dynamics + +To the best of our knowledge, there are no open-source packages that provide tools for performing automated data analysis and quantification of global calcium dynamics. +Most open-source tools available for analyzing neural activity using GECI focus on segmenting individual neurons within a single specimen. +`CaImAn` [@giovannucci:2019], and `Suite2p` [@pachitariu:2016] are among the most widely used. +These packages detect calcium dynamics and use individual neuron statistics to perform spike inference, but do not offer direct peak detection on the calcium signal. +Furthermore, they are optimized for two-photon microscopy as opposed to wide-field microscopy. +`SNAzzy` provides a series of automated analyses and quantifications to analyze global calcium levels in time-series acquired with widefield microscopes. + +![**Schematic of the SNAzzy pipeline.** +Time-lapse taken from fluorescent widefield microscopes (raw data) enters the processing stage (green). +The processing stage outputs two types of CSV files: time series of signal intensities from each recorded channel and ROI length. +CSV files enter the analysis stage (blue) to generate normalized fluorescent traces and detect peaks along with other signal processing metrics. +These initial traces can be visualized to curate the data. +Curation generates a configuration file that works as metadata across platforms and users. +Curated data can be reanalyzed and used to visualize final data and compare across groups (yellow). +Analysis and output stages are performed in the GUI (red dashed box), along with other metrics. +Dashed arrows indicate optional steps.\label{fig:fig1}](figures/snazzy-fig1.png) + +# Pipeline Description + +The initial input for `SNAzzy` \autoref{fig:fig1} is raw time-lapse imaging data containing multiple embryos. +Each embryo expresses a GECI (dynamic fluorophore) and a structural fluorophore. +Fluorophores are imaged in different optical channels. + +The first pipeline step converts the raw data to TIF format, thereby avoiding compatibility issues that may arise when parsing different proprietary formats \autoref{fig:fig1}. +All embryos are then segmented using histogram equalization, followed by intensity threshold binarization [@otsu:1979]. +Boxes surrounding the segments are cropped into individual time-lapses for each embryo. +Cropping results in a substantial memory reduction, as most background pixels are removed, with cropped images typically accounting for around 40% of the original size. + +The next step is to process each individual specimen. +First, the ROI, which in our case is the entire central nervous system (CNS), is defined by binarizing the structural channel and selecting the largest connected component. +This process is repeated at every time point to generate an “adaptive ROI”. +From these adaptive ROI, the average signal intensity for both channels is extracted. +The results are saved as CSV files and are the basis for downstream analysis. + +![**ROI length measurement algorithm and validation.** +A) Steps to calculate the ROI length. +The ROI length is calculated by estimating the centerline (red line) using points of maximum (dots) in the distance transform, followed by RANSAC to ignore outliers (orange dots). +B) Validation of the method as relative error (measured - annotated) / annotated. +Each whisker bar summarizes the relative error for frames taken at intervals of 50 timepoints. +C) Comparison of absolute values over a time series for three representative embryos.\label{fig:fig2}](figures/snazzy-fig2.png) + +The ROI is also used to measure the length of the CNS \autoref{fig:fig2}. +Drosophila embryo CNS length serves as an internal proxy for neurodevelopmental stages, enabling more accurate comparisons across embryos [@carreira:2021]. +The CNS length is calculated by centerline estimation. +First, a distance transform is applied to the binarized image, and local maxima points are detected. +Depending on the embryo's orientation, some points may be part of the brain lobes and must be filtered out to accurately measure the CNS length. +To obtain a robust centerline estimate that can ignore outliers, we use RANSAC [@fischler:1981] over the local maxima points and measure the overlap between the fitted line and the binary image. +CNS length is also detected frame by frame and exported as a CSV file \autoref{fig:fig1}. + +![**Peak detection algorithm.** +A low-pass filter (orange line) is applied to the ∆F/F signal (black line) to remove fast transients. +The peak in the filtered signal (orange dot) is then ported back to the ∆F/F (blue dot) signal by selecting the leftmost peak within a search window (blue lines).\label{fig:fig3}](figures/snazzy-fig3.png) + +The package utilizes average signal intensity measurements to calculate ∆F/F traces and peaks. +For ∆F/F, we first calculate the ratiometric signal (dynamic signal / structural signal) and then its baseline, which is defined as the average of the N lowest values within a sliding window. +The generated ∆F/F traces contain long-duration bouts of activity with superimposed fast transients \autoref{fig:fig3}. +The former represents the bursts of activity and is the most relevant for the initial analysis. +To mark only these more prolonged bouts, we apply a low-pass frequency filter to omit transients. +Peaks in the filtered trace are detected using SciPy [@virtanen:2020]. +Finally, the detected peaks are ported to the original ∆F/F signal. + +Results can be visualized and curated in a graphical user interface (GUI) implemented in `PyQt6` \autoref{fig:fig4}. +During curation, researchers can modify data analysis parameters, which are persisted in a JSON configuration file and utilized by the core analysis code across different machines and users. +Finally, a large number of different metrics and representations derived from ∆F/F, CNS length, and peaks can be visualized and plotted using the GUI. +These include SNA onset, burst duration and spectrograms, among others. + +![**GUI for data validation, curation, visualization and plotting.** +Initial GUI screen. +A ∆F/F trace (white) and the corresponding peaks (magenta dots) are shown. +The low-passed signal (green line) is used as a reference to determine peaks. +The GUI enables the modification of analysis parameters, visualization of data, and comparison of metrics across groups of experiments, as well as manual adjustment of peak data.\label{fig:fig4}](figures/snazzy-fig4.png) + +In conclusion, genetically encoded fluorescent indicators and microscopy systems are evolving rapidly, increasing the data acquisition throughput. +Custom open-source tools are needed to handle such large data files. +`SNAzzy` addresses this by offering an automated, scalable, and user-friendly platform for analyzing synchronous network activity in developing embryos. +As an open and versatile solution, `SNAzzy` offers tools for a broader range of applications in time-lapse fluorescence imaging across diverse biological systems. + +# Acknowledgments + +We acknowledge Newton PenkoffLidbeck and D. Berfin Azizoglu for feedback on the manuscript. +This work was partially funded by NINDS and the BRAIN initiative (R00NS119295). + +# References diff --git a/snazzy_analysis/README.md b/snazzy_analysis/README.md index b63530c..fb8366d 100644 --- a/snazzy_analysis/README.md +++ b/snazzy_analysis/README.md @@ -1,15 +1,21 @@ # Snazzy Analysis Data analysis for `snazzy_processing`'s pipeline output. + +### Running the code + +Copy the results of processing a dataset with `snazzy_processing` to the `data` directory inside this package first. +To visualize individual sample movies, the cropped movies inside `snazzy_processing/data/{dataset_name}/embs` should be copied too. +See the 'Adding data' session for more details. ### Organization -* `pasna_analysis`: contains the core code used in all analyses -* `pasna_analysis/gui`: GUI code +* `snazzy_analysis`: contains the core code used in all analyses +* `snazzy_analysis/gui`: GUI code * `tests`: contains tests for the code * `data`: contains the data for the analysis. This folder is not tracked, and should be populated in your local copy * `results`: contains the results of the analyses. It is not tracked either and will be populated by performing the analyses -* `notebooks`: contains examples and the front-end for using the `pasna_analysis` main module +* `notebooks`: contains examples and the front-end for using the `snazzy_analysis` main module ### Analyses @@ -19,7 +25,7 @@ There are also jupyter notebooks available, which can be used alternatively and ### Adding data -Each experiment should have one corresponding folder inside `./data/`. +Each dataset should have one corresponding folder inside `./data/`. The file structure inside the `data` folder should look like: The `embs` directory is used if you want to inspect movies inside the GUI. The files are generated with `snazzy_processing`, as long as the flag `clean_up_data` in there (inside snazzy_processing_pipeline.ipynb) is set to `False`. diff --git a/snazzy_analysis/notebooks/baseline_methods.ipynb b/snazzy_analysis/notebooks/baseline_methods.ipynb deleted file mode 100644 index 6e6950d..0000000 --- a/snazzy_analysis/notebooks/baseline_methods.ipynb +++ /dev/null @@ -1,151 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Visualize baseline methods\n", - "**Description:** This notebook presents the baseline methods available in `pasna_analysis` and the resulting DFF signals. " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Import Libraries" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "from pathlib import Path\n", - "import matplotlib.pyplot as plt\n", - "\n", - "from snazzy_analysis import Experiment, Trace" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Load Data\n", - "\n", - "Specify the location of the data by editing `folder` and `experiment_name`. Then specify the embryo." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "folder = \"25C\" # HERE\n", - "experiment_name = \"20240919_25C\" # HERE\n", - "\n", - "exp_path = Path.cwd().parent.joinpath(\"data\", folder, experiment_name)\n", - "exp = Experiment(exp_path)\n", - "\n", - "embryos = list(exp.embryos)\n", - "emb = embryos[0] # HERE\n", - "trace = emb.trace\n", - "\n", - "print(f\"\\n\\nLoading {folder} {experiment_name} {emb.name}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Baseline Methods\n", - "\n", - "### Local minima baseline\n", - "\n", - "The baseline value at time `t` is the average of the `n` lowest values, selected from a window of length `ws`, centered at point `t`.\n", - "\n", - "### Most frequent bin average baseline\n", - "\n", - "The baseline value at time `t` is the average of the values that fall inside the most frequent bin.\n", - "The first step is to calculate the histogram from a window of length `ws`.\n", - "The values of the bin with the highest number of elements are averaged and used as the baseline.\n", - "This assumes that the peaks are sparse in comparison to the baseline, so that when you pick the most frequent bin it will fall under the baseline range.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "emb_index = 9 # Change emb_index to use another embryo\n", - "\n", - "emb = embryos[emb_index]\n", - "trace = emb.trace\n", - "\n", - "ws = 161\n", - "num_minima_points = 21\n", - "\n", - "fig, (ax0, ax1) = plt.subplots(2, figsize=(10, 10))\n", - "\n", - "fig.suptitle(emb.name)\n", - "\n", - "ratiom_signal = trace.compute_ratiom_gcamp()\n", - "local_minima_baseline = Trace.average_n_lowest_window(\n", - " ratiom_signal, ws, num_minima_points\n", - ")\n", - "most_freq_baseline = trace.compute_baseline(ratiom_signal, ws)\n", - "\n", - "ax0.plot(ratiom_signal, label=\"Ratiometric signal\", color=\"green\")\n", - "ax0.plot(local_minima_baseline, label=f\"Average lower {num_minima_points}\")\n", - "ax0.plot(most_freq_baseline, label=\"Most frequent values\")\n", - "ax0.axvline(trace.trim_idx, color=\"r\")\n", - "ax0.legend()\n", - "\n", - "dff_local_minima = (ratiom_signal - local_minima_baseline) / local_minima_baseline\n", - "dff_most_freq = (ratiom_signal - most_freq_baseline) / most_freq_baseline\n", - "\n", - "ax1.plot(dff_local_minima, label=f\"DFF (Average lower {num_minima_points})\")\n", - "ax1.plot(dff_most_freq, label=\"DFF (Most frequent values)\")\n", - "ax1.axvline(trace.trim_idx, color=\"r\")\n", - "ax1.legend()\n", - "\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As can be seen in the plots above, the local minima method creates a smoother baseline.\n", - "This directly impacts on the resulting DFF signal (dff = (ratiometric_signal - baseline) / baseline).\n", - "The most frequent bin baseline method makes much stronger assumptions about the ratiometric signal distribution, which means that it tends to perform worse when the signal deviates from these assumptions.\n", - "The local minima method is more general, and tends to produce a baseline with values that follow the ratiometric signal more closely." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "golf-env", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.13" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/snazzy_analysis/notebooks/calculate_metrics.ipynb b/snazzy_analysis/notebooks/calculate_metrics.ipynb index 30ea923..b9ba9ce 100644 --- a/snazzy_analysis/notebooks/calculate_metrics.ipynb +++ b/snazzy_analysis/notebooks/calculate_metrics.ipynb @@ -6,7 +6,7 @@ "source": [ "# Calculate Metrics\n", "\n", - "**Description:** This notebook compares different groups of experiments, using the metrics described in each cell.\n", + "**Description:** This notebook compares different groups of datasets, using the metrics described in each cell.\n", "\n", "**Real Time**\n", "* Burst duration\n", @@ -52,7 +52,7 @@ "import seaborn as sns\n", "import pandas as pd\n", "\n", - "from snazzy_analysis import Experiment, Group, FrequencyAnalysis, utils, myplots, TracePhases" + "from snazzy_analysis import Dataset, Group, FrequencyAnalysis, utils, myplots" ] }, { @@ -68,26 +68,24 @@ "metadata": {}, "outputs": [], "source": [ - "wt_folder = \"25C\"\n", - "wt_config = [\"20240611_25C\", \"20240919_25C\", \"20250404_25C\"]\n", + "wt_config = [\"20240611\"]\n", "\n", - "rdl_folder = \"rdl\"\n", - "rdl_config = [\"20250211_rdl\", \"20250212_rdl\"]\n", + "exp_config = [\"20250206\"]\n", "\n", - "wt_experiments = {}\n", - "for exp in wt_config:\n", - " exp_path = Path.cwd().parent.joinpath(\"data\", wt_folder, exp)\n", - " wt_experiments[exp] = Experiment(exp_path)\n", + "wt_datasets = {}\n", + "for dataset in wt_config:\n", + " dataset_path = Path.cwd().parent.joinpath(\"data\", dataset)\n", + " wt_datasets[dataset] = Dataset(dataset_path)\n", "\n", - "rdl_experiments = {}\n", - "for exp in rdl_config:\n", - " exp_path = Path.cwd().parent.joinpath(\"data\", rdl_folder, exp)\n", - " rdl_experiments[exp] = Experiment(exp_path)\n", + "exp_datasets = {}\n", + "for dataset in exp_config:\n", + " dataset_path = Path.cwd().parent.joinpath(\"data\", dataset)\n", + " exp_datasets[dataset] = Dataset(dataset_path)\n", "\n", - "wt = Group(\"WT\", wt_experiments)\n", - "rdl = Group(\"Rdl-\", rdl_experiments)\n", + "wt = Group(\"WT\", wt_datasets)\n", + "exp = Group(\"Exp\", exp_datasets)\n", "\n", - "groups = [wt, rdl]" + "groups = [wt, exp]" ] }, { @@ -156,7 +154,7 @@ "source": [ "## Sample Size\n", "\n", - "Prints a table displaying the total sample size for each group, plus the sample size for each experiment in a group. " + "Prints a table displaying the total sample size for each group, plus the sample size for each dataset in a group. " ] }, { @@ -165,23 +163,23 @@ "metadata": {}, "outputs": [], "source": [ - "data = {\"Group\": [], \"Exp\": [], \"Emb\": []}\n", + "data = {\"group\": [], \"exp\": [], \"emb\": []}\n", "\n", "for group in groups:\n", - " for exp in group.experiments.values():\n", - " for emb in exp.embryos:\n", - " data[\"Group\"].append(group.name)\n", - " data[\"Exp\"].append(exp.name)\n", - " data[\"Emb\"].append(emb)\n", + " for dataset in group.datasets.values():\n", + " for emb in dataset.embryos:\n", + " data[\"group\"].append(group.name)\n", + " data[\"exp\"].append(dataset.name)\n", + " data[\"emb\"].append(emb)\n", "\n", "data = pd.DataFrame(data)\n", "\n", "# calculate total Ns per group\n", - "num_per_exp = data.groupby([\"Group\", \"Exp\"]).size().reset_index(name=\"N\")\n", - "num_per_group = data.groupby(\"Group\").size().reset_index(name=\"N\")\n", - "num_per_group[\"Exp\"] = \"**Total**\"\n", - "num_per_group = num_per_group[[\"Group\", \"Exp\", \"N\"]]\n", - "combined = pd.concat([num_per_exp, num_per_group], ignore_index=True)\n", + "num_per_dataset = data.groupby([\"group\", \"exp\"]).size().reset_index(name=\"N\")\n", + "num_per_group = data.groupby(\"group\").size().reset_index(name=\"N\")\n", + "num_per_group[\"exp\"] = \"**Total**\"\n", + "num_per_group = num_per_group[[\"group\", \"exp\", \"N\"]]\n", + "combined = pd.concat([num_per_dataset, num_per_group], ignore_index=True)\n", "flipped = combined[::-1]\n", "\n", "print(flipped.to_string(index=False))" @@ -210,8 +208,8 @@ "data = {\"Group\": [], \"Burst Duration (min)\": [], \"Burst #\": []}\n", "\n", "for group in groups:\n", - " for exp in group.experiments.values():\n", - " for emb in exp.embryos:\n", + " for dataset in group.datasets.values():\n", + " for emb in dataset.embryos:\n", " trace = emb.trace\n", " for i, duration in zip(range(num_episodes), trace.peak_durations):\n", " data[\"Group\"].append(group.name)\n", @@ -246,8 +244,8 @@ "data = {\"Group\": [], \"Rise Duration (sec)\": [], \"Burst #\": []}\n", "\n", "for group in groups:\n", - " for exp in group.experiments.values():\n", - " for emb in exp.embryos:\n", + " for dataset in group.datasets.values():\n", + " for emb in dataset.embryos:\n", " trace = emb.trace\n", " for i, rise in zip(range(num_episodes), trace.peak_rise_times):\n", " data[\"Group\"].append(group.name)\n", @@ -282,8 +280,8 @@ "data = {\"Group\": [], \"Decay Duration (sec)\": [], \"Burst #\": []}\n", "\n", "for group in groups:\n", - " for exp in group.experiments.values():\n", - " for emb in exp.embryos:\n", + " for dataset in group.datasets.values():\n", + " for emb in dataset.embryos:\n", " trace = emb.trace\n", " for i, decay in zip(range(num_episodes), trace.peak_decay_times):\n", " data[\"Group\"].append(group.name)\n", @@ -318,8 +316,8 @@ "data = {\"Group\": [], \"Interval Duration (min)\": [], \"Burst #\": []}\n", "\n", "for group in groups:\n", - " for exp in group.experiments.values():\n", - " for emb in exp.embryos:\n", + " for dataset in group.datasets.values():\n", + " for emb in dataset.embryos:\n", " trace = emb.trace\n", " for i, interval in zip(range(num_episodes), trace.peak_intervals):\n", " data[\"Group\"].append(group.name)\n", @@ -354,8 +352,8 @@ "data = {\"Group\": [], \"Duration (min)\": []}\n", "\n", "for group in groups:\n", - " for exp in group.experiments.values():\n", - " for emb in exp.embryos:\n", + " for dataset in group.datasets.values():\n", + " for emb in dataset.embryos:\n", " trace = emb.trace\n", " if len(trace.peak_idxes) == 0:\n", " continue\n", @@ -393,9 +391,9 @@ "data = {\"Group\": [], \"Dev Time\": []}\n", "\n", "for group in groups:\n", - " for exp in group.experiments.values():\n", + " for dataset in group.datasets.values():\n", " num = 0\n", - " for emb in exp.embryos:\n", + " for emb in dataset.embryos:\n", " num = num + 1\n", " trace = emb.trace\n", " if len(trace.peak_idxes) == 0:\n", @@ -428,9 +426,9 @@ "data = {\"Group\": [], \"Dev Time\": []}\n", "\n", "for group in groups:\n", - " for exp in group.experiments.values():\n", + " for dataset in group.datasets.values():\n", " num = 0\n", - " for emb in exp.embryos:\n", + " for emb in dataset.embryos:\n", " num = num + 1\n", " trace = emb.trace\n", " if len(trace.peak_idxes) == 0:\n", @@ -468,8 +466,8 @@ "data = {\"Group\": [], \"Dev Time\": [], \"Burst #\": []}\n", "\n", "for group in groups:\n", - " for exp in group.experiments.values():\n", - " for emb_n, emb in enumerate(exp.embryos):\n", + " for dataset in group.datasets.values():\n", + " for emb_n, emb in enumerate(dataset.embryos):\n", " trace = emb.trace\n", " if len(trace.peak_idxes) == 0:\n", " continue\n", @@ -501,8 +499,8 @@ "data = {\"Group\": [], \"Dev Time\": []}\n", "\n", "for group in groups:\n", - " for exp in group.experiments.values():\n", - " for emb in exp.embryos:\n", + " for dataset in group.datasets.values():\n", + " for emb in dataset.embryos:\n", " trace = emb.trace\n", " if len(trace.peak_idxes) == 0:\n", " continue\n", @@ -538,8 +536,8 @@ "data = {\"Group\": [], \"Amplitude\": [], \"Burst #\": []}\n", "\n", "for group in groups:\n", - " for exp in group.experiments.values():\n", - " for emb in exp.embryos:\n", + " for dataset in group.datasets.values():\n", + " for emb in dataset.embryos:\n", " t = emb.trace\n", " for i, amp in zip(range(num_episodes), t.peak_amplitudes):\n", " data[\"Amplitude\"].append(amp)\n", @@ -573,8 +571,8 @@ "bin_width = 0.2\n", "\n", "for group in groups:\n", - " for exp in group.experiments.values():\n", - " for emb in exp.embryos:\n", + " for dataset in group.datasets.values():\n", + " for emb in dataset.embryos:\n", " trace = emb.trace\n", " dev_time_at_peaks = emb.get_DT_from_time(trace.peak_times)\n", " bins = [first_bin + j * bin_width for j in range(n_bins)]\n", @@ -610,8 +608,8 @@ "data = {\"Group\": [], \"# Local Peaks\": [], \"Burst #\": []}\n", "\n", "for group in groups:\n", - " for exp in group.experiments.values():\n", - " for emb in exp.embryos:\n", + " for dataset in group.datasets.values():\n", + " for emb in dataset.embryos:\n", " trace = emb.trace\n", " local_peaks = trace.compute_local_peaks(height=0.03, prominence=0.02)\n", " for i, lp in zip(range(15), local_peaks):\n", @@ -647,8 +645,8 @@ "bins = np.arange(start=first_bin, stop=first_bin + n_bins * bin_width, step=bin_width)\n", "\n", "for group in groups:\n", - " for exp in group.experiments.values():\n", - " for emb in exp.embryos:\n", + " for dataset in group.datasets.values():\n", + " for emb in dataset.embryos:\n", " trace = emb.trace\n", " time_bins, idx_offset = emb.get_time_bins(bins)\n", " time_intervals = np.diff(time_bins) / 3600\n", @@ -699,8 +697,8 @@ "bins = [first_bin + j * bin_width for j in range(n_bins)]\n", "\n", "for group in groups:\n", - " for exp in group.experiments.values():\n", - " for emb in exp.embryos:\n", + " for dataset in group.datasets.values():\n", + " for emb in dataset.embryos:\n", " trace = emb.trace\n", " if len(trace.peak_idxes) == 0:\n", " continue\n", @@ -735,338 +733,6 @@ " xlabels=x_labels,\n", ")" ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## DFF signal overlayed with low passed signal" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "emb = wt.experiments[\"20240919_25C\"].embryos[9]\n", - "print(emb.name)\n", - "trace = emb.trace\n", - "\n", - "time, dff = trace.preprocess_dff()\n", - "\n", - "low_pass_dff = FrequencyAnalysis.apply_lopass_filter(dff, 0.006)\n", - "\n", - "overlay_rc = group_metrics_rc.copy()\n", - "overlay_rc[\"figure.figsize\"] = (15, 3)\n", - "myplots.plot_trace_with_overlay(\n", - " time,\n", - " dff,\n", - " low_pass_dff,\n", - " overlay_rc,\n", - " color=\"#0273b2ff\",\n", - " xmax=250,\n", - " ymax=1,\n", - " yinterval=0.5,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Low frequency and high frequency events" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "emb = wt.experiments[\"20240919_25C\"].embryos[9]\n", - "trace = emb.trace\n", - "\n", - "local_peaks, _ = spsig.find_peaks(\n", - " trace.dff[: trace.trim_idx], height=0.09, prominence=0.03\n", - ")\n", - "local_peaks = [\n", - " lp for lp in local_peaks if not np.any(np.abs(trace.peak_idxes - lp) <= 10)\n", - "]\n", - "amp_local_peaks = trace.dff[local_peaks]\n", - "amp_bursts = trace.dff[trace.peak_idxes]\n", - "\n", - "fig, ax = plt.subplots(figsize=(19, 4))\n", - "ax.plot(trace.dff[: trace.trim_idx])\n", - "ax.plot(local_peaks, amp_local_peaks, \"r.\")\n", - "ax.plot(trace.peak_idxes, amp_bursts, \"k.\")\n", - "ax.set_title(emb.name)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Number of high frequency events in first hour and last hour of development" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "trace = wt.experiments[\"20240611_25C\"].embryos[4].trace\n", - "\n", - "\n", - "def get_hf_magnitude(signal, freq_cutoff, fs=1 / 6):\n", - " N = len(signal)\n", - " freqs = np.fft.rfftfreq(N, 1 / fs)\n", - " fft = np.fft.rfft(signal)\n", - " mask = freqs > freq_cutoff\n", - " filtered_fft = fft * mask\n", - " return np.sum(np.abs(filtered_fft))\n", - "\n", - "\n", - "data = {\"condition\": [], \"value\": [], \"emb_id\": [], \"experiment\": []}\n", - "\n", - "for experiment in wt.experiments.values():\n", - " for emb in experiment.embryos:\n", - " trace = emb.trace\n", - " first_peak = trace.peak_idxes[0]\n", - " last_peak = trace.peak_idxes[-2]\n", - " early_magnitude = get_hf_magnitude(\n", - " trace.dff[first_peak - 50 : first_peak + 550], 0.01\n", - " )\n", - " late_magnitude = get_hf_magnitude(trace.dff[last_peak - 600 : last_peak], 0.01)\n", - " data[\"experiment\"].append(experiment.name)\n", - " data[\"experiment\"].append(experiment.name)\n", - " data[\"emb_id\"].append(experiment.name + emb.name)\n", - " data[\"emb_id\"].append(experiment.name + emb.name)\n", - " data[\"value\"].append(early_magnitude)\n", - " data[\"condition\"].append(\"hf_early\")\n", - " data[\"value\"].append(late_magnitude)\n", - " data[\"condition\"].append(\"hf_late\")\n", - "\n", - "df = pd.DataFrame(data)\n", - "\n", - "hf_late = df[df[\"condition\"] == \"hf_late\"][\"value\"].values\n", - "hf_early = df[df[\"condition\"] == \"hf_early\"][\"value\"].values\n", - "stat, pval = ttest_rel(hf_late, hf_early)\n", - "print(f\"Paired t-test: t={stat:.2f}, p={pval}\")\n", - "\n", - "fig, ax = plt.subplots(figsize=(2, 3))\n", - "\n", - "sns.barplot(\n", - " data=data,\n", - " x=\"condition\",\n", - " y=\"value\",\n", - " ax=ax,\n", - ")\n", - "\n", - "for emb_id, pair in df.groupby(\"emb_id\"):\n", - " pair_sorted = pair.sort_values(\"condition\")\n", - " plt.plot(\n", - " pair_sorted[\"condition\"], pair_sorted[\"value\"], marker=\".\", color=\"k\", alpha=0.7\n", - " )\n", - "\n", - "ax.set_title(\"Early and Late High Frequency Events Magnitude\")\n", - "ax.set_ylabel(\"Magnitude\")\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Binned count of high frequency events" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "data = {\n", - " \"emb_id\": [],\n", - " \"bin_idx\": [],\n", - " \"value\": [],\n", - " \"type\": [],\n", - "}\n", - "\n", - "\n", - "def find_hf_events(trace):\n", - " local_peaks, _ = spsig.find_peaks(\n", - " trace.aligned_dff[: trace.trim_idx], height=0.1, prominence=0.05\n", - " )\n", - " return np.array(\n", - " [lp for lp in local_peaks if not np.any(np.abs(trace.peak_idxes - lp) <= 10)]\n", - " )\n", - "\n", - "\n", - "def hits_per_bin(values, bin_width, num_bins):\n", - " bin_idx = values // bin_width\n", - " return np.bincount(bin_idx, minlength=num_bins)\n", - "\n", - "\n", - "n_bins = 15\n", - "first_bin = 0\n", - "bin_width = 250\n", - "bins = np.arange(start=first_bin, stop=first_bin + n_bins * bin_width, step=bin_width)\n", - "\n", - "for exp in wt.experiments.values():\n", - " onset = 0\n", - " for emb in exp.embryos:\n", - " trace = emb.trace\n", - "\n", - " hf_peaks = find_hf_events(trace)\n", - "\n", - " hf_bin_count = hits_per_bin(hf_peaks, bin_width, n_bins)\n", - " onset = trace.peak_bounds_indices[0][0]\n", - " onset = onset - 300 if onset > 300 else onset\n", - " lf_bin_count = hits_per_bin(trace.peak_idxes - onset, bin_width, n_bins)\n", - "\n", - " # hf entries\n", - " data[\"emb_id\"].extend([exp.name + emb.name] * len(bins))\n", - " data[\"bin_idx\"].extend(bins)\n", - " data[\"value\"].extend(hf_bin_count)\n", - " data[\"type\"].extend([\"hf_event\"] * len(bins))\n", - " # lf entries\n", - " data[\"emb_id\"].extend([exp.name + emb.name] * len(bins))\n", - " data[\"bin_idx\"].extend(bins)\n", - " data[\"value\"].extend(lf_bin_count)\n", - " data[\"type\"].extend([\"lf_event\"] * len(bins))\n", - "\n", - "data = pd.DataFrame(data)\n", - "\n", - "x_labels = [b // 10 if i % 3 == 0 else \"\" for i, b in enumerate(bins)]\n", - "\n", - "myplots.plot_pointplot(\n", - " data,\n", - " x=\"bin_idx\",\n", - " y=\"value\",\n", - " category=\"type\",\n", - " rc=episode_metrics_rc,\n", - " xlabels=x_labels,\n", - ")\n", - "\n", - "plt.tight_layout()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Phase 1 End" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "emb = wt.experiments[\"20240919_25C\"].embryos[5]\n", - "trace = emb.trace\n", - "\n", - "tp = TracePhases(trace)\n", - "\n", - "features = tp.phase1_features(hf_cutoff=0.01)\n", - "dm = tp.dist_matrix(features)\n", - "thres = tp.feature_thres(dm, num_classes=2)\n", - "change_index = tp.segment_distance_matrix_forward(dm, thres)\n", - "tp.plot_phase_change(dm, change_index, features)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Developmental time at first burst, phase 1 end, and hatching" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "data = {\"emb_id\": [], \"metric\": [], \"value\": []}\n", - "\n", - "for exp in wt.experiments.values():\n", - " for emb in exp.embryos:\n", - "\n", - " data[\"emb_id\"].append(exp.name + emb.name)\n", - " data[\"metric\"].append(\"dt_first_peak\")\n", - " initial_dt = emb.get_DT_from_time(emb.trace.time[emb.trace.peak_idxes[0]])\n", - " data[\"value\"].append(initial_dt)\n", - "\n", - " data[\"emb_id\"].append(exp.name + emb.name)\n", - " data[\"metric\"].append(\"dt_phase1_end\")\n", - " tp = TracePhases(emb.trace)\n", - " p1_end = tp.get_phase1_end(freq=0.010)\n", - " p1_end_time = emb.trace.time[p1_end]\n", - " p1_end_dt = emb.get_DT_from_time(p1_end_time)\n", - " data[\"value\"].append(p1_end_dt)\n", - "\n", - " data[\"emb_id\"].append(exp.name + emb.name)\n", - " data[\"metric\"].append(\"dt_hatching\")\n", - " final_dt = emb.get_DT_from_time(emb.trace.time[emb.trace.trim_idx])\n", - " data[\"value\"].append(final_dt)\n", - "\n", - "x_order = [\"dt_first_peak\", \"dt_phase1_end\", \"dt_hatching\"]\n", - "palette = sns.color_palette(\"pastel\", n_colors=len(x_order))\n", - "color_map = dict(zip(x_order, palette))\n", - "\n", - "fig, ax = plt.subplots(figsize=(10, 6))\n", - "sns.boxplot(\n", - " data=data,\n", - " x=\"metric\",\n", - " y=\"value\",\n", - " hue=\"metric\",\n", - " order=x_order,\n", - " ax=ax,\n", - ")\n", - "\n", - "for i, emb_id in enumerate(data[\"emb_id\"]):\n", - " x = data[\"metric\"][3 * i : 3 * i + 3]\n", - " y = data[\"value\"][3 * i : 3 * i + 3]\n", - " ax.plot(x, y, color=\"black\", linewidth=0.75)\n", - "\n", - "dts_first_peak = data[\"value\"][::3]\n", - "ax.plot(\n", - " [\"dt_first_peak\"] * len(dts_first_peak),\n", - " dts_first_peak,\n", - " marker=\"o\",\n", - " markerfacecolor=color_map[\"dt_first_peak\"],\n", - " markeredgecolor=\"k\",\n", - ")\n", - "\n", - "dts_phase1_end = data[\"value\"][1::3]\n", - "ax.plot(\n", - " [\"dt_phase1_end\"] * len(dts_first_peak),\n", - " dts_phase1_end,\n", - " marker=\"o\",\n", - " markerfacecolor=color_map[\"dt_phase1_end\"],\n", - " markeredgecolor=\"k\",\n", - ")\n", - "\n", - "dts_hatching = data[\"value\"][2::3]\n", - "ax.plot(\n", - " [\"dt_hatching\"] * len(dts_first_peak),\n", - " dts_hatching,\n", - " marker=\"o\",\n", - " markerfacecolor=color_map[\"dt_hatching\"],\n", - " markeredgecolor=\"k\",\n", - ")\n", - "\n", - "plt.ylim(1.5, 3)\n", - "plt.title(\"WT embryos\")\n", - "plt.tight_layout()\n", - "plt.show()" - ] } ], "metadata": { diff --git a/snazzy_analysis/notebooks/plot_group_traces.ipynb b/snazzy_analysis/notebooks/plot_group_traces.ipynb index a8fd470..4eb4e78 100644 --- a/snazzy_analysis/notebooks/plot_group_traces.ipynb +++ b/snazzy_analysis/notebooks/plot_group_traces.ipynb @@ -5,7 +5,7 @@ "metadata": {}, "source": [ "# Plot Group Traces\n", - "**Description:** This notebook visualizes the signals for all embryos within a group (across experiments). It specifically uses the **preprocessed** trace information which defaults to finding the first burst and trimming the trace and time 30 mins before that burst. These parameters can be changed in the Trace class. \n", + "**Description:** This notebook visualizes the signals for all embryos within a group (across datasets). It specifically uses the **preprocessed** trace information which defaults to finding the first burst and trimming the trace and time 30 mins before that burst. These parameters can be changed in the Trace class. \n", "\n", "* All raw signals (active and structural)\n", "* All dff traces\n", @@ -32,7 +32,7 @@ "import numpy as np\n", "from scipy.signal import savgol_filter\n", "\n", - "from snazzy_analysis import Group, Experiment, FrequencyAnalysis, myplots" + "from snazzy_analysis import Group, Dataset, FrequencyAnalysis, myplots" ] }, { @@ -41,7 +41,7 @@ "source": [ "## Load Data\n", "\n", - "Specify the location of the data by editing `folder` and entering experiments. All of the embryos in those experiments will be processed togehter." + "Specify the location of the data by editing `folder` and entering datasets. All of the embryos in those datasets will be processed togehter." ] }, { @@ -50,17 +50,16 @@ "metadata": {}, "outputs": [], "source": [ - "folder = \"25C\"\n", - "group_config = [\"20250404_25C\", \"20240919_25C\", \"20240611_25C\"] # WT\n", + "group_config = [\"20240611\", \"20250206\"] # WT\n", "\n", - "experiments = {}\n", + "datasets = {}\n", "\n", - "for exp in group_config:\n", - " exp_path = Path.cwd().parent.joinpath(\"data\", folder, exp)\n", - " experiment_object = Experiment(exp_path)\n", - " experiments[exp] = experiment_object\n", + "for dataset in group_config:\n", + " dataset_path = Path.cwd().parent.joinpath(\"data\", dataset)\n", + " dataset_object = Dataset(dataset_path)\n", + " datasets[dataset] = dataset_object\n", "\n", - "group = Group(folder, experiments)" + "group = Group(\"WT\", datasets)" ] }, { @@ -134,8 +133,8 @@ "metadata": {}, "outputs": [], "source": [ - "for exp in group.experiments.values():\n", - " myplots.plot_raw_signals(exp.embryos, subplots_rc, title=exp.name)" + "for dataset in group.datasets.values():\n", + " myplots.plot_raw_signals(dataset.embryos, subplots_rc, title=dataset.name)" ] }, { @@ -162,9 +161,9 @@ "metadata": {}, "outputs": [], "source": [ - "for exp in group.experiments.values():\n", + "for dataset in group.datasets.values():\n", " myplots.plot_traces(\n", - " exp.embryos, subplots_rc, title=exp.name, color=\"#0273b2ff\", ymax=1.5\n", + " dataset.embryos, subplots_rc, title=dataset.name, color=\"#0273b2ff\", ymax=1.5\n", " )" ] }, @@ -206,9 +205,9 @@ "metadata": {}, "outputs": [], "source": [ - "for exp in group.experiments.values():\n", + "for dataset in group.datasets.values():\n", " myplots.plot_specs(\n", - " exp.embryos, mymap, subplots_rc, title=exp.name, display_colorbar=False\n", + " dataset.embryos, mymap, subplots_rc, title=dataset.name, display_colorbar=False\n", " )" ] }, @@ -247,8 +246,8 @@ "t_all = []\n", "Zxx_all = []\n", "\n", - "for exp in group.experiments.values():\n", - " for emb in exp.embryos:\n", + "for dataset in group.datasets.values():\n", + " for emb in dataset.embryos:\n", " dff = emb.trace.aligned_dff\n", " smoothed_dff = savgol_filter(dff, window_length=25, polyorder=4)\n", " f, t, Zxx = FrequencyAnalysis.calculate_STFT(smoothed_dff)\n", @@ -266,7 +265,7 @@ ], "metadata": { "kernelspec": { - "display_name": "pscope_analysis", + "display_name": "snazzy-env", "language": "python", "name": "python3" }, @@ -280,7 +279,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.12" + "version": "3.11.13" } }, "nbformat": 4, diff --git a/snazzy_analysis/notebooks/plot_individual_traces.ipynb b/snazzy_analysis/notebooks/plot_individual_traces.ipynb index 41723bb..6bb0f47 100644 --- a/snazzy_analysis/notebooks/plot_individual_traces.ipynb +++ b/snazzy_analysis/notebooks/plot_individual_traces.ipynb @@ -33,7 +33,7 @@ "import matplotlib as mpl\n", "from scipy.signal import savgol_filter\n", "\n", - "from snazzy_analysis import Experiment, FrequencyAnalysis, myplots" + "from snazzy_analysis import Dataset, FrequencyAnalysis, myplots" ] }, { @@ -42,7 +42,7 @@ "source": [ "## Load Data\n", "\n", - "Specify the location of the data by editing `folder` and `experiment_name`. Then specify the embryo." + "Specify the location of the data by editing `dataset_name`. Then specify the embryo, by picking an `emb_idx`." ] }, { @@ -51,17 +51,17 @@ "metadata": {}, "outputs": [], "source": [ - "folder = \"25C\" # HERE\n", - "experiment_name = \"20240919_25C\" # HERE\n", + "dataset_name = \"20240611\" # HERE\n", + "emb_idx = 0 # HERE\n", "\n", - "exp_path = Path.cwd().parent.joinpath(\"data\", folder, experiment_name)\n", - "exp = Experiment(exp_path)\n", + "dataset_path = Path.cwd().parent.joinpath(\"data\", dataset_name)\n", + "dataset = Dataset(dataset_path)\n", "\n", - "embryos = list(exp.embryos)\n", - "emb = embryos[9] # HERE\n", + "embryos = list(dataset.embryos)\n", + "emb = embryos[emb_idx]\n", "trace = emb.trace\n", "\n", - "print(f\"\\n\\nLoading {folder} {experiment_name} {emb.name}\")" + "print(f\"\\n\\nLoading {dataset_name} {emb.name}\")" ] }, { @@ -355,7 +355,7 @@ ], "metadata": { "kernelspec": { - "display_name": "golf-env", + "display_name": "snazzy-env", "language": "python", "name": "python3" }, diff --git a/snazzy_analysis/snazzy_analysis/__init__.py b/snazzy_analysis/snazzy_analysis/__init__.py index c5c9231..5dec23f 100644 --- a/snazzy_analysis/snazzy_analysis/__init__.py +++ b/snazzy_analysis/snazzy_analysis/__init__.py @@ -1,8 +1,7 @@ from .frequency_analsyis import FrequencyAnalysis from .config import Config -from .trace_phases import TracePhases from .data_loader import DataLoader from .trace import BaselineStrategies, Trace from .embryo import Embryo -from .experiment import Experiment +from .dataset import Dataset from .group import Group diff --git a/snazzy_analysis/snazzy_analysis/config.py b/snazzy_analysis/snazzy_analysis/config.py index 5412ad3..10e8813 100644 --- a/snazzy_analysis/snazzy_analysis/config.py +++ b/snazzy_analysis/snazzy_analysis/config.py @@ -26,37 +26,88 @@ def set_decoder(obj): class ExpParams(BaseModel): """ - Exp params. + Params related to a Dataset. Attributes ---------- first_peak_threshold: int Minimum time, in minutes, for the first peak. Embryos with peaks before that will be ignored. to_exclude: list[str] | None - List of embryo names to be excluded. These embryos won't even be created. + List of embryo ids to be excluded. These embryos won't even be created. to_remove: list[str] | None - List of embryo names to be removed. These will be created and show up in the GUI as removed. - dff_strategy: "baseline" | "local_minima" - How to compute the dff baseline. + List of embryo ids to be removed. These will be created and show up in the GUI as removed. has_transients: boolean - If an experiment has transients, early peaks will be skipped. - has_dsna: boolean - If an experiment has embryos with dSNA, automatically calculates dSNA start and ignores peaks - that happen after that point. + If a dataset has transients, early peaks will be skipped. acquisition_period: int - The time (in seconds) interval between acquiring two successive frames. + The time (in seconds) interval between acquiring two successive frames in a given channel. """ first_peak_threshold: int = 30 to_exclude: set[str] = Field(default_factory=set) to_remove: set[str] = Field(default_factory=set) has_transients: bool = True - has_dsna: bool = False acquisition_period: int = 6 class PDParams(BaseModel): - """Parameters used in peak detection.""" + """Parameters used in peak detection. + + While this is a long list of parameters, the default values work well for many DFF traces. + `freq` and `peak_width` might require some tuning and the GUI has sliders for changing them. + They are exposed here to avoid hiding scattered magic values in the peak detection code and + to make them easier to change if necessary. + + Attributes + ---------- + peak_width: float + Value between 0 and 1 (inclusive), used to calculate peak width. + Passed to `scipy.signal.peak_widths`, see `rel_height` in that function for detailed description. + Default value is 0.98. + freq: float + Frequency cutoff used to find peaks. + See `trace.calculate_peaks` for details. + Default value is 0.0025. + dff_strategy: str + Method used to calculate F0, and therefore dff, since dff = (F - F0) / F0. + Options are 'local_minima' (default) and 'baseline'. + See `trace.average_n_local_window` and `trace.compute_baseline` for method descriptions. + baseline_window_size: int + Number of points used to calculate F0. + Should be an odd number. + Defaults to 81. + trim_zscore: float + Z-value threshold used to determine hatching. + See `trace.trim_data` for more details. + Defaults to 0.35. + ISI_factor: float + The inter-spike interval (ISI) is used to ignore early ramps in signal that are misindentified as bursts. + If the interval between the first two peaks is greater than `average interval * ISI_factor`, that peak is ignored. + See `trace.remove_transients` for details. + low_amp_threshold: float + Ignores peaks that have amplitude lower than `low_amp_threshold * max_peak_amplitude`. + Defaults to 0.1. + fft_height: float + Minimum amplitude to detect peaks in the low-passed filtered signal. + Defaults to 0.04. + fft_prominence: float + Complements fft_height for detecting peaks in the freq domain. + See `trace.calculate_peaks` for details. + Defaults to 0.03. + local_thres_window_size: int + Window size used to filter peaks by local threshold. + See `trace.filter_peaks_by_local_threshold` for details. + Defaults to 300. + local_thres_value: float + Percentage of the local maximum, used to filter peaks. + See `trace.filter_peaks_by_local_threshold` for details. + Defaults to 75. + port_peaks_window_size: int + Window size to port peaks from filtered signal to dff signal. + Defaults to 30. + port_peaks_thres: float + Minimum value as a percentage of the maximum peak within `port_peaks_window_size` required when porting a peak to dff values. + Defaults to 70. + """ peak_width: float = 0.98 freq: float = 0.0025 @@ -69,19 +120,35 @@ class PDParams(BaseModel): fft_prominence: float = 0.03 local_thres_window_size: int = 300 local_thres_value: float = 75.0 - local_thres_method: str = "percentile" port_peaks_window_size: int = 30 port_peaks_thres: float = 70.0 class EmbryoParams(BaseModel): + """Values that can be manually changed using the GUI, for a single Embryo. + + Attributes: + ----------- + wlen: int + window length (half size)used when adding or removing peaks. + defaults to 2. + manual_peaks: list[int] + Indices marked as a manual peak. + manual_remove: list[int] + Indices where calculated peaks will be ignored within that index +- wlen. + manual_widths: dict[str, Any] + Maps indices to start and end coordinates of a peak. + The index represents a peak. + manual_trim_idx: int + Index used as the trim_idx. + All dff data after trim_idx is ignored. + """ + wlen: int = 2 manual_peaks: list[int] = Field(default_factory=list) manual_remove: list[int] = Field(default_factory=list) manual_widths: dict[str, Any] = Field(default_factory={}) manual_trim_idx: int = -1 - manual_phase1_end: int = -1 - manual_dsna_start: int = -1 class ConfigObj(BaseModel): @@ -92,20 +159,20 @@ class ConfigObj(BaseModel): class Config: """ - Configuration data from Experiment class. + Configuration data from Dataset class. Falls back to default values if any values are missing. The default values are specified in the BaseModel subclasses above. Attributes: - exp_path (Path): + dataset_path (Path): Path to the `peak_detection_params.json` file. If not found, will hold the default params in memory. """ - def __init__(self, exp_path: Path): - self.exp_path = exp_path - self.config_path = exp_path / "peak_detection_params.json" + def __init__(self, dataset_path: Path): + self.dataset_path = dataset_path + self.config_path = dataset_path / "peak_detection_params.json" self.default_params = ConfigObj().dict() @@ -169,12 +236,6 @@ def get_corrected_peaks(self, emb_name): except KeyError: return None - def get_corrected_dsna_start(self, emb_name): - try: - return self.data["embryos"][emb_name]["manual_dsna_start"] - except KeyError: - return None - def save_manual_peak_data( self, emb_name, @@ -183,8 +244,6 @@ def save_manual_peak_data( removed_peaks=None, manual_widths=None, manual_trim_idx=None, - manual_phase1_end=None, - manual_dsna_start=None, ): if "embryos" not in self.data: self.data["embryos"] = {} @@ -196,8 +255,6 @@ def save_manual_peak_data( "manual_remove": [], "manual_widths": {}, "manual_trim_idx": -1, - "manual_phase1_end": -1, - "manual_dsna_start": -1, } if wlen is not None: @@ -210,9 +267,5 @@ def save_manual_peak_data( self.data["embryos"][emb_name]["manual_widths"] = manual_widths if manual_trim_idx is not None: self.data["embryos"][emb_name]["manual_trim_idx"] = manual_trim_idx - if manual_phase1_end is not None: - self.data["embryos"][emb_name]["manual_phase1_end"] = manual_phase1_end - if manual_dsna_start is not None: - self.data["embryos"][emb_name]["manual_dsna_start"] = manual_dsna_start self.save_params() diff --git a/snazzy_analysis/snazzy_analysis/data_loader.py b/snazzy_analysis/snazzy_analysis/data_loader.py index 324764d..510ef3a 100644 --- a/snazzy_analysis/snazzy_analysis/data_loader.py +++ b/snazzy_analysis/snazzy_analysis/data_loader.py @@ -4,12 +4,12 @@ class DataLoader: - """Access data about the current experiment. + """Loads data from a dataset. Attributes ---------- path: Path - The path that contains `pasnascope` output. Must follow the folder\ + The path that contains `snazzy_processing` output. Must follow the folder\ structure described in this project's README. """ @@ -23,7 +23,7 @@ def __init__(self, path: Path): self.check_embs_match() def check_files(self): - """Asserts that folder structure matches `pasnascope` output.""" + """Asserts that folder structure matches `snazzy_processing` output.""" if not self.path.exists(): raise ValueError(f"Path not found: {self.path}") paths = ( @@ -32,7 +32,7 @@ def check_files(self): ) if not all(path.exists() for path in paths): raise ValueError( - "Could not find expected files. Is this really a directory from `pasnascope`?" + "Could not find expected files. Is this really a directory from `snazzy_processing`?" ) def check_embs_match(self): diff --git a/snazzy_analysis/snazzy_analysis/experiment.py b/snazzy_analysis/snazzy_analysis/dataset.py similarity index 88% rename from snazzy_analysis/snazzy_analysis/experiment.py rename to snazzy_analysis/snazzy_analysis/dataset.py index 10d26a2..9811c3d 100644 --- a/snazzy_analysis/snazzy_analysis/experiment.py +++ b/snazzy_analysis/snazzy_analysis/dataset.py @@ -3,36 +3,36 @@ from snazzy_analysis import Config, DataLoader, Embryo, utils -class Experiment: - """Encapsulates data about all embryos for a given experiment. +class Dataset: + """Encapsulates data about all embryos for a given dataset. Attributes ---------- - exp_path: Path - Path with `pasnascope` output. + dataset_path: Path + Path with `snazzy_processing` output. config: Config | None Config obj. If not provided will look for config data saved as json in - the exp_path. If not found, a file with default params will be created. + the dataset_path. If not found, a file with default params will be created. kwargs: See `self._parse_kwargs` for list of valid keys passed as kwargs. """ def __init__( self, - exp_path: str | Path, + dataset_path: str | Path, config: Config | None = None, **kwargs, ): - exp_path = Path(exp_path) - self.directory = exp_path - self.config = config if config is not None else Config(exp_path) + dataset_path = Path(dataset_path) + self.directory = dataset_path + self.config = config if config is not None else Config(dataset_path) if kwargs: self._parse_kwargs(kwargs) self.exp_params = self.config.get_exp_params() - self.data_loader = DataLoader(exp_path) + self.data_loader = DataLoader(dataset_path) # persist config to file if it only exists in memory if not self.config.config_path.exists(): self.config.initialize_config_file() @@ -80,7 +80,7 @@ def _parse_kwargs(self, kwargs): ignored_params = [kw for kw in kwargs if kw not in valid_params] if ignored_params: print( - f"WARN: Some kwargs were ignored when creating a new Experiment: {ignored_params}." + f"WARN: Some kwargs were ignored when creating a new Dataset: {ignored_params}." ) exp_params_keys = self.config.default_params["exp_params"].keys() update_exp_params = {k: v for k, v in kwargs.items() if k in exp_params_keys} diff --git a/snazzy_analysis/snazzy_analysis/group.py b/snazzy_analysis/snazzy_analysis/group.py index 23eb698..b54cfa2 100644 --- a/snazzy_analysis/snazzy_analysis/group.py +++ b/snazzy_analysis/snazzy_analysis/group.py @@ -1,19 +1,19 @@ from dataclasses import dataclass -from snazzy_analysis import Experiment +from snazzy_analysis import Dataset @dataclass class Group: - """A group of experiments that should be analyzed together.""" + """A group of datasets that should be analyzed together.""" name: str - experiments: dict[str, Experiment] + datasets: dict[str, Dataset] @property def number_of_embryos(self): - """Total number of embryos across all experiments in this group.""" + """Total number of embryos across all datasets in this group.""" total_embs = 0 - for exp in self.experiments.values(): - total_embs += len(exp.embryos) + for dataset in self.datasets.values(): + total_embs += len(dataset.embryos) return total_embs diff --git a/snazzy_analysis/snazzy_analysis/gui/__init__.py b/snazzy_analysis/snazzy_analysis/gui/__init__.py index f4c1908..5533b1c 100644 --- a/snazzy_analysis/snazzy_analysis/gui/__init__.py +++ b/snazzy_analysis/snazzy_analysis/gui/__init__.py @@ -1,5 +1,5 @@ from .worker import Worker -from .exp_params_dialog import ExperimentParamsDialog +from .dataset_params_dialog import DatasetParamsDialog from .graph_switcher import GraphSwitcher from .image_window import ImageSequenceViewer, ImageWindow from .interactive_plot import InteractivePlotWidget @@ -8,6 +8,5 @@ from .model import GroupModel, Model from .sidebar import RemovableSidebar, FixedSidebar from .sliders import LabeledSlider -from .phase_boundaries_window import PhaseBoundariesWindow from .clickable_plot import ClickableViewBox from .compare_plot_window import ComparePlotWindow diff --git a/snazzy_analysis/snazzy_analysis/gui/compare_plot_window.py b/snazzy_analysis/snazzy_analysis/gui/compare_plot_window.py index c8dd786..5597643 100644 --- a/snazzy_analysis/snazzy_analysis/gui/compare_plot_window.py +++ b/snazzy_analysis/snazzy_analysis/gui/compare_plot_window.py @@ -66,10 +66,10 @@ def __init__(self, groups: list[GroupModel]): def save_all_plots(self): for group in self.groups: - for exp in group.experiments.values(): - exp_dir = exp.directory + for dataset in group.datasets.values(): + dataset_dir = dataset.directory timestamp = datetime.now().strftime("%m%d%Y_%H:%M:%S") - save_path = exp_dir / "plots" / timestamp + save_path = dataset_dir / "plots" / timestamp save_path.mkdir(parents=True, exist_ok=True) for plot_fn in self.btns.values(): @@ -103,7 +103,7 @@ def dt_first_peak(self, save=False, save_dir=None): data = {"dev_fp": [], "group": []} for group in self.groups: - for exp_name, emb in group.iter_all_embryos(): + for _, emb in group.iter_all_embryos(): if emb.trace.peak_times.size == 0: continue time_first_peak = emb.trace.peak_times[0] @@ -136,7 +136,7 @@ def dt_hatching(self, save=False, save_dir=None): data = {"dev_hatching": [], "group": []} for group in self.groups: - for exp_name, emb in group.iter_all_embryos(): + for _, emb in group.iter_all_embryos(): trace = emb.trace time_hatching = trace.time[trace.trim_idx] dev_time_first_peak = emb.get_DT_from_time(time_hatching) @@ -168,7 +168,7 @@ def sna_duration(self, save=False, save_dir=None): data = {"group": [], "duration": []} for group in self.groups: - for exp_name, emb in group.iter_all_embryos(): + for _, emb in group.iter_all_embryos(): trace = emb.trace if trace.peak_times.size == 0: continue @@ -232,7 +232,7 @@ def num_episodes(self, save=False, save_dir=None): data = {"group": [], "num_eps": []} for group in self.groups: - for exp_name, emb in group.iter_all_embryos(): + for _, emb in group.iter_all_embryos(): trace = emb.trace data["group"].append(group.name) data["num_eps"].append(len(trace.peak_idxes)) @@ -253,7 +253,7 @@ def cdf_dt(self, save=False, save_dir=None): data = {"dev_time": [], "group": []} for group in self.groups: - for exp_name, emb in group.iter_all_embryos(): + for _, emb in group.iter_all_embryos(): dev_times = [emb.get_DT_from_time(t) for t in emb.trace.peak_times] data["dev_time"].extend(dev_times) data["group"].extend([group.name] * len(dev_times)) @@ -277,7 +277,7 @@ def peak_amplitudes_by_ep(self, save=False, save_dir=None): num_of_peaks = 15 for group in self.groups: - for exp_name, emb in group.iter_all_embryos(): + for _, emb in group.iter_all_embryos(): for i, amp in zip(range(num_of_peaks), emb.trace.peak_amplitudes): data["peak_amp"].append(amp) data["group"].append(group.name) @@ -311,7 +311,7 @@ def dt_by_ep(self, save=False, save_dir=None): num_of_peaks = 15 for group in self.groups: - for exp_name, emb in group.iter_all_embryos(): + for _, emb in group.iter_all_embryos(): for i, t in zip(range(num_of_peaks), emb.trace.peak_times): data["group"].append(group.name) data["dev_time"].append(emb.get_DT_from_time(t)) @@ -345,7 +345,7 @@ def ep_intervals(self, save=False, save_dir=None): num_of_peaks = 15 for group in self.groups: - for exp_name, emb in group.iter_all_embryos(): + for _, emb in group.iter_all_embryos(): for i, interval in zip(range(num_of_peaks), emb.trace.peak_intervals): data["group"].append(group.name) data["interval"].append(interval / 60) @@ -372,6 +372,32 @@ def ep_intervals(self, save=False, save_dir=None): else: self._save_plot(save_dir, "episode_intervals.png") + def ep_durations(self, save=False, save_dir=None): + """Duration of each episode.""" + self.clear_axes() + data = {"group": [], "duration": [], "idx": []} + + for group in self.groups: + for _, emb in group.iter_all_embryos(): + for i, duration in zip(range(15), emb.trace.peak_durations): + data["group"].append(group.name) + data["duration"].append(duration / 60) + data["idx"].append(i) + + dodge = len(set(data["group"])) > 1 + ax = sns.pointplot( + data=data, x="idx", y="duration", hue="group", dodge=dodge, ax=self.ax + ) + + ax.set_xticks([0, 2, 4, 6, 8, 10, 12, 14]) + ax.set_title("Durations by peak") + ax.set_ylabel("Duration (min)") + + if not save: + self.canvas.draw() + else: + self._save_plot(save_dir, "peak_durations.png") + def decay_times(self, save=False, save_dir=None): """Decay times. @@ -380,7 +406,7 @@ def decay_times(self, save=False, save_dir=None): data = {"group": [], "decay_times": [], "idx": []} for group in self.groups: - for exp_name, emb in group.iter_all_embryos(): + for _, emb in group.iter_all_embryos(): for i, decay in zip(range(15), emb.trace.peak_decay_times): data["group"].append(group.name) data["decay_times"].append(decay / 60) @@ -406,6 +432,35 @@ def decay_times(self, save=False, save_dir=None): else: self._save_plot(save_dir, "decay_times.png") + def rise_times(self, save=False, save_dir=None): + """Peak rise times. + + Time between the start of the peak (left width boundary) and the peak time.""" + self.clear_axes() + data = {"group": [], "duration": [], "idx": []} + + for group in self.groups: + for _, emb in group.iter_all_embryos(): + for i, duration in zip(range(15), emb.trace.peak_rise_times): + data["group"].append(group.name) + data["duration"].append(duration) + data["idx"].append(i) + + dodge = len(set(data["group"])) > 1 + ax = sns.pointplot( + data=data, x="idx", y="duration", hue="group", dodge=dodge, ax=self.ax + ) + + ax.set_xticks([0, 2, 4, 6, 8, 10, 12, 14]) + ax.set_title("Rise times") + ax.set_xlabel("Peak #") + ax.set_ylabel("Duration (s)") + + if not save: + self.canvas.draw() + else: + self._save_plot(save_dir, "peak_durations.png") + def average_spectrogram(self, save=False, save_dir=None): self.canvas.figure.clear() ax = self.canvas.figure.subplots(len(self.groups), 1) @@ -418,9 +473,9 @@ def average_spectrogram(self, save=False, save_dir=None): for i, group in enumerate(self.groups): f_zero = None t_zero = None - for exp in group.experiments.values(): + for dataset in group.datasets.values(): Zxxs = [] - for emb in exp.embryos: + for emb in dataset.embryos: stft = FrequencyAnalysis.calculate_STFT(emb.trace.aligned_dff) if stft is None: continue @@ -454,58 +509,3 @@ def average_spectrogram(self, save=False, save_dir=None): self.canvas.draw() else: self._save_plot(save_dir, "average_spectrogram.png") - - def ep_durations(self, save=False, save_dir=None): - """Duration of each episode.""" - self.clear_axes() - data = {"group": [], "duration": [], "idx": []} - - for group in self.groups: - for exp_name, emb in group.iter_all_embryos(): - for i, duration in zip(range(15), emb.trace.peak_durations): - data["group"].append(group.name) - data["duration"].append(duration / 60) - data["idx"].append(i) - - dodge = len(set(data["group"])) > 1 - ax = sns.pointplot( - data=data, x="idx", y="duration", hue="group", dodge=dodge, ax=self.ax - ) - - ax.set_xticks([0, 2, 4, 6, 8, 10, 12, 14]) - ax.set_title("Durations by peak") - ax.set_ylabel("Duration (min)") - - if not save: - self.canvas.draw() - else: - self._save_plot(save_dir, "peak_durations.png") - - def rise_times(self, save=False, save_dir=None): - """Peak rise times. - - Time between the start of the peak (left width boundary) and the peak time.""" - self.clear_axes() - data = {"group": [], "duration": [], "idx": []} - - for group in self.groups: - for exp_name, emb in group.iter_all_embryos(): - for i, duration in zip(range(15), emb.trace.peak_rise_times): - data["group"].append(group.name) - data["duration"].append(duration) - data["idx"].append(i) - - dodge = len(set(data["group"])) > 1 - ax = sns.pointplot( - data=data, x="idx", y="duration", hue="group", dodge=dodge, ax=self.ax - ) - - ax.set_xticks([0, 2, 4, 6, 8, 10, 12, 14]) - ax.set_title("Rise times") - ax.set_xlabel("Peak #") - ax.set_ylabel("Duration (s)") - - if not save: - self.canvas.draw() - else: - self._save_plot(save_dir, "peak_durations.png") diff --git a/snazzy_analysis/snazzy_analysis/gui/exp_params_dialog.py b/snazzy_analysis/snazzy_analysis/gui/dataset_params_dialog.py similarity index 93% rename from snazzy_analysis/snazzy_analysis/gui/exp_params_dialog.py rename to snazzy_analysis/snazzy_analysis/gui/dataset_params_dialog.py index fea4f93..b688e38 100644 --- a/snazzy_analysis/snazzy_analysis/gui/exp_params_dialog.py +++ b/snazzy_analysis/snazzy_analysis/gui/dataset_params_dialog.py @@ -24,17 +24,17 @@ def convert_value(value: str, field_name: str): return value -class ExperimentParamsDialog(QDialog): - """Present Experiment params that can be changed before creating an Experiment. +class DatasetParamsDialog(QDialog): + """Present Dataset params that can be changed before creating a Dataset. Embryos are presented as embryo ids, to make the input easier to change. - Internally, pasna_analysis uses embryo names, so when data is coming in / going + Internally, snazzy_analysis uses embryo names, so when data is coming in / going out it has to be converted. """ def __init__(self, properties, parent=None): super().__init__(parent) - self.setWindowTitle("Experiment parameters") + self.setWindowTitle("Dataset parameters") self.setFixedWidth(540) self.combo_keys = {"dff_strategy": ["baseline", "local_minima"]} diff --git a/snazzy_analysis/snazzy_analysis/gui/gui.py b/snazzy_analysis/snazzy_analysis/gui/gui.py index 11f4849..600eb3a 100644 --- a/snazzy_analysis/snazzy_analysis/gui/gui.py +++ b/snazzy_analysis/snazzy_analysis/gui/gui.py @@ -26,7 +26,7 @@ from snazzy_analysis.gui import ( ClickableViewBox, ComparePlotWindow, - ExperimentParamsDialog, + DatasetParamsDialog, FixedSidebar, GraphSwitcher, ImageSequenceViewer, @@ -35,7 +35,6 @@ JsonViewer, LabeledSlider, Model, - PhaseBoundariesWindow, RemovableSidebar, Worker, ) @@ -87,7 +86,7 @@ def _get_group_name(self, is_new_group: bool) -> str: else self.model.selected_group.name ) - def _show_experiment_dialog( + def _show_dataset_dialog( self, config: Config, group_name: str, on_accepted: Callable[[dict], None] ): exp_params = config.get_exp_params() @@ -101,14 +100,14 @@ def _show_experiment_dialog( } del dialog_params["acquisition_period"] - self.exp_params_dialog = ExperimentParamsDialog(dialog_params, parent=self) + self.dataset_params_dialog = DatasetParamsDialog(dialog_params, parent=self) - self.exp_params_dialog.accepted.connect( - lambda: on_accepted(self.exp_params_dialog.get_values()) + self.dataset_params_dialog.accepted.connect( + lambda: on_accepted(self.dataset_params_dialog.get_values()) ) - self.exp_params_dialog.rejected.connect(lambda: None) + self.dataset_params_dialog.rejected.connect(lambda: None) - self.exp_params_dialog.open() + self.dataset_params_dialog.open() def _update_config(self, config: Config, dialog_values): exp_params = config.get_exp_params() @@ -120,9 +119,9 @@ def _update_config(self, config: Config, dialog_values): } config.update_params(new_config) - def _start_experiment_worker(self, config: Config, group_name: str): + def _start_dataset_worker(self, config: Config, group_name: str): worker = Worker( - self.model.create_experiment, + self.model.create_dataset, config=config, group_name=group_name, ) @@ -155,9 +154,9 @@ def on_dialog_accepted(dialog_values): self._present_loading() - self._start_experiment_worker(config, group_name) + self._start_dataset_worker(config, group_name) - self._show_experiment_dialog( + self._show_dataset_dialog( config, group_name, on_accepted=on_dialog_accepted ) @@ -176,8 +175,8 @@ def handle_open_err(self, err: Exception): def update_UI(self): self.clear_layout() - self.add_experiment_action.setEnabled(True) - self.compare_experiment_action.setEnabled(True) + self.add_dataset_action.setEnabled(True) + self.compare_dataset_action.setEnabled(True) self.view_pd_action.setEnabled(True) self.paint_main_view() self.render_trace() @@ -186,10 +185,10 @@ def update_UI(self): def open_directory(self): self._open_directory(is_new_group=True, should_reset_model=True) - def compare_experiments(self): + def compare_datasets(self): self._open_directory(is_new_group=True) - def add_experiment(self): + def add_dataset(self): self._open_directory(is_new_group=False) def change_group(self, i): @@ -206,44 +205,21 @@ def display_plots(self): self.cpw = ComparePlotWindow(groups) self.cpw.show() - def display_phase_boundaries(self): - exp = self.model.selected_experiment - traces = [e.trace for e in exp.embryos] - - current_trace = self.model.selected_trace - current_trace_idx = 0 - for i, trace in enumerate(traces): - if trace.name == current_trace: - current_trace_idx = i - break - - has_dsna = self.model.has_dsna() - self.pbw = PhaseBoundariesWindow(traces, current_trace_idx, has_dsna) - self.pbw.save_bounds_signal.connect(self.save_trace_phases) - self.pbw.show() - - def save_trace_phases(self, emb_name, new_phases): - if "phase1_end" in new_phases: - self.model.save_phase1_end_idx(emb_name, new_phases["phase1_end"]) - if "dsna_start" in new_phases: - self.model.save_dsna_start(emb_name, new_phases["dsna_start"]) - self.render_trace() - def display_embryo_movie(self): - exp = self.model.selected_experiment - embryos = exp.all_embryos() + dataset = self.model.selected_dataset + embryos = dataset.all_embryos() try: - self.viewer = ImageSequenceViewer(exp.directory, embryos) + self.viewer = ImageSequenceViewer(dataset.directory, embryos) except FileNotFoundError as e: self.show_error_message(str(e)) return self.viewer.show() def display_field_of_view(self): - exp = self.model.selected_experiment - img_path = exp.directory / "emb_numbers.png" + dataset = self.model.selected_dataset + img_path = dataset.directory / "emb_numbers.png" try: - self.image_window = ImageWindow(exp.name, str(img_path)) + self.image_window = ImageWindow(dataset.name, str(img_path)) except FileNotFoundError as e: self.show_error_message(str(e)) return @@ -300,7 +276,7 @@ def paint_top_app_bar(self): self.toggle_graph_btn.clicked.connect(self.toggle_graph_view) self.top_app_bar.addWidget(self.toggle_graph_btn) - if len(self.model.groups) == 1 and not self.model.has_combined_experiments(): + if len(self.model.groups) == 1 and not self.model.has_combined_datasets(): self.toggle_view_width_btn = QPushButton("View widths") self.toggle_view_width_btn.setCheckable(True) self.toggle_view_width_btn.clicked.connect(self.toggle_view_width) @@ -363,8 +339,8 @@ def toggle_view_width(self, checked): self.render_trace() def paint_controls(self): - # Sliders are only avaialable if a single experiment is open - if len(self.model.groups) > 1 or self.model.has_combined_experiments(): + # Sliders are only avaialable if a single dataset is open + if len(self.model.groups) > 1 or self.model.has_combined_datasets(): return self.freq_slider = LabeledSlider( @@ -404,19 +380,19 @@ def paint_graphs(self): self.single_graph_frame.setLayout(single_graph_layout) # Sidebar start - exp = self.model.selected_experiment - if not self.model.has_combined_experiments(): - accepted_embs = set([e.name for e in exp.embryos]) - removed_embs = set(exp.to_remove) + dataset = self.model.selected_dataset + if not self.model.has_combined_datasets(): + accepted_embs = set([e.name for e in dataset.embryos]) + removed_embs = set(dataset.to_remove) self.sidebar = RemovableSidebar( - self.select_embryo, accepted_embs, removed_embs, exp.name + self.select_embryo, accepted_embs, removed_embs, dataset.name ) self.sidebar.emb_visibility_toggled.connect(self.toggle_emb_visibility) else: exp_to_embs = {} group = self.model.selected_group - for exp_name, exp in group.experiments.items(): - exp_to_embs[exp_name] = [e.name for e in exp.embryos] + for exp_name, dataset in group.datasets.items(): + exp_to_embs[exp_name] = [e.name for e in dataset.embryos] self.sidebar = FixedSidebar(exp_to_embs, self.select_embryo) scroll_area = QScrollArea() @@ -459,15 +435,15 @@ def paint_menu(self): open_action.triggered.connect(self.open_directory) file_menu.addAction(open_action) - self.add_experiment_action = QAction("Add Experiment", self) - self.add_experiment_action.triggered.connect(self.add_experiment) - self.add_experiment_action.setEnabled(False) - file_menu.addAction(self.add_experiment_action) + self.add_dataset_action = QAction("Add Dataset", self) + self.add_dataset_action.triggered.connect(self.add_dataset) + self.add_dataset_action.setEnabled(False) + file_menu.addAction(self.add_dataset_action) - self.compare_experiment_action = QAction("Compare with experiment", self) - self.compare_experiment_action.triggered.connect(self.compare_experiments) - self.compare_experiment_action.setEnabled(False) - file_menu.addAction(self.compare_experiment_action) + self.compare_dataset_action = QAction("Compare with dataset", self) + self.compare_dataset_action.triggered.connect(self.compare_datasets) + self.compare_dataset_action.setEnabled(False) + file_menu.addAction(self.compare_dataset_action) self.view_pd_action = QAction("View pd_params data", self) self.view_pd_action.triggered.connect(self.display_json_data) @@ -488,10 +464,6 @@ def paint_menu(self): display_movie_action.triggered.connect(self.display_embryo_movie) plot_menu.addAction(display_movie_action) - display_phase_bounds_action = QAction("View phase boundaries", self) - display_phase_bounds_action.triggered.connect(self.display_phase_boundaries) - plot_menu.addAction(display_phase_bounds_action) - display_plots_action = QAction("View plots", self) display_plots_action.triggered.connect(self.display_plots) plot_menu.addAction(display_plots_action) @@ -518,8 +490,8 @@ def display_json_data(self): def update_from_json_viewer(self, new_data): self.model.update_config(new_data) - # reset the current experiment to use the new config data - self.model.reset_current_experiment() + # reset the current dataset to use the new config data + self.model.reset_current_dataset() self.update_UI() def paint_main_view(self): @@ -611,7 +583,7 @@ def update_all_embs(self): self.plot_all_traces() def collect_slider_params(self): - # on 'combined exp' mode, the top_layout that hold the slider will be removed + # on 'combined dataset' mode, the top_layout that hold the slider will be removed if not self.top_layout: return None @@ -646,7 +618,7 @@ def plot_all_traces(self): plot_widget.plot(time, dff) - if self.model.has_combined_experiments(): + if self.model.has_combined_datasets(): plot_widget.setTitle(f"{exp_name} - {emb.name}") else: plot_widget.setTitle(emb.name) @@ -678,8 +650,8 @@ def plot_all_traces(self): def calibrate_sliders(self): """Adjusts the sliders based on pd_params.json. - The sliders should not be available when more than one experiment is loaded.""" - if self.model.has_combined_experiments(): + The sliders should not be available when more than one dataset is loaded.""" + if self.model.has_combined_datasets(): return pd_params = self.model.get_pd_params() @@ -715,9 +687,9 @@ def select_embryo_from_multi_view(self, emb_name, exp_name): self.select_embryo(emb_name, exp_name) def select_embryo(self, emb_name, exp_name): - exp = self.model.selected_group.experiments[exp_name] - emb = exp.get_embryo(emb_name) - self.model.select_experiment(exp) + dataset = self.model.selected_group.datasets[exp_name] + emb = dataset.get_embryo(emb_name) + self.model.select_dataset(dataset) self.model.select_embryo(emb) self.render_trace() @@ -729,7 +701,7 @@ def render_trace(self): """Render data about the currently selected embryo.""" trace, time, trimmed_time, dff = self.model.get_trace_context(self.use_dev_time) emb_name = self.model.selected_embryo.name - exp_name = self.model.selected_experiment.name + exp_name = self.model.selected_dataset.name self._clear_current_plot() self._plot_raw_trace(trimmed_time, dff) @@ -738,7 +710,6 @@ def render_trace(self): self._plot_peaks(trimmed_time, trace) self._plot_active_and_struct_channels(time, trace) self._setup_trim_line(time, trace) - self._setup_dsna_line(trimmed_time, trace) self._plot_peak_widths(trimmed_time, trace) self._set_plot_titles(emb_name, exp_name) @@ -793,28 +764,8 @@ def _plot_active_and_struct_channels(self, time, trace): ) self.plot_channels.addLegend() - def _setup_dsna_line(self, time, trace): - if not self.model.has_dsna(): - return - - try: - freq = self.freq_slider.value() - except RuntimeError: - freq = trace.pd_params["freq"] - - dsna_start = trace.get_dsna_start(freq) - - dsna_line = pg.InfiniteLine( - time[dsna_start], - movable=True, - pen=pg.mkPen("chartreuse", cosmetic=True), - ) - dsna_line.addMarker("<|>") - dsna_line.sigPositionChangeFinished.connect(self.change_dsna_start) - self.plot_widget.addItem(dsna_line) - def _setup_trim_line(self, time, trace): - is_single_exp = not self.model.has_combined_experiments() + is_single_exp = not self.model.has_combined_datasets() trim_line = pg.InfiniteLine( time[trace.trim_idx], @@ -850,51 +801,14 @@ def _plot_peak_widths(self, time, trace): self.plot_widget.addItem(line) def _set_plot_titles(self, emb_name, exp_name): - if self.model.has_combined_experiments(): - exp_name = exp_name or self.model.selected_experiment + if self.model.has_combined_datasets(): + exp_name = exp_name or self.model.selected_dataset title = f"{exp_name} - {emb_name}" else: title = emb_name self.plot_widget.setTitle(title) self.plot_channels.setTitle(title) - def change_dsna_start(self, il_obj): - trace = self.model.selected_trace - emb = self.model.selected_embryo - - if self.use_dev_time: - dev_time = emb.lin_developmental_time - idx = np.searchsorted(dev_time, il_obj.getXPos()) - 1 - x = int(idx) - else: - x = self.model.get_index_from_time(il_obj.getXPos()) - - res = QMessageBox.question( - self, - "Confirm Update", - "Update dSNA start?", - QMessageBox.StandardButton.Ok | QMessageBox.StandardButton.Cancel, - ) - - prev_dsna_start = trace.dsna_start - if self.use_dev_time: - prev_value = dev_time[prev_dsna_start] - else: - prev_value = self.model.get_index_from_time(prev_dsna_start) - - if res == QMessageBox.StandardButton.Cancel: - il_obj.setValue(prev_value) - return - - self.model.save_dsna_start(emb.name, x) - - pd_params = self.model.get_pd_params() - - trace.detect_peaks(pd_params["freq"]) - trace.compute_peak_bounds(pd_params["peak_width"]) - - self.render_trace() - def change_trim_idx(self, il_obj): trace = self.model.selected_trace emb = self.model.selected_embryo @@ -906,7 +820,7 @@ def change_trim_idx(self, il_obj): prev_value = dev_time[trace.trim_idx] else: x = self.model.get_index_from_time(il_obj.getXPos()) - prev_value = trace.time[trace.trim_idx] + prev_value = trace.time[trace.trim_idx] // 60 # cannot allow trim_idx to be set after last timepoint, since it's # used to index trace points and would cause IndexError diff --git a/snazzy_analysis/snazzy_analysis/gui/image_window.py b/snazzy_analysis/snazzy_analysis/gui/image_window.py index 52cde37..4243ef9 100644 --- a/snazzy_analysis/snazzy_analysis/gui/image_window.py +++ b/snazzy_analysis/snazzy_analysis/gui/image_window.py @@ -25,7 +25,7 @@ def __init__(self, exp_name, image_path): self.setGeometry(200, 200, 600, 400) title = QLabel(self) - title.setText(f"Experiment: {exp_name}") + title.setText(f"Dataset: {exp_name}") title.setAlignment(Qt.AlignmentFlag.AlignCenter) label = QLabel(self) @@ -53,6 +53,8 @@ def normalize_16bit_to_8bit(img: np.ndarray, lower_p=0.25, upper_p=99.75) -> np. class ImageSequenceViewer(QWidget): + """Render an image sequnce in sync with DFF trace.""" + def __init__(self, directory: Path, embryos: list[Embryo]): super().__init__() self.directory = directory diff --git a/snazzy_analysis/snazzy_analysis/gui/interactive_plot.py b/snazzy_analysis/snazzy_analysis/gui/interactive_plot.py index 45ee313..150fefa 100644 --- a/snazzy_analysis/snazzy_analysis/gui/interactive_plot.py +++ b/snazzy_analysis/snazzy_analysis/gui/interactive_plot.py @@ -4,6 +4,8 @@ class InteractivePlotWidget(pg.PlotWidget): + """A PlotWidget that emits custom signals for CTRL + click and SHIFT + click.""" + add_peak_fired = pyqtSignal(float, float) remove_peak_fired = pyqtSignal(float, float) diff --git a/snazzy_analysis/snazzy_analysis/gui/json_viewer.py b/snazzy_analysis/snazzy_analysis/gui/json_viewer.py index 0709996..ba2d46a 100644 --- a/snazzy_analysis/snazzy_analysis/gui/json_viewer.py +++ b/snazzy_analysis/snazzy_analysis/gui/json_viewer.py @@ -17,31 +17,27 @@ "to_exclude": set, "to_remove": set, "has_transients": bool, - "has_dsna": bool, "acquisition_period": int, "pd_params": dict, + "peak_width": float, "freq": float, - "trim_zscore": float, "dff_strategy": str, "baseline_window_size": int, - "peak_width": float, - "manual_peaks": list, - "manual_remove": list, - "manual_widths": dict, - "manual_trim_idx": int, - "manual_phase1_end": int, - "manual_dsna_start": int, - "wlen": int, - "embryos": dict, + "trim_zscore": float, "ISI_factor": float, "low_amp_threshold": float, "fft_height": float, "fft_prominence": float, "local_thres_window_size": int, "local_thres_value": float, - "local_thres_method": str, "port_peaks_window_size": int, "port_peaks_thres": float, + "embryos": dict, + "wlen": int, + "manual_peaks": list, + "manual_remove": list, + "manual_widths": dict, + "manual_trim_idx": int, } diff --git a/snazzy_analysis/snazzy_analysis/gui/model.py b/snazzy_analysis/snazzy_analysis/gui/model.py index 8a08d16..3095698 100644 --- a/snazzy_analysis/snazzy_analysis/gui/model.py +++ b/snazzy_analysis/snazzy_analysis/gui/model.py @@ -1,37 +1,37 @@ -from snazzy_analysis import Config, Embryo, Experiment, utils +from snazzy_analysis import Config, Embryo, Dataset, utils from snazzy_analysis.gui import PeakMatcher -class ExperimentModel: - def __init__(self, experiment: Experiment): - self.experiment = experiment +class DatasetModel: + def __init__(self, dataset: Dataset): + self.dataset = dataset self.to_remove = self.get_removed_embryos() self.selected_embryo = self.embryos[0] @property def embryos(self): - """List of filtered embryos for this experiment.""" + """List of filtered embryos for this dataset.""" return [e for e in self.all_embryos() if e.name not in self.to_remove] def __getattr__(self, name): - return getattr(self.experiment, name) + return getattr(self.dataset, name) def get_embryo(self, emb_name): for embryo in self.all_embryos(): if embryo.name == emb_name: return embryo - raise ValueError(f"Could not find {emb_name} in experiment {self.name}.") + raise ValueError(f"Could not find {emb_name} in dataset {self.name}.") def get_emb_ids(self): return [e.get_id() for e in self.embryos] def all_embryos(self): - return self.experiment.get_all_embryos() + return self.dataset.get_all_embryos() def get_removed_embryos(self): - manual_remove = self.experiment.exp_params.get("to_remove", set()) - if self.experiment.filtered_out is not None: - removed_embryos = manual_remove.union(self.experiment.filtered_out) + manual_remove = self.dataset.exp_params.get("to_remove", set()) + if self.dataset.filtered_out is not None: + removed_embryos = manual_remove.union(self.dataset.filtered_out) return removed_embryos def mark_as_accepted(self, emb_name): @@ -44,22 +44,22 @@ def mark_as_removed(self, emb_name): class GroupModel: def __init__(self, name: str): self.name = name - self.experiments: dict[str, ExperimentModel] = {} + self.datasets: dict[str, DatasetModel] = {} - def add_experiment(self, exp: ExperimentModel): - if exp.name in self.experiments: - raise ValueError("Experiment already added to this group.") - self.experiments[exp.name] = exp + def add_dataset(self, dataset: DatasetModel): + if dataset.name in self.datasets: + raise ValueError("Dataset already added to this group.") + self.datasets[dataset.name] = dataset - def remove_experiment(self, exp: ExperimentModel): - if exp.name in self.experiments: - del self.experiments[exp.name] + def remove_dataset(self, dataset: DatasetModel): + if dataset.name in self.datasets: + del self.datasets[dataset.name] def iter_all_embryos(self): - """Yield tuples of exp_name, embryo for all valid embryos in a Group.""" - for exp_name, exp in self.experiments.items(): - for emb in exp.embryos: - yield exp_name, emb + """Yield tuples of dataset_name, embryo for all valid embryos in a Group.""" + for dataset_name, dataset in self.datasets.items(): + for emb in dataset.embryos: + yield dataset_name, emb class Model: @@ -70,60 +70,60 @@ def __init__(self): def __str__(self): group_names = [g.name for g in self.groups] to_remove_count = { - exp.name: len(exp.to_remove) + dataset.name: len(dataset.to_remove) for g in self.groups - for exp in g.experiments.values() + for dataset in g.datasets.values() } return ( f"Model(\n" f" groups: {group_names} groups\n" f" curr_group: {self.selected_group.name}\n" f" to_remove: {to_remove_count}\n" - f" curr_exp: {self.selected_experiment.name}\n" + f" curr_dataset: {self.selected_dataset.name}\n" f" curr_emb_name: {self.selected_embryo.name}\n" f")" ) @property def selected_trace(self): - if self.selected_experiment is None: + if self.selected_dataset is None: return None - return self.selected_experiment.selected_embryo.trace + return self.selected_dataset.selected_embryo.trace @property def selected_embryo(self): - if self.selected_experiment is None: + if self.selected_dataset is None: return None - return self.selected_experiment.selected_embryo + return self.selected_dataset.selected_embryo def set_initial_state(self): self.groups: list[GroupModel] = [] self.selected_group: GroupModel = None - self.selected_experiment: ExperimentModel = None + self.selected_dataset: DatasetModel = None - def create_experiment(self, config: Config, group_name: str): - exp = Experiment(config.exp_path, config) + def create_dataset(self, config: Config, group_name: str): + dataset = Dataset(config.dataset_path, config) - if not exp.embryos: + if not dataset.embryos: first_peak_threshold = config.get_exp_params()["first_peak_threshold"] raise AttributeError( f"Could not find any embryos with first peak after {first_peak_threshold} minutes." ) config.save_params() - return self.add_experiment(ExperimentModel(exp), group_name) + return self.add_dataset(DatasetModel(dataset), group_name) - def add_experiment(self, experiment: ExperimentModel, group_name: str): + def add_dataset(self, dataset: DatasetModel, group_name: str): group = self.get_group_by_name(group_name) if group is None: group = self.create_group(group_name) self.add_group(group) - group.add_experiment(experiment) + group.add_dataset(dataset) self.select_group(group) - self.select_experiment(experiment) - self.select_embryo(experiment.embryos[0]) + self.select_dataset(dataset) + self.select_embryo(dataset.embryos[0]) def create_group(self, group_name: str) -> GroupModel: for g in self.groups: @@ -143,18 +143,18 @@ def select_group(self, group: GroupModel): if self.selected_group == group: return self.selected_group = group - if group.experiments: - self.select_experiment(next(iter(group.experiments.values()))) + if group.datasets: + self.select_dataset(next(iter(group.datasets.values()))) - def select_experiment(self, experiment: ExperimentModel): - if self.selected_experiment == experiment: + def select_dataset(self, dataset: DatasetModel): + if self.selected_dataset == dataset: return - self.selected_experiment = experiment + self.selected_dataset = dataset def select_embryo(self, embryo: Embryo): - if self.selected_experiment.selected_embryo == embryo: + if self.selected_dataset.selected_embryo == embryo: return - self.selected_experiment.selected_embryo = embryo + self.selected_dataset.selected_embryo = embryo def get_group_by_name(self, name: str) -> GroupModel | None: for group in self.groups: @@ -163,27 +163,19 @@ def get_group_by_name(self, name: str) -> GroupModel | None: return None def update_config(self, new_data): - """Updates the config data for the current experiment.""" - exp = self.selected_experiment - exp.config.update_params(new_data) - exp.config.save_params() + """Updates the config data for the current dataset.""" + dataset = self.selected_dataset + dataset.config.update_params(new_data) + dataset.config.save_params() def save_trim_idx(self, idx): """Updates trim index of the current embryo.""" - exp = self.selected_experiment - emb_name = exp.selected_embryo.name - exp.config.save_manual_peak_data(emb_name, manual_trim_idx=idx) - - def save_phase1_end_idx(self, emb_name, idx): - exp = self.selected_experiment - exp.config.save_manual_peak_data(emb_name, manual_phase1_end=idx) - - def save_dsna_start(self, emb_name, idx): - exp = self.selected_experiment - exp.config.save_manual_peak_data(emb_name, manual_dsna_start=idx) + dataset = self.selected_dataset + emb_name = dataset.selected_embryo.name + dataset.config.save_manual_peak_data(emb_name, manual_trim_idx=idx) def update_peak_widths(self, peak_index, line_index, new_line_pos): - emb = self.selected_experiment.selected_embryo + emb = self.selected_dataset.selected_embryo peak_bounds = emb.trace.peak_bounds_indices[peak_index] peak_bounds[line_index] = new_line_pos @@ -193,8 +185,8 @@ def update_peak_widths(self, peak_index, line_index, new_line_pos): self.save_peak_widths(emb.name, peak_bounds, peak_index) def save_peak_widths(self, emb_name, peak_widths, peak_index): - exp = self.selected_experiment - corrected_peaks = exp.config.get_corrected_peaks(emb_name) + dataset = self.selected_dataset + corrected_peaks = dataset.config.get_corrected_peaks(emb_name) peak_key = str(peak_index) if corrected_peaks: @@ -203,33 +195,33 @@ def save_peak_widths(self, emb_name, peak_widths, peak_index): else: manual_widths = {peak_key: peak_widths} - exp.config.save_manual_peak_data(emb_name, manual_widths=manual_widths) + dataset.config.save_manual_peak_data(emb_name, manual_widths=manual_widths) def calc_peaks_all_embs(self, pd_params=None): - """Calculates peaks for all embryos in a given experiment. + """Calculates peaks for all embryos in a given dataset. Persists the parameters used to calculate peaks in pd_params.json. - There's no need to calculate all peaks for all experiments in a Group because - the GUI does not support updating combined experiments. - If there are combined experiments, the GUI will only present them.""" - exp = self.selected_experiment + There's no need to calculate all peaks for all datasets in a Group because + the GUI does not support updating combined datasets. + If there are combined datasets, the GUI will only present them.""" + dataset = self.selected_dataset if pd_params is None: pd_params = self.get_pd_params() - for emb in exp.all_embryos(): + for emb in dataset.all_embryos(): emb.trace.detect_peaks(pd_params["freq"]) emb.trace.compute_peak_bounds(pd_params["peak_width"]) - to_remove = self.selected_experiment.to_remove + to_remove = self.selected_dataset.to_remove self.update_config( {"pd_params": pd_params, "exp_params": {"to_remove": to_remove}} ) def add_peak(self, x, emb_name, trace, wlen=2): # load corrected data to reconcile with the new add - exp = self.selected_experiment - corrected_peaks = exp.config.get_corrected_peaks(emb_name) + dataset = self.selected_dataset + corrected_peaks = dataset.config.get_corrected_peaks(emb_name) manual_remove = [] if not corrected_peaks else corrected_peaks["manual_remove"] new_peak, new_peaks, removed_peaks = self.pm.add_peak( @@ -242,15 +234,15 @@ def add_peak(self, x, emb_name, trace, wlen=2): else: added_peaks = [new_peak] - exp.config.save_manual_peak_data( + dataset.config.save_manual_peak_data( emb_name, added_peaks=added_peaks, removed_peaks=removed_peaks, wlen=wlen ) return new_peak, new_peaks def remove_peak(self, x, emb_name, trace, wlen=2): # load corrected data to reconcile with the new add - exp = self.selected_experiment - corrected_peaks = exp.config.get_corrected_peaks(emb_name) + dataset = self.selected_dataset + corrected_peaks = dataset.config.get_corrected_peaks(emb_name) manual_add = [] if not corrected_peaks else corrected_peaks["manual_peaks"] manual_widths = {} if not corrected_peaks else corrected_peaks["manual_widths"] wlen = wlen if not corrected_peaks else corrected_peaks["wlen"] @@ -265,7 +257,7 @@ def remove_peak(self, x, emb_name, trace, wlen=2): corrected_peaks["manual_remove"] = removed corrected_peaks["manual_widths"] = filtered_peak_widths - exp.config.save_manual_peak_data( + dataset.config.save_manual_peak_data( emb_name, added_peaks=added_peaks, removed_peaks=removed, @@ -275,78 +267,78 @@ def remove_peak(self, x, emb_name, trace, wlen=2): def is_emb_accepted(self, emb_id): emb_name = f"emb{emb_id}" - exp = self.selected_experiment - return emb_name in exp.embryos + dataset = self.selected_dataset + return emb_name in dataset.embryos def toggle_emb_visibility(self, emb_name, should_remove): - exp = self.selected_experiment + dataset = self.selected_dataset if not should_remove: - exp.mark_as_accepted(emb_name) + dataset.mark_as_accepted(emb_name) else: if emb_name == self.selected_embryo.name: self.render_next_embryo() - exp.mark_as_removed(emb_name) + dataset.mark_as_removed(emb_name) self.update_emb_visibility_in_config() - return emb_name in exp.to_remove + return emb_name in dataset.to_remove def update_emb_visibility_in_config(self): - exp = self.selected_experiment - new_data = {"exp_params": {"to_remove": exp.to_remove}} + dataset = self.selected_dataset + new_data = {"exp_params": {"to_remove": dataset.to_remove}} self.update_config(new_data) def render_next_embryo(self): next_exp, next_emb = self.get_next_emb_name(forward=True) - experiment = self.selected_group.experiments[next_exp] - embryo = experiment.get_embryo(next_emb) - self.select_experiment(experiment) + dataset = self.selected_group.datasets[next_exp] + embryo = dataset.get_embryo(next_emb) + self.select_dataset(dataset) self.select_embryo(embryo) def clear_manual_data_by_embryo(self, emb_name): - exp = self.selected_experiment + dataset = self.selected_dataset target_emb = None - for emb in exp.all_embryos(): + for emb in dataset.all_embryos(): if emb.name == emb_name: target_emb = emb if target_emb is None: - raise ValueError(f"Cannot find {emb_name} in selected experiment.") + raise ValueError(f"Cannot find {emb_name} in selected dataset.") target_emb.trace.to_add = [] target_emb.trace.to_remove = [] - if "embryos" in exp.config.data: - emb_data = exp.config.data["embryos"] + if "embryos" in dataset.config.data: + emb_data = dataset.config.data["embryos"] if emb_name in emb_data: del emb_data[emb_name] - exp.config.save_params() + dataset.config.save_params() def clear_all_manual_data(self): - exp = self.selected_experiment + dataset = self.selected_dataset - for emb in exp.embryos: + for emb in dataset.embryos: emb.trace.to_add = [] emb.trace.to_remove = [] - if "embryos" in exp.config.data: - exp.config.data["embryos"] = {} + if "embryos" in dataset.config.data: + dataset.config.data["embryos"] = {} - exp.config.save_params() + dataset.config.save_params() - def reset_current_experiment(self): - exp = self.selected_experiment - config = exp.config + def reset_current_dataset(self): + dataset = self.selected_dataset + config = dataset.config group = self.selected_group - group.remove_experiment(exp) + group.remove_dataset(dataset) - self.create_experiment(config, group.name) + self.create_dataset(config, group.name) def get_trace_context(self, use_dev_time: bool = False): - exp = self.selected_experiment + dataset = self.selected_dataset - embryo = exp.selected_embryo + embryo = dataset.selected_embryo trace = embryo.trace if use_dev_time: @@ -358,26 +350,21 @@ def get_trace_context(self, use_dev_time: bool = False): return trace, time, time[: trace.trim_idx], dff - def has_dsna(self): - exp = self.selected_experiment - exp_params = exp.config.get_exp_params() - return exp_params.get("has_dsna", False) - - def has_combined_experiments(self): - return len(self.selected_group.experiments) > 1 + def has_combined_datasets(self): + return len(self.selected_group.datasets) > 1 def get_pd_params(self): - exp = self.selected_experiment - return exp.config.get_pd_params() + dataset = self.selected_dataset + return dataset.config.get_pd_params() def get_config_data(self): - exp = self.selected_experiment - if exp is None: + dataset = self.selected_dataset + if dataset is None: return None - return exp.config.load_data() + return dataset.config.load_data() def get_next_emb_name(self, forward: bool) -> tuple[str, str]: - """Return the next valid exp_name and emb_name of the currenlty selected group. + """Return the next valid dataset_name and emb_name of the currenlty selected group. If an emb marked as to_remove is selected, returns the first valid embryo. @@ -386,38 +373,40 @@ def get_next_emb_name(self, forward: bool) -> tuple[str, str]: If True returns the next embryo, otherwise the previous embryo. Returns: next_values(tuple[str, str]): - next_emb_name, next_exp_name + next_emb_name, next_dataset_name """ - if self.select_experiment is None: + if self.select_dataset is None: return - exp_and_embs = [ - (exp_name, emb.name) - for exp_name, emb in self.selected_group.iter_all_embryos() + datasets_and_embs = [ + (dataset_name, emb.name) + for dataset_name, emb in self.selected_group.iter_all_embryos() ] - exp_and_embs.sort(key=lambda e: (e[0], utils.emb_id(e[1]))) + datasets_and_embs.sort(key=lambda e: (e[0], utils.emb_id(e[1]))) try: - exp = self.selected_experiment - curr_emb_index = exp_and_embs.index((exp.name, exp.selected_embryo.name)) + dataset = self.selected_dataset + curr_emb_index = datasets_and_embs.index( + (dataset.name, dataset.selected_embryo.name) + ) except ValueError: - return exp_and_embs[0] + return datasets_and_embs[0] if forward: - next_idx = (curr_emb_index + 1) % len(exp_and_embs) + next_idx = (curr_emb_index + 1) % len(datasets_and_embs) else: - next_idx = (curr_emb_index - 1) % len(exp_and_embs) + next_idx = (curr_emb_index - 1) % len(datasets_and_embs) - return exp_and_embs[next_idx] + return datasets_and_embs[next_idx] def move_to_next_emb(self, forward): - exp_name, emb_name = self.get_next_emb_name(forward) + dataset_name, emb_name = self.get_next_emb_name(forward) - experiment = self.selected_group.experiments[exp_name] - embryo = experiment.get_embryo(emb_name) + dataset = self.selected_group.datasets[dataset_name] + embryo = dataset.get_embryo(emb_name) - self.select_experiment(experiment) + self.select_dataset(dataset) self.select_embryo(embryo) def get_index_from_time(self, time) -> int: @@ -428,7 +417,7 @@ def get_index_from_time(self, time) -> int: Parameters: time (float): time in minutes. """ - exp = self.selected_experiment - exp_params = exp.config.get_exp_params() + dataset = self.selected_dataset + exp_params = dataset.config.get_exp_params() return int(time * 60) // exp_params["acquisition_period"] diff --git a/snazzy_analysis/snazzy_analysis/gui/phase_boundaries_window.py b/snazzy_analysis/snazzy_analysis/gui/phase_boundaries_window.py deleted file mode 100644 index bf9ec2c..0000000 --- a/snazzy_analysis/snazzy_analysis/gui/phase_boundaries_window.py +++ /dev/null @@ -1,168 +0,0 @@ -import numpy as np -from PyQt6.QtCore import pyqtSignal -from PyQt6.QtGui import QAction, QKeySequence -from PyQt6.QtWidgets import QHBoxLayout, QPushButton, QVBoxLayout, QWidget -import pyqtgraph as pg - -from snazzy_analysis import Trace, TracePhases - - -class PhaseBoundariesWindow(QWidget): - save_bounds_signal = pyqtSignal(str, dict) - - def __init__(self, traces: list[Trace], current_trace: int, has_dsna: bool): - super().__init__() - - self.traces = traces - self.has_dsna = has_dsna - self.current_trace = current_trace - - self.setWindowTitle("Phase Boundaries") - - next_trace_action = QAction(self) - next_trace_action.setShortcut(QKeySequence("Ctrl+N")) - next_trace_action.triggered.connect(self.next_trace) - self.addAction(next_trace_action) - - layout = QVBoxLayout() - - self.top_btns = QHBoxLayout() - - self.save_changes_btn = QPushButton("Save changes") - self.save_changes_btn.clicked.connect(self.save_changes) - self.top_btns.addWidget(self.save_changes_btn) - - layout.addLayout(self.top_btns) - - self.plot_widget = pg.PlotWidget() - - layout.addWidget(self.plot_widget) - - self.setLayout(layout) - - self.phase2_line = None - self.dsna_line = None - self.paint_window() - - def paint_window(self): - self.render_current_trace() - self.phase2_start() - if self.has_dsna: - self.dsna_start() - - def next_trace(self): - self.current_trace = (self.current_trace + 1) % len(self.traces) - self.paint_window() - - def render_current_trace(self): - self.plot_widget.clear() - - trace = self.traces[self.current_trace] - self.plot_widget.plot( - trace.time[: trace.trim_idx], - trace.dff[: trace.trim_idx], - name="Dff", - pen=pg.mkPen("whitesmoke"), - ) - self.plot_widget.setTitle(trace.name) - - def phase2_start(self): - """Plot an InfiniteLine that splits phase 1 to phase 2. - - The line is plotted between peaks, so all peaks to the left belong to - phase 1 and peaks to the right to phase 2.""" - if self.phase2_line is not None: - self.plot_widget.removeItem(self.phase2_line) - - trace = self.traces[self.current_trace] - trace_phases = TracePhases(trace) - phase1_end = trace_phases.get_phase1_end() - - phase1_end_time = trace.time[phase1_end] - - self.phase2_line = pg.InfiniteLine( - phase1_end_time, - movable=True, - pen=pg.mkPen("tomato", cosmetic=True), - ) - self.phase2_line.addMarker("<|>") - self.phase2_line.sigPositionChangeFinished.connect(self.change_phase1_end) - self.plot_widget.addItem(self.phase2_line) - - def find_last_peak_index(self, time: int, trace: Trace) -> int: - """Find the last peak index that happended before `time`. - - Parameters: - time(int): - Time used to filter the indices. Has the same units as `trace.time`. - trace(Trace): - Trace object used to extract peak indices and time information. - - Return: - int: - Highest peak index that happend before `time`. - """ - new_idx = -1 - for i, peak_time in enumerate(trace.peak_times): - if peak_time > time: - new_idx = i - 1 - break - return new_idx - - def change_phase1_end(self, il_obj): - previous_pos = il_obj.startPosition - bound_time = int(il_obj.getXPos()) - - curr_trace = self.traces[self.current_trace] - - new_idx = self.find_last_peak_index(bound_time, curr_trace) - if new_idx == -1: - self.phase2_line.setX(previous_pos.x()) - - def change_dsna_start(self, il_obj): - previous_pos = il_obj.startPosition - bound_time = int(il_obj.getXPos()) - - curr_trace = self.traces[self.current_trace] - - new_idx = self.find_last_peak_index(bound_time, curr_trace) + 1 - if new_idx == -1: - self.dsna_line.setX(previous_pos.x()) - - def dsna_start(self): - if self.dsna_line is not None: - self.plot_widget.removeItem(self.dsna_line) - - trace = self.traces[self.current_trace] - freq = trace.pd_params.get("freq") - trace_phases = TracePhases(trace) - start = trace_phases.get_dsna_start(freq) - - start_time = trace.time[start] - - self.dsna_line = pg.InfiniteLine( - start_time, - movable=True, - pen=pg.mkPen("fuchsia", cosmetic=True), - ) - self.dsna_line.addMarker("<|>") - self.dsna_line.sigPositionChangeFinished.connect(self.change_dsna_start) - self.plot_widget.addItem(self.dsna_line) - - def get_index(self, x): - trace = self.traces[self.current_trace] - return np.searchsorted(trace.time, x, side="left") - - def save_changes(self): - curr_trace = self.traces[self.current_trace] - updated_bounds = {} - if self.phase2_line is not None: - phase1_end_pos = int(self.phase2_line.getXPos() / 6) - updated_bounds["phase1_end"] = phase1_end_pos - if self.dsna_line is not None: - dsna_start_pos = int(self.dsna_line.getXPos() / 6) - updated_bounds["dsna_start"] = dsna_start_pos - - if updated_bounds: - emb_name = curr_trace.name - self.save_bounds_signal.emit(emb_name, updated_bounds) diff --git a/snazzy_analysis/snazzy_analysis/gui/sidebar.py b/snazzy_analysis/snazzy_analysis/gui/sidebar.py index 18a0aa2..910793e 100644 --- a/snazzy_analysis/snazzy_analysis/gui/sidebar.py +++ b/snazzy_analysis/snazzy_analysis/gui/sidebar.py @@ -34,13 +34,15 @@ def __init__( self.setLayout(main_layout) def populate_buttons(self, exp_to_embs: dict[str, Iterable[str]], layout): - for exp in exp_to_embs: - for emb in sorted(exp_to_embs[exp], key=sort_by_emb_number): + for dataset in exp_to_embs: + for emb in sorted(exp_to_embs[dataset], key=sort_by_emb_number): row_layout = QHBoxLayout() row_layout.setSpacing(0) - btn = QPushButton(f"{exp} - {emb}") + btn = QPushButton(f"{dataset} - {emb}") btn.clicked.connect( - lambda checked, name=emb, exp=exp: self.callback(name, exp) + lambda checked, name=emb, dataset=dataset: self.callback( + name, dataset + ) ) row_layout.addWidget(btn) layout.addLayout(row_layout) @@ -52,8 +54,8 @@ class RemovableSidebar(QWidget): def __init__( self, callback: Callable[[str, str | None], None], - accepted_embs: set[int], - removed_embs: set[int], + accepted_embs: set[str], + removed_embs: set[str], exp_name: str, ): super().__init__() diff --git a/snazzy_analysis/snazzy_analysis/trace.py b/snazzy_analysis/snazzy_analysis/trace.py index 69a7a51..4957c47 100644 --- a/snazzy_analysis/snazzy_analysis/trace.py +++ b/snazzy_analysis/snazzy_analysis/trace.py @@ -5,7 +5,7 @@ import scipy.signal as spsig from scipy.stats import zscore -from snazzy_analysis import Config, FrequencyAnalysis, TracePhases +from snazzy_analysis import Config, FrequencyAnalysis class BaselineStrategies(Enum): @@ -23,7 +23,6 @@ def __init__( name, activity, config: Config, - fs=1 / 6, ): self.name = name self.time = activity[:, 0] @@ -36,12 +35,10 @@ def __init__( # list of peaks that were manually added / removed: self.to_add = [] self.to_remove = [] - self.fs = fs self._peak_idxes = None self._peak_bounds_indices = None self.filtered_dff = None - self.dsna_start = None self.trim_idx = self.get_trim_index() self.dff = self.compute_dff() @@ -54,9 +51,6 @@ def peak_idxes(self): self.detect_peaks(self.pd_params["freq"]) else: self.detect_peaks() - if self.exp_params.get("has_dsna", False): - filtered_peaks = [pi for pi in self._peak_idxes if pi < self.dsna_start] - return np.array(filtered_peaks) return self._peak_idxes @peak_idxes.setter @@ -259,8 +253,7 @@ def compute_baseline(self, signal, window_size=160, n_bins=64): into n_bins amplitude bins and taking the mean of the bin with the most samples. - This assumes that PaSNA peaks are sparse. - To handle edges, both edges are reflected. + This assumes that bouts are sparse. To handle edges, both edges are reflected. """ expanded_signal = self.reflect_edges(signal, window_size) @@ -358,36 +351,30 @@ def reconcile_manual_peaks(self, params): sorted(list(set(filtered_peaks + to_add))), dtype=np.int64 ) - def update_dsna_start(self, params): - if not self.exp_params.get("has_dsna", False): - return - self.dsna_start = self.get_dsna_start(params["freq"]) - def detect_peaks(self, freq=0.0025): self._peak_idxes, filtered_dff = self.calculate_peaks(freq_cutoff=freq) self.filtered_dff = filtered_dff - self.dsna_start = self.get_dsna_start(freq) stages = [ (self.remove_transients, {}), (self.remove_below_threshold, {}), (self.reconcile_manual_peaks, {}), - (self.update_dsna_start, {"freq": freq}), ] self.process_peaks(stages) - def filter_peaks_by_local_context( - self, signal, peak_indices, window_size=300, value=75, method="percentile" + def filter_peaks_by_local_threshold( + self, signal, peak_indices, window_size=300, value=75 ): """Filter pre-detected peaks by comparing their height to a local threshold. + Peaks that are not within a percentile of the values in the window are filtered out. + Parameters: signal (np.ndarray): Original signal. peak_indices (np.ndarray): Indices of peaks (e.g., from find_peaks). window_size (int): Size of the window to determine local threshold. value (int): Factor to determine local threshold. - method ('mean' | 'median' | 'percentile'): How to calculate the threshold. Returns: filtered_peaks (np.ndarray): Indices of peaks that passed local thresholding. @@ -395,26 +382,19 @@ def filter_peaks_by_local_context( filtered_peaks = [] for i in peak_indices: - if signal[i] > self.local_threshold(signal, i, window_size, value, method): + if signal[i] > self.local_threshold(signal, i, window_size, value): filtered_peaks.append(i) return np.array(sorted(filtered_peaks)) - def local_threshold(self, signal, idx, window_size, value, method): + def local_threshold(self, signal, idx, window_size, value): half_win = window_size // 2 start = max(0, idx - half_win) end = min(len(signal), idx + half_win) window = signal[start:end] - if method == "mean": - local_thresh = value * np.mean(np.abs(window)) - elif method == "median": - local_thresh = value * np.median(np.abs(window)) - elif method == "percentile": - local_thresh = np.percentile(np.abs(window), value) - else: - raise ValueError("Method must be 'mean', 'median', or 'percentile'.") + local_thresh = np.percentile(np.abs(window), value) return local_thresh @@ -474,25 +454,14 @@ def calculate_peaks(self, freq_cutoff): pp_ws = self.pd_params["port_peaks_window_size"] pp_thres = self.pd_params["port_peaks_thres"] - local_peak_indices = self.port_peaks( + dff_peak_indices = self.port_peaks( peak_indices, self.dff[: self.trim_idx], search_window=pp_ws, peak_height_thres=pp_thres, ) - local_ws = self.pd_params["local_thres_window_size"] - local_value = self.pd_params["local_thres_value"] - local_method = self.pd_params["local_thres_method"] - peaks = self.filter_peaks_by_local_context( - self.dff, - local_peak_indices, - window_size=local_ws, - value=local_value, - method=local_method, - ) - - return np.array(peaks), filtered_dff + return np.array(dff_peak_indices), filtered_dff def get_trim_index(self): """Try to return the trim index from config, otherwise calculates it.""" @@ -601,19 +570,3 @@ def compute_local_peaks(self, height=0.02, prominence=0.01): ) local_peaks.append(len(peak_indices)) return local_peaks - - def get_dsna_start(self, freq): - if not self.exp_params.get("has_dsna", False): - return None - - manual_dsna = self.config.get_corrected_dsna_start(self.name) - - if manual_dsna is not None and manual_dsna >= 0: - return manual_dsna - - tp = TracePhases(self) - dsna_start = tp.get_dsna_start(freq) - if dsna_start == -1: - return self.trim_idx - - return dsna_start diff --git a/snazzy_analysis/snazzy_analysis/trace_phases.py b/snazzy_analysis/snazzy_analysis/trace_phases.py deleted file mode 100644 index d81d20f..0000000 --- a/snazzy_analysis/snazzy_analysis/trace_phases.py +++ /dev/null @@ -1,291 +0,0 @@ -from matplotlib import patches -from scipy.spatial.distance import pdist, squareform -from skimage.filters import threshold_multiotsu -from sklearn.preprocessing import MinMaxScaler -import matplotlib.pyplot as plt -import numpy as np -import seaborn as sns - -from snazzy_analysis import FrequencyAnalysis - - -class TracePhases: - """Calculate phase boundaries for a Trace. - - A phase is a time inverval where peaks have similar features. This class is used to - calculate features and determine phase boundaries based on feature distances. - """ - - def __init__(self, trace): - self.trace = trace - - def get_phase1_end(self, freq: float = 0.025) -> int: - """Return the index of the last phase 1 peak. - - Parameters: - freq(float): - High frequency cutoff value used to calculate the HF feature. - - Returns: - phase1_end(int): - Time series index of the last phase 1 peak. - """ - features = self.phase1_features(hf_cutoff=freq) - if len(features) <= 1: - return -1 - dm = TracePhases.dist_matrix(features) - thres = TracePhases.feature_thres(dm, num_classes=3) - p1_end = TracePhases.segment_distance_matrix_forward(dm, thres) - - return self.to_dff_index(p1_end) - - def get_dsna_start(self, freq: float = 0.002) -> int: - """Return the index of the first dSNA peak. - - Used for specific traces where this behavior is observed, eg vgludf. - - Parameters: - freq(float): - Low frequency cutoff used to calculate the LF feature. - - Returns: - dsna_start(int): - Time series index of the first dSNA peak. - """ - features = self.dsna_features(lf_cutoff=freq) - if len(features) <= 1: - return -1 - - dm = TracePhases.dist_matrix(features) - thres = TracePhases.feature_thres(dm) - dsna_start = TracePhases.segment_distance_matrix_reverse(dm, thres) - - return self.to_dff_index(dsna_start) - - def to_dff_index(self, peak_idx: int): - """Convert from peak index to DFF index.""" - peak_idxes = self.trace.get_all_peak_idxes() - return peak_idxes[peak_idx] - - def phase1_features(self, hf_cutoff: float = 0.025) -> list: - """Each peak is represented by high frequency pass RMS. - - Parameters: - hf_cutoff(float): - High frequency cutoff. Calculates peak RMS after removing all - frequencies lower than this value. - - Returns: - features(list): - A 2D list where each nested list has features of a given peak. - The only phase 1 feature used is HF filtered RMS. - """ - high_pass = FrequencyAnalysis.get_filtered_signal( - self.trace.dff, hf_cutoff, low_pass=False - ) - - rel_height = self.trace.pd_params["peak_width"] - peak_idxes = self.trace.get_all_peak_idxes() - peak_bounds = self.trace.compute_peak_bounds(rel_height, peak_idxes) - features = [] - for pi, (s, e) in zip(peak_idxes, peak_bounds): - rms = np.sqrt(np.mean(np.power(high_pass[s:e], 2))) - features.append([self.trace.dff[pi], rms]) - - return features - - def dsna_features(self, lf_cutoff: float = 0.005) -> list: - """Each peak is represented by LF pass RMS and peak amplitude. - - Parameters: - lf_cutoff(float): - Lower frequency cutoff. Calculates peak RMS after removing all - frequencies higher than this value. - - Returns: - features(list): - A 2D list where each nested list has features of a given peak. - """ - low_pass = FrequencyAnalysis.get_filtered_signal( - self.trace.dff, lf_cutoff, low_pass=True - ) - - rel_height = self.trace.pd_params["peak_width"] - peak_idxes = self.trace.get_all_peak_idxes() - peak_bounds = self.trace.compute_peak_bounds(rel_height, peak_idxes) - features = [] - for i, (s, e) in enumerate(peak_bounds): - rms = np.sqrt(np.mean(np.power(low_pass[s:e], 2))) - peak_amp = self.trace.dff[peak_idxes[i]] - features.append([rms, peak_amp]) - - return features - - @staticmethod - def feature_thres(dist_matrix: np.ndarray, num_classes: int = 2) -> float: - """Return the lower threshold value from multiclass otsu threshold. - - Increasing the number of classes decreases the threshold value. - - Parameters: - dist_matrix(ndarray): - Square matrix of distance features. - num_classes(int): - Number of classes to use in threshold_multiotsu. - """ - thres_vals = threshold_multiotsu(dist_matrix, classes=num_classes) - return thres_vals[0] - - @staticmethod - def dist_matrix(features: list) -> np.ndarray: - """Return a square matrix of feature distances. - - To ensure that all features have the same range and contribute equally - to the distance matrix, they are first minMax scaled before calculating - distances. - - Parameters: - features(list): - A list where each element represents a list of features for a - condition. - """ - scaler = MinMaxScaler() - features_scaled = scaler.fit_transform(features) - - return squareform(pdist(features_scaled, metric="euclidean")) - - @staticmethod - def segment_distance_matrix_forward(matrix: np.ndarray, thres: float) -> int: - """Segmentation by region growing until the provided thres is reached. - - Iterate over each cell in the matrix diagonal and calculate the average - distance between that cell and all previous cells. Stop when the average - reaches the threshold. - - Parameters: - matrix (nparray): - Feature distances square matrix. - thres (float): - Threshold used to determine index. - - Returns: - segmentation_index (int): - Highest index that is still below `thres`. - """ - if matrix.size == 0: - raise ValueError("Cannot apply threshold to empty matrix.") - - N = len(matrix) - if N == 1: - return 0 - - for k in range(1, N): - next_cells = matrix[k, :k] - if np.average(next_cells) > thres: - return k - 1 - - return N - 1 - - @staticmethod - def segment_distance_matrix_reverse(matrix: np.ndarray, thres: float) -> int: - """Segmentation by region growing until the provided thres is reached. - - **Starting from the last element of the matrix**, iterate over each cell - in the matrix diagonal and calculate the average distance between that - cell and all forward cells. Stop when the average reaches the threshold. - - Parameters: - matrix (nparray): - Feature distances square matrix. - thres (float): - Threshold used to determine index. - - Returns: - segmentation_index (int): - Lowest index that is still below `thres`. - """ - if matrix.size == 0: - raise ValueError("Cannot apply threshold to empty matrix.") - - N = len(matrix) - if N == 1: - return 0 - - for k in range(N - 2, -1, -1): - next_cells = matrix[k, k + 1 :] - if np.average(next_cells) > thres: - return k + 1 - - return N - 1 - - def plot_phase_change( - self, - dist_matrix: np.ndarray, - change_index: int, - features: list, - from_start: bool = True, - ): - """Visualize change index with DFF trace and feat dist matrix.""" - peaks = self.trace.peak_idxes - peak_amps = self.trace.dff[peaks] - peak_times = self.trace.time[peaks] / 6 - all_peaks = self.trace.get_all_peak_idxes() - - # plot the mid point between last phase 1 point and first phase 2 point - try: - boundary = (all_peaks[change_index] + all_peaks[change_index + 1]) // 2 - except IndexError: - print("Could not determine phase 2 start") - return - - fig = plt.figure(figsize=(14, 10)) - axs = fig.subplot_mosaic([["dff", "dff"], ["features", "dist_matrix"]]) - - axs["dff"].axvline(boundary, color="r") - axs["dff"].plot(self.trace.dff[: self.trace.trim_idx]) - axs["dff"].plot(peak_times, peak_amps, "m.") - - scaler = MinMaxScaler() - scaled_features = scaler.fit_transform(features) - - feat1, feat2 = list(zip(*scaled_features)) - axs["features"].plot( - feat1, feat2, linestyle="None", marker="o", markerfacecolor="None" - ) - axs["features"].set_ylabel("Peak Amplitude") - axs["features"].set_xlabel("RMS High Frequency Content") - - sns.heatmap( - dist_matrix, - cmap="viridis", - square=True, - xticklabels=False, - yticklabels=False, - ax=axs["dist_matrix"], - ) - - # plot segmented area from start or from finish - if from_start: - rect = patches.Rectangle( - (0, 0), - change_index + 1, - change_index + 1, - linewidth=2, - edgecolor="red", - facecolor="none", - ) - else: - width = len(dist_matrix) - change_index - rect = patches.Rectangle( - (change_index, change_index), - width, - width, - linewidth=2, - edgecolor="red", - facecolor="none", - ) - axs["dist_matrix"].add_patch(rect) - axs["dist_matrix"].set_title("Feature distances") - - fig.suptitle(self.trace.name) - plt.show() diff --git a/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241008_vgatdf_emb6.npy.npy b/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241008_vgatdf_emb6.npy.npy deleted file mode 100644 index 3004845..0000000 Binary files a/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241008_vgatdf_emb6.npy.npy and /dev/null differ diff --git a/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241008_vgatdf_emb6_annotated.csv b/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241008_vgatdf_emb6_annotated.csv deleted file mode 100644 index 64b8523..0000000 --- a/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241008_vgatdf_emb6_annotated.csv +++ /dev/null @@ -1,16 +0,0 @@ -index,label -1204,Episode -1522,Episode -1573,Episode -1760,Episode -2074,Episode -2359,Episode -2665,Episode -2846,Episode -3122,Episode -3319,Episode -3460,Episode -3504,Episode -3755,Episode -3944,Episode -4172,Episode diff --git a/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241008_vgatdf_emb6_windows.csv b/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241008_vgatdf_emb6_windows.csv deleted file mode 100644 index a3a7a78..0000000 --- a/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241008_vgatdf_emb6_windows.csv +++ /dev/null @@ -1,28 +0,0 @@ -start_index,end_index,label -1198,1269,Episode -1513,1555,Episode -1565,1636,Episode -1751,1837,Episode -2063,2149,Episode -2351,2427,Episode -2655,2731,Episode -2833,2921,Episode -3111,3218,Episode -3312,3359,Episode -3451,3498,Episode -3501,3562,Episode -3745,3815,Episode -3939,4035,Episode -4166,4230,Episode -4046,4160,Baseline -3819,3933,Baseline -3566,3738,Baseline -3363,3447,Baseline -3221,3305,Baseline -2927,3102,Baseline -2735,2828,Baseline -2433,2648,Baseline -2153,2346,Baseline -1842,2058,Baseline -1641,1746,Baseline -1274,1508,Baseline diff --git a/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241010_vgatdf_emb14.npy.npy b/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241010_vgatdf_emb14.npy.npy deleted file mode 100644 index ba8392f..0000000 Binary files a/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241010_vgatdf_emb14.npy.npy and /dev/null differ diff --git a/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241010_vgatdf_emb14_annotated.csv b/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241010_vgatdf_emb14_annotated.csv deleted file mode 100644 index e84ce4b..0000000 --- a/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241010_vgatdf_emb14_annotated.csv +++ /dev/null @@ -1,33 +0,0 @@ -index,label -643,Episode -887,Episode -1161,Episode -1353,Episode -1538,Episode -1685,Episode -1835,Episode -1986,Episode -2146,Episode -2202,Episode -2276,Episode -2398,Episode -2447,Episode -2604,Episode -2772,Episode -2982,Episode -3051,Episode -3107,Episode -3261,Episode -3386,Episode -3515,Episode -3627,Episode -3784,Episode -3881,Episode -4008,Episode -4106,Episode -4268,Episode -4388,Episode -4486,Episode -4544,Episode -4710,Episode -4750,Episode diff --git a/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241010_vgatdf_emb14_windows.csv b/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241010_vgatdf_emb14_windows.csv deleted file mode 100644 index 9c52b8d..0000000 --- a/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241010_vgatdf_emb14_windows.csv +++ /dev/null @@ -1,52 +0,0 @@ -start_index,end_index,label -636,686,Episode -877,927,Episode -1158,1199,Episode -1346,1396,Episode -1530,1581,Episode -1677,1736,Episode -1829,1897,Episode -1980,2084,Episode -2142,2197,Episode -2142,2197,Episode -2198,2272,Episode -2274,2327,Episode -2396,2438,Episode -2439,2500,Episode -2598,2689,Episode -2770,2865,Episode -2979,3046,Episode -3048,3092,Episode -3094,3159,Episode -3258,3300,Episode -3379,3436,Episode -3511,3556,Episode -3622,3680,Episode -3781,3826,Episode -3874,3936,Episode -4005,4055,Episode -4101,4165,Episode -4263,4310,Episode -4378,4439,Episode -4481,4542,Episode -4543,4639,Episode -4707,4795,Episode -4642,4703,Baseline -4443,4479,Baseline -4172,4257,Baseline -3940,4001,Baseline -3685,3772,Baseline -3440,3505,Baseline -3163,3255,Baseline -2869,2975,Baseline -2693,2767,Baseline -2504,2593,Baseline -2330,2393,Baseline -2086,2139,Baseline -1902,1976,Baseline -1744,1823,Baseline -1586,1673,Baseline -1401,1523,Baseline -1205,1343,Baseline -932,1151,Baseline -691,873,Baseline diff --git a/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241011_vgatdf_emb13.npy.npy b/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241011_vgatdf_emb13.npy.npy deleted file mode 100644 index cd6f111..0000000 Binary files a/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241011_vgatdf_emb13.npy.npy and /dev/null differ diff --git a/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241011_vgatdf_emb13_annotated.csv b/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241011_vgatdf_emb13_annotated.csv deleted file mode 100644 index 53bbd57..0000000 --- a/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241011_vgatdf_emb13_annotated.csv +++ /dev/null @@ -1,34 +0,0 @@ -index,label -717,Episode -995,Episode -1148,Episode -1286,Episode -1409,Episode -1565,Episode -1658,Episode -1779,Episode -1896,Episode -2166,Episode -2013,Episode -2382,Episode -2529,Episode -2577,Episode -2864,Episode -3075,Episode -3120,Episode -3156,Episode -3335,Episode -3391,Episode -3598,Episode -3663,Episode -3750,Episode -3916,Episode -4083,Episode -4221,Episode -3977,Episode -4309,Episode -4410,Episode -4485,Episode -4628,Episode -4723,Episode -4761,Episode diff --git a/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241011_vgatdf_emb13_windows.csv b/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241011_vgatdf_emb13_windows.csv deleted file mode 100644 index 064d1ec..0000000 --- a/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241011_vgatdf_emb13_windows.csv +++ /dev/null @@ -1,54 +0,0 @@ -start_index,end_index,label -706,792,Episode -987,1096,Episode -1141,1220,Episode -1279,1347,Episode -1403,1471,Episode -1558,1612,Episode -1649,1741,Episode -1772,1822,Episode -1890,1951,Episode -2007,2068,Episode -2161,2238,Episode -2372,2447,Episode -2527,2647,Episode -2859,2954,Episode -3073,3206,Episode -3331,3386,Episode -3389,3466,Episode -3593,3658,Episode -3660,3714,Episode -3748,3818,Episode -3913,4010,Episode -4078,4138,Episode -4217,4277,Episode -4305,4365,Episode -4407,4451,Episode -4482,4518,Episode -4624,4678,Episode -4721,4784,Episode -4679,4718,Baseline -4521,4620,Baseline -4453,4480,Baseline -4367,4406,Baseline -4278,4304,Baseline -4140,4216,Baseline -4013,4075,Baseline -3820,3910,Baseline -3715,3745,Baseline -3469,3590,Baseline -3207,3328,Baseline -2956,3071,Baseline -2650,2855,Baseline -2449,2525,Baseline -2242,2370,Baseline -2070,2159,Baseline -1953,2003,Baseline -1825,1886,Baseline -1744,1770,Baseline -1614,1647,Baseline -1474,1554,Baseline -1349,1399,Baseline -1221,1277,Baseline -1099,1138,Baseline -794,985,Baseline diff --git a/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241011_vgatdf_emb3.npy.npy b/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241011_vgatdf_emb3.npy.npy deleted file mode 100644 index 38bc05a..0000000 Binary files a/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241011_vgatdf_emb3.npy.npy and /dev/null differ diff --git a/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241011_vgatdf_emb3_annotated.csv b/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241011_vgatdf_emb3_annotated.csv deleted file mode 100644 index 0f18b48..0000000 --- a/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241011_vgatdf_emb3_annotated.csv +++ /dev/null @@ -1,31 +0,0 @@ -index,label -1064,Episode -1273,Episode -1416,Episode -1590,Episode -1761,Episode -1843,Episode -1994,Episode -2135,Episode -2265,Episode -2416,Episode -2569,Episode -2707,Episode -2832,Episode -2978,Episode -3117,Episode -3225,Episode -3285,Episode -3397,Episode -3447,Episode -3529,Episode -3702,Episode -3768,Episode -3882,Episode -3990,Episode -4129,Episode -4285,Episode -4422,Episode -4485,Episode -4527,Episode -4721,Episode diff --git a/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241011_vgatdf_emb3_windows.csv b/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241011_vgatdf_emb3_windows.csv deleted file mode 100644 index 31ae85e..0000000 --- a/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241011_vgatdf_emb3_windows.csv +++ /dev/null @@ -1,51 +0,0 @@ -start_index,end_index,label -1050,1130,Episode -1267,1317,Episode -1398,1470,Episode -1582,1634,Episode -1750,1803,Episode -1834,1886,Episode -1985,2071,Episode -2122,2186,Episode -2256,2337,Episode -2400,2504,Episode -2552,2655,Episode -2691,2782,Episode -2815,2906,Episode -2959,3072,Episode -3107,3191,Episode -3214,3342,Episode -3392,3501,Episode -3521,3621,Episode -3697,3742,Episode -3765,3826,Episode -3881,3924,Episode -3983,4041,Episode -4125,4171,Episode -4277,4343,Episode -4419,4473,Episode -4473,4523,Episode -4525,4616,Episode -4718,4776,Episode -4619,4715,Baseline -4346,4417,Baseline -4044,4122,Baseline -3829,3879,Baseline -3623,3694,Baseline -3504,3517,Baseline -3345,3389,Baseline -3194,3212,Baseline -3073,3106,Baseline -2907,2956,Baseline -2783,2813,Baseline -2656,2689,Baseline -2506,2550,Baseline -2339,2397,Baseline -2187,2253,Baseline -2072,2120,Baseline -1887,1984,Baseline -1804,1832,Baseline -1636,1749,Baseline -1472,1581,Baseline -1318,1397,Baseline -1132,1266,Baseline diff --git a/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241011_vgatdf_emb5.npy.npy b/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241011_vgatdf_emb5.npy.npy deleted file mode 100644 index a838b48..0000000 Binary files a/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241011_vgatdf_emb5.npy.npy and /dev/null differ diff --git a/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241011_vgatdf_emb5_annotated.csv b/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241011_vgatdf_emb5_annotated.csv deleted file mode 100644 index bd9b172..0000000 --- a/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241011_vgatdf_emb5_annotated.csv +++ /dev/null @@ -1,38 +0,0 @@ -index,label -1084,Episode -1340,Episode -1526,Episode -1674,Episode -1878,Episode -2070,Episode -2186,Episode -2306,Episode -2473,Episode -2592,Episode -2749,Episode -2861,Episode -2950,Episode -3065,Episode -3157,Episode -3242,Episode -3356,Oscillation -3375,Oscillation -3405,Oscillation -3427,Oscillation -3445,Oscillation -3475,Oscillation -3501,Oscillation -3527,Oscillation -3588,Episode -3702,Episode -3749,Episode -3794,Episode -3847,Episode -3930,Episode -4003,Episode -4040,Episode -4160,Episode -4315,Episode -4418,Episode -4555,Episode -4689,Episode diff --git a/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241011_vgatdf_emb5_windows.csv b/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241011_vgatdf_emb5_windows.csv deleted file mode 100644 index 1e68ad4..0000000 --- a/snazzy_analysis/tests/assets/annotated_data/VGAT-/20241011_vgatdf_emb5_windows.csv +++ /dev/null @@ -1,54 +0,0 @@ -start_index,end_index,label -1073,1121,Episode -1325,1373,Episode -1518,1572,Episode -1664,1750,Episode -1866,1944,Episode -2062,2110,Episode -2177,2235,Episode -2296,2347,Episode -2464,2548,Episode -2584,2658,Episode -2737,2805,Episode -2847,2914,Episode -2941,3008,Episode -3049,3102,Episode -3147,3200,Episode -3231,3310,Episode -3344,3418,Episode -3420,3560,Episode -3577,3662,Episode -3698,3790,Episode -3792,3842,Episode -3843,3893,Episode -3926,4031,Episode -4032,4107,Episode -4156,4265,Episode -4311,4414,Episode -4415,4502,Episode -4550,4654,Episode -4687,4732,Episode -4656,4686,Baseline -4503,4549,Baseline -4267,4309,Baseline -4108,4154,Baseline -3896,3925,Baseline -3663,3697,Baseline -3561,3575,Baseline -3311,3342,Baseline -3203,3230,Baseline -3103,3146,Baseline -3009,3048,Baseline -2916,2939,Baseline -2808,2845,Baseline -2660,2735,Baseline -2549,2583,Baseline -2348,2462,Baseline -2236,2294,Baseline -2112,2175,Baseline -1946,2060,Baseline -1751,1865,Baseline -1574,1663,Baseline -1375,1515,Baseline -1125,1322,Baseline -3344,3557,Oscillation diff --git a/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240828-vglutdf_emb4.npy.npy b/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240828-vglutdf_emb4.npy.npy deleted file mode 100644 index 375d0f3..0000000 Binary files a/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240828-vglutdf_emb4.npy.npy and /dev/null differ diff --git a/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240828-vglutdf_emb7.npy.npy b/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240828-vglutdf_emb7.npy.npy deleted file mode 100644 index 7385fee..0000000 Binary files a/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240828-vglutdf_emb7.npy.npy and /dev/null differ diff --git a/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240828_vglutdf_emb4_annotated.csv b/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240828_vglutdf_emb4_annotated.csv deleted file mode 100644 index c7d058b..0000000 --- a/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240828_vglutdf_emb4_annotated.csv +++ /dev/null @@ -1,40 +0,0 @@ -index,label -492,Episode -653,Episode -744,Episode -954,Episode -1150,Episode -1272,Episode -1437,Episode -1585,Episode -1758,Episode -1852,Episode -1941,Episode -2136,Episode -2329,Episode -2485,Episode -2637,Episode -2809,Episode -2013,Oscillation -2053,Oscillation -2075,Oscillation -2109,Oscillation -2693,Oscillation -2723,Oscillation -2760,Oscillation -2878,Oscillation -2903,Oscillation -2927,Oscillation -2948,Oscillation -2960,Oscillation -2970,Oscillation -2567,Oscillation -2579,Oscillation -2593,Oscillation -2270,Oscillation -2286,Oscillation -2306,Oscillation -3007,Episode -3007,Episode -3007,dSNA -1823,Oscillation diff --git a/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240828_vglutdf_emb4_windows.csv b/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240828_vglutdf_emb4_windows.csv deleted file mode 100644 index dd0a37f..0000000 --- a/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240828_vglutdf_emb4_windows.csv +++ /dev/null @@ -1,38 +0,0 @@ -start_index,end_index,label -481,544,Episode -645,731,Episode -735,822,Episode -947,1033,Episode -1141,1241,Episode -1261,1332,Episode -1425,1495,Episode -1581,1684,Episode -1745,1835,Episode -1844,1895,Episode -1928,2006,Episode -2122,2196,Episode -2317,2429,Episode -2481,2566,Episode -2625,2710,Episode -2801,2902,Episode -2996,3097,Episode -3112,3181,Episode -3192,3254,Episode -2998,5396,dSNA -2874,2981,Oscillation -2749,2800,Oscillation -2564,2602,Oscillation -2255,2315,Oscillation -2008,2117,Oscillation -550,642,Baseline -825,943,Baseline -1035,1138,Baseline -1243,1257,Baseline -1336,1421,Baseline -1498,1576,Baseline -1687,1742,Baseline -1898,1926,Baseline -2200,2216,Baseline -2438,2476,Baseline -2604,2623,Baseline -2987,2995,Baseline diff --git a/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240828_vglutdf_emb7_annotated.csv b/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240828_vglutdf_emb7_annotated.csv deleted file mode 100644 index 933d4f1..0000000 --- a/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240828_vglutdf_emb7_annotated.csv +++ /dev/null @@ -1,29 +0,0 @@ -index,label -1321,Episode -1287,Episode -1232,Episode -1533,Episode -1704,Episode -1864,Episode -2083,Episode -2409,Episode -2716,Episode -2860,Episode -3137,Episode -3384,Episode -1165,Episode -2823,Oscillation -2963,Oscillation -2975,Oscillation -3000,Oscillation -3057,Oscillation -3073,Oscillation -3290,Oscillation -3308,Oscillation -3354,Oscillation -3480,Oscillation -3508,Oscillation -3524,Oscillation -3539,Oscillation -3566,Oscillation -3566,dSNA diff --git a/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240828_vglutdf_emb7_windows.csv b/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240828_vglutdf_emb7_windows.csv deleted file mode 100644 index a59916b..0000000 --- a/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240828_vglutdf_emb7_windows.csv +++ /dev/null @@ -1,36 +0,0 @@ -start_index,end_index,label -1157,1200,Episode -1224,1279,Episode -1282,1312,Episode -1314,1358,Episode -1523,1576,Episode -1697,1781,Episode -1855,1938,Episode -2076,2160,Episode -2399,2489,Episode -2703,2793,Episode -2850,2934,Episode -3132,3205,Episode -3376,3444,Episode -1361,1519,Baseline -1577,1694,Baseline -1783,1853,Baseline -1941,2074,Baseline -2162,2398,Baseline -2492,2697,Baseline -2795,2817,Baseline -2830,2847,Baseline -3009,3053,Baseline -3086,3130,Baseline -3230,3286,Baseline -3316,3348,Baseline -3365,3376,Baseline -3447,3477,Baseline -3479,5357,dSNA -3351,3360,Oscillation -3289,3313,Oscillation -3209,3226,Oscillation -3056,3080,Oscillation -2940,3006,Oscillation -2819,2829,Oscillation -1202,1223,Baseline diff --git a/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240829-vglutdf_emb1.npy.npy b/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240829-vglutdf_emb1.npy.npy deleted file mode 100644 index 05392e7..0000000 Binary files a/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240829-vglutdf_emb1.npy.npy and /dev/null differ diff --git a/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240829-vglutdf_emb6.npy.npy b/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240829-vglutdf_emb6.npy.npy deleted file mode 100644 index 79a072f..0000000 Binary files a/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240829-vglutdf_emb6.npy.npy and /dev/null differ diff --git a/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240829_vglutdf_emb1_annotated.csv b/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240829_vglutdf_emb1_annotated.csv deleted file mode 100644 index 1d5851a..0000000 --- a/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240829_vglutdf_emb1_annotated.csv +++ /dev/null @@ -1,61 +0,0 @@ -index,label -1294,Episode -1432,Episode -1588,Episode -1827,Episode -2046,Episode -2268,Episode -2458,Episode -2591,Episode -2815,Episode -2978,Episode -3351,Episode -3488,Episode -3571,Episode -2312,Oscillation -2331,Oscillation -2348,Oscillation -2413,Oscillation -2433,Oscillation -2380,Oscillation -2364,Oscillation -2502,Oscillation -2520,Oscillation -2535,Oscillation -2543,Oscillation -2552,Oscillation -2568,Oscillation -2622,Oscillation -2639,Oscillation -2657,Oscillation -2674,Oscillation -2691,Oscillation -2710,Oscillation -2725,Oscillation -2741,Oscillation -2755,Oscillation -2776,Oscillation -2792,Oscillation -2863,Oscillation -2881,Oscillation -2900,Oscillation -2918,Oscillation -2938,Oscillation -2955,Oscillation -3025,Oscillation -3042,Oscillation -3058,Oscillation -3078,Oscillation -3094,Oscillation -3149,Oscillation -3166,Oscillation -3179,Oscillation -3190,Oscillation -3270,Oscillation -3276,Oscillation -3289,Oscillation -3314,Oscillation -3417,Oscillation -3437,Oscillation -3455,Oscillation -3571,dSNA diff --git a/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240829_vglutdf_emb1_windows.csv b/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240829_vglutdf_emb1_windows.csv deleted file mode 100644 index cc3d363..0000000 --- a/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240829_vglutdf_emb1_windows.csv +++ /dev/null @@ -1,33 +0,0 @@ -start_index,end_index,label -1286,1365,Episode -1422,1464,Episode -1581,1660,Episode -1820,1907,Episode -2032,2155,Episode -2255,2392,Episode -2445,2577,Episode -2580,2701,Episode -2803,2963,Episode -2965,3104,Episode -3107,3176,Episode -3196,3285,Episode -3339,3465,Episode -3479,3552,Episode -3554,3621,Episode -3553,5386,dSNA -3468,3475,Baseline -3325,3337,Baseline -2396,2408,Baseline -2162,2253,Baseline -1914,2028,Baseline -1663,1816,Baseline -1469,1574,Baseline -1372,1419,Baseline -2304,2438,Oscillation -2498,2550,Oscillation -2614,2798,Oscillation -2858,2964,Oscillation -3019,3105,Oscillation -3141,3193,Oscillation -3285,3321,Oscillation -3414,3463,Oscillation diff --git a/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240829_vglutdf_emb6_annotated.csv b/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240829_vglutdf_emb6_annotated.csv deleted file mode 100644 index 0d3f194..0000000 --- a/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240829_vglutdf_emb6_annotated.csv +++ /dev/null @@ -1,30 +0,0 @@ -index,label -1490,Episode -1705,Episode -1994,Episode -2233,Episode -2443,Episode -2624,Episode -2811,Episode -2979,Episode -3136,Episode -3287,Episode -3424,Episode -3628,Episode -3677,Oscillation -3690,Oscillation -3698,Oscillation -3715,Oscillation -3747,Oscillation -3759,Oscillation -3765,Oscillation -3824,Episode -3906,Episode -4092,Episode -4092,dSNA -3966,Oscillation -3982,Oscillation -3997,Oscillation -3529,Oscillation -3545,Oscillation -3561,Oscillation diff --git a/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240829_vglutdf_emb6_windows.csv b/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240829_vglutdf_emb6_windows.csv deleted file mode 100644 index 438771c..0000000 --- a/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240829_vglutdf_emb6_windows.csv +++ /dev/null @@ -1,33 +0,0 @@ -start_index,end_index,label -1480,1529,Episode -1693,1775,Episode -1983,2085,Episode -2221,2274,Episode -2430,2494,Episode -2617,2681,Episode -2797,2907,Episode -2968,3064,Episode -3122,3214,Episode -3275,3387,Episode -3417,3508,Episode -3619,3740,Episode -3811,3885,Episode -3896,3953,Episode -4065,4197,Episode -3897,5376,dSNA -3887,3894,Baseline -3570,3588,Baseline -3389,3415,Baseline -3216,3271,Baseline -3065,3120,Baseline -2911,2965,Baseline -2686,2793,Baseline -2498,2614,Baseline -2276,2427,Baseline -2087,2219,Baseline -1778,1979,Baseline -1532,1692,Baseline -3319,3383,Oscillation -3669,3805,Oscillation -3524,3575,Oscillation -3509,3522,Baseline diff --git a/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240830-vglutdf_emb2.npy.npy b/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240830-vglutdf_emb2.npy.npy deleted file mode 100644 index d829dd9..0000000 Binary files a/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240830-vglutdf_emb2.npy.npy and /dev/null differ diff --git a/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240830_vglutdf_emb2_annotated.csv b/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240830_vglutdf_emb2_annotated.csv deleted file mode 100644 index e2112ef..0000000 --- a/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240830_vglutdf_emb2_annotated.csv +++ /dev/null @@ -1,68 +0,0 @@ -index,label -828,Episode -949,Episode -1125,Episode -1281,Episode -1417,Episode -1537,Episode -1705,Episode -1848,Episode -1971,Episode -2115,Episode -2253,Episode -2382,Episode -2474,Episode -2594,Episode -2711,Episode -2816,Episode -3030,Episode -3172,Episode -3257,Episode -3328,Episode -3388,Episode -3446,Episode -3257,dSNA -3328,dSNA -3388,dSNA -3446,dSNA -1885,Oscillation -1898,Oscillation -1915,Oscillation -1933,Oscillation -1951,Oscillation -2036,Oscillation -2053,Oscillation -2067,Oscillation -2088,Oscillation -2156,Oscillation -2172,Oscillation -2190,Oscillation -2206,Oscillation -2225,Oscillation -2240,Oscillation -2291,Oscillation -2310,Oscillation -2326,Oscillation -2340,Oscillation -2360,Oscillation -2412,Oscillation -2432,Oscillation -2447,Oscillation -2517,Oscillation -2536,Oscillation -2551,Oscillation -2568,Oscillation -2638,Oscillation -2652,Oscillation -2671,Oscillation -2688,Oscillation -2763,Oscillation -2777,Oscillation -2793,Oscillation -2857,Oscillation -2878,Oscillation -2895,Oscillation -2915,Oscillation -2927,Oscillation -3146,Oscillation -3159,Oscillation diff --git a/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240830_vglutdf_emb2_windows.csv b/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240830_vglutdf_emb2_windows.csv deleted file mode 100644 index b95dfe4..0000000 --- a/snazzy_analysis/tests/assets/annotated_data/VGluT-/20240830_vglutdf_emb2_windows.csv +++ /dev/null @@ -1,41 +0,0 @@ -start_index,end_index,label -814,881,Episode -937,1048,Episode -1113,1181,Episode -1266,1334,Episode -1410,1454,Episode -1531,1604,Episode -1690,1789,Episode -1829,1908,Episode -1963,2062,Episode -2099,2198,Episode -2234,2304,Episode -2371,2441,Episode -2459,2530,Episode -2581,2698,Episode -2701,2770,Episode -2804,2868,Episode -3022,3095,Episode -3164,3229,Episode -3254,3306,Episode -3320,3372,Episode -3376,3436,Episode -3439,3506,Episode -3508,3567,Episode -3250,5394,dSNA -2853,2924,Oscillation -2754,2802,Oscillation -2629,2699,Oscillation -2512,2582,Oscillation -2408,2457,Oscillation -2287,2370,Oscillation -2151,2234,Oscillation -2033,2099,Oscillation -1879,1963,Oscillation -886,933,Baseline -1051,1108,Baseline -1184,1260,Baseline -1340,1407,Baseline -1503,1529,Baseline -1609,1686,Baseline -1792,1824,Baseline diff --git a/snazzy_analysis/tests/assets/annotated_data/WT/20240611_25C_emb2.npy.npy b/snazzy_analysis/tests/assets/annotated_data/WT/20240611_25C_emb2.npy.npy deleted file mode 100644 index e19e6c6..0000000 Binary files a/snazzy_analysis/tests/assets/annotated_data/WT/20240611_25C_emb2.npy.npy and /dev/null differ diff --git a/snazzy_analysis/tests/assets/annotated_data/WT/20240611_25C_emb2_annotated.csv b/snazzy_analysis/tests/assets/annotated_data/WT/20240611_25C_emb2_annotated.csv deleted file mode 100644 index 6cdbd29..0000000 --- a/snazzy_analysis/tests/assets/annotated_data/WT/20240611_25C_emb2_annotated.csv +++ /dev/null @@ -1,31 +0,0 @@ -index,label -609,Episode -1037,Episode -1292,Episode -1467,Episode -1733,Episode -1902,Episode -2008,Episode -2158,Episode -2273,Episode -2455,Episode -2600,Episode -2814,Episode -3003,Episode -3084,Episode -3208,Episode -3293,Episode -3340,Episode -3003,dSNA -3084,dSNA -3208,dSNA -3293,dSNA -3340,dSNA -2927,Oscillation -2946,Oscillation -2961,Oscillation -2981,Oscillation -2518,Oscillation -2532,Oscillation -2554,Oscillation -2576,Oscillation diff --git a/snazzy_analysis/tests/assets/annotated_data/WT/20240611_25C_emb2_windows.csv b/snazzy_analysis/tests/assets/annotated_data/WT/20240611_25C_emb2_windows.csv deleted file mode 100644 index 88a2950..0000000 --- a/snazzy_analysis/tests/assets/annotated_data/WT/20240611_25C_emb2_windows.csv +++ /dev/null @@ -1,28 +0,0 @@ -start_index,end_index,label -737,1023,Baseline -1127,1279,Baseline -1403,1453,Baseline -1641,1714,Baseline -1845,1888,Baseline -1973,1996,Baseline -2132,2150,Baseline -2254,2263,Baseline -2383,2445,Baseline -601,731,Episode -1027,1124,Episode -1285,1394,Episode -1457,1635,Episode -1725,1841,Episode -1894,1969,Episode -2000,2128,Episode -2153,2247,Episode -2268,2379,Episode -2447,2579,Episode -2589,2690,Episode -2799,2912,Episode -3078,3186,Episode -3287,3372,Episode -2490,2584,Oscillation -2887,3060,Oscillation -2694,2793,Baseline -2918,3371,dSNA diff --git a/snazzy_analysis/tests/assets/annotated_data/WT/20240611_25C_emb6.npy.npy b/snazzy_analysis/tests/assets/annotated_data/WT/20240611_25C_emb6.npy.npy deleted file mode 100644 index 269b2cd..0000000 Binary files a/snazzy_analysis/tests/assets/annotated_data/WT/20240611_25C_emb6.npy.npy and /dev/null differ diff --git a/snazzy_analysis/tests/assets/annotated_data/WT/20240611_25C_emb6_annotated.csv b/snazzy_analysis/tests/assets/annotated_data/WT/20240611_25C_emb6_annotated.csv deleted file mode 100644 index 9e39b9e..0000000 --- a/snazzy_analysis/tests/assets/annotated_data/WT/20240611_25C_emb6_annotated.csv +++ /dev/null @@ -1,18 +0,0 @@ -index,label -359,Episode -567,Episode -849,Episode -1055,Episode -1283,Episode -1514,Episode -1697,Episode -1858,Episode -2064,Episode -2157,Episode -2252,Episode -2330,Episode -2409,Episode -2157,dSNA -2252,dSNA -2330,dSNA -2409,dSNA diff --git a/snazzy_analysis/tests/assets/annotated_data/WT/20240611_25C_emb6_windows.csv b/snazzy_analysis/tests/assets/annotated_data/WT/20240611_25C_emb6_windows.csv deleted file mode 100644 index 4165ad9..0000000 --- a/snazzy_analysis/tests/assets/annotated_data/WT/20240611_25C_emb6_windows.csv +++ /dev/null @@ -1,23 +0,0 @@ -start_index,end_index,label -342,440,Episode -555,666,Episode -835,970,Episode -1046,1180,Episode -1274,1409,Episode -1504,1615,Episode -1687,1794,Episode -1845,1952,Episode -2051,2145,Episode -2148,2242,Episode -2245,2319,Episode -2321,2378,Episode -2381,2446,Episode -2147,2489,dSNA -445,550,Baseline -671,832,Baseline -973,1043,Baseline -1186,1268,Baseline -1415,1498,Baseline -1621,1681,Baseline -1800,1840,Baseline -1958,2043,Baseline diff --git a/snazzy_analysis/tests/assets/annotated_data/WT/20240919_25C_emb14.npy.npy b/snazzy_analysis/tests/assets/annotated_data/WT/20240919_25C_emb14.npy.npy deleted file mode 100644 index dd1a615..0000000 Binary files a/snazzy_analysis/tests/assets/annotated_data/WT/20240919_25C_emb14.npy.npy and /dev/null differ diff --git a/snazzy_analysis/tests/assets/annotated_data/WT/20240919_25C_emb14_annotated.csv b/snazzy_analysis/tests/assets/annotated_data/WT/20240919_25C_emb14_annotated.csv deleted file mode 100644 index 3e01b99..0000000 --- a/snazzy_analysis/tests/assets/annotated_data/WT/20240919_25C_emb14_annotated.csv +++ /dev/null @@ -1,13 +0,0 @@ -index,label -586,Episode -920,Episode -1296,Episode -1605,Episode -1814,Episode -2047,Episode -2233,Episode -2353,Episode -2476,Episode -2518,Episode -2476,dSNA -2518,dSNA diff --git a/snazzy_analysis/tests/assets/annotated_data/WT/20240919_25C_emb14_windows.csv b/snazzy_analysis/tests/assets/annotated_data/WT/20240919_25C_emb14_windows.csv deleted file mode 100644 index 9d279ce..0000000 --- a/snazzy_analysis/tests/assets/annotated_data/WT/20240919_25C_emb14_windows.csv +++ /dev/null @@ -1,17 +0,0 @@ -start_index,end_index,label -567,685,Episode -909,1052,Episode -1281,1457,Episode -1595,1715,Episode -1803,1909,Episode -2037,2137,Episode -2223,2334,Episode -2345,2416,Episode -2474,2552,Episode -2421,2615,dSNA -2141,2220,Baseline -1912,2031,Baseline -1719,1797,Baseline -1464,1590,Baseline -1057,1277,Baseline -689,904,Baseline diff --git a/snazzy_analysis/tests/assets/annotated_data/WT/20240919_25C_emb21.npy.npy b/snazzy_analysis/tests/assets/annotated_data/WT/20240919_25C_emb21.npy.npy deleted file mode 100644 index e4d8b1c..0000000 Binary files a/snazzy_analysis/tests/assets/annotated_data/WT/20240919_25C_emb21.npy.npy and /dev/null differ diff --git a/snazzy_analysis/tests/assets/annotated_data/WT/20240919_25C_emb21_annotated.csv b/snazzy_analysis/tests/assets/annotated_data/WT/20240919_25C_emb21_annotated.csv deleted file mode 100644 index 782440f..0000000 --- a/snazzy_analysis/tests/assets/annotated_data/WT/20240919_25C_emb21_annotated.csv +++ /dev/null @@ -1,18 +0,0 @@ -index,label -468,Episode -765,Episode -602,Episode -954,Episode -1125,Episode -1284,Episode -1476,Episode -1638,Episode -1878,Episode -2018,Episode -2147,Episode -2294,Episode -2415,Episode -2488,Episode -2629,Episode -2488,dSNA -2629,dSNA diff --git a/snazzy_analysis/tests/assets/annotated_data/WT/20240919_25C_emb21_windows.csv b/snazzy_analysis/tests/assets/annotated_data/WT/20240919_25C_emb21_windows.csv deleted file mode 100644 index bfd6739..0000000 --- a/snazzy_analysis/tests/assets/annotated_data/WT/20240919_25C_emb21_windows.csv +++ /dev/null @@ -1,29 +0,0 @@ -start_index,end_index,label -458,544,Episode -588,659,Episode -746,860,Episode -942,1046,Episode -1107,1251,Episode -1278,1375,Episode -1464,1600,Episode -1628,1764,Episode -1860,1983,Episode -2007,2125,Episode -2139,2257,Episode -2281,2399,Episode -2408,2467,Episode -2474,2566,Episode -2619,2662,Episode -2664,2741,Episode -2475,2750,dSNA -2262,2278,Baseline -2127,2136,Baseline -1987,2005,Baseline -1767,1857,Baseline -1602,1626,Baseline -1378,1462,Baseline -1253,1276,Baseline -1051,1105,Baseline -861,939,Baseline -662,743,Baseline -547,585,Baseline diff --git a/snazzy_analysis/tests/assets/annotated_data/WT/20250404_wt_emb13.npy.npy b/snazzy_analysis/tests/assets/annotated_data/WT/20250404_wt_emb13.npy.npy deleted file mode 100644 index 4bfb6fc..0000000 Binary files a/snazzy_analysis/tests/assets/annotated_data/WT/20250404_wt_emb13.npy.npy and /dev/null differ diff --git a/snazzy_analysis/tests/assets/annotated_data/WT/20250404_wt_emb13_annotated.csv b/snazzy_analysis/tests/assets/annotated_data/WT/20250404_wt_emb13_annotated.csv deleted file mode 100644 index 3e6c5b7..0000000 --- a/snazzy_analysis/tests/assets/annotated_data/WT/20250404_wt_emb13_annotated.csv +++ /dev/null @@ -1,11 +0,0 @@ -index,label -1159,Episode -1437,Episode -1700,Episode -1898,Episode -2088,Episode -2247,Episode -2425,Episode -2621,Episode -2709,Episode -2709,dSNA diff --git a/snazzy_analysis/tests/assets/annotated_data/WT/20250404_wt_emb13_windows.csv b/snazzy_analysis/tests/assets/annotated_data/WT/20250404_wt_emb13_windows.csv deleted file mode 100644 index 6316a9f..0000000 --- a/snazzy_analysis/tests/assets/annotated_data/WT/20250404_wt_emb13_windows.csv +++ /dev/null @@ -1,18 +0,0 @@ -start_index,end_index,label -1151,1210,Episode -1430,1563,Episode -1695,1757,Episode -1893,2010,Episode -2078,2196,Episode -2242,2315,Episode -2417,2491,Episode -2615,2684,Episode -2696,2793,Episode -2696,2827,dSNA -2493,2613,Baseline -2317,2412,Baseline -2198,2239,Baseline -2013,2075,Baseline -1846,1890,Baseline -1569,1692,Baseline -1216,1426,Baseline diff --git a/snazzy_analysis/tests/assets/annotated_data/WT/20250404_wt_emb46.npy.npy b/snazzy_analysis/tests/assets/annotated_data/WT/20250404_wt_emb46.npy.npy deleted file mode 100644 index f7fe326..0000000 Binary files a/snazzy_analysis/tests/assets/annotated_data/WT/20250404_wt_emb46.npy.npy and /dev/null differ diff --git a/snazzy_analysis/tests/assets/annotated_data/WT/20250404_wt_emb46_annotated.csv b/snazzy_analysis/tests/assets/annotated_data/WT/20250404_wt_emb46_annotated.csv deleted file mode 100644 index 28ce59a..0000000 --- a/snazzy_analysis/tests/assets/annotated_data/WT/20250404_wt_emb46_annotated.csv +++ /dev/null @@ -1,18 +0,0 @@ -index,label -764,Episode -1102,Episode -1334,Episode -1519,Episode -1746,Episode -1930,Episode -2153,Episode -2352,Episode -2556,Episode -2693,Episode -2821,Episode -2922,Episode -3058,Episode -3174,dSNA -3295,dSNA -3174,Episode -3295,Episode diff --git a/snazzy_analysis/tests/assets/annotated_data/WT/20250404_wt_emb46_windows.csv b/snazzy_analysis/tests/assets/annotated_data/WT/20250404_wt_emb46_windows.csv deleted file mode 100644 index 9c8da7b..0000000 --- a/snazzy_analysis/tests/assets/annotated_data/WT/20250404_wt_emb46_windows.csv +++ /dev/null @@ -1,30 +0,0 @@ -start_index,end_index,label -754,819,Episode -1094,1200,Episode -1323,1428,Episode -1502,1607,Episode -1727,1832,Episode -1919,2019,Episode -2133,2255,Episode -2329,2429,Episode -2545,2605,Episode -2623,2652,Episode -2684,2759,Episode -2777,2796,Episode -2813,2886,Episode -2905,2978,Episode -3049,3121,Episode -3157,3253,Episode -3157,3344,dSNA -3280,3344,Episode -2981,3045,Baseline -2656,2681,Baseline -2494,2542,Baseline -2258,2324,Baseline -2024,2129,Baseline -1834,1916,Baseline -1612,1721,Baseline -1432,1497,Baseline -1204,1320,Baseline -826,1090,Baseline -2434,2489,Baseline diff --git a/snazzy_analysis/tests/compare_detected_peaks_and_annotations.py b/snazzy_analysis/tests/compare_detected_peaks_and_annotations.py deleted file mode 100644 index b25fb05..0000000 --- a/snazzy_analysis/tests/compare_detected_peaks_and_annotations.py +++ /dev/null @@ -1,323 +0,0 @@ -from pathlib import Path - -import matplotlib.pyplot as plt -import pandas as pd -import seaborn as sns - -from snazzy_analysis import Experiment, Trace -from peak_annot_parser import ( - PeakAnnotationParser, - GroundTruthPointData, - GroundTruthWindowData, -) - -ANNOT_DIR = Path(__file__).parent.joinpath("assets", "annotated_data") - - -def load_data(annotated_path, exp_dir, annot_type): - """Generates ground truth and calculated data in pairs. - - Parameters: - annotated_path (Path): - Path to annotated csv file. - exp_dir (Path): - Path to experiment dir. - annot_type ('point' | 'window'): - Type of GT data. - - Yields: - comparison_data (tuple): - (annotations, peak_idxes, exp_name, trace) - """ - annotated_data = PeakAnnotationParser(annotated_path, annot_type) - exp_names = annotated_data.get_exp_names() - for exp_name in exp_names: - annotations = annotated_data.get_annotation_by_exp_name(exp_name) - emb_names = [a.emb_name for a in annotations] - exp = Experiment( - Path(exp_dir + "/" + exp_name), - to_exclude=[x for x in range(99) if f"emb{x}" not in emb_names], - first_peak_threshold=0, - dff_strategy="local_minima", - ) - - for annot in annotations: - emb_name = annot.emb_name - trace = exp.get_embryo(emb_name).trace - calc_idxes = get_peak_idxes(exp, emb_name) - yield (annot, calc_idxes, exp_name, trace) - - -def abs_bound_distance(bound1, bound2): - s1, e1 = bound1 - s2, e2 = bound2 - return abs(s1 - s2) + abs(e1 - e2) - - -def evaluate_bounds_detection(annot_bounds, calc_bounds, tolerance): - matched = [] - unmatched_annotated = set(annot_bounds) - unmatched_calculated = set(calc_bounds) - visited = set() - - for ann in annot_bounds: - close_candidates = [ - calc - for calc in calc_bounds - if abs_bound_distance(calc, ann) <= tolerance and calc not in visited - ] - if close_candidates: - best_match = min(close_candidates, key=lambda c: abs_bound_distance(c, ann)) - matched.append((ann, best_match, abs_bound_distance(best_match, ann))) - visited.add(best_match) - unmatched_annotated.discard(ann) - unmatched_calculated.discard(best_match) - - return { - "matches": matched, - "mean_error": sum(e[-1] for e in matched) / len(matched) if matched else None, - "misses": list(unmatched_annotated), - "false_positives": list(unmatched_calculated), - } - - -def evaluate_peak_detection(annotated_peaks, calculated_peaks, tolerance): - matched = [] - unmatched_annotated = set(annotated_peaks) - unmatched_calculated = set(calculated_peaks) - visited = set() - - for ann in annotated_peaks: - close_candidates = [ - calc - for calc in calculated_peaks - if abs(calc - ann) <= tolerance and calc not in visited - ] - if close_candidates: - best_match = min(close_candidates, key=lambda c: abs(c - ann)) - matched.append((ann, best_match, abs(best_match - ann))) - visited.add(best_match) - unmatched_annotated.discard(ann) - unmatched_calculated.discard(best_match) - - return { - "matches": matched, - "mean_error": sum(e[-1] for e in matched) / len(matched) if matched else None, - "misses": list(unmatched_annotated), - "false_positives": list(unmatched_calculated), - } - - -def get_comparison_results(annot_to_exp, annot_type): - results = [] - for annot_file, exp_file in annot_to_exp.items(): - annot_path = ANNOT_DIR + annot_file - exp_path = "./" + exp_file - if annot_type == "point": - for annot, calc_idxes, exp_name, _ in load_data( - annot_path, exp_path, annot_type - ): - annot_idxes = annot.episode_idxes - res = evaluate_peak_detection(annot_idxes, calc_idxes, tolerance=30) - res["exp_name"] = exp_name - results.append(res) - elif annot_type == "window": - for annot, _, exp_name, trace in load_data( - annot_path, exp_path, annot_type - ): - annot_bounds = annot.episode_bounds - trace.compute_peak_bounds(rel_height=0.99) - calc_bounds = [tuple(b.tolist()) for b in trace.peak_bounds_indices] - res = evaluate_bounds_detection(annot_bounds, calc_bounds, tolerance=90) - res["exp_name"] = exp_name - results.append(res) - - return results - - -def get_peak_idxes(exp: Experiment, emb_name): - idxs = exp.get_embryo(emb_name).trace.peak_idxes - return [int(idx) for idx in idxs] - - -def plot_single_trace(trace: Trace, annot: GroundTruthPointData): - annot_peaks = annot.episode_idxes - dff = trace.dff[: trace.trim_idx] - annot_amps = [dff[p] for p in annot_peaks] - - fig, ax = plt.subplots(figsize=(12, 6)) - ax.plot(dff) - ax.plot(annot_peaks, annot_amps, "b.", ms=12, label="annot") - ax.plot(trace.peak_idxes, trace.peak_amplitudes, "r.", label="calc") - ax.set_title(annot.emb_name) - ax.legend() - plt.show() - - -def plot_single_trace_bounds(trace: Trace, annot: GroundTruthWindowData): - annot_bounds = annot.episode_bounds - dff = trace.dff[: trace.trim_idx] - trace.compute_peak_bounds(rel_height=0.99) - calc_bounds = trace.peak_bounds_indices - - fig, ax = plt.subplots(figsize=(12, 6)) - ax.plot(dff) - for s, e in annot_bounds: - ax.axvline(s, ls=":", color="b", alpha=0.5) - ax.axvline(e, ls=":", color="b", alpha=0.5) - for s, e in calc_bounds: - ax.axvspan(s, e, facecolor="r", alpha=0.3) - ax.set_title(annot.emb_name) - plt.show() - - -def plot_all_traces(annot_to_exp, annot_type): - for annot_file, exp_file in annot_to_exp.items(): - annot_path = ANNOT_DIR + annot_file - exp_path = "./" + exp_file - if annot_type == "point": - for annot, _, _, trace in load_data(annot_path, exp_path, annot_type): - plot_single_trace(trace, annot) - elif annot_type == "window": - for annot, _, _, trace in load_data(annot_path, exp_path, annot_type): - plot_single_trace_bounds(trace, annot) - - -def plot_mean_error(results): - exps_to_errs = {} - - for res in results: - exp_name = res.get("exp_name", None) - if exp_name is None: - print(f"WARN: Invalid data, skipping..") - if exp_name not in exps_to_errs: - exps_to_errs[exp_name] = [] - - exps_to_errs[exp_name].append(res.get("mean_error")) - - mean_errors = [sum(res) / len(res) for res in exps_to_errs.values()] - exp_names = list(exps_to_errs.keys()) - x = list(range(len(mean_errors))) - - fig, ax = plt.subplots() - ax.plot(x, mean_errors, "bo") - ax.grid(True) - ax.set_xticks(x, exp_names, rotation="vertical") - ax.set_ylabel("# of frames") - ax.set_title("Mean absolute error") - - plt.tight_layout() - plt.show() - - -def plot_eval_results(results): - rows = [] - for res in results: - for k, v in res.items(): - if k == "exp_name" or k == "mean_error": - continue - rows.append( - { - "exp_name": group_name(res.get("exp_name")), - "metric": k, - "count": len(v), - } - ) - - df = pd.DataFrame(rows) - - sns.set_theme(style="darkgrid") - ax = sns.catplot( - data=df, - x="exp_name", - y="count", - hue="metric", - kind="bar", - width=0.4, - edgecolor="k", - ) - ax.figure.suptitle("Calc vs GT peak positions") - plt.show() - - -def plot_eval_results_bounds(results): - rows = [] - for res in results: - for ann, calc, d in res["matches"]: - rows.append( - { - "group_name": group_name(res["exp_name"]), - "exp_name": res["exp_name"], - "left_match": ann[0] - calc[0], - "right_match": ann[1] - calc[1], - "distance": d, - } - ) - - df = pd.DataFrame(rows) - df_long = df.melt( - id_vars=["group_name", "exp_name"], - value_vars=["left_match", "right_match", "distance"], - var_name="metric", - value_name="value", - ) - - sns.set_theme(style="darkgrid") - ax = sns.stripplot( - data=df_long, - x="metric", - y="value", - hue="group_name", - dodge=True, - alpha=0.7, - zorder=1, - ) - - sns.pointplot( - data=df_long, - x="metric", - y="value", - hue="group_name", - dodge=0.532, - errorbar=None, - linewidth=0, - color="k", - markers=".", - markersize=8, - ax=ax, - legend=False, - ) - ax.figure.suptitle("Calc vs GT width positions") - plt.show() - - -def group_name(exp_name): - if exp_name.endswith("vglutdf"): - return "vglutdf" - elif exp_name.endswith("vgatdf"): - return "vgatdf" - elif exp_name.endswith("wt") or exp_name.endswith("25C"): - return "wt" - - -def write_results(res_path, results): - with open(res_path, "w+") as f: - for res in results: - for k, v in res.items(): - f.write(f"{k}: {v}" + "\n") - print(f"Wrote data at {res_path}") - - -if __name__ == "__main__": - - annot_to_exp = {"VGAT-": "./data/vgat", "VGluT-": "./data/vglut", "WT": "./data"} - - # view results for peak boundaries: - # results = get_comparison_results(annot_to_exp, "window") - # plot_eval_results_bounds(results) - # plot_all_traces(annot_to_exp, "window") - - # view results for peak data: - # results = get_comparison_results(annot_to_exp, "point") - # plot_eval_results(results) - # plot_all_traces(annot_to_exp, "point") diff --git a/snazzy_analysis/tests/peak_annot_parser.py b/snazzy_analysis/tests/peak_annot_parser.py deleted file mode 100644 index 8b0a978..0000000 --- a/snazzy_analysis/tests/peak_annot_parser.py +++ /dev/null @@ -1,147 +0,0 @@ -import csv -from dataclasses import dataclass -from pathlib import Path - - -@dataclass -class GroundTruthData: - - exp_date: str - fly_line: str - emb_name: str - - -@dataclass -class GroundTruthPointData(GroundTruthData): - """Keep track of annotated peaks for a given embryo.""" - - episode_idxes: list - oscillation_idxes: list - - -@dataclass -class GroundTruthWindowData(GroundTruthData): - """Keep track of peak boundaries for a given embryo.""" - - episode_bounds: list - oscillation_bounds: list - - -class PeakAnnotationParser: - """Reads annotated peak data from a csv file. - - Expects the file name to follow the format: expDate_flyLine_embName_annotated.csv. - Also expects csv files to have two columns, first for peak index and two for label. - """ - - def __init__(self, dir_path: str | Path, annot_type: str): - self.dir_path = Path(dir_path) - self.validate_annot_type(annot_type) - if annot_type == "point": - self.annotations = self.load_point_data() - elif annot_type == "window": - self.annotations = self.load_window_data() - - def validate_annot_type(self, annot_type): - valid_types = ["window", "point"] - if annot_type not in valid_types: - raise ValueError( - f"Invalid annot_type: {annot_type}, expected one of {valid_types}" - ) - - def load_point_data(self, suffix="annotated.csv"): - csv_paths = [f for f in self.dir_path.iterdir() if f.name.endswith(suffix)] - - annotations = {} - - for csv_path in csv_paths: - exp_date, fly_line, emb_name, _ = csv_path.name.split("_") - episode_idxes = [] - oscillation_idxes = [] - with open(csv_path, newline="") as csv_file: - rdr = csv.reader(csv_file) - # skip header: - next(rdr) - for idx, label in rdr: - if label == "Episode": - episode_idxes.append(int(idx)) - elif label == "Oscillation": - oscillation_idxes.append(int(idx)) - elif label == "dSNA": - continue - else: - print( - f"WARN: got an unexpected label: {label}. File: {csv_path.name}" - ) - annotation = GroundTruthPointData( - exp_date, fly_line, emb_name, episode_idxes, oscillation_idxes - ) - key = f"{exp_date}_{fly_line}_{emb_name}" - annotations[key] = annotation - - return annotations - - def load_window_data(self, suffix="windows.csv"): - csv_paths = [f for f in self.dir_path.iterdir() if f.name.endswith(suffix)] - - annotations = {} - - for csv_path in csv_paths: - exp_date, fly_line, emb_name, _ = csv_path.name.split("_") - episode_bounds = [] - oscillation_bounds = [] - with open(csv_path, newline="") as csv_file: - rdr = csv.reader(csv_file) - next(rdr) - for start, end, label in rdr: - if label == "Episode": - episode_bounds.append((int(start), int(end))) - elif label == "Oscillation": - oscillation_bounds.append((int(start), int(end))) - elif label == "dSNA" or label == "Baseline": - continue - else: - print( - f"WARN: got an unexpected label: {label}. File: {csv_path.name}" - ) - annotation = GroundTruthWindowData( - exp_date, fly_line, emb_name, episode_bounds, oscillation_bounds - ) - key = f"{exp_date}_{fly_line}_{emb_name}" - annotations[key] = annotation - - return annotations - - def get_annotation_by_exp_name(self, exp_name: str) -> list[GroundTruthPointData]: - """Returns all GroundTruthData relative to an `exp_name`. - - Parameters: - exp_name: str in the format: `expDate_flyLine` - - Returns: - annotations: list with all annotated data found for that experiment - """ - if self.annotations is None: - raise AttributeError( - "Cannot read annotation data, first call `self.load_point_data` or `self.load_window_data`." - ) - annotations = [] - for annot_name, annot in self.annotations.items(): - exp_date, fly_line, _ = annot_name.split("_") - if f"{exp_date}_{fly_line}" == exp_name: - annotations.append(annot) - return annotations - - def get_exp_names(self) -> list[str]: - """Returns the names of all experiments that have GT data.""" - if self.annotations is None: - raise AttributeError( - "Cannot read annotation data, first call `self.load_point_data` or `self.load_window_data`." - ) - exp_names = set() - - for k in self.annotations.keys(): - name, _ = k.split("_emb") - exp_names.add(name) - - return list(exp_names) diff --git a/snazzy_analysis/tests/test_config.py b/snazzy_analysis/tests/test_config.py index f35a33c..178fecb 100644 --- a/snazzy_analysis/tests/test_config.py +++ b/snazzy_analysis/tests/test_config.py @@ -53,7 +53,6 @@ def test_can_save_manual_data_for_new_embryo(config): removed_peaks=[50], manual_widths={"100": [80, 125]}, manual_trim_idx=1500, - manual_phase1_end=500, ) assert emb_data["emb_name"] not in config.data["embryos"] diff --git a/snazzy_analysis/tests/test_experiment.py b/snazzy_analysis/tests/test_dataset.py similarity index 67% rename from snazzy_analysis/tests/test_experiment.py rename to snazzy_analysis/tests/test_dataset.py index 99dd599..097c9f8 100644 --- a/snazzy_analysis/tests/test_experiment.py +++ b/snazzy_analysis/tests/test_dataset.py @@ -3,14 +3,14 @@ import pytest -from snazzy_analysis import Config, Experiment +from snazzy_analysis import Config, Dataset VALID_DIR = Path(__file__).parent.joinpath("assets", "data", "20250210") @pytest.fixture -def exp(): - return Experiment(VALID_DIR) +def dataset(): + return Dataset(VALID_DIR) @pytest.fixture @@ -18,39 +18,39 @@ def config(): return Config(VALID_DIR) -def test_can_create_experiment(exp): - assert exp is not None +def test_can_create_dataset(dataset): + assert dataset is not None -def test_can_skip_peaks_before_first_peak_threshold(exp): +def test_can_skip_peaks_before_first_peak_threshold(dataset): # emb4 has first peak before 30 min and should be excluded - assert exp.embryos is not None - assert len(exp.embryos) == 2 + assert dataset.embryos is not None + assert len(dataset.embryos) == 2 def test_can_exclude_embryos(config): to_exclude = [1] config.update_params({"exp_params": {"to_exclude": to_exclude}}) - exp = Experiment(VALID_DIR, config) + dataset = Dataset(VALID_DIR, config) - assert len(exp.embryos) == 1 + assert len(dataset.embryos) == 1 -def test_ignores_embryos_not_in_experiment(config): +def test_ignores_embryos_not_in_dataset(config): # VALID_DIR only contains emb1, emb3, and emb4 to_exclude = [15] config.update_params({"exp_params": {"to_exclude": to_exclude}}) - exp = Experiment(VALID_DIR, config) + dataset = Dataset(VALID_DIR, config) - assert len(exp.embryos) == 2 + assert len(dataset.embryos) == 2 def test_can_use_kwargs(): expected_dff_strategy = "local_minima" - exp = Experiment(VALID_DIR, dff_strategy=expected_dff_strategy) + dataset = Dataset(VALID_DIR, dff_strategy=expected_dff_strategy) - pd_params = exp.config.get_pd_params() + pd_params = dataset.config.get_pd_params() actual_strategy = pd_params.get("dff_strategy", None) assert actual_strategy == expected_dff_strategy @@ -62,7 +62,7 @@ def test_can_use_all_valid_kwargs(): expected_dff_strategy = "local_minima" expected_first_peak_threshold = 35 - exp = Experiment( + dataset = Dataset( VALID_DIR, dff_strategy=expected_dff_strategy, has_transients=expected_has_transients, @@ -70,8 +70,8 @@ def test_can_use_all_valid_kwargs(): first_peak_threshold=expected_first_peak_threshold, ) - pd_params = exp.config.get_pd_params() - exp_params = exp.config.get_exp_params() + pd_params = dataset.config.get_pd_params() + exp_params = dataset.config.get_exp_params() actual_strategy = pd_params.get("dff_strategy", None) actual_to_exclude = exp_params.get("to_exclude", None) actual_has_transients = exp_params.get("has_transients", None) @@ -84,12 +84,12 @@ def test_can_use_all_valid_kwargs(): def test_ignores_invalid_kwargs(capsys): - Experiment(VALID_DIR, invalid_kwarg="invalid") + Dataset(VALID_DIR, invalid_kwarg="invalid") captured = capsys.readouterr() assert "WARN" in captured.out -def test_raises_when_get_missing_embryo(exp): +def test_raises_when_get_missing_embryo(dataset): with pytest.raises(ValueError): - exp.get_embryo("emb55") + dataset.get_embryo("emb55") diff --git a/snazzy_analysis/tests/test_embryo.py b/snazzy_analysis/tests/test_embryo.py index 6583215..05fa4c5 100644 --- a/snazzy_analysis/tests/test_embryo.py +++ b/snazzy_analysis/tests/test_embryo.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from snazzy_analysis import Config, Experiment +from snazzy_analysis import Config, Dataset VALID_DIR = Path(__file__).parent.joinpath("assets", "data", "20250210") @@ -13,8 +13,8 @@ def emb(): to_exclude = ["emb3", "emb4"] config = Config(VALID_DIR) config.update_params({"exp_params": {"to_exclude": to_exclude}}) - exp = Experiment(VALID_DIR, config) - return exp.get_embryo("emb1") + dataset = Dataset(VALID_DIR, config) + return dataset.get_embryo("emb1") @pytest.fixture diff --git a/snazzy_analysis/tests/test_gui.py b/snazzy_analysis/tests/test_gui.py index 44a68a9..eb903a3 100644 --- a/snazzy_analysis/tests/test_gui.py +++ b/snazzy_analysis/tests/test_gui.py @@ -5,15 +5,15 @@ from PyQt6.QtCore import Qt from PyQt6.QtWidgets import QLabel -from snazzy_analysis import Experiment +from snazzy_analysis import Dataset from snazzy_analysis.gui.gui import ( ComparePlotWindow, - ExperimentParamsDialog, + DatasetParamsDialog, ImageWindow, JsonViewer, MainWindow, ) -from snazzy_analysis.gui.model import GroupModel, ExperimentModel +from snazzy_analysis.gui.model import GroupModel, DatasetModel pytestmark = pytest.mark.skipif( not sys.platform.startswith("linux"), reason="Running headless tests on linux only." @@ -23,15 +23,15 @@ @pytest.fixture -def exp(): - return Experiment(VALID_DIR) +def dataset(): + return Dataset(VALID_DIR) @pytest.fixture -def group_model(exp): - exp_model = ExperimentModel(exp) +def group_model(dataset): + exp_model = DatasetModel(dataset) group_model = GroupModel("wt") - group_model.add_experiment(exp_model) + group_model.add_dataset(exp_model) return group_model @@ -44,7 +44,7 @@ def test_can_display_initial_screen(qtbot): assert window.isVisible() -def test_can_create_experiment_via_menu(qtbot, monkeypatch): +def test_can_create_dataset_via_menu(qtbot, monkeypatch): monkeypatch.setattr( "snazzy_analysis.gui.gui.QFileDialog.getExistingDirectory", lambda *a, **kw: VALID_DIR, @@ -60,11 +60,11 @@ def test_can_create_experiment_via_menu(qtbot, monkeypatch): open_action = file_menu.actions()[0] open_action.trigger() - window.exp_params_dialog.accept() + window.dataset_params_dialog.accept() - qtbot.waitUntil(lambda: window.add_experiment_action.isEnabled(), timeout=3000) + qtbot.waitUntil(lambda: window.add_dataset_action.isEnabled(), timeout=3000) - assert window.add_experiment_action.isEnabled() + assert window.add_dataset_action.isEnabled() assert window.top_app_bar is not None assert window.bottom_layout is not None # for VALID_DIR dataset, emb1 is the first embryo: @@ -73,7 +73,7 @@ def test_can_create_experiment_via_menu(qtbot, monkeypatch): def test_exp_dialog_parses_emb_ids(qtbot): props = {"to_remove": ["emb1", "emb2"]} - exp_dialog = ExperimentParamsDialog(props) + exp_dialog = DatasetParamsDialog(props) qtbot.addWidget(exp_dialog) assert props["to_remove"] == [1, 2] @@ -81,7 +81,7 @@ def test_exp_dialog_parses_emb_ids(qtbot): def test_exp_dialog_parses_emb_ids_when_receives_only_digits(qtbot): props = {"to_remove": ["1", "2"]} - exp_dialog = ExperimentParamsDialog(props) + exp_dialog = DatasetParamsDialog(props) qtbot.addWidget(exp_dialog) assert props["to_remove"] == [1, 2] @@ -122,8 +122,8 @@ def test_can_render_FOV(qtbot): assert not pixmap.isNull() -def test_can_render_json_config(qtbot, exp): - config_data = exp.config.data +def test_can_render_json_config(qtbot, dataset): + config_data = dataset.config.data json_viewer = JsonViewer(config_data) qtbot.addWidget(json_viewer) diff --git a/snazzy_analysis/tests/test_gui_model.py b/snazzy_analysis/tests/test_gui_model.py index cdd4d42..6ff1506 100644 --- a/snazzy_analysis/tests/test_gui_model.py +++ b/snazzy_analysis/tests/test_gui_model.py @@ -12,22 +12,22 @@ @pytest.fixture(scope="module") -def model_single_exp(): +def model_single_dataset(): config = Config(DATASET_1) model = Model() - model.create_experiment(config, GROUP_NAME) + model.create_dataset(config, GROUP_NAME) return model @pytest.fixture(scope="module") -def model_two_exps(): +def model_two_datasets(): config = Config(DATASET_1) model = Model() - model.create_experiment(config, GROUP_NAME) + model.create_dataset(config, GROUP_NAME) config2 = Config(DATASET_2) - model.create_experiment(config2, GROUP_NAME) + model.create_dataset(config2, GROUP_NAME) return model @@ -35,102 +35,102 @@ def model_two_exps(): def test_initial_state_is_empty(): model = Model() - assert model.selected_experiment is None + assert model.selected_dataset is None assert model.selected_embryo is None assert model.selected_group is None assert model.groups == [] -def test_can_create_experiment(model_single_exp): - assert model_single_exp is not None +def test_can_create_dataset(model_single_dataset): + assert model_single_dataset is not None -def test_creating_experiment_also_creates_group_if_needed(model_single_exp): - assert model_single_exp.selected_group.name == GROUP_NAME +def test_creating_dataset_also_creates_group_if_needed(model_single_dataset): + assert model_single_dataset.selected_group.name == GROUP_NAME -def test_can_add_experiment_to_existing_group(model_two_exps): - assert len(model_two_exps.groups) == 1 - assert model_two_exps.selected_group.name == GROUP_NAME +def test_can_add_dataset_to_existing_group(model_two_datasets): + assert len(model_two_datasets.groups) == 1 + assert model_two_datasets.selected_group.name == GROUP_NAME -def test_selected_experiment_matches_provided_exp(model_single_exp): - assert model_single_exp.selected_experiment.name == DATASET_1.name +def test_selected_dataset_matches_provided_dataset(model_single_dataset): + assert model_single_dataset.selected_dataset.name == DATASET_1.name -def test_embs_marked_as_removed_are_synced_with_experiment(model_single_exp): - exp = model_single_exp.selected_experiment - to_remove = exp.exp_params.get("to_remove", []) +def test_embs_marked_as_removed_are_synced_with_dataset(model_single_dataset): + dataset = model_single_dataset.selected_dataset + to_remove = dataset.exp_params.get("to_remove", []) assert len(to_remove) > 0 for emb_name in to_remove: - assert emb_name in model_single_exp.selected_experiment.to_remove + assert emb_name in model_single_dataset.selected_dataset.to_remove -def test_reset_exp_resets_current_embryo(model_single_exp): +def test_reset_dataset_resets_current_embryo(model_single_dataset): first_emb_name = "emb1" next_emb_name = "emb3" - next_emb = model_single_exp.selected_experiment.get_embryo(next_emb_name) - model_single_exp.select_embryo(next_emb) - model_single_exp.reset_current_experiment() - emb = model_single_exp.selected_experiment.selected_embryo + next_emb = model_single_dataset.selected_dataset.get_embryo(next_emb_name) + model_single_dataset.select_embryo(next_emb) + model_single_dataset.reset_current_dataset() + emb = model_single_dataset.selected_dataset.selected_embryo assert emb.name != next_emb_name assert emb.name == first_emb_name -def test_toggle_twice_adds_embryo_back(model_single_exp): - exp = model_single_exp.selected_experiment +def test_toggle_twice_adds_embryo_back(model_single_dataset): + dataset = model_single_dataset.selected_dataset first_emb_name = "emb1" - model_single_exp.toggle_emb_visibility(first_emb_name, should_remove=True) - model_single_exp.toggle_emb_visibility(first_emb_name, should_remove=False) - emb_names = [e.name for e in exp.embryos] + model_single_dataset.toggle_emb_visibility(first_emb_name, should_remove=True) + model_single_dataset.toggle_emb_visibility(first_emb_name, should_remove=False) + emb_names = [e.name for e in dataset.embryos] assert first_emb_name in emb_names -def test_can_remove_embryo(model_single_exp): - exp = model_single_exp.selected_experiment +def test_can_remove_embryo(model_single_dataset): + dataset = model_single_dataset.selected_dataset first_emb_name = "emb1" - model_single_exp.toggle_emb_visibility(first_emb_name, should_remove=True) - assert first_emb_name in exp.to_remove - model_single_exp.toggle_emb_visibility(first_emb_name, should_remove=False) + model_single_dataset.toggle_emb_visibility(first_emb_name, should_remove=True) + assert first_emb_name in dataset.to_remove + model_single_dataset.toggle_emb_visibility(first_emb_name, should_remove=False) -def test_after_removing_emb_embryos_does_not_contain_it(model_single_exp): - exp = model_single_exp.selected_experiment - emb_names = [e.name for e in exp.embryos] +def test_after_removing_emb_embryos_does_not_contain_it(model_single_dataset): + dataset = model_single_dataset.selected_dataset + emb_names = [e.name for e in dataset.embryos] first_emb_name = emb_names[0] - model_single_exp.toggle_emb_visibility(first_emb_name, should_remove=True) - emb_names = [e.name for e in exp.embryos] + model_single_dataset.toggle_emb_visibility(first_emb_name, should_remove=True) + emb_names = [e.name for e in dataset.embryos] assert first_emb_name not in emb_names - model_single_exp.toggle_emb_visibility(first_emb_name, should_remove=False) + model_single_dataset.toggle_emb_visibility(first_emb_name, should_remove=False) -def test_can_create_group_with_two_experiments(model_two_exps): - assert model_two_exps.has_combined_experiments() +def test_can_create_group_with_two_datasets(model_two_datasets): + assert model_two_datasets.has_combined_datasets() -def test_add_experiment_selects_most_recent_exp(model_two_exps): - assert model_two_exps.selected_experiment.name == DATASET_2.name +def test_add_dataset_selects_most_recent_dataset(model_two_datasets): + assert model_two_datasets.selected_dataset.name == DATASET_2.name -def test_add_group_keeps_current_group(model_two_exps): - prev_name = model_two_exps.selected_group.name +def test_add_group_keeps_current_group(model_two_datasets): + prev_name = model_two_datasets.selected_group.name new_group = "Mutant" - model_two_exps.create_group(new_group) - assert len(model_two_exps.groups) == 2 - assert model_two_exps.selected_group.name == prev_name - assert model_two_exps.selected_group.name != new_group + model_two_datasets.create_group(new_group) + assert len(model_two_datasets.groups) == 2 + assert model_two_datasets.selected_group.name == prev_name + assert model_two_datasets.selected_group.name != new_group -def test_can_save_trim_index_in_config(model_single_exp): - curr_emb = model_single_exp.selected_embryo +def test_can_save_trim_index_in_config(model_single_dataset): + curr_emb = model_single_dataset.selected_embryo updated_trim_idx = len(curr_emb.activity) // 2 - model_single_exp.save_trim_idx(updated_trim_idx) + model_single_dataset.save_trim_idx(updated_trim_idx) - curr_exp = model_single_exp.selected_experiment + curr_dataset = model_single_dataset.selected_dataset - assert curr_exp + assert curr_dataset - config = curr_exp.config + config = curr_dataset.config assert config @@ -139,47 +139,49 @@ def test_can_save_trim_index_in_config(model_single_exp): assert manual_data["manual_trim_idx"] == updated_trim_idx -def test_can_add_peak(model_single_exp): +def test_can_add_peak(model_single_dataset): new_peak_index = 300 - curr_trace = model_single_exp.selected_experiment.get_embryo("emb1").trace + curr_trace = model_single_dataset.selected_dataset.get_embryo("emb1").trace original_peaks_len = len(curr_trace.peak_idxes) - _, new_peaks = model_single_exp.add_peak(new_peak_index, "emb1", curr_trace) + _, new_peaks = model_single_dataset.add_peak(new_peak_index, "emb1", curr_trace) assert len(new_peaks) == original_peaks_len + 1 -def test_can_remove_peak(model_single_exp): +def test_can_remove_peak(model_single_dataset): emb_name = "emb1" - curr_trace = model_single_exp.selected_experiment.get_embryo(emb_name).trace + curr_trace = model_single_dataset.selected_dataset.get_embryo(emb_name).trace to_remove_index = curr_trace.peak_idxes[1] original_peaks_len = len(curr_trace.peak_idxes) - _, new_peaks = model_single_exp.remove_peak(to_remove_index, emb_name, curr_trace) + _, new_peaks = model_single_dataset.remove_peak( + to_remove_index, emb_name, curr_trace + ) assert len(new_peaks) == original_peaks_len - 1 - config = model_single_exp.selected_experiment.config + config = model_single_dataset.selected_dataset.config manual_data = config.get_corrected_peaks(emb_name) assert to_remove_index in manual_data["manual_remove"] -def test_can_add_and_remove_peak(model_single_exp): +def test_can_add_and_remove_peak(model_single_dataset): emb_name = "emb1" new_peak_index = 353 - curr_trace = model_single_exp.selected_experiment.get_embryo(emb_name).trace + curr_trace = model_single_dataset.selected_dataset.get_embryo(emb_name).trace - model_single_exp.add_peak(new_peak_index, emb_name, curr_trace) + model_single_dataset.add_peak(new_peak_index, emb_name, curr_trace) - model_single_exp.calc_peaks_all_embs() + model_single_dataset.calc_peaks_all_embs() - model_single_exp.remove_peak(new_peak_index, emb_name, curr_trace) + model_single_dataset.remove_peak(new_peak_index, emb_name, curr_trace) - config = model_single_exp.selected_experiment.config + config = model_single_dataset.selected_dataset.config manual_data = config.get_corrected_peaks(emb_name) @@ -187,54 +189,54 @@ def test_can_add_and_remove_peak(model_single_exp): assert new_peak_index in manual_data["manual_remove"] -def test_can_clear_manual_data_from_single_emb(model_single_exp): +def test_can_clear_manual_data_from_single_emb(model_single_dataset): emb_name = "emb1" new_peak_index = 400 - curr_trace = model_single_exp.selected_experiment.get_embryo(emb_name).trace + curr_trace = model_single_dataset.selected_dataset.get_embryo(emb_name).trace - model_single_exp.add_peak(new_peak_index, emb_name, curr_trace) + model_single_dataset.add_peak(new_peak_index, emb_name, curr_trace) another_emb_name = "emb3" - another_trace = model_single_exp.selected_experiment.get_embryo( + another_trace = model_single_dataset.selected_dataset.get_embryo( another_emb_name ).trace - model_single_exp.add_peak(new_peak_index, another_emb_name, another_trace) + model_single_dataset.add_peak(new_peak_index, another_emb_name, another_trace) - assert "embryos" in model_single_exp.selected_experiment.config.data + assert "embryos" in model_single_dataset.selected_dataset.config.data - orig_config = model_single_exp.selected_experiment.config.data["embryos"] + orig_config = model_single_dataset.selected_dataset.config.data["embryos"] assert emb_name in orig_config - model_single_exp.clear_manual_data_by_embryo(emb_name) + model_single_dataset.clear_manual_data_by_embryo(emb_name) assert curr_trace.to_add == [] assert curr_trace.to_remove == [] - manual_data = model_single_exp.selected_experiment.config.data["embryos"] + manual_data = model_single_dataset.selected_dataset.config.data["embryos"] assert emb_name not in manual_data assert another_emb_name in manual_data -def test_can_clear_all_manual_data(model_single_exp): +def test_can_clear_all_manual_data(model_single_dataset): emb_name = "emb1" new_peak_index = 400 - curr_trace = model_single_exp.selected_experiment.get_embryo(emb_name).trace + curr_trace = model_single_dataset.selected_dataset.get_embryo(emb_name).trace - model_single_exp.add_peak(new_peak_index, emb_name, curr_trace) + model_single_dataset.add_peak(new_peak_index, emb_name, curr_trace) another_emb_name = "emb3" - another_trace = model_single_exp.selected_experiment.get_embryo( + another_trace = model_single_dataset.selected_dataset.get_embryo( another_emb_name ).trace - model_single_exp.add_peak(new_peak_index, another_emb_name, another_trace) + model_single_dataset.add_peak(new_peak_index, another_emb_name, another_trace) - assert "embryos" in model_single_exp.selected_experiment.config.data + assert "embryos" in model_single_dataset.selected_dataset.config.data - model_single_exp.clear_all_manual_data() + model_single_dataset.clear_all_manual_data() - manual_data = model_single_exp.selected_experiment.config.data["embryos"] + manual_data = model_single_dataset.selected_dataset.config.data["embryos"] assert emb_name not in manual_data assert another_emb_name not in manual_data @@ -242,24 +244,24 @@ def test_can_clear_all_manual_data(model_single_exp): assert manual_data == {} -def test_clear_emb_manual_data_raises_if_emb_not_exists(model_single_exp): +def test_clear_emb_manual_data_raises_if_emb_not_exists(model_single_dataset): emb_name = "emb1" new_peak_index = 400 - curr_trace = model_single_exp.selected_experiment.get_embryo(emb_name).trace + curr_trace = model_single_dataset.selected_dataset.get_embryo(emb_name).trace - model_single_exp.add_peak(new_peak_index, emb_name, curr_trace) + model_single_dataset.add_peak(new_peak_index, emb_name, curr_trace) with pytest.raises(ValueError): - model_single_exp.clear_manual_data_by_embryo("emb2") + model_single_dataset.clear_manual_data_by_embryo("emb2") -def test_clear_emb_manual_data_works_when_no_saved_data(model_single_exp): +def test_clear_emb_manual_data_works_when_no_saved_data(model_single_dataset): emb_name = "emb1" - curr_trace = model_single_exp.selected_experiment.get_embryo(emb_name).trace + curr_trace = model_single_dataset.selected_dataset.get_embryo(emb_name).trace assert curr_trace.to_add == [] assert curr_trace.to_remove == [] - model_single_exp.clear_manual_data_by_embryo("emb1") + model_single_dataset.clear_manual_data_by_embryo("emb1") assert curr_trace.to_remove == [] diff --git a/snazzy_analysis/tests/test_trace.py b/snazzy_analysis/tests/test_trace.py index d8c8922..73957f8 100644 --- a/snazzy_analysis/tests/test_trace.py +++ b/snazzy_analysis/tests/test_trace.py @@ -56,15 +56,6 @@ def test_can_process_trace_without_peaks(config): assert trace.rms is not None -def test_can_create_dsna_trace_and_calculate_dsna(config, activity): - config.update_params({"exp_params": {"has_dsna": True}}) - trace = Trace("emb1", activity, config) - assert trace - - dsna_start = trace.get_dsna_start(freq=0.02) - assert dsna_start - - def test_baseline_with_average_n_values(): arr = [1, 2, 3, 4, 0, 1, 0, 7, 6] baseline = Trace.average_n_lowest_window(arr, window_size=5, n_lowest=2) diff --git a/snazzy_analysis/tests/test_trace_phases.py b/snazzy_analysis/tests/test_trace_phases.py deleted file mode 100644 index 4e76989..0000000 --- a/snazzy_analysis/tests/test_trace_phases.py +++ /dev/null @@ -1,78 +0,0 @@ -import numpy as np -import pytest - -from snazzy_analysis import TracePhases - - -def test_finds_right_index_in_dist_matrix(): - dist_matrix = np.random.random((10, 10)) - thres = 1 - k = 5 - dist_matrix[k, :] += 1 - - actual_index = TracePhases.segment_distance_matrix_forward(dist_matrix, thres) - expected_index = k - 1 - - assert actual_index == expected_index - - -def test_finds_right_index_in_last_row(): - dist_matrix = np.random.random((10, 10)) - thres = 1 - k = 9 - dist_matrix[k, :] += 1 - - actual_index = TracePhases.segment_distance_matrix_forward(dist_matrix, thres) - expected_index = k - 1 - - assert actual_index == expected_index - - -def test_when_all_dists_below_thres_return_last_index(): - dist_matrix = np.random.random((10, 10)) - thres = 2 - - actual_index = TracePhases.segment_distance_matrix_forward(dist_matrix, thres) - expected_index = len(dist_matrix) - 1 - - assert actual_index == expected_index - - -def test_when_reversing_and_all_dists_below_thres_return_last_index(): - dist_matrix = np.random.random((10, 10)) - thres = 2 - - actual_index = TracePhases.segment_distance_matrix_reverse(dist_matrix, thres) - expected_index = 9 - - assert actual_index == expected_index - - -def test_finds_right_index_in_dist_matrix_reverse(): - dist_matrix = np.random.random((10, 10)) - thres = 1 - k = 5 - dist_matrix[k, :] += 1 - - actual_index = TracePhases.segment_distance_matrix_reverse(dist_matrix, thres) - expected_index = k + 1 - - assert actual_index == expected_index - - -def test_finds_right_index_if_dist_matrix_size_1(): - dist_matrix = np.random.random(1) - thres = 0 - - actual_index = TracePhases.segment_distance_matrix_forward(dist_matrix, thres) - expected_index = 0 - - assert actual_index == expected_index - - -def test_apply_thres_raises_if_empty_dist_matrix(): - dist_matrix = np.array([]) - thres = 0 - - with pytest.raises(ValueError): - TracePhases.segment_distance_matrix_forward(dist_matrix, thres) diff --git a/snazzy_processing/README.md b/snazzy_processing/README.md index 4152d14..711a644 100644 --- a/snazzy_processing/README.md +++ b/snazzy_processing/README.md @@ -1,6 +1,12 @@ # SNAzzy Processing Raw data processing for the SNAzzy pipeline. + +### Running the code + +To run the pipeline, use the jupyter notebook `snazzy_processing_pipeline.ipynb`. +Sample data is available in zenodo: https://doi.org/10.5281/zenodo.17295552. +To try the code, please download and extract the datasets first. ### Organization @@ -20,9 +26,9 @@ The other notebooks are used to understand in details the pipeline stages. ### Adding data -Each experiment will have one corresponding folder inside `./data/`. -By running the code in `snazzy_processing_pipeline.ipynb`, the raw data will be parsed and saved inside `./data/{experiment_name}/embs`. -To compare the calculated VNC length against manual measurements, add an `annotated` folder inside the experiment directory. +After processed, each dataset will have one corresponding folder inside `./data/`. +By running the code in `snazzy_processing_pipeline.ipynb`, the raw data will be parsed and saved inside `./data/{dataset_name}/embs`. +To compare the calculated VNC length against manual measurements, add an `annotated` folder inside the dataset directory. The measurements should be saved as a csv file. Given the description above, the file structure inside the `data` folder should look like: diff --git a/snazzy_processing/data/20240611/annotated/emb10-ch2.csv b/snazzy_processing/data/20240611/annotated/emb10-ch2.csv new file mode 100644 index 0000000..759398d --- /dev/null +++ b/snazzy_processing/data/20240611/annotated/emb10-ch2.csv @@ -0,0 +1,55 @@ +Frame,Length +1,267.87 +51,267.1 +101,265.85 +151,264.12 +201,263.24 +251,257.61 +301,259.23 +351,258.64 +401,255.21 +451,254.92 +501,251.49 +551,250.47 +601,246.68 +651,259.24 +701,247.38 +751,243.28 +801,243.45 +851,241.39 +901,246.04 +951,239.83 +1001,236.29 +1051,239.08 +1101,233.97 +1151,235.23 +1201,231.09 +1251,233.44 +1301,229.76 +1351,228.87 +1401,230.54 +1451,225.72 +1501,241.09 +1551,222.03 +1601,222.04 +1651,225.46 +1701,219.23 +1751,222.99 +1801,218.45 +1851,229.88 +1901,216.2 +1951,216.73 +2001,225.74 +2051,215.78 +2101,214.46 +2151,214.83 +2201,221.33 +2251,215.33 +2301,210.84 +2351,211.81 +2401,221.29 +2451,214.23 +2501,212.55 +2551,218.37 +2601,238.71 +2651,248.9 diff --git a/snazzy_processing/data/20240611/annotated/emb4-ch2.csv b/snazzy_processing/data/20240611/annotated/emb4-ch2.csv new file mode 100644 index 0000000..481c1cc --- /dev/null +++ b/snazzy_processing/data/20240611/annotated/emb4-ch2.csv @@ -0,0 +1,45 @@ +Frame,Length +1,250.31 +51,254.26 +101,249.91 +151,243.25 +201,245.75 +251,242.18 +301,240.01 +351,239.54 +401,233.5 +451,237.55 +501,234.73 +551,234.39 +601,230.59 +651,231.66 +701,231.14 +751,230.64 +801,229.38 +851,232.46 +901,222.43 +951,221.92 +1001,233.7 +1051,221.14 +1101,222.83 +1151,226.23 +1201,231.87 +1251,210.48 +1301,221.14 +1351,240.59 +1401,211.15 +1451,209.72 +1501,213.66 +1551,212.82 +1601,212.24 +1651,207.9 +1701,209.4 +1751,206.85 +1801,208.89 +1851,205.35 +1901,206.67 +1951,209.53 +2001,204.99 +2051,206.56 +2101,205.1 +2151,236.15 diff --git a/snazzy_processing/data/20240611/annotated/emb5-ch2.csv b/snazzy_processing/data/20240611/annotated/emb5-ch2.csv new file mode 100644 index 0000000..9c50ca8 --- /dev/null +++ b/snazzy_processing/data/20240611/annotated/emb5-ch2.csv @@ -0,0 +1,56 @@ +Frame,Length +1,265.45 +51,262.47 +101,262.82 +151,259.08 +201,257.04 +251,261.18 +301,252.49 +351,252.66 +401,251.12 +451,250.04 +501,248.73 +551,246.67 +601,246.67 +651,249.3 +701,246 +751,243.13 +801,240.93 +851,238.61 +901,235.78 +951,239.83 +1001,240.26 +1051,239.1 +1101,238.59 +1151,236.01 +1201,233.33 +1251,234.4 +1301,238.24 +1351,233.48 +1401,228.55 +1451,226.11 +1501,228.9 +1551,226.47 +1601,223.75 +1651,224.97 +1701,218.88 +1751,237.12 +1801,222.4 +1851,220.11 +1901,219.7 +1951,231.25 +2001,220.89 +2051,220.23 +2101,218.8 +2151,214.47 +2201,211.96 +2251,211.51 +2301,212.74 +2351,214.44 +2401,214.27 +2451,212.1 +2501,211.5 +2551,208.41 +2601,216.43 +2651,208.66 +2701,217.96 diff --git a/snazzy_processing/data/20240611/annotated/emb6-ch2.csv b/snazzy_processing/data/20240611/annotated/emb6-ch2.csv new file mode 100644 index 0000000..c8e8438 --- /dev/null +++ b/snazzy_processing/data/20240611/annotated/emb6-ch2.csv @@ -0,0 +1,50 @@ +Frame,Length +1,258.79 +51,259.31 +101,251.64 +151,260.22 +201,251.34 +251,246.58 +301,246.19 +351,244.56 +401,243.26 +451,242.59 +501,239.57 +551,244.72 +601,239.95 +651,237 +701,236.37 +751,235.54 +801,234.8 +851,240.32 +901,230.4 +951,228.91 +1001,225.05 +1051,251.7 +1101,223.55 +1151,223.29 +1201,220.02 +1251,219.91 +1301,230.07 +1351,219.17 +1401,214.38 +1451,218.26 +1501,214.34 +1551,213.41 +1601,209.56 +1651,211.77 +1701,210.51 +1751,211.81 +1801,206.82 +1851,201.54 +1901,230.17 +1951,205.71 +2001,204.01 +2051,203.48 +2101,207.12 +2151,201.93 +2201,202.52 +2251,209.06 +2301,202.01 +2351,203.72 +2401,197.69 diff --git a/snazzy_processing/data/20240611/annotated/emb9-ch2.csv b/snazzy_processing/data/20240611/annotated/emb9-ch2.csv new file mode 100644 index 0000000..c592c96 --- /dev/null +++ b/snazzy_processing/data/20240611/annotated/emb9-ch2.csv @@ -0,0 +1,53 @@ +Frame,Length +1,259.44 +51,255.86 +101,257.85 +151,252.63 +201,253.21 +251,252.05 +301,250.7 +351,248.07 +401,245.69 +451,250.08 +501,245.44 +551,243.47 +601,244.32 +651,245.18 +701,243.14 +751,245.65 +801,241.03 +851,241.41 +901,238.72 +951,240.02 +1001,239.59 +1051,236.62 +1101,235.12 +1151,228.81 +1201,233.2 +1251,228.88 +1301,226.56 +1351,223.58 +1401,245.4 +1451,222.68 +1501,222.59 +1551,223.7 +1601,221.98 +1651,220.32 +1701,218.01 +1751,227.2 +1801,225.32 +1851,217.53 +1901,218.98 +1951,218.67 +2001,219.81 +2051,216.2 +2101,217.33 +2151,220.69 +2201,224.74 +2251,219.48 +2301,218.49 +2351,223.57 +2401,221.92 +2451,217.22 +2501,223.39 +2551,232.79 diff --git a/snazzy_processing/data/20240611/emb_sizes/embryo_size.csv b/snazzy_processing/data/20240611/emb_sizes/embryo_size.csv new file mode 100644 index 0000000..cbc201d --- /dev/null +++ b/snazzy_processing/data/20240611/emb_sizes/embryo_size.csv @@ -0,0 +1,5 @@ +1, 558.80 +2, 511.75 +3, 523.14 +4, 532.96 +5, 550.16 \ No newline at end of file diff --git a/snazzy_processing/notebooks/activity.ipynb b/snazzy_processing/notebooks/activity.ipynb index bd1a654..5ba2117 100644 --- a/snazzy_processing/notebooks/activity.ipynb +++ b/snazzy_processing/notebooks/activity.ipynb @@ -26,9 +26,9 @@ "\n", "from snazzy_processing import activity, find_hatching, roi, utils\n", "\n", - "experiment_name = '20250828'\n", + "dataset_name = '20240611'\n", "root_dir = Path.cwd().parent\n", - "experiment_dir = root_dir.joinpath('data', experiment_name)" + "dataset_dir = root_dir.joinpath('data', dataset_name)" ] }, { @@ -37,7 +37,7 @@ "metadata": {}, "outputs": [], "source": [ - "img_dir = experiment_dir.joinpath(\"embs\")\n", + "img_dir = dataset_dir.joinpath(\"embs\")\n", "\n", "# All structural channel movies end with the suffix ch2\n", "active = sorted(img_dir.glob(\"*ch1.tif\"), key=utils.emb_number)\n", diff --git a/snazzy_processing/notebooks/full-embryo-length.ipynb b/snazzy_processing/notebooks/full-embryo-length.ipynb index 3cabaf6..85f6194 100644 --- a/snazzy_processing/notebooks/full-embryo-length.ipynb +++ b/snazzy_processing/notebooks/full-embryo-length.ipynb @@ -29,9 +29,9 @@ "\n", "from snazzy_processing import full_embryo_length, utils\n", "\n", - "experiment_name = '20240611_25C'\n", + "dataset_name = '20240611'\n", "root_dir = Path.cwd().parent\n", - "project_dir = root_dir.joinpath('data', experiment_name)" + "project_dir = root_dir.joinpath('data', dataset_name)" ] }, { @@ -50,7 +50,7 @@ "outputs": [], "source": [ "embs = sorted(project_dir.joinpath(\"embs\").glob(\"*ch2.tif\"), key=utils.emb_number)\n", - "i = 9\n", + "i = 0\n", "\n", "print(embs[i].stem)\n", "\n", @@ -132,11 +132,11 @@ " annotated_data = full_embryo_length.get_annotated_data(csv_path)\n", "\n", " # maps annotated_id: embryo_id\n", - " name_LUT = {i: i for i in range(3, 23)}\n", + " name_LUT = {i: i for i in range(1, 6)}\n", " embs = sorted(project_dir.joinpath(\"embs\").glob(\"*ch2.tif\"), key=utils.emb_number)\n", "\n", " measured = {\n", - " utils.emb_number(e.stem): full_embryo_length.measure(e, low_non_VNC=True)\n", + " utils.emb_number(e.stem): full_embryo_length.measure(e, low_non_VNC=False)\n", " for e in embs\n", " }\n", "\n", @@ -154,7 +154,7 @@ " fig, ax = plt.subplots()\n", " ax.plot(common_keys, diff, \"bo\")\n", " ax.set_xticks(common_keys)\n", - " fig.suptitle(f\"Embryo size measurement - {experiment_name}\")\n", + " fig.suptitle(f\"Embryo size measurement - {dataset_name}\")\n", "except FileNotFoundError:\n", " print(\"Could not find the file with annotated data. Path received:\")\n", " print(csv_path)" diff --git a/snazzy_processing/notebooks/snazzy-processing-pipeline.ipynb b/snazzy_processing/notebooks/snazzy-processing-pipeline.ipynb index 9d43cd5..5328e40 100644 --- a/snazzy_processing/notebooks/snazzy-processing-pipeline.ipynb +++ b/snazzy_processing/notebooks/snazzy-processing-pipeline.ipynb @@ -8,12 +8,12 @@ "\n", "Integrates all of the modules from the package, to go from raw data to csv output.\n", "More information about each module can be found in the other Jupyter Notebooks.\n", - "You should change the `experiment name` and the `img_path` at the fist cell.\n", + "You should change the `dataset_name` and the `img_path` at the fist cell.\n", "\n", "If the raw data is in nd2 format, it can be converted directly using the pipeline.\n", "For other formats, the image must me converted to tif using another application, for example ImageJ.\n", "\n", - "Adjust the image paths to where the nd2 file is, and where the tif file will be saved." + "Adjust the image path to where the tif/nd2 file is." ] }, { @@ -32,14 +32,14 @@ "\n", "from snazzy_processing import pipeline, slice_img\n", "\n", - "experiment_name = '20240611'\n", + "dataset_name = '20240611'\n", "root_dir = Path.cwd().parent\n", - "project_dir = root_dir.joinpath('data', experiment_name)\n", - "res_dir = root_dir.joinpath('results', experiment_name)\n", + "project_dir = root_dir.joinpath('data', dataset_name)\n", + "res_dir = root_dir.joinpath('results', dataset_name)\n", "res_dir.mkdir(parents=True, exist_ok=True)\n", "\n", "# Provide an absolute path for the raw tif or nd2 image\n", - "img_path = Path.home().joinpath(\"Documents\", \"raw_data\", f\"{experiment_name}.nd2\")" + "img_path = Path.home().joinpath(\"Documents\", \"raw_data\", f\"{dataset_name}.nd2\")" ] }, { @@ -56,7 +56,7 @@ "outputs": [], "source": [ "first_frames = \"first_frames.tif\"\n", - "first_frames_path = root_dir.joinpath(\"results\", experiment_name, first_frames)\n", + "first_frames_path = root_dir.joinpath(\"results\", dataset_name, first_frames)\n", "if first_frames_path.exists():\n", " print(f\"{first_frames_path.stem} already exists.\\nPlease choose another path.\")\n", "else:\n", @@ -69,6 +69,8 @@ "source": [ "Visualize the regions of the individual embryos that will be cropped.\n", "If the bounding boxes don't fit the embryo, you can try changing the `thres_adjust` parameter in `slice_img.calculate_slice_coordinates`.\n", + "Decreasing `thres_adjust`, which can be a negative value, tends to generate bigger boxes.\n", + "\n", "The resulting image also shows the number that will be used for each embryo when saving the individual movies.\n", "\n", "If after adjust `thres_adjust` the embryos are still not framed as expected, you can use `slice_img.increase_bbox()` to control the bbox dimensions." @@ -110,7 +112,7 @@ "pc = PatchCollection(recs, color=\"red\", alpha=0.7, linewidth=1, facecolor=\"none\")\n", "ax.add_collection(pc)\n", "\n", - "dest = root_dir.joinpath(\"results\", experiment_name)\n", + "dest = root_dir.joinpath(\"results\", dataset_name)\n", "dest.mkdir(parents=True, exist_ok=True)\n", "save_path = dest / \"emb_numbers.png\"\n", "plt.savefig(save_path)\n", @@ -123,7 +125,7 @@ "metadata": {}, "source": [ "If the image above looks good, select which embryos you want to analyze, by changing the values in the `embryos` list in the next cell.\n", - "Embryos will be saved under the `data` directory, for the corresponding experiment.\n", + "Embryos will be saved under the `data` directory, for the corresponding dataset.\n", "To process all embryos you can just pass an empty `embryos` list.\n", "\n", "The length and activity data will be saved in the `results` directory." @@ -146,7 +148,7 @@ "window = 1\n", "\n", "# directories and file paths\n", - "embs_dest = root_dir.joinpath(\"data\", experiment_name, \"embs\")\n", + "embs_dest = root_dir.joinpath(\"data\", dataset_name, \"embs\")\n", "embs_dest.mkdir(parents=True, exist_ok=True)\n", "\n", "# nd2 to tif\n", @@ -192,10 +194,10 @@ " pipeline.clean_up_files(None, first_frames_path, None)\n", "\n", "# write params used\n", - "output_path = root_dir.joinpath(\"results\", experiment_name, \"params.txt\")\n", + "output_path = root_dir.joinpath(\"results\", dataset_name, \"params.txt\")\n", "pipeline.log_params(\n", " output_path=output_path,\n", - " experiment_name=experiment_name,\n", + " dataset_name=dataset_name,\n", " embryos=embryos,\n", " window=window,\n", " vnc_length_interval=vnc_length_interval,\n", diff --git a/snazzy_processing/notebooks/vnc-length-evaluation.ipynb b/snazzy_processing/notebooks/vnc-length-evaluation.ipynb index 6cb963c..3035a1e 100644 --- a/snazzy_processing/notebooks/vnc-length-evaluation.ipynb +++ b/snazzy_processing/notebooks/vnc-length-evaluation.ipynb @@ -6,7 +6,7 @@ "source": [ "## Evaluation of the VNC length method\n", "\n", - "This notebook compares the calculated VNC length values with annotated data, measured manually. It therefore requires annotated data, which should be placed under the directory `annotated`, within the experiment directory. " + "This notebook compares the calculated VNC length values with annotated data, measured manually. It therefore requires annotated data, which should be placed under the directory `annotated`, within the dataset directory. " ] }, { @@ -25,14 +25,14 @@ "\n", "from snazzy_processing import centerline_errors, find_hatching, utils, vnc_length\n", "\n", - "experiment_name = '20240611_25C'\n", + "dataset_name = '20240611'\n", "root_dir = Path.cwd().parent\n", - "project_dir = root_dir.joinpath('data', experiment_name)\n", + "project_dir = root_dir.joinpath('data', dataset_name)\n", "annotated_dir = project_dir.joinpath('annotated')\n", "img_dir = project_dir.joinpath('embs')\n", "\n", "if not annotated_dir.exists():\n", - " print('Cannot evaluate the measurements for the current experiment.\\nThe evaluations require manually measured data.')\n", + " print('Cannot evaluate the measurements for the current dataset.\\nThe evaluations require manually measured data.')\n", " assert False" ] }, @@ -40,12 +40,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Sometimes the embryo names of the annotated data don't match the names generated by pasnascope.\n", - "In these cases, a look up table (LUT) can be used to associate pasnascope sliced movies to annotated files.\n", + "Sometimes the embryo names of the annotated data don't match the names generated by `snazzy_processing`.\n", + "In these cases, a look up table (LUT) can be used to associate `snazzy_processing` sliced movies to annotated files.\n", "The LUT can also be used to ignore embryos if needed, since only the embryos in the LUT will be used.\n", "The look up table is a dictionary, where the keys are the embryo numbers of the individual movies and the values are the numbers used to identify the embryos in annotated data.\n", "\n", - "Use the image generated from the `process-raw-data` notebook to inspect the numbers used by `pasnascope` and build the LUT." + "Use the image generated from the `process-raw-data` notebook to inspect the numbers used by `snazzy_processing` and build the LUT." ] }, { @@ -56,24 +56,13 @@ "source": [ "# maps {img_file: annotated_file}\n", "\n", - "# experiment 20240611\n", + "# dataset 20240611\n", "name_LUT = {\n", - " 1: 1,\n", - " 2: 2,\n", - " 3: 3,\n", - " 4: 4,\n", - " 5: 5,\n", - " 6: 6,\n", - " 8: 9,\n", - " 9: 10,\n", - " 10: 11,\n", - " 12: 12,\n", - " 15: 15,\n", - " 16: 16,\n", - " 17: 17,\n", - " 18: 18,\n", - " 19: 19,\n", - " 20: 20,\n", + " 1: 4,\n", + " 2: 5,\n", + " 3: 6,\n", + " 4: 9,\n", + " 5: 10,\n", "}" ] }, @@ -81,7 +70,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Calculates the VNC lengths using pasnascope.\n", + "Calculates the VNC lengths using snazzy.\n", "These values will be compared against annotated data in the next cells." ] }, @@ -133,8 +122,6 @@ " embryo_files, annotated_files, name_LUT\n", ")\n", "\n", - "# embryos mounted in wrong configuration and should be excluded from this analysis:\n", - "to_exclude = [\"emb2-ch2.tif\", \"emb16-ch2.tif\"]\n", "embryos = [Path(img_dir).joinpath(e) for e in mapping.keys() if e not in to_exclude]\n", "\n", "hatching_points = get_hatching_points(embryos)\n", @@ -157,7 +144,7 @@ "metadata": {}, "outputs": [], "source": [ - "start = 5\n", + "start = 0\n", "n = 3\n", "interval = 50\n", "\n", @@ -194,7 +181,7 @@ "metadata": {}, "outputs": [], "source": [ - "i = 3\n", + "i = 0\n", "embryo = list(measurements.keys())[i]\n", "embryo_file = next(e for e in embryos if e.stem == embryo)\n", "\n", @@ -235,7 +222,7 @@ "metadata": {}, "outputs": [], "source": [ - "i = 6\n", + "i = 0\n", "num_replicates = 3\n", "interval = 50\n", "\n", @@ -264,14 +251,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Tests all combinations of the passed parameters to find the best performance for a given experiment." + "Tests all combinations of the passed parameters to find the best performance for a given dataset." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Calculates the average error for all embryos of an experiment that have annotated data.\n", + "Calculates the average error for all embryos of a dataset that have annotated data.\n", "The error is defined as the average of the absolute relative error." ] }, @@ -291,7 +278,7 @@ "ax.set_xticks([])\n", "ax.set_ylabel(\"Abs relative error\")\n", "fig.canvas.header_visible = False\n", - "fig.suptitle(f\"Error (compared to annotated data) for exp {project_dir.stem}\")\n", + "fig.suptitle(f\"Error (compared to annotated data) for dataset {project_dir.stem}\")\n", "\n", "plt.tight_layout()" ] diff --git a/snazzy_processing/notebooks/vnc-length.ipynb b/snazzy_processing/notebooks/vnc-length.ipynb index 62456df..20c0388 100644 --- a/snazzy_processing/notebooks/vnc-length.ipynb +++ b/snazzy_processing/notebooks/vnc-length.ipynb @@ -32,10 +32,10 @@ "\n", "from snazzy_processing import centerline, find_hatching, utils, vnc_length\n", "\n", - "experiment_name = '20240611_25C'\n", + "dataset_name = '20240611'\n", "root_dir = Path.cwd().parent\n", - "project_dir = root_dir.joinpath('data', experiment_name)\n", - "print(f\"Selected data for experiment: {experiment_name}\")" + "project_dir = root_dir.joinpath('data', dataset_name)\n", + "print(f\"Selected data for dataset: {dataset_name}\")" ] }, { @@ -61,7 +61,7 @@ "metadata": {}, "outputs": [], "source": [ - "i = 6\n", + "i = 0\n", "frame_number = 0\n", "\n", "img_dir = project_dir.joinpath(\"embs\")\n", @@ -131,7 +131,7 @@ ], "metadata": { "kernelspec": { - "display_name": "snazzy-env", + "display_name": "snazzy-test-env", "language": "python", "name": "python3" }, diff --git a/snazzy_processing/scripts/play_movie.py b/snazzy_processing/scripts/play_movie.py index b43b25c..8eb6441 100644 --- a/snazzy_processing/scripts/play_movie.py +++ b/snazzy_processing/scripts/play_movie.py @@ -7,16 +7,16 @@ data_dir = Path("./data") -experiments = [f.stem for f in data_dir.iterdir() if f.is_dir()] +datasets = [f.stem for f in data_dir.iterdir() if f.is_dir()] -print("Enter experiment name, based on index:") -for i, file in enumerate(experiments): +print("Enter dataset name, based on index:") +for i, file in enumerate(datasets): print(f"[{i}] {file}") e = int(input()) -experiment = experiments[e] +dataset = datasets[e] -img_dir = data_dir.joinpath(experiment, "embs") +img_dir = data_dir.joinpath(dataset, "embs") # All structural channel movies end with the suffix ch2 structs = sorted(img_dir.glob("*ch2.tif"), key=utils.emb_number) diff --git a/snazzy_processing/scripts/plot_contours.py b/snazzy_processing/scripts/plot_contours.py index 9924d54..5de06fc 100644 --- a/snazzy_processing/scripts/plot_contours.py +++ b/snazzy_processing/scripts/plot_contours.py @@ -6,16 +6,16 @@ from snazzy_processing import roi, utils data_dir = Path("./data") -experiments = [f.stem for f in data_dir.iterdir() if f.is_dir()] +datasets = [f.stem for f in data_dir.iterdir() if f.is_dir()] -print("Enter experiment name, based on index:") -for i, file in enumerate(experiments): +print("Enter dataset name, based on index:") +for i, file in enumerate(datasets): print(f"[{i}] {file}") e = int(input()) -experiment = experiments[e] +dataset = datasets[e] -img_dir = data_dir.joinpath(experiment, "embs") +img_dir = data_dir.joinpath(dataset, "embs") # All structural channel movies end with the suffix ch2 structs = sorted(img_dir.glob("*ch2.tif"), key=utils.emb_number) @@ -45,7 +45,6 @@ img = imread(structs[idx]) if ch == 2 else imread(active[idx]) struct_img = img if ch == 2 else imread(structs[idx]) contours = roi.get_contours(struct_img, window=window) -print(f"Contours within plot_contours: {len(contours)}") -ca = custom_animation.ContourAnimation(img, contours, 1) +ca = custom_animation.ContourAnimation(img, contours, window, 1) ca.display() diff --git a/snazzy_processing/snazzy_processing/animations/custom_animation.py b/snazzy_processing/snazzy_processing/animations/custom_animation.py index 643ccd3..ac71599 100644 --- a/snazzy_processing/snazzy_processing/animations/custom_animation.py +++ b/snazzy_processing/snazzy_processing/animations/custom_animation.py @@ -60,11 +60,12 @@ def save(self, filename): class ContourAnimation(PauseAnimation): """Overlays ROI contour on top of a movie.""" - def __init__(self, image, contours, interval=50): + def __init__(self, image, contours, step_size, interval=50): super().__init__(image, interval) + self.contours = contours self.paint_axes() - self.offset = image.shape[0] // len(contours) + self.step_size = step_size def paint_axes(self): x = self.contours[0][:, 0] @@ -73,9 +74,10 @@ def paint_axes(self): def update(self, frame): self.img_plot.set_data(self.image[frame]) - if frame % self.offset == 0: - i = frame // self.offset + if frame % self.step_size == 0: + i = frame // self.step_size x, y = self.contours[i][:, 0], self.contours[i][:, 1] self.contour_plot.set_data(y, x) self.frame_num.set_text(str(frame)) + return self.img_plot diff --git a/snazzy_processing/snazzy_processing/full_embryo_length.py b/snazzy_processing/snazzy_processing/full_embryo_length.py index 26ec724..75887da 100644 --- a/snazzy_processing/snazzy_processing/full_embryo_length.py +++ b/snazzy_processing/snazzy_processing/full_embryo_length.py @@ -124,7 +124,7 @@ def measure(img_path, low_non_VNC=False, start=None, end=None, interval=100) -> low_non_VNC (bool): Flag to determine how to binarize the image. Pick True if the VNC has lower signal than the rest of the embryo. - Defaults to `True`. + Defaults to `False`. start (int | None): Starting image frame. Starts at first frame if None. end (int | None): diff --git a/snazzy_processing/snazzy_processing/roi.py b/snazzy_processing/snazzy_processing/roi.py index 92170c5..9e2a14b 100644 --- a/snazzy_processing/snazzy_processing/roi.py +++ b/snazzy_processing/snazzy_processing/roi.py @@ -61,9 +61,9 @@ def get_roi(img: np.ndarray, window=10) -> np.ndarray: rois = np.empty((rois_length, *img.shape[1:]), dtype=np.bool_) # calculates a new ROI in steps of `window`: - for i in range(0, num_slices - window, window): + for idx, i in enumerate(range(0, num_slices, window)): avg_slc = np.average(img[i : i + window], axis=0) - rois[i // window] = get_single_roi(avg_slc) + rois[idx] = get_single_roi(avg_slc) return rois diff --git a/snazzy_processing/snazzy_processing/vnc_length.py b/snazzy_processing/snazzy_processing/vnc_length.py index f1efe12..0101dc3 100644 --- a/snazzy_processing/snazzy_processing/vnc_length.py +++ b/snazzy_processing/snazzy_processing/vnc_length.py @@ -22,7 +22,7 @@ def measure_VNC_centerline( image (np.ndarray): 3D matrix representing an image. pixel_width (float): - Physica size of a pixel. + Physical size of a pixel. thres_rel (float): Threshold value used to calculate centerline points. min_dist (float):