j2figuer
diff --git a/‎.github/workflows/test_package_build.yml‎
Lines changed: 122 additions & 0 deletions b/‎.github/workflows/test_package_build.yml‎
Lines changed: 122 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 2 deletions b/‎.gitignore‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎README.md‎
Lines changed: 32 additions & 0 deletions b/‎README.md‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 3 additions & 1 deletion b/‎pyproject.toml‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎tests/create_spike_test_data.py‎ renamed to ‎tests/create_processed_ephys_test_data.py‎
Lines changed: 15 additions & 10 deletions b/‎tests/create_spike_test_data.py‎ renamed to ‎tests/create_processed_ephys_test_data.py‎
Lines changed: 15 additions & 10 deletions
diff --git a/‎tests/create_raw_ephys_test_data.py‎
Lines changed: 17 additions & 16 deletions b/‎tests/create_raw_ephys_test_data.py‎
Lines changed: 17 additions & 16 deletions
@@ -0,0 +1,122 @@
+name: Test building package and publish
+
+on:
+  push:
+    branches:
+      - main
+      - maint/*
+    tags:
+      - "*"
+  pull_request:
+    branches:
+      - main
+      - maint/*
+defaults:
+  run:
+    shell: bash
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - uses: actions/setup-python@v5
+        with:
+          python-version: 3
+      - run: pip install --upgrade build twine
+      - name: Build sdist and wheel
+        run: python -m build
+      - run: twine check dist/*
+      - name: Upload sdist and wheel artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: dist
+          path: dist/
+      - name: Build git archive
+        run: mkdir archive && git archive -v -o archive/archive.tgz HEAD
+      - name: Upload git archive artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: archive
+          path: archive/
+  test-package:
+    runs-on: ubuntu-latest
+    needs: [build]
+    strategy:
+      matrix:
+        package: ['wheel', 'sdist', 'archive', 'editable']
+    steps:
+      - name: Download sdist and wheel artifacts
+        if: matrix.package != 'archive'
+        uses: actions/download-artifact@v4
+        with:
+          name: dist
+          path: dist/
+      - name: Download git archive artifact
+        if: matrix.package == 'archive'
+        uses: actions/download-artifact@v4
+        with:
+          name: archive
+          path: archive/
+      - name: Checkout repo
+        if: matrix.package == 'editable'
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+      - name: Display Python version
+        run: python -c "import sys; print(sys.version)"
+      - name: Update pip
+        run: pip install --upgrade pip
+      - name: Install wheel
+        if: matrix.package == 'wheel'
+        run: pip install dist/*.whl
+      - name: Install sdist
+        if: matrix.package == 'sdist'
+        run: pip install dist/*.tar.gz
+      - name: Install archive
+        if: matrix.package == 'archive'
+        run: pip install archive/archive.tgz
+      - name: Install editable
+        if: matrix.package == 'editable'
+        run: pip install -e .
+      - name: Install test extras
+        run: pip install .[test]
+      - name: Download test data
+        env:
+          BOX_USERNAME: ${{ secrets.BOX_USERNAME }}
+          BOX_PASSWORD: ${{ secrets.BOX_PASSWORD }}
+        run: |
+          python tests/download_test_data.py
+          tree tests/test_data
+      - name: Run tests without coverage
+        if: matrix.package != 'editable'
+        run: pytest -v jdb_to_nwb
+      - name: Run tests on editable install with coverage
+        if: matrix.package == 'editable'
+        run: pytest --cov=src --cov-report=xml -v jdb_to_nwb
+      - name: Upload coverage reports to Codecov
+        if: matrix.package == 'editable'
+        uses: codecov/codecov-action@v5
+        env:
+          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+  # pypi-publish:
+  #   name: Upload release to PyPI
+  #   runs-on: ubuntu-latest
+  #   needs: [test-package]
+  #   environment:
+  #     name: pypi
+  #     url: https://pypi.org/p/jdb-to-nwb
+  #   permissions:
+  #     id-token: write  # IMPORTANT: this permission is mandatory for trusted publishing
+  #   if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
+  #   steps:
+  #     - uses: actions/download-artifact@v4
+  #       with:
+  #         name: dist
+  #         path: dist/
+  #     - name: Publish package distributions to PyPI
+  #       uses: pypa/gh-action-pypi-publish@release/v1
@@ -169,7 +169,8 @@ _version.py
 
 # Large test data
 tests/test_data/photometry/*
-tests/test_data/raw_ephys/*
-tests/test_data/processed_ephys/*
 tests/test_data/behavior/IM-1478*
+tests/test_data/downloaded/*
 
+# Box credentials
+.env
@@ -26,6 +26,38 @@ cp tests/metadata_full.yaml .
 jdb_to_nwb metadata_full.yaml out.nwb
 ```
 
+## Downloading test data
+
+The large test data files are stored in a shared UCSF Box account. To get access to the test data,
+please contact the repo maintainers.
+
+Create a new file called `.env` in the root directory of the repository and add your Box credentials:
+```bash
+BOX_USERNAME=<your_box_username>
+BOX_PASSWORD=<your_box_password>
+```
+Or set the environment variables in your shell:
+```bash
+export BOX_USERNAME=<your_box_username>
+export BOX_PASSWORD=<your_box_password>
+```
+
+Then run the download script:
+```bash
+python tests/download_test_data.py
+```
+
+Notes:
+- Run `python tests/test_data/create_raw_ephys_test_data.py` to re-create the test data for `raw_ephys`.
+- Run `python tests/test_data/create_processed_ephys_test_data.py` to re-create the test data for `processed_ephys`.
+- `tests/test_data/processed_ephys/impedance.csv` was manually created for testing purposes.
+- `tests/test_data/processed_ephys/geom.csv` was manually created for testing purposes.
+- Some files (`settings.xml`, `structure.oebin`) nested within `tests/test_data/raw_ephys/2022-07-25_15-30-00` 
+  were manually created for testing purposes.
+
+The GitHub Actions workflow (`.github/workflows/test_package_build.yml`) will automatically download the test data and run the tests.
+
+
 ## Versioning
 
 Versioning is handled automatically using [hatch-vcs](https://github.com/ofek/hatch-vcs) using the latest
 
@@ -25,10 +25,11 @@ classifiers = [
 dependencies = [
     "spikeinterface >= 0.101.0",
     "tqdm",
-    "neuroconv == 0.6.0",
+    "neuroconv == 0.6.5",
     "pynwb >= 2.8.1",
     "ndx_fiber_photometry",
     "ndx_franklab_novela",
+    "scikit-learn",
 ]
 dynamic = ["version"]
 
@@ -40,6 +41,7 @@ dev = [
     "ruff",
     "codespell",
 ]
+test = ["pytest", "pytest-cov"]
 
 [project.urls]
 "Homepage" = "https://github.com/calderast/jdb_to_nwb/"
 
@@ -3,34 +3,39 @@
 
 # SpikeInterface can read this format easily.
 
-# In Tim's data, the MDA file contains 145 units and 3,040,410 spikes.
+# In Tim's data for IM-1478/2022-07-25_15-30-00, the `firings.mda` file contains 145 units and 3,040,410 spikes.
 
 # To create test data of a reasonable size, we will trim the spike times to only those in the first 30,000 samples.
 # This results in 63 units and 462 spikes.
 
-# To run this script, copy Tim's MountainSort output file "firing.mda" to "../data/ephys/mntsort_output/firings.mda"
-# or change the paths in this script to point to the location of Tim's data.
+# To run this script, copy Tim's MountainSort output file `firings.mda` for IM-1478/2022-07-25_15-30-00
+# to your computer and adjust the path in this script to point to the location of the data on your computer.
+
+# The `firings.mda` file should be 72,969,860 bytes. The test checks specific properties of the file generated by this
+# script.
 
 # Then run this script from the command line from the root of the repo:
-#   python tests/test_data/create_spike_test_data.py
+#   python tests/test_data/create_processed_ephys_test_data.py
 
 from pathlib import Path
 
 from spikeinterface.extractors import read_mda_sorting, MdaSortingExtractor
 
-# Create a new directory to store the trimmed data
-new_data_dir = Path("./tests/test_data/processed_ephys")
-new_data_dir.mkdir(parents=True, exist_ok=True)
-output_file_path = new_data_dir / "firings.mda"
+# NOTE: Adjust this path to point to the location of Tim's sorted data for IM-1478/2022-07-25_15-30-00
+firings_mda_file_path = Path("/Users/rly/Documents/NWB/berke-lab-to-nwb/data/ephys/mntsort_output/firings.mda")
+sampling_frequency = 30_000
 
 # Read the .mda file
-firings_mda_file_path = Path("../data/ephys/mntsort_output/firings.mda")
-sampling_frequency = 30_000
 sorting = read_mda_sorting(firings_mda_file_path, sampling_frequency=sampling_frequency)
 
 # Trim the spike times to only those in the first 30,000 samples
 sorting_trimmed = sorting.frame_slice(start_frame=0, end_frame=30_000)
 
+# Create a new directory to store the trimmed data
+new_data_dir = Path("./tests/test_data/processed_ephys")
+new_data_dir.mkdir(parents=True, exist_ok=True)
+output_file_path = new_data_dir / "firings.mda"
+
 # Write the trimmed spike sorting data to a new .mda file
 MdaSortingExtractor.write_sorting(sorting=sorting_trimmed, save_path=output_file_path)
 
 
@@ -3,32 +3,32 @@
 
 # Each continuous directory contains the following files:
 
-# - continuous.dat: A simple binary file containing N channels x M samples 16-bit integers in little-endian format.
+# - `continuous.dat`: A simple binary file containing N channels x M samples 16-bit integers in little-endian format.
 # Data is saved as ch1_samp1, ch2_samp1, ... chN_samp1, ch1_samp2, ch2_samp2, ..., chN_sampM. The value of the
 # least significant bit needed to convert the 16-bit integers to physical units is specified in the bitVolts
 # field of the relevant channel in the structure.oebin JSON file. For “headstage” channels, multiplying by
 # bitVolts converts the values to microvolts, whereas for “ADC” channels, bitVolts converts the values to volts.
 
-# - timestamps.npy: A numpy array containing M 64-bit integers that represent the index of each sample in the
+# - `timestamps.npy`: A numpy array containing M 64-bit integers that represent the index of each sample in the
 # .dat file since the start of acquisition.
 
 # We could use SpikeInterface to read this data, but manipulating the data is easier with numpy since the data
 # is a flat binary file.
 
-# In Tim's data, the continuous.dat file contains 264 channels. The first 256 channels are the headstage (neural)
+# In Tim's data, the `continuous.dat` file contains 264 channels. The first 256 channels are the headstage (neural)
 # channels, and the last 8 channels are the ADC channels.
 
-# The structure.oebin JSON file and settings.xml contains metadata for the recording.
+# The `structure.oebin` JSON file and `settings.xml` contains metadata for the recording.
 
 # To create test data of a reasonable size, we will trim the existing data and timestamps to 30,000 samples
 # (one second of data) and 6 channels and save it to a new directory.
 
-# We will manually edit the structure.oebin JSON file to remove the events and TTL channels and extra headstage
-# and ADC channels. We will also manually edit the settings.xml file to remove the events and TTL channels and
+# We will manually edit the `structure.oebin` JSON file to remove the events and TTL channels and extra headstage
+# and ADC channels. We will also manually edit the `settings.xml` file to remove the events and TTL channels and
 # extra headstage and ADC channels.
 
-# To run this script, copy Tim's ephys data directory "2022-07-25_15-30-00" and place it in "../data"
-# or change the paths in this script to point to the location of Tim's data.
+# To run this script, copy Tim's open ephys data directory for IM-1478/2022-07-25_15-30-00 to your computer
+# and adjust the paths in this script to point to the location of the data on your computer.
 
 # Then run this script from the command line from the root of the repo:
 #   python tests/test_data/create_raw_ephys_test_data.py
@@ -37,23 +37,24 @@
 
 import numpy as np
 
-# Create a new directory to store the trimmed data
-new_data_root = Path("./tests/test_data/raw_ephys")
-new_data_dir = new_data_root / "2022-07-25_15-30-00/experiment1/recording1/continuous/Rhythm_FPGA-100.0"
-new_data_dir.mkdir(parents=True, exist_ok=True)
+# NOTE: Adjust this path to point to the location of Tim's sorted data for IM-1478/2022-07-25_15-30-00
+open_ephys_data_root = Path("/Users/rly/Documents/NWB/berke-lab-to-nwb/data/2022-07-25_15-30-00")
+continuous_dat_file_path = open_ephys_data_root / "experiment1/recording1/continuous/Rhythm_FPGA-100.0/continuous.dat"
+timestamps_file_path = open_ephys_data_root / "experiment1/recording1/continuous/Rhythm_FPGA-100.0/timestamps.npy"
 
 # Set the properties of the source data and parameters for the trimmed data
 num_channels = 264
 sampling_rate_in_hz = 30_000
-continuous_dat_file_path = (
-    "../data/2022-07-25_15-30-00/experiment1/recording1/continuous/Rhythm_FPGA-100.0/continuous.dat"
-)
-timestamps_file_path = "../data/2022-07-25_15-30-00/experiment1/recording1/continuous/Rhythm_FPGA-100.0/timestamps.npy"
 
 # Specify the number of seconds and channels of the original data to keep
 num_seconds_to_keep = 1.0
 num_channels_to_keep = 6
 
+# Create a new directory to store the trimmed data
+new_data_root = Path("./tests/test_data/raw_ephys")
+new_data_dir = new_data_root / "2022-07-25_15-30-00/experiment1/recording1/continuous/Rhythm_FPGA-100.0"
+new_data_dir.mkdir(parents=True, exist_ok=True)
+
 # Load the data from the continuous.dat file into a memory-mapped numpy array
 data = np.memmap(continuous_dat_file_path, dtype=np.int16, mode="r")
 assert len(data) % num_channels == 0, f"Data length is not divisible by num_channels: {num_channels}"