Merge pull request #179 from DiamondLightSource/zenodo

namannimmo10 · web-flow · commit fc4d9c3845da · 2025-01-10T11:20:38.000Z
CI: Run tests on larger datasets
diff --git a/.github/workflows/httomolibgpu_tests_run_iris.yml b/.github/workflows/httomolibgpu_tests_run_iris.yml
@@ -14,6 +14,7 @@ jobs:
       image: nvidia/cuda:12.6.3-devel-ubi8
       env:
         NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }}
+      options: --gpus all --runtime=nvidia
 
     defaults:
       run:
@@ -23,6 +24,10 @@ jobs:
       - name: Checkout repository code
         uses: actions/checkout@v4
 
+      - name: Set up CUDA environment
+        run: |
+          echo "LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH" >> $GITHUB_ENV
+
       - name: Create conda environment
         uses: mamba-org/setup-micromamba@v1
         with:
@@ -36,6 +41,14 @@ jobs:
           pip install .[dev]
           micromamba list
 
-      - name: Run tests
+      - name: Run unit tests on small data
         run: |
           pytest tests/
+
+      # Optional: Run Zenodo tests only if PR has a label
+      - name: Download and run Zenodo tests
+        if: contains(github.event.pull_request.labels.*.name, 'run-zenodo-tests')
+        run: |
+          chmod +x ./.scripts/download_zenodo.py
+          ./.scripts/download_zenodo.py zenodo-tests/large_data_archive
+          pytest zenodo-tests/
diff --git a/.github/workflows/main-checks.yml b/.github/workflows/main-checks.yml
@@ -0,0 +1,47 @@
+name: Main Branch Tests
+on:
+  push:
+    branches:
+      - main
+
+jobs:
+  iris-gpu:
+    runs-on: iris-gpu
+    container:
+      image: nvidia/cuda:12.6.3-devel-ubi8
+      env:
+        NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }}
+      options: --gpus all --runtime=nvidia
+
+    defaults:
+      run:
+        shell: bash -l {0}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up CUDA environment
+        run: |
+          echo "LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH" >> $GITHUB_ENV
+
+      - name: Create conda environment
+        uses: mamba-org/setup-micromamba@v1
+        with:
+          environment-file: conda/environment.yml
+          environment-name: httomo
+          post-cleanup: 'all'
+          init-shell: bash
+
+      - name: Download test data from Zenodo
+        run: |
+          chmod +x ./.scripts/download_zenodo.py
+          ./.scripts/download_zenodo.py zenodo-tests/large_data_archive
+
+      - name: Install httomolibgpu
+        run: |
+          pip install .[dev]
+          micromamba list
+
+      - name: Run all tests (including Zenodo)
+        run: |
+          pytest tests/ zenodo-tests/
diff --git a/.scripts/download_zenodo.py b/.scripts/download_zenodo.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+
+import json
+import urllib.request
+import hashlib
+import sys
+import os
+from pathlib import Path
+
+
+def calculate_md5(filename):
+    """Calculate MD5 hash of a file."""
+    md5_hash = hashlib.md5()
+    with open(filename, "rb") as f:
+        for chunk in iter(lambda: f.read(4096), b""):
+            md5_hash.update(chunk)
+    return md5_hash.hexdigest()
+
+
+def download_zenodo_files(output_dir: Path):
+    """
+    Download all files from Zenodo record 14338424 and verify their checksums.
+    
+    Args:
+        output_dir: Directory where files should be downloaded
+    """
+    try:
+        print("Fetching files from Zenodo record 14338424...")
+        with urllib.request.urlopen("https://zenodo.org/api/records/14338424") as response:
+            data = json.loads(response.read())
+        
+        # Create output directory if it doesn't exist
+        output_dir.mkdir(parents=True, exist_ok=True)
+        
+        # Now 'files' is a list, not a dictionary
+        for file_info in data["files"]:
+            filename = file_info["key"]  # The 'key' is the filename
+            output_file = output_dir / filename
+            print(f"Downloading {filename}...")
+            url = file_info["links"]["self"]  # The link to download the file
+            
+            expected_md5 = file_info["checksum"].split(":")[1]  # Extract MD5 hash
+            
+            # Download the file
+            urllib.request.urlretrieve(url, output_file)
+
+            # Verify checksum
+            actual_md5 = calculate_md5(output_file)
+            if actual_md5 == expected_md5:
+                print(f"✓ Verified {filename}")
+            else:
+                print(f"✗ Checksum verification failed for {filename}")
+                print(f"Expected: {expected_md5}")
+                print(f"Got: {actual_md5}")
+                sys.exit(1)
+                
+        print("\nAll files downloaded and verified successfully!")
+        
+    except Exception as e:
+        print(f"Error: {str(e)}", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: download_zenodo.py <output_directory>")
+        sys.exit(1)
+        
+    output_dir = Path(sys.argv[1])
+    download_zenodo_files(output_dir)
diff --git a/zenodo-tests/conftest.py b/zenodo-tests/conftest.py
@@ -0,0 +1,67 @@
+import os
+import cupy as cp
+import numpy as np
+import pytest
+
+CUR_DIR = os.path.abspath(os.path.dirname(__file__))
+
+
+@pytest.fixture(scope="session")
+def test_data_path():
+    return os.path.join(CUR_DIR, "large_data_archive")
+
+
+@pytest.fixture(scope="session")
+def data_i12LFOV_file(test_data_path):
+    in_file = os.path.join(test_data_path, "i12LFOV.npz")
+    return np.load(in_file)
+
+
+@pytest.fixture(scope="session")
+def data_i12_sandstone_file(test_data_path):
+    in_file = os.path.join(test_data_path, "i12_sandstone_50sinoslices.npz")
+    return np.load(in_file)
+
+
+@pytest.fixture(scope="session")
+def data_geant4sim_file(test_data_path):
+    in_file = os.path.join(test_data_path, "geant4_640_540_proj360.npz")
+    return np.load(in_file)
+
+@pytest.fixture
+def i12LFOV_data(data_i12LFOV_file):
+    return (
+        cp.asarray(data_i12LFOV_file["projdata"]),
+        data_i12LFOV_file["angles"],
+        cp.asarray(data_i12LFOV_file["flats"]),
+        cp.asarray(data_i12LFOV_file["darks"]),
+    )
+
+
+@pytest.fixture
+def i12sandstone_data(data_i12_sandstone_file):
+    return (
+        cp.asarray(data_i12_sandstone_file["projdata"]),
+        data_i12_sandstone_file["angles"],
+        cp.asarray(data_i12_sandstone_file["flats"]),
+        cp.asarray(data_i12_sandstone_file["darks"]),
+    )
+
+
+@pytest.fixture
+def geantsim_data(data_geant4sim_file):
+    return (
+        cp.asarray(data_geant4sim_file["projdata"]),
+        data_geant4sim_file["angles"],
+        cp.asarray(data_geant4sim_file["flats"]),
+        cp.asarray(data_geant4sim_file["darks"]),
+    )
+
+
+@pytest.fixture
+def ensure_clean_memory():
+    cp.get_default_memory_pool().free_all_blocks()
+    cp.get_default_pinned_memory_pool().free_all_blocks()
+    yield None
+    cp.get_default_memory_pool().free_all_blocks()
+    cp.get_default_pinned_memory_pool().free_all_blocks()
diff --git a/zenodo-tests/test_recon/__init__.py b/zenodo-tests/test_recon/__init__.py
diff --git a/zenodo-tests/test_recon/test_rotation.py b/zenodo-tests/test_recon/test_rotation.py
@@ -0,0 +1,69 @@
+import cupy as cp
+import numpy as np
+import pytest
+
+from httomolibgpu.prep.normalize import normalize
+from httomolibgpu.recon.rotation import find_center_vo
+
+
+def test_center_vo_i12LFOV(i12LFOV_data, ensure_clean_memory):
+    projdata = i12LFOV_data[0]
+    flats = i12LFOV_data[2]
+    darks = i12LFOV_data[3]
+    del i12LFOV_data
+
+    data_normalised = normalize(projdata, flats, darks, minus_log=False)
+    del flats, darks, projdata
+
+    mid_slice = data_normalised.shape[1] // 2
+    cor = find_center_vo(data_normalised[:, mid_slice, :])
+
+    assert cor == 1197.75
+    assert cor.dtype == np.float32
+
+
+def test_center_vo_average_i12LFOV(i12LFOV_data, ensure_clean_memory):
+    projdata = i12LFOV_data[0]
+    flats = i12LFOV_data[2]
+    darks = i12LFOV_data[3]
+    del i12LFOV_data
+
+    data_normalised = normalize(projdata, flats, darks, minus_log=False)
+    del flats, darks, projdata
+
+    cor = find_center_vo(data_normalised[:, 10:25, :], average_radius=5)
+
+    assert cor == 1199.25
+    assert cor.dtype == np.float32
+
+
+def test_center_vo_i12_sandstone(i12sandstone_data, ensure_clean_memory):
+    projdata = i12sandstone_data[0]
+    flats = i12sandstone_data[2]
+    darks = i12sandstone_data[3]
+    del i12sandstone_data
+
+    data_normalised = normalize(projdata, flats, darks, minus_log=True)
+    del flats, darks, projdata
+
+    mid_slice = data_normalised.shape[1] // 2
+    cor = find_center_vo(data_normalised[:, mid_slice, :])
+
+    assert cor == 1253.75
+    assert cor.dtype == np.float32
+
+
+def test_center_vo_i12_geantsim(geantsim_data, ensure_clean_memory):
+    projdata = geantsim_data[0]
+    flats = geantsim_data[2]
+    darks = geantsim_data[3]
+    del geantsim_data
+
+    data_normalised = normalize(projdata, flats, darks, minus_log=True)
+    del flats, darks, projdata
+
+    mid_slice = data_normalised.shape[1] // 2
+    cor = find_center_vo(data_normalised[:, mid_slice, :])
+
+    assert cor == 319.5
+    assert cor.dtype == np.float32