Polish: British English, add Python 3.10 to CI, metadata, remove dead fixture

Ekin-Kahraman · Ekin-Kahraman · commit 3e6f2368fbb1 · 2026-03-23T18:58:05.000Z
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.11", "3.12"]
+        python-version: ["3.10", "3.11", "3.12"]
 
     steps:
       - uses: actions/checkout@v4
diff --git a/CITATION.cff b/CITATION.cff
@@ -13,3 +13,5 @@ keywords:
 authors:
   - family-names: "Kahraman"
     given-names: "Ekin"
+    affiliation: "University of East Anglia"
+    email: "evk23umu@uea.ac.uk"
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # Single-Cell RNA-seq Immune Cell Profiling
 
-End-to-end single-cell RNA-seq analysis pipeline in Python using [scanpy](https://scanpy.readthedocs.io/). Demonstrates quality control, normalization, dimensionality reduction, clustering with automated resolution selection, and marker-based cell type annotation on human PBMC data.
+End-to-end single-cell RNA-seq analysis pipeline in Python using [scanpy](https://scanpy.readthedocs.io/). Demonstrates quality control, normalisation, dimensionality reduction, clustering with automated resolution selection, and marker-based cell type annotation on human PBMC data.
 
 <p align="center">
   <img src="docs/umap_3d_rotation.gif" alt="3D UMAP rotation showing PBMC immune cell clusters" width="600">
@@ -24,8 +24,8 @@ End-to-end single-cell RNA-seq analysis pipeline in Python using [scanpy](https:
 | Step | Script | Description |
 |------|--------|-------------|
 | 01 | `scripts/01_load_and_qc.py` | Download data, calculate QC metrics (genes/cell, counts, mito %), filter |
-| 02 | `scripts/02_preprocess.py` | Normalize (10k/cell), log-transform, select 2,000 HVGs, regress covariates, scale |
-| 03 | `scripts/03_reduce_dimensions.py` | PCA (40 components), neighbor graph, UMAP embedding |
+| 02 | `scripts/02_preprocess.py` | Normalise (10k/cell), log-transform, select 2,000 HVGs, regress covariates, scale |
+| 03 | `scripts/03_reduce_dimensions.py` | PCA (40 components), neighbour graph, UMAP embedding |
 | 04 | `scripts/04_cluster.py` | Leiden clustering at 5 resolutions, silhouette-based selection (min 5 clusters) |
 | 05 | `scripts/05_annotate_cell_types.py` | Wilcoxon DE, marker gene scoring, automated cell type assignment |
 | 06 | `scripts/06_publication_figures.py` | Multi-panel publication figure (UMAP, composition, heatmap) |
@@ -93,9 +93,9 @@ pytest -v
 
 - **Automated cell type annotation**: Clusters are assigned to cell types by scoring against curated PBMC marker gene sets, not manual inspection.
 - **Multi-resolution clustering**: Leiden is run at 5 resolutions (0.3-1.2) and the best is selected by silhouette score with a biological floor of 5 clusters.
-- **Colorblind-friendly palette**: Publication figures use the Okabe-Ito palette for accessibility.
+- **Colourblind-friendly palette**: Publication figures use the Okabe-Ito palette for accessibility.
 - **Modular scripts**: Each step reads the previous step's output from disk. Steps can be re-run independently.
 
-## License
+## Licence
 
 MIT
diff --git a/pyproject.toml b/pyproject.toml
@@ -9,8 +9,11 @@ description = "Single-cell RNA-seq immune cell profiling pipeline using scanpy"
 readme = "README.md"
 license = {text = "MIT"}
 requires-python = ">=3.10"
-authors = [{name = "Ekin Kahraman"}]
+authors = [{name = "Ekin Kahraman", email = "evk23umu@uea.ac.uk"}]
 keywords = ["bioinformatics", "single-cell", "RNA-seq", "scanpy", "immune profiling"]
+
+[project.urls]
+Repository = "https://github.com/Ekin-Kahraman/single-cell-rnaseq-immune-profiling"
 classifiers = [
     "Development Status :: 4 - Beta",
     "Intended Audience :: Science/Research",
diff --git a/scripts/02_preprocess.py b/scripts/02_preprocess.py
@@ -1,4 +1,4 @@
-"""Step 02: Normalize, find highly variable genes, and scale."""
+"""Step 02: Normalise, find highly variable genes, and scale."""
 
 import scanpy as sc
 import matplotlib.pyplot as plt
@@ -13,10 +13,10 @@
 
 
 def preprocess(adata):
-    """Normalize, log-transform, select HVGs, regress, and scale."""
+    """Normalise, log-transform, select HVGs, regress, and scale."""
     print(f"Input: {adata.n_obs} cells, {adata.n_vars} genes")
 
-    # Normalize to target_sum counts per cell
+    # Normalise to target_sum counts per cell
     sc.pp.normalize_total(adata, target_sum=TARGET_SUM)
     print(f"Normalized to {TARGET_SUM:.0f} counts per cell")
 
diff --git a/scripts/06_publication_figures.py b/scripts/06_publication_figures.py
@@ -9,7 +9,7 @@
 RESULTS_DIR = Path("results")
 FIG_DIR = RESULTS_DIR / "figures"
 
-# Color palette (colorblind-friendly)
+# Colour palette (colourblind-friendly)
 PALETTE = {
     "CD4+ T cells": "#E69F00",
     "CD8+ T cells": "#56B4E9",
@@ -82,7 +82,7 @@ def make_figure(adata):
         raw_df["cell_type"] = adata.obs["cell_type"].values
         mean_expr = raw_df.groupby("cell_type")[markers_present].mean()
 
-        # Z-score per gene for visualization
+        # Z-score per gene for visualisation
         from scipy.stats import zscore
         z_expr = mean_expr.apply(zscore, axis=0)
 
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -2,7 +2,6 @@
 
 import pytest
 import scanpy as sc
-import numpy as np
 
 
 @pytest.fixture(scope="session")
@@ -11,25 +10,3 @@ def pbmc3k_raw():
     adata = sc.datasets.pbmc3k()
     adata.var_names_make_unique()
     return adata
-
-
-@pytest.fixture
-def small_adata():
-    """Create a small synthetic AnnData for fast unit tests."""
-    rng = np.random.default_rng(42)
-    n_cells, n_genes = 200, 500
-    X = rng.poisson(1, size=(n_cells, n_genes)).astype(np.float32)
-
-    adata = sc.AnnData(X)
-    adata.var_names = [f"Gene_{i}" for i in range(n_genes)]
-    adata.obs_names = [f"Cell_{i}" for i in range(n_cells)]
-
-    # Add some "mitochondrial" genes
-    mt_genes = [f"MT-{c}" for c in ["ND1", "ND2", "CO1", "CO2", "ATP6"]]
-    for i, name in enumerate(mt_genes):
-        if i < n_genes:
-            adata.var_names = adata.var_names.tolist()
-            adata.var_names = [name if j == i else adata.var_names[j] for j in range(n_genes)]
-
-    adata.var_names_make_unique()
-    return adata