Skip to content

Commit afd3c66

Browse files
committed
add testing
1 parent 9523b84 commit afd3c66

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+1587
-3
lines changed

.github/workflows/tests.yml

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
name: Tests
2+
3+
on:
4+
push:
5+
branches: [master, dev, looper-update]
6+
pull_request:
7+
branches: [master, dev]
8+
workflow_dispatch:
9+
inputs:
10+
run_integration:
11+
description: "Run integration tests (requires self-hosted runner)"
12+
required: false
13+
default: "false"
14+
15+
jobs:
16+
# --------------------------------------------------------------------------
17+
# Tier 1: Unit tests — no genome data or bioinformatics tools required.
18+
# Runs on every push and pull request.
19+
# --------------------------------------------------------------------------
20+
unit-tests:
21+
name: Unit tests (Python ${{ matrix.python-version }})
22+
runs-on: ubuntu-latest
23+
strategy:
24+
fail-fast: false
25+
matrix:
26+
python-version: ["3.9", "3.11", "3.12"]
27+
28+
steps:
29+
- uses: actions/checkout@v4
30+
31+
- uses: actions/setup-python@v5
32+
with:
33+
python-version: ${{ matrix.python-version }}
34+
35+
- name: Install Python dependencies
36+
run: pip install -r requirements.txt pytest
37+
38+
- name: Run unit tests
39+
run: pytest tests/test_unit.py -v --tb=short
40+
41+
# --------------------------------------------------------------------------
42+
# Tier 2: Integration tests — full pipeline runs.
43+
# Requires a self-hosted runner with genome indices and tools installed.
44+
# Triggered manually via workflow_dispatch or by setting
45+
# RUN_INTEGRATION_TESTS=true in the environment.
46+
# --------------------------------------------------------------------------
47+
integration-tests:
48+
name: Integration tests (${{ matrix.scenario }})
49+
if: >
50+
github.event_name == 'workflow_dispatch' &&
51+
github.event.inputs.run_integration == 'true'
52+
runs-on: self-hosted
53+
strategy:
54+
fail-fast: false
55+
matrix:
56+
scenario:
57+
- se_basic
58+
- pe_basic
59+
- se_groseq
60+
- se_umi
61+
- pe_umi
62+
- se_fastp
63+
- se_fastx
64+
- se_fqdedup
65+
- se_scale
66+
- se_no_complexity
67+
- se_nofifo
68+
- se_coverage
69+
70+
steps:
71+
- uses: actions/checkout@v4
72+
73+
- name: Install Python dependencies
74+
run: pip install -r requirements.txt pytest
75+
76+
- name: Run integration test for ${{ matrix.scenario }}
77+
env:
78+
RUN_INTEGRATION_TESTS: "true"
79+
run: >
80+
pytest tests/test_integration.py -v --tb=short
81+
-k "${{ matrix.scenario }}"

Makefile

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,52 @@
11
test:
2-
python pipelines/peppro.py -P 3 -M 100 -O peppro_test -R -S test -G hg38 -Q single -C peppro.yaml --genome-size hs --prealignments rCRSd human_repeats -I examples/data/test_R1.fq.gz
2+
python pipelines/peppro.py -P 3 -M 100 -O peppro_test -R -S test -G hg38 \
3+
-Q single -C peppro.yaml \
4+
--protocol PRO \
5+
--prealignment-names rCRSd human_repeats \
6+
--genome-index $$(refgenie seek hg38/bowtie2_index --seek-key dir) \
7+
--chrom-sizes $$(refgenie seek hg38/fasta --seek-key chrom_sizes) \
8+
--pipestat-schema peppro_output_schema.yaml \
9+
-I examples/data/test_r1.fq.gz
10+
11+
# -----------------------------------------------------------------------
12+
# Test suite targets
13+
# -----------------------------------------------------------------------
14+
15+
# Run only unit tests (no genome data or external tools required)
16+
test-unit:
17+
pytest tests/test_unit.py -v --tb=short
18+
19+
# Run a single integration scenario (e.g. make test-se SCENARIO=se_basic)
20+
SCENARIO ?= se_basic
21+
test-scenario:
22+
RUN_INTEGRATION_TESTS=true pytest tests/test_integration.py -v --tb=short -k "$(SCENARIO)"
23+
24+
# Run all SE integration scenarios
25+
test-se:
26+
RUN_INTEGRATION_TESTS=true pytest tests/test_integration.py -v --tb=short \
27+
-k "se_basic or se_groseq or se_umi or se_fastp or se_fastx or se_fqdedup or se_scale or se_no_complexity or se_nofifo or se_coverage"
28+
29+
# Run all PE integration scenarios
30+
test-pe:
31+
RUN_INTEGRATION_TESTS=true pytest tests/test_integration.py -v --tb=short \
32+
-k "pe_basic or pe_umi"
33+
34+
# Run recovery regression tests
35+
test-recovery:
36+
RUN_INTEGRATION_TESTS=true pytest tests/test_integration.py -v --tb=short \
37+
-k "recovery"
38+
39+
# Run all integration tests (SE + PE + recovery)
40+
test-integration:
41+
RUN_INTEGRATION_TESTS=true pytest tests/test_integration.py -v --tb=short
42+
43+
# Run both unit and integration tests
44+
test-all:
45+
RUN_INTEGRATION_TESTS=true pytest tests/ -v --tb=short
46+
47+
# Regenerate test FASTQ data files from the source R1 read file
48+
test-data:
49+
bash tests/scripts/generate_test_data.sh
350

451
docker:
552
docker build -t databio/peppro -f containers/peppro.Dockerfile .

pipelines/peppro.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,10 @@ def parse_arguments():
173173
dest="complexity",
174174
help="Disable library complexity calculation (faster).")
175175

176+
parser.add_argument("--no-bw", action='store_true', default=False,
177+
dest="no_bw",
178+
help="Skip bigWig signal track generation (faster, for testing).")
179+
176180
parser.add_argument("--prioritize", action='store_true', default=False,
177181
dest="prioritize",
178182
help="Plot cFRiF/FRiF using mutually exclusive priority"
@@ -3906,8 +3910,10 @@ def count_unmapped_reads():
39063910
signal_folder, args.sample_name + "_minus_exact_body_0-mer.bw")
39073911
minus_smooth_bw = os.path.join(
39083912
signal_folder, args.sample_name + "_minus_smooth_body_0-mer.bw")
3909-
3910-
if not args.sob:
3913+
3914+
if args.no_bw:
3915+
pm.timestamp("### Skipping bigWig generation (--no-bw)")
3916+
elif not args.sob:
39113917
# If not scaling we don't need to use seqOutBias to generate the
39123918
# separate strand bigWigs; just convert the BAM's directly with
39133919
# bamSitesToWig.py which uses UCSC wigToBigWig

sample_pipeline_interface.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ sample_interface:
4040
{% if sample.keep_mito is defined %} --keep-mito {% endif %}
4141
{% if sample.no_fifo is defined %} --noFIFO {% endif %}
4242
{% if sample.complexity is defined %} --no-complexity {% endif %}
43+
{% if sample.no_bw is defined %} --no-bw {% endif %}
4344
{% if sample.prioritize is defined %} --prioritize {% endif %}
4445
{% if sample.config_file is defined %} -C {sample.config_file} {% endif %}
4546
--pipestat-config {pipestat.config_file}

tests/README.md

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
# PEPPRO Test Suite
2+
3+
This directory contains the PEPPRO test suite, organized into two tiers:
4+
5+
- **Unit tests** — fast, no genome data or external bioinformatics tools required; run on every push/PR via GitHub Actions
6+
- **Integration tests** — full pipeline runs; require a self-hosted runner with genome indices and all tools installed
7+
8+
---
9+
10+
## Directory Structure
11+
12+
```
13+
tests/
14+
├── data/ # Small test FASTQ files (~3 MB total)
15+
│ ├── test_R1.fastq.gz # SE reads (12,500 reads)
16+
│ ├── test_R2.fastq.gz # PE reverse reads (rev-comp of R1)
17+
│ └── test_R1_umi.fastq.gz # R1 with 8-nt UMI prefix for UMI tests
18+
├── pep_configs/ # PEP project configs for each scenario
19+
│ ├── se_basic.yaml / .csv
20+
│ ├── pe_basic.yaml / .csv
21+
│ └── ...
22+
├── looper_configs/ # Looper run configs for each scenario
23+
│ ├── .looper_se_basic.yaml
24+
│ └── ...
25+
├── scripts/
26+
│ └── generate_test_data.sh # Regenerate test FASTQ data from source
27+
├── test_unit.py # Unit tests (no tools/genome needed)
28+
├── test_integration.py # Integration tests (full pipeline runs)
29+
└── README.md # This file
30+
```
31+
32+
---
33+
34+
## Unit Tests
35+
36+
Unit tests cover:
37+
38+
- **Constants**: `RUNON_SOURCE`, `ADAPTER_REMOVERS`, `TRIMMERS`, `DEDUPLICATORS` values and defaults
39+
- **PEP loading**: Each test config loads correctly with expected sample attributes
40+
- **Schema validation**: eido validation passes for valid configs; regression tests ensure invalid inputs (e.g., integer `umi_len` in YAML `imply`, invalid `protocol`/`adapter`/`trimmer`/`dedup` enum values) fail correctly
41+
- **Argument parsing**: All CLI flags parse correctly, defaults are correct, invalid choices raise `SystemExit`
42+
- **Recovery paths**: Expected output file naming conventions are documented and verified
43+
44+
### Running unit tests
45+
46+
```bash
47+
# Via pytest directly
48+
pytest tests/test_unit.py -v
49+
50+
# Via Makefile
51+
make test-unit
52+
```
53+
54+
No environment variables or external tools are needed.
55+
56+
---
57+
58+
## Integration Tests
59+
60+
Integration tests run the full PEPPRO pipeline for each scenario and verify:
61+
62+
1. Pipeline exits with status `0`
63+
2. Key output files exist (BAM, bigWig, stats.yaml)
64+
3. `stats.yaml` contains the expected result keys
65+
4. The `TestRecovery` class additionally tests checkpoint skipping and the `unmap_R1.fq` recovery regression
66+
67+
### Prerequisites
68+
69+
The integration tests require a machine with all PEPPRO dependencies installed and genome assets configured via refgenie:
70+
71+
| Tool | Version tested |
72+
|------|---------------|
73+
| bowtie2 | ≥2.4 |
74+
| samtools | ≥1.13 |
75+
| bedtools | ≥2.30 |
76+
| cutadapt | ≥4.0 |
77+
| fastp | ≥0.23 |
78+
| seqtk | ≥1.3 |
79+
| fastx_toolkit | any |
80+
| seqkit | ≥2.0 |
81+
| fqdedup | any |
82+
| fastq_pair | any |
83+
| wigToBigWig | UCSC |
84+
| bedGraphToBigWig | UCSC |
85+
86+
**Genome assets** (via refgenie, pointed to by `$REFGENIE`):
87+
88+
- `hg38/bowtie2_index`
89+
- `human_rDNA/bowtie2_index`
90+
- `hg38/fasta` (for chromosome sizes)
91+
- `hg38/blacklist` (optional, for coverage tests)
92+
93+
### Running integration tests
94+
95+
Tests run with `-p local` (divvy local compute package) so the pipeline
96+
executes inline on the current node rather than being submitted to a job
97+
scheduler. Run integration tests from a compute node or interactive session
98+
if your cluster policy prohibits CPU-intensive work on login nodes.
99+
100+
```bash
101+
# Enable integration tests
102+
export RUN_INTEGRATION_TESTS=true
103+
104+
# Run a specific scenario
105+
pytest tests/test_integration.py -v -k se_basic
106+
107+
# Via Makefile targets
108+
make test-se # All SE scenarios
109+
make test-pe # All PE scenarios
110+
make test-recovery # Recovery regression tests
111+
make test-integration # All integration tests
112+
make test-all # Unit + integration
113+
114+
# Run a single named scenario
115+
make test-scenario SCENARIO=se_fastp
116+
117+
# Keep output directories for debugging (default: cleaned up after each class)
118+
KEEP_TEST_OUTPUTS=true RUN_INTEGRATION_TESTS=true pytest tests/test_integration.py -v -k se_basic
119+
```
120+
121+
---
122+
123+
## Test Scenarios
124+
125+
| Scenario | Read type | Protocol | Adapter | Trimmer | Dedup | Notes |
126+
|----------|-----------|----------|---------|---------|-------|-------|
127+
| `se_basic` | SE | PRO-seq | cutadapt | seqtk || Baseline SE run |
128+
| `pe_basic` | PE | PRO-seq | cutadapt | seqtk || Baseline PE run |
129+
| `se_groseq` | SE | GRO-seq | cutadapt | seqtk || GRO-seq protocol |
130+
| `se_umi` | SE | PRO-seq | cutadapt | seqtk | seqkit | 8-nt UMI deduplication |
131+
| `pe_umi` | PE | PRO-seq | cutadapt | seqtk | seqkit | PE with UMI dedup |
132+
| `se_fastp` | SE | PRO-seq | fastp | seqtk || fastp adapter trimming |
133+
| `se_fastx` | SE | PRO-seq | cutadapt | fastx || fastx_trimmer |
134+
| `se_fqdedup` | SE | PRO-seq | cutadapt | seqtk | fqdedup | fqdedup UMI dedup |
135+
| `se_scale` | SE | PRO-seq | cutadapt | seqtk || `--scale` flag |
136+
| `se_no_complexity` | SE | PRO-seq | cutadapt | seqtk || `--no-complexity` flag |
137+
| `se_nofifo` | SE | PRO-seq | cutadapt | seqtk || `--no-fifo` flag |
138+
| `se_coverage` | SE | PRO-seq | cutadapt | seqtk || `--coverage` flag |
139+
140+
---
141+
142+
## Test Data
143+
144+
The files in `tests/data/` are derived from `examples/data/test_r1.fq.gz` (the existing pipeline example read file). They are small enough to commit to the repository (~1 MB each).
145+
146+
To regenerate the test data files (requires `seqtk`):
147+
148+
```bash
149+
make test-data
150+
# or
151+
bash tests/scripts/generate_test_data.sh
152+
```
153+
154+
---
155+
156+
## GitHub Actions
157+
158+
Unit tests run automatically on every push and pull request targeting `master` or `dev`, across Python 3.9, 3.11, and 3.12.
159+
160+
Integration tests are triggered manually via **workflow_dispatch** on a self-hosted runner:
161+
162+
1. Go to **Actions****Tests****Run workflow**
163+
2. Set "Run integration tests" to `true`
164+
3. Click **Run workflow**
165+
166+
See `.github/workflows/tests.yml` for the full configuration.
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
pep_config: "../pep_configs/pe_basic.yaml"
2+
3+
output_dir: "${HOME}/peppro_test_pe_basic"
4+
5+
pipeline_interfaces:
6+
- "../../sample_pipeline_interface.yaml"
7+
- "../../project_pipeline_interface.yaml"
8+
9+
pipestat:
10+
results_file_path: "${HOME}/peppro_test_pe_basic/results_pipeline/{record_identifier}/stats.yaml"
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
pep_config: "../pep_configs/pe_umi.yaml"
2+
3+
output_dir: "${HOME}/peppro_test_pe_umi"
4+
5+
pipeline_interfaces:
6+
- "../../sample_pipeline_interface.yaml"
7+
- "../../project_pipeline_interface.yaml"
8+
9+
pipestat:
10+
results_file_path: "${HOME}/peppro_test_pe_umi/results_pipeline/{record_identifier}/stats.yaml"
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
pep_config: "../pep_configs/se_basic.yaml"
2+
3+
output_dir: "${HOME}/peppro_test_se_basic"
4+
5+
pipeline_interfaces:
6+
- "../../sample_pipeline_interface.yaml"
7+
- "../../project_pipeline_interface.yaml"
8+
9+
pipestat:
10+
results_file_path: "${HOME}/peppro_test_se_basic/results_pipeline/{record_identifier}/stats.yaml"
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
pep_config: "../pep_configs/se_coverage.yaml"
2+
3+
output_dir: "${HOME}/peppro_test_se_coverage"
4+
5+
pipeline_interfaces:
6+
- "../../sample_pipeline_interface.yaml"
7+
- "../../project_pipeline_interface.yaml"
8+
9+
pipestat:
10+
results_file_path: "${HOME}/peppro_test_se_coverage/results_pipeline/{record_identifier}/stats.yaml"
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
pep_config: "../pep_configs/se_fastp.yaml"
2+
3+
output_dir: "${HOME}/peppro_test_se_fastp"
4+
5+
pipeline_interfaces:
6+
- "../../sample_pipeline_interface.yaml"
7+
- "../../project_pipeline_interface.yaml"
8+
9+
pipestat:
10+
results_file_path: "${HOME}/peppro_test_se_fastp/results_pipeline/{record_identifier}/stats.yaml"

0 commit comments

Comments
 (0)