longraredisease/ro-crate-metadata.json at dev · nourmahfel/longraredisease · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
{
    "@context": [
        "https://w3id.org/ro/crate/1.1/context",
        {
            "GithubService": "https://w3id.org/ro/terms/test#GithubService",
            "JenkinsService": "https://w3id.org/ro/terms/test#JenkinsService",
            "PlanemoEngine": "https://w3id.org/ro/terms/test#PlanemoEngine",
            "TestDefinition": "https://w3id.org/ro/terms/test#TestDefinition",
            "TestInstance": "https://w3id.org/ro/terms/test#TestInstance",
            "TestService": "https://w3id.org/ro/terms/test#TestService",
            "TestSuite": "https://w3id.org/ro/terms/test#TestSuite",
            "TravisService": "https://w3id.org/ro/terms/test#TravisService",
            "definition": "https://w3id.org/ro/terms/test#definition",
            "engineVersion": "https://w3id.org/ro/terms/test#engineVersion",
            "instance": "https://w3id.org/ro/terms/test#instance",
            "resource": "https://w3id.org/ro/terms/test#resource",
            "runsOn": "https://w3id.org/ro/terms/test#runsOn"
        }
    ],
    "@graph": [
        {
            "@id": "./",
            "@type": "Dataset",
            "creativeWorkStatus": "InProgress",
            "datePublished": "2025-11-20T09:31:39+00:00",
            "description": "<h1>\n  <picture>\n    <source media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-longraredisease_logo_dark.png\">\n    <img alt=\"nf-core/longraredisease\" src=\"docs/images/nf-core-longraredisease_logo_light.png\">\n  </picture>\n</h1>\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/longraredisease)\n[![GitHub Actions CI Status](https://github.com/nf-core/longraredisease/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/longraredisease/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/longraredisease/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/longraredisease/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/longraredisease/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.6-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/longraredisease)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23longraredisease-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/longraredisease)\n[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)\n[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)\n[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n---\n\n## Introduction\n\n**nf-core/longraredisease** is a specialized bioinformatics pipeline for **structural variant (SV) detection and clinical interpretation** from long-read sequencing data (Oxford Nanopore and PacBio). Designed for rare disease diagnostics, it delivers high-confidence variant discovery through multi-caller consensus, family-based analysis, and phenotype-driven prioritization.\n\n![Long-read sequencing pipeline](docs/images/longraredisease_pipeline.png)\n\n### \ud83c\udfaf **Primary Focus: Structural Variant Detection**\n\nThe pipeline excels at identifying and interpreting structural variants through:\n\n- **Multi-caller SV consensus** - Sniffles, CuteSV, SVIM with JASMINE merging\n- **Phase-aware calling** - Haplotype-resolved SV detection using LongPhase\n- **Family analysis** - Trio-based joint calling and de novo variant detection\n- **Clinical annotation** - AnnotSV with disease database integration\n- **Phenotype prioritization** - SVANNA-based ranking using HPO terms\n\n### \ud83d\udcca **Analysis Capabilities**\n\n**Core SV Analysis (Always Enabled):**\n\n- \u2705 **Structural Variants** - Multi-caller detection (DEL, INS, DUP, INV, BND)\n- \u2705 **Phasing** - Long-range haplotyping with LongPhase\n- \u2705 **Quality Control** - Comprehensive QC with NanoPlot, mosdepth, MultiQC\n\n**Optional Analyses:**\n\n- \ud83e\uddec **Single Nucleotide Variants** - Clair3 or DeepVariant (enable with `--snv true`)\n- \ud83d\udcc8 **Copy Number Variants** - Spectre or HiFiCNV (enable with `--cnv true`)\n- \ud83d\udd01 **Short Tandem Repeats** - Straglr genotyping (enable with `--str true`)\n- \ud83e\uddea **DNA Methylation** - Modkit extraction for ONT (enable with `--methyl true`)\n\n---\n\n## Requirements\n\n### Software\n\n- **Nextflow** \u226525.04.6 (DSL2)\n- **Container engine:** Docker, Singularity/Apptainer, or Podman\n- **Java** \u226517 (required by Nextflow)\n\n### Recommended Hardware\n\n| Analysis Type         | CPU Cores | Memory   | Storage |\n| --------------------- | --------- | -------- | ------- |\n| **Single WGS sample** | 8-16      | 32-64 GB | 100 GB  |\n\n**Notes:**\n\n- Coverage recommendations: \u226510x for accurate SV calling, \u226530x for high-confidence trio analysis\n- Storage includes space for input data, intermediate files, and results\n- Adjust `--max_cpus` and `--max_memory` parameters based on available resources\n\n---\n\n## Quick Start\n\n### 1. Install Nextflow\n\n```bash\n# Install Nextflow (\u226525.04.6)\ncurl -s https://get.nextflow.io | bash\nsudo mv nextflow /usr/local/bin/\n\n# Verify installation\nnextflow -version\n```\n\n### 2. Test the Pipeline\n\n```bash\n# Run with test data\nnextflow run nf-core/longraredisease \\\n    -profile test,docker \\\n    --outdir test_results\n```\n\n### 3. Run with the Longraredisease Test Data\n\n**Minimal SV-focused run:**\n\n```bash\nnextflow run nf-core/longraredisease \\\n    --input samplesheet.csv \\\n    --outdir results \\\n    --fasta reference.fasta \\\n    --sequencing_platform ont \\\n    -profile docker\n```\n\n**With family analysis and phenotype prioritization:**\n\n```bash\nnextflow run nf-core/longraredisease \\\n    --input samplesheet.csv \\\n    --outdir results \\\n    --fasta reference.fasta \\\n    --sequencing_platform ont \\\n    --trio_analysis true \\\n    --run_svanna true \\\n    --svanna_db /path/to/svanna_db \\\n    -profile docker\n```\n\nSee [docs/usage.md](docs/usage.md) for complete examples and parameter details.\n\n---\n\n## Input Requirements\n\n### Required Inputs\n\n| Parameter               | Description                      | Format                      | Example           |\n| ----------------------- | -------------------------------- | --------------------------- | ----------------- |\n| `--input`               | Samplesheet with sample metadata | CSV                         | `samplesheet.csv` |\n| `--outdir`              | Output directory                 | Path                        | `./results`       |\n| `--fasta`               | Reference genome FASTA           | `.fasta`/`.fa`              | `GRCh38.fasta`    |\n| `--sequencing_platform` | Platform type                    | `ont` or `pacbio` or `hifi` | `ont`             |\n\n### Samplesheet Format\n\nThe input samplesheet is a CSV file with the following columns:\n\n**Minimal format (single samples):**\n\n```csv\nsample,bam,bai\nsample1,/path/to/sample1.bam,/path/to/sample1.bam.bai\nsample2,/path/to/sample2.bam,/path/to/sample2.bam.bai\n```\n\n**Family analysis format (trios):**\n\n```csv\nsample,bam,bai,family,paternal_id,maternal_id,sex,phenotype,hpo_terms\nproband,proband.bam,proband.bam.bai,family1,father,mother,1,affected,\"HP:0001250,HP:0002066\"\nfather,father.bam,father.bam.bai,family1,0,0,1,unaffected,\nmother,mother.bam,mother.bam.bai,family1,0,0,2,unaffected,\n```\n\n**Column descriptions:**\n\n- `sample` - Unique sample identifier\n- `bam` - Path to aligned BAM file\n- `bai` - Path to BAM index file\n- `family` - Family identifier (for trio analysis)\n- `paternal_id` - Father's sample ID (or `0` if not in study)\n- `maternal_id` - Mother's sample ID (or `0` if not in study)\n- `sex` - `1` = male, `2` = female, `0` = unknown\n- `phenotype` - `affected` or `unaffected`\n- `hpo_terms` - Comma-separated HPO terms (e.g., `HP:0001250,HP:0002066`)\n\n### Optional Inputs\n\n| Parameter      | Description             | Required For             |\n| -------------- | ----------------------- | ------------------------ |\n| `--bed`        | Target regions BED file | Targeted sequencing      |\n| `--annotsv_db` | AnnotSV database path   | SV annotation            |\n| `--svanna_db`  | SVANNA database path    | Phenotype prioritization |\n| `--str_bed`    | STR loci BED file       | STR analysis             |\n\n---\n\n## Key Parameters\n\n### Core Analysis Toggles\n\n**Structural variant analysis is always enabled.** Optional analyses:\n\n| Parameter  | Description                                   | Default |\n| ---------- | --------------------------------------------- | ------- |\n| `--snv`    | Enable SNV calling (Clair3/DeepVariant)       | `false` |\n| `--cnv`    | Enable CNV detection (Spectre)                | `false` |\n| `--str`    | Enable STR genotyping (Straglr)               | `false` |\n| `--methyl` | Enable methylation calling (Modkit, ONT only) | `false` |\n\n### SV Detection Parameters\n\n| Parameter            | Description                               | Default |\n| -------------------- | ----------------------------------------- | ------- |\n| `--run_cutesv`       | Enable CuteSV caller                      | `true`  |\n| `--run_svim`         | Enable SVIM caller (recommended for BNDs) | `false` |\n| `--haplotag_bam`     | Haplotag BAM for phase-aware SV calling   | `true`  |\n| `--min_sv_size`      | Minimum SV size to report (bp)            | `30`    |\n| `--min_read_support` | Minimum supporting reads                  | `auto`  |\n\n### Family Analysis Parameters\n\n| Parameter         | Description                            | Default |\n| ----------------- | -------------------------------------- | ------- |\n| `--trio_analysis` | Enable trio/family-based calling       | `false` |\n| `--run_svanna`    | Enable phenotype-driven prioritization | `false` |\n| `--svanna_db`     | Path to SVANNA database                | -       |\n\n### Multi-caller Consensus Parameters\n\n| Parameter               | Description                               | Default |\n| ----------------------- | ----------------------------------------- | ------- |\n| `--jasmine_max_dist`    | Max distance for merging breakpoints (bp) | `1000`  |\n| `--jasmine_min_support` | Min callers supporting merged variant     | `2`     |\n| `--jasmine_spec_reads`  | Min supporting reads for consensus        | `3`     |\n\n### Platform-specific Settings\n\n| Parameter               | Description               | Options                         |\n| ----------------------- | ------------------------- | ------------------------------- |\n| `--sequencing_platform` | Sequencing platform       | `ont`, `pacbio`                 |\n| `--preset`              | Minimap2 alignment preset | `map-ont`, `map-hifi`, `map-pb` |\n| `--snv_caller`          | SNV caller choice         | `clair3`, `deepvariant`         |\n\n---\n\n## Usage Examples\n\n### 1. Standard SV Analysis (Single Sample)\n\n```bash\nnextflow run nf-core/longraredisease \\\n    --input samplesheet.csv \\\n    --outdir results \\\n    --fasta GRCh38.fasta \\\n    --sequencing_platform ont \\\n    -profile docker\n```\n\n### 2. Comprehensive Analysis (SVs + SNVs + CNVs)\n\n```bash\nnextflow run nf-core/longraredisease \\\n    --input samplesheet.csv \\\n    --outdir results \\\n    --fasta GRCh38.fasta \\\n    --sequencing_platform pacbio \\\n    --snv true \\\n    --cnv true \\\n    --str true \\\n    -profile singularity\n```\n\n### 3. Family Trio Analysis with Phenotype Prioritization\n\n```bash\nnextflow run nf-core/longraredisease \\\n    --input trio_samplesheet.csv \\\n    --outdir family_results \\\n    --fasta GRCh38.fasta \\\n    --sequencing_platform ont \\\n    --trio_analysis true \\\n    --run_svanna true \\\n    --svanna_db /databases/svanna_data \\\n    --annotsv_db /databases/AnnotSV \\\n    -profile docker\n```\n\n### 4. High-Sensitivity SV Detection\n\n```bash\nnextflow run nf-core/longraredisease \\\n    --input samplesheet.csv \\\n    --outdir sensitive_results \\\n    --fasta GRCh38.fasta \\\n    --sequencing_platform ont \\\n    --run_svim true \\\n    --min_sv_size 20 \\\n    --min_read_support 2 \\\n    --jasmine_min_support 1 \\\n    -profile docker\n```\n\n### 5. Targeted Sequencing with BED File\n\n```bash\nnextflow run nf-core/longraredisease \\\n    --input samplesheet.csv \\\n    --outdir targeted_results \\\n    --fasta GRCh38.fasta \\\n    --bed targets.bed \\\n    --sequencing_platform ont \\\n    -profile docker\n```\n\n---\n\n## Output Structure\n\n```\nresults/\n\u251c\u2500\u2500 pipeline_info/              # Pipeline execution reports\n\u2502   \u251c\u2500\u2500 execution_report.html   # Resource usage timeline\n\u2502   \u251c\u2500\u2500 execution_timeline.html # Process execution graph\n\u2502   \u2514\u2500\u2500 multiqc_report.html     # Comprehensive QC report\n\u2502\n\u251c\u2500\u2500 qc/                         # Quality control metrics\n\u2502   \u251c\u2500\u2500 mosdepth/               # Coverage statistics per sample\n\u2502   \u251c\u2500\u2500 nanoplot/               # Read quality metrics (ONT)\n\u2502   \u2514\u2500\u2500 cramino/                # CRAM-based QC (optional)\n\u2502\n\u251c\u2500\u2500 structural_variants/        # \ud83c\udfaf PRIMARY OUTPUT: SV calls\n\u2502   \u251c\u2500\u2500 sniffles/               # Per-sample Sniffles VCFs\n\u2502   \u2502   \u2514\u2500\u2500 {sample}.sniffles.vcf.gz\n\u2502   \u251c\u2500\u2500 cutesv/                 # Per-sample CuteSV VCFs\n\u2502   \u2502   \u2514\u2500\u2500 {sample}.cutesv.vcf.gz\n\u2502   \u251c\u2500\u2500 svim/                   # Per-sample SVIM VCFs (if enabled)\n\u2502   \u2502   \u2514\u2500\u2500 {sample}.svim.vcf.gz\n\u2502   \u251c\u2500\u2500 merged/                 # Multi-caller consensus SVs\n\u2502   \u2502   \u251c\u2500\u2500 {sample}.jasmine.vcf.gz\n\u2502   \u2502   \u2514\u2500\u2500 {sample}.survivor.vcf.gz\n\u2502   \u251c\u2500\u2500 annotated/              # AnnotSV annotations\n\u2502   \u2502   \u2514\u2500\u2500 {sample}.annotated.tsv\n\u2502   \u2514\u2500\u2500 svanna/                 # Phenotype-prioritized SVs\n\u2502       \u2514\u2500\u2500 {sample}.svanna.html\n\u2502\n\u251c\u2500\u2500 phasing/                    # Haplotype-resolved results\n\u2502   \u251c\u2500\u2500 haplotagged_bams/       # Phase-tagged alignments\n\u2502   \u2502   \u2514\u2500\u2500 {sample}.haplotagged.bam\n\u2502   \u251c\u2500\u2500 whatshap/               # Phasing statistics\n\u2502   \u2502   \u2514\u2500\u2500 {sample}.phased.vcf.gz\n\u2502   \u2514\u2500\u2500 longphase/              # Alternative phasing\n\u2502       \u2514\u2500\u2500 {sample}.longphase.vcf.gz\n\u2502\n\u251c\u2500\u2500 snv_calls/                  # SNVs (if --snv enabled)\n\u2502   \u251c\u2500\u2500 clair3/\n\u2502   \u2502   \u2514\u2500\u2500 {sample}.clair3.vcf.gz\n\u2502   \u2514\u2500\u2500 deepvariant/\n\u2502       \u2514\u2500\u2500 {sample}.deepvariant.vcf.gz\n\u2502\n\u251c\u2500\u2500 cnv_calls/                  # CNVs (if --cnv enabled)\n\u2502   \u2514\u2500\u2500 spectre/\n\u2502       \u2514\u2500\u2500 {sample}.cnv.vcf.gz\n\u2502\n\u251c\u2500\u2500 str_calls/                  # STRs (if --str enabled)\n\u2502   \u2514\u2500\u2500 straglr/\n\u2502       \u2514\u2500\u2500 {sample}.straglr.tsv\n\u2502\n\u2514\u2500\u2500 methylation/                # Methylation (if --methyl enabled, ONT only)\n    \u2514\u2500\u2500 modkit/\n        \u2514\u2500\u2500 {sample}.bedmethyl.gz\n```\n\n**Key output files:**\n\n- **Merged SVs**: `structural_variants/merged/{sample}.jasmine.vcf.gz` (high-confidence consensus)\n- **Annotated SVs**: `structural_variants/annotated/{sample}.annotated.tsv` (clinical interpretation)\n- **QC Report**: `pipeline_info/multiqc_report.html` (overall quality assessment)\n- **Phenotype-prioritized**: `structural_variants/svanna/{sample}.svanna.html` (ranked by phenotype match)\n\n---\n\n## Configuration Profiles\n\n**Available Profiles:**\n\n- test: Minimal test dataset\n- docker: Use Docker containers\n- singularity: Use Singularity containers\n\n**Custom Configuration**\n\n```bash\n// custom.config\nparams {\n    max_cpus = 16\n    max_memory = '64.GB'\n    outdir = '/scratch/results'\n}\n\nprocess {\n    withName: 'CLAIR3' {\n        cpus = 8\n        memory = '32.GB'\n    }\n}\n```\n\nRun with:\n\n```bash\nnextflow run main.nf -c custom.config -profile docker\n```\n\n---\n\n## Family-Based Analysis\n\n### Trio/Family Configuration\n\nFor family-based SV analysis, provide pedigree information in your samplesheet:\n\n```csv\nsample,bam,bai,family,paternal_id,maternal_id,sex,phenotype,hpo_terms\nchild_001,child.bam,child.bam.bai,FAM001,father_001,mother_001,2,affected,\"HP:0001250,HP:0002066,HP:0001263\"\nfather_001,father.bam,father.bam.bai,FAM001,0,0,1,unaffected,\nmother_001,mother.bam,mother.bam.bai,FAM001,0,0,2,unaffected,\n```\n\n**Sex encoding:** `1` = male, `2` = female, `0` = unknown\n**Parental IDs:** Use `0` for founders (individuals with no parents in the study)\n\n### De Novo SV Detection\n\nEnable trio analysis to identify _de novo_ structural variants:\n\n```bash\nnextflow run nf-core/longraredisease \\\n    --input trio_samplesheet.csv \\\n    --trio_analysis true \\\n    --outdir trio_results \\\n    --fasta GRCh38.fasta \\\n    --sequencing_platform ont \\\n    -profile docker\n```\n\nThe pipeline will:\n\n1. \u2705 Call SVs in each family member independently\n2. \u2705 Merge calls using JASMINE with family-aware parameters\n3. \u2705 Identify variants present in child but absent in parents\n4. \u2705 Filter based on read support and quality metrics\n\n### Phenotype-Driven Prioritization (SVANNA)\n\nWhen HPO terms are provided, SVANNA ranks SVs by phenotype relevance:\n\n```bash\nnextflow run nf-core/longraredisease \\\n    --input trio_samplesheet.csv \\\n    --trio_analysis true \\\n    --run_svanna true \\\n    --svanna_db /path/to/svanna/2302 \\\n    --outdir prioritized_results \\\n    --fasta GRCh38.fasta \\\n    --sequencing_platform ont \\\n    -profile docker\n```\n\n**Required:** Download SVANNA database from [Monarch Initiative](https://github.com/TheJacksonLaboratory/SvAnna)\n\n**Output:** HTML report ranking SVs by:\n\n- Overlap with disease-associated genes\n- Regulatory impact predictions\n- Phenotype similarity scores\n- De novo status (if trio data available)\n\n### Annotation with AnnotSV\n\nEnable comprehensive SV annotation:\n\n```bash\nnextflow run nf-core/longraredisease \\\n    --input samplesheet.csv \\\n    --annotsv_db /path/to/AnnotSV_db \\\n    --outdir annotated_results \\\n    --fasta GRCh38.fasta \\\n    --sequencing_platform ont \\\n    -profile docker\n```\n\n**AnnotSV provides:**\n\n- Gene overlap and functional impact\n- ClinGen/ClinVar annotations\n- DGV/gnomAD population frequencies\n- Pathogenicity predictions (ACMG criteria)\n- Regulatory element disruption\n\n---\n\n## Troubleshooting\n\n### Common Issues\n\n#### 1. Low SV Detection Rate\n\n**Symptoms:** Fewer SVs than expected\n\n**Solutions:**\n\n```bash\n# Lower read support threshold\n--min_read_support 2\n\n# Reduce minimum SV size\n--min_sv_size 20\n\n# Enable SVIM for better breakend detection\n--run_svim true\n\n# Lower consensus requirement\n--jasmine_min_support 1\n```\n\n#### 2. High False Positive Rate\n\n**Symptoms:** Many low-quality SV calls\n\n**Solutions:**\n\n```bash\n# Increase read support\n--min_read_support 5\n\n# Require multiple caller agreement\n--jasmine_min_support 2\n\n# Increase minimum SV size\n--min_sv_size 50\n```\n\n#### 3. Memory Issues\n\n**Symptoms:** Process killed due to OOM\n\n**Solutions:**\n\n```bash\n# Increase max memory\n--max_memory 128.GB\n\n# Reduce parallel processes\n--max_cpus 16\n\n# Use chromosome-based parallelization (automatic)\n```\n\n#### 4. Missing De Novo Variants\n\n**Symptoms:** Expected _de novo_ variants not detected\n\n**Checklist:**\n\n- \u2705 Ensure `--trio_analysis true` is set\n- \u2705 Verify pedigree information in samplesheet\n- \u2705 Check read coverage in all samples (\u226530\u00d7)\n- \u2705 Review `structural_variants/merged/` for family calls\n- \u2705 Lower `--jasmine_min_support` if needed\n\n#### 5. SVANNA Database Issues\n\n**Symptoms:** SVANNA fails or produces no rankings\n\n**Solutions:**\n\n```bash\n# Verify database path and version\nls -lh /path/to/svanna/2302\n\n# Ensure HPO terms are valid (HP:XXXXXXX format)\n# Check samplesheet for proper HPO term formatting\n\n# Download latest SVANNA database:\nwget https://storage.googleapis.com/svanna-db/svanna-data-2302.tar.gz\ntar -xzf svanna-data-2302.tar.gz\n```\n\n### Performance Optimization\n\n**For large cohorts (>10 samples):**\n\n```bash\n# Enable resource-efficient mode\n--max_cpus 64\n--max_memory 256.GB\n\n# Use Singularity for better resource isolation\n-profile singularity\n\n# Enable work directory cleanup\n-resume -with-dag flowchart.html\n```\n\n**For whole genome sequencing:**\n\n- Expect 8-24 hours runtime (depending on coverage)\n- Allocate 64-128GB RAM per sample for SV calling\n- Use SSD storage for work directory (I/O intensive)\n\n---\n\n## Test Data\n\nThe pipeline includes test data for validation:\n\n- Location: assets/test_data/\n- Genome: Chromosome 22 subset\n- Samples: Simulated nanopore data\n- Runtime: ~10-15 minutes\n\n---\n\n## Getting Help\n\n**Debugging Failed Runs:**\n\n```bash\n# Check Nextflow log for detailed errors\nless .nextflow.log\n\n# Resume from last successful step\nnextflow run nf-core/longraredisease -resume\n\n# Enable debug mode for verbose output\nnextflow run nf-core/longraredisease --debug -profile docker\n```\n\n**Reporting Issues:**\n\nWhen reporting issues, please include:\n\n- Nextflow version (`nextflow -version`)\n- Command used to run the pipeline\n- Relevant error messages from `.nextflow.log`\n- Sample metadata (anonymized if sensitive)\n- System specifications (CPU, RAM, storage)\n\n---\n\n## Citation\n\nIf you use **nf-core/longraredisease** in your research, please cite:\n\n> **nf-core/longraredisease: A Nextflow pipeline for long-read sequencing analysis in rare disease research** > _Citation to be added upon publication_\n\nAdditionally, please cite the tools used in your analysis:\n\n**Core SV Tools:**\n\n- **Sniffles2:** Sedlazeck et al. (2018) _Nature Methods_\n- **CuteSV:** Jiang et al. (2020) _Genome Biology_\n- **JASMINE:** Kirsche et al. (2023) _Nature Methods_\n- **LongPhase:** Luo et al. (2023) _Nature Communications_\n- **AnnotSV:** Geoffroy et al. (2018) _Bioinformatics_\n\n**Optional Analysis Tools:**\n\n- **SVANNA:** Danis et al. (2022) _AJHG_\n- **Clair3:** Zheng et al. (2022) _Nature Computational Science_\n- **Spectre:** Suvakov et al. (2021) _Genome Research_\n- **Straglr:** Chin et al. (2023) _Genome Research_\n\n---\n\n## Contributing\n\nContributions are welcome! To contribute:\n\n1. Fork the repository\n2. Create a feature branch (`git checkout -b feature/AmazingFeature`)\n3. Make your changes following [nf-core guidelines](https://nf-co.re/developers/guidelines)\n4. Test with `nextflow run . -profile test,docker`\n5. Commit your changes (`git commit -m 'Add AmazingFeature'`)\n6. Push to the branch (`git push origin feature/AmazingFeature`)\n7. Open a Pull Request\n\n**Please ensure:**\n\n- \u2705 Code follows nf-core style guidelines\n- \u2705 All tests pass successfully\n- \u2705 Documentation is updated accordingly\n- \u2705 Commit messages are descriptive\n\n---\n\n## License\n\nThis project is licensed under the MIT License \u2013 see the [LICENSE](LICENSE) file for details.\n\n---\n\n## Acknowledgments\n\nThis pipeline was developed with support from [institution/funding sources]. We thank the nf-core community for infrastructure and best practices, and all tool developers whose software makes this pipeline possible.\n\n---\n\n**Pipeline Version:** 1.0.0\n**Nextflow Version:** \u226525.04.6\n**Last Updated:** 2024\n",
            "hasPart": [
                {
                    "@id": "main.nf"
                },
                {
                    "@id": "assets/"
                },
                {
                    "@id": "conf/"
                },
                {
                    "@id": "docs/"
                },
                {
                    "@id": "docs/images/"
                },
                {
                    "@id": "modules/"
                },
                {
                    "@id": "modules/nf-core/"
                },
                {
                    "@id": "workflows/"
                },
                {
                    "@id": "subworkflows/"
                },
                {
                    "@id": "nextflow.config"
                },
                {
                    "@id": "README.md"
                },
                {
                    "@id": "nextflow_schema.json"
                },
                {
                    "@id": "CHANGELOG.md"
                },
                {
                    "@id": "LICENSE"
                },
                {
                    "@id": "CODE_OF_CONDUCT.md"
                },
                {
                    "@id": "CITATIONS.md"
                },
                {
                    "@id": "modules.json"
                },
                {
                    "@id": "docs/usage.md"
                },
                {
                    "@id": "docs/output.md"
                },
                {
                    "@id": ".nf-core.yml"
                },
                {
                    "@id": ".pre-commit-config.yaml"
                },
                {
                    "@id": ".prettierignore"
                }
            ],
            "isBasedOn": "https://github.com/nf-core/longraredisease",
            "license": "MIT",
            "mainEntity": {
                "@id": "main.nf"
            },
            "mentions": [
                {
                    "@id": "#2764164c-30d2-480b-9bc8-5e69e4929eb9"
                }
            ],
            "name": "nf-core/longraredisease"
        },
        {
            "@id": "ro-crate-metadata.json",
            "@type": "CreativeWork",
            "about": {
                "@id": "./"
            },
            "conformsTo": [
                {
                    "@id": "https://w3id.org/ro/crate/1.1"
                },
                {
                    "@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0"
                }
            ]
        },
        {
            "@id": "main.nf",
            "@type": [
                "File",
                "SoftwareSourceCode",
                "ComputationalWorkflow"
            ],
            "dateCreated": "",
            "dateModified": "2025-11-20T09:31:39Z",
            "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/",
            "keywords": [
                "nf-core",
                "nextflow"
            ],
            "license": [
                "MIT"
            ],
            "name": [
                "nf-core/longraredisease"
            ],
            "programmingLanguage": {
                "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow"
            },
            "sdPublisher": {
                "@id": "https://nf-co.re/"
            },
            "url": [
                "https://github.com/nf-core/longraredisease",
                "https://nf-co.re/longraredisease/dev/"
            ],
            "version": [
                "1.0.0"
            ]
        },
        {
            "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow",
            "@type": "ComputerLanguage",
            "identifier": {
                "@id": "https://www.nextflow.io/"
            },
            "name": "Nextflow",
            "url": {
                "@id": "https://www.nextflow.io/"
            },
            "version": "!>=25.04.6"
        },
        {
            "@id": "#2764164c-30d2-480b-9bc8-5e69e4929eb9",
            "@type": "TestSuite",
            "instance": [
                {
                    "@id": "#297e50ac-a8ec-4a9b-a7de-ed270e7b0d0f"
                }
            ],
            "mainEntity": {
                "@id": "main.nf"
            },
            "name": "Test suite for nf-core/longraredisease"
        },
        {
            "@id": "#297e50ac-a8ec-4a9b-a7de-ed270e7b0d0f",
            "@type": "TestInstance",
            "name": "GitHub Actions workflow for testing nf-core/longraredisease",
            "resource": "repos/nf-core/longraredisease/actions/workflows/nf-test.yml",
            "runsOn": {
                "@id": "https://w3id.org/ro/terms/test#GithubService"
            },
            "url": "https://api.github.com"
        },
        {
            "@id": "https://w3id.org/ro/terms/test#GithubService",
            "@type": "TestService",
            "name": "Github Actions",
            "url": {
                "@id": "https://github.com"
            }
        },
        {
            "@id": "assets/",
            "@type": "Dataset",
            "description": "Additional files"
        },
        {
            "@id": "conf/",
            "@type": "Dataset",
            "description": "Configuration files"
        },
        {
            "@id": "docs/",
            "@type": "Dataset",
            "description": "Markdown files for documenting the pipeline"
        },
        {
            "@id": "docs/images/",
            "@type": "Dataset",
            "description": "Images for the documentation files"
        },
        {
            "@id": "modules/",
            "@type": "Dataset",
            "description": "Modules used by the pipeline"
        },
        {
            "@id": "modules/nf-core/",
            "@type": "Dataset",
            "description": "nf-core modules"
        },
        {
            "@id": "workflows/",
            "@type": "Dataset",
            "description": "Main pipeline workflows to be executed in main.nf"
        },
        {
            "@id": "subworkflows/",
            "@type": "Dataset",
            "description": "Smaller subworkflows"
        },
        {
            "@id": "nextflow.config",
            "@type": "File",
            "description": "Main Nextflow configuration file"
        },
        {
            "@id": "README.md",
            "@type": "File",
            "description": "Basic pipeline usage information"
        },
        {
            "@id": "nextflow_schema.json",
            "@type": "File",
            "description": "JSON schema for pipeline parameter specification"
        },
        {
            "@id": "CHANGELOG.md",
            "@type": "File",
            "description": "Information on changes made to the pipeline"
        },
        {
            "@id": "LICENSE",
            "@type": "File",
            "description": "The license - should be MIT"
        },
        {
            "@id": "CODE_OF_CONDUCT.md",
            "@type": "File",
            "description": "The nf-core code of conduct"
        },
        {
            "@id": "CITATIONS.md",
            "@type": "File",
            "description": "Citations needed when using the pipeline"
        },
        {
            "@id": "modules.json",
            "@type": "File",
            "description": "Version information for modules from nf-core/modules"
        },
        {
            "@id": "docs/usage.md",
            "@type": "File",
            "description": "Usage documentation"
        },
        {
            "@id": "docs/output.md",
            "@type": "File",
            "description": "Output documentation"
        },
        {
            "@id": ".nf-core.yml",
            "@type": "File",
            "description": "nf-core configuration file, configuring template features and linting rules"
        },
        {
            "@id": ".pre-commit-config.yaml",
            "@type": "File",
            "description": "Configuration file for pre-commit hooks"
        },
        {
            "@id": ".prettierignore",
            "@type": "File",
            "description": "Ignore file for prettier"
        },
        {
            "@id": "https://nf-co.re/",
            "@type": "Organization",
            "name": "nf-core",
            "url": "https://nf-co.re/"
        }
    ]
}