|
1 | 1 | import re |
2 | 2 |
|
| 3 | +def get_tcr_parent_dir(): |
| 4 | + import os |
| 5 | + import csv |
| 6 | + |
| 7 | + library_csv = getattr(config, "library", "libraries.csv") |
| 8 | + if not os.path.exists(library_csv): |
| 9 | + raise FileNotFoundError(f"Library CSV not found: {library_csv}") |
| 10 | + |
| 11 | + mapping = {} |
| 12 | + with open(library_csv, newline='') as fh: |
| 13 | + reader = csv.DictReader(fh) |
| 14 | + fieldnames = reader.fieldnames or [] |
| 15 | + # normalize header names for case-insensitive check |
| 16 | + normalized = [fn.lower().strip() for fn in fieldnames] |
| 17 | + if "tcr" not in normalized or "parent" not in normalized: |
| 18 | + raise ValueError("library CSV must contain at least two columns: 'tcr' and 'parent'") |
| 19 | + |
| 20 | + for row in reader: |
| 21 | + # case-insensitive access to columns |
| 22 | + row_ci = {k.lower().strip(): (v or "").strip() for k, v in row.items()} |
| 23 | + tcr_key = row_ci.get("tcr", "") |
| 24 | + parent_val = row_ci.get("parent", "") |
| 25 | + if not tcr_key: |
| 26 | + # skip empty tcr entries |
| 27 | + continue |
| 28 | + if tcr_key in mapping: |
| 29 | + raise ValueError(f"Duplicate tcr entry in {library_csv}: '{tcr_key}'") |
| 30 | + mapping[tcr_key] = parent_val |
| 31 | + |
| 32 | + return mapping |
| 33 | + |
3 | 34 | current_cellranger = program.parsebio_split_pipe |
4 | 35 | if "split-pipe" in current_cellranger or 'spipe' in current_cellranger: |
5 | 36 | split_version = re.search(r"spipe_v(\d+\.\d+\.\d+)", current_cellranger) |
@@ -29,60 +60,134 @@ parsebio_sample_list = getattr(config, "parsebio_sample_list", "") |
29 | 60 | if parsebio_sample_list == "": |
30 | 61 | raise ValueError("parsebio_sample_list is a required config parameter for parsebio") |
31 | 62 |
|
32 | | -rule split_pipe_all: |
33 | | - input: |
34 | | - r1 = rules.make_fastq_concat.output.r1, |
35 | | - r2 = rules.make_fastq_concat.output.r2, |
36 | | - output: |
37 | | - summary_html = "{sample}/all-sample_analysis_summary.html" |
38 | | - log: |
39 | | - err = "run_{sample}_parsebio_split_pipe_all.err", |
40 | | - log ="run_{sample}_parsebio_split_pipe_all.log", |
41 | | - params: |
42 | | - prefix = "{sample}", |
43 | | - chemistry = config.chemistry, |
44 | | - kit = config.kit, |
45 | | - shell: |
46 | | - """ |
| 63 | +# Check if TCR analysis is enabled |
| 64 | +tcr_enabled = getattr(config, "tcr", False) |
| 65 | + |
| 66 | +immune_genome = "none" |
| 67 | +# If the reference indicates hg38 or hg39, force human immune genome |
| 68 | +ref_name = getattr(config, "ref", "").lower() |
| 69 | +print(ref_name) |
| 70 | +if "hg38" in ref_name or "hg39" in ref_name: |
| 71 | + immune_genome = "human" |
| 72 | +elif "mm10" in ref_name or "mm39" in ref_name: |
| 73 | + immune_genome = "mouse" |
| 74 | + |
| 75 | +# Validate supported immune genomes when TCR analysis is enabled |
| 76 | +if tcr_enabled: |
| 77 | + if immune_genome not in ("human", "mouse"): |
| 78 | + raise ValueError(f"Immune genome for TCR analysis must be 'human' or 'mouse'. Detected: '{immune_genome}'. Check config.parent_dir ('{ref_name}') to determine genome.") |
| 79 | + |
| 80 | + dict_tcr_parent = get_tcr_parent_dir() |
| 81 | + |
| 82 | + rule split_pipe_all: |
| 83 | + input: |
| 84 | + r1 = rules.make_fastq_concat.output.r1, |
| 85 | + r2 = rules.make_fastq_concat.output.r2, |
| 86 | + output: |
| 87 | + summary_html = "{sample}/all-sample_analysis_summary.html" |
| 88 | + log: |
| 89 | + err = "run_{sample}_parsebio_split_pipe_all.err", |
| 90 | + log ="run_{sample}_parsebio_split_pipe_all.log", |
| 91 | + params: |
| 92 | + prefix = "{sample}", |
| 93 | + chemistry = config.chemistry, |
| 94 | + immune_genome = immune_genome, |
| 95 | + parent_dir = lambda wildcards: dict_tcr_parent[wildcards.sample], |
| 96 | + shell: |
| 97 | + """ |
47 | 98 | rm -r {params.prefix}; |
48 | 99 | {program.parsebio_split_pipe} \ |
49 | 100 | --mode all \ |
50 | 101 | --chemistry {params.chemistry} \ |
51 | | - --kit {params.kit} \ |
52 | | - --genome_dir {reference.transcriptome_parsebio} \ |
| 102 | + --tcr_analysis \ |
| 103 | + --immune_genome {params.immune_genome} \ |
| 104 | + --parent_dir {params.parent_dir} \ |
| 105 | + --output_dir {params.prefix} \ |
53 | 106 | --fq1 {input.r1} \ |
54 | 107 | --fq2 {input.r2} \ |
55 | | - --output_dir {params.prefix} \ |
56 | | - --samp_list {parsebio_sample_list} \ |
57 | 108 | 2>{log.err} 1>{log.log} |
58 | 109 | """ |
| 110 | + rule split_pipe_comb: |
| 111 | + input: |
| 112 | + expand(rules.split_pipe_all.output, sample=samples) |
| 113 | + params: |
| 114 | + outdir = "split_pipe_comb", |
| 115 | + immune_genome = immune_genome, |
| 116 | + parent_dir = lambda wildcards: dict_tcr_parent[wildcards.sample], |
| 117 | + sublibs = expand(os.path.join(analysis, "{sample}"), sample=samples) |
| 118 | + output: |
| 119 | + summary_html = "split_pipe_comb/all-sample_analysis_summary.html" |
| 120 | + log: |
| 121 | + err = "run_parsebio_split_pipe_comb.err", |
| 122 | + log ="run_parsebio_split_pipe_comb.log", |
59 | 123 |
|
60 | | -rule split_pipe_comb: |
61 | | - input: |
62 | | - expand(rules.split_pipe_all.output, sample=samples) |
63 | | - params: |
64 | | - outdir = "split_pipe_comb", |
65 | | - chemistry = config.chemistry, |
66 | | - kit = config.kit, |
67 | | - sublibs = expand(os.path.join(analysis, "{sample}"), sample=samples) |
68 | | - output: |
69 | | - summary_html = "split_pipe_comb/all-sample_analysis_summary.html" |
70 | | - log: |
71 | | - err = "run_parsebio_split_pipe_comb.err", |
72 | | - log ="run_parsebio_split_pipe_comb.log", |
| 124 | + shell: |
| 125 | + """ |
| 126 | + {program.parsebio_split_pipe} \ |
| 127 | + --mode comb \ |
| 128 | + --immune_genome {params.immune_genome} \ |
| 129 | + --parent_dir {params.parent_dir} \ |
| 130 | + --genome_dir {reference.transcriptome_parsebio} \ |
| 131 | + --sublibraries {params.sublibs} \ |
| 132 | + --output_dir {params.outdir} \ |
| 133 | + 2>{log.err} 1>{log.log} |
| 134 | + """ |
73 | 135 |
|
74 | | - shell: |
75 | | - """ |
| 136 | +else: |
| 137 | + rule split_pipe_all: |
| 138 | + input: |
| 139 | + r1 = rules.make_fastq_concat.output.r1, |
| 140 | + r2 = rules.make_fastq_concat.output.r2, |
| 141 | + output: |
| 142 | + summary_html = "{sample}/all-sample_analysis_summary.html" |
| 143 | + log: |
| 144 | + err = "run_{sample}_parsebio_split_pipe_all.err", |
| 145 | + log ="run_{sample}_parsebio_split_pipe_all.log", |
| 146 | + params: |
| 147 | + prefix = "{sample}", |
| 148 | + chemistry = config.chemistry, |
| 149 | + kit = config.kit, |
| 150 | + shell: |
| 151 | + """ |
| 152 | +rm -r {params.prefix}; |
76 | 153 | {program.parsebio_split_pipe} \ |
77 | | - --mode comb \ |
78 | | - --chemistry {config.chemistry} \ |
79 | | - --kit {config.kit} \ |
| 154 | + --mode all \ |
| 155 | + --chemistry {params.chemistry} \ |
| 156 | + --kit {params.kit} \ |
80 | 157 | --genome_dir {reference.transcriptome_parsebio} \ |
81 | | - --sublibraries {params.sublibs} \ |
82 | | - --output_dir {params.outdir} \ |
| 158 | + --fq1 {input.r1} \ |
| 159 | + --fq2 {input.r2} \ |
| 160 | + --output_dir {params.prefix} \ |
| 161 | + --samp_list {parsebio_sample_list} \ |
83 | 162 | 2>{log.err} 1>{log.log} |
84 | 163 | """ |
85 | 164 |
|
| 165 | + rule split_pipe_comb: |
| 166 | + input: |
| 167 | + expand(rules.split_pipe_all.output, sample=samples) |
| 168 | + params: |
| 169 | + outdir = "split_pipe_comb", |
| 170 | + chemistry = config.chemistry, |
| 171 | + kit = config.kit, |
| 172 | + sublibs = expand(os.path.join(analysis, "{sample}"), sample=samples) |
| 173 | + output: |
| 174 | + summary_html = "split_pipe_comb/all-sample_analysis_summary.html" |
| 175 | + log: |
| 176 | + err = "run_parsebio_split_pipe_comb.err", |
| 177 | + log ="run_parsebio_split_pipe_comb.log", |
| 178 | + |
| 179 | + shell: |
| 180 | + """ |
| 181 | + {program.parsebio_split_pipe} \ |
| 182 | + --mode comb \ |
| 183 | + --chemistry {config.chemistry} \ |
| 184 | + --kit {config.kit} \ |
| 185 | + --genome_dir {reference.transcriptome_parsebio} \ |
| 186 | + --sublibraries {params.sublibs} \ |
| 187 | + --output_dir {params.outdir} \ |
| 188 | + 2>{log.err} 1>{log.log} |
| 189 | + """ |
| 190 | + |
86 | 191 | rule summaryFiles: |
87 | 192 | input: |
88 | 193 | rules.split_pipe_comb.output, |
|
0 commit comments