Skip to content

Commit ae4515b

Browse files
committed
Add support for tcr libraries
1 parent 481c5a6 commit ae4515b

1 file changed

Lines changed: 144 additions & 39 deletions

File tree

rules/singlecell_parsebio_import.smk

Lines changed: 144 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,36 @@
11
import re
22

3+
def get_tcr_parent_dir():
4+
import os
5+
import csv
6+
7+
library_csv = getattr(config, "library", "libraries.csv")
8+
if not os.path.exists(library_csv):
9+
raise FileNotFoundError(f"Library CSV not found: {library_csv}")
10+
11+
mapping = {}
12+
with open(library_csv, newline='') as fh:
13+
reader = csv.DictReader(fh)
14+
fieldnames = reader.fieldnames or []
15+
# normalize header names for case-insensitive check
16+
normalized = [fn.lower().strip() for fn in fieldnames]
17+
if "tcr" not in normalized or "parent" not in normalized:
18+
raise ValueError("library CSV must contain at least two columns: 'tcr' and 'parent'")
19+
20+
for row in reader:
21+
# case-insensitive access to columns
22+
row_ci = {k.lower().strip(): (v or "").strip() for k, v in row.items()}
23+
tcr_key = row_ci.get("tcr", "")
24+
parent_val = row_ci.get("parent", "")
25+
if not tcr_key:
26+
# skip empty tcr entries
27+
continue
28+
if tcr_key in mapping:
29+
raise ValueError(f"Duplicate tcr entry in {library_csv}: '{tcr_key}'")
30+
mapping[tcr_key] = parent_val
31+
32+
return mapping
33+
334
current_cellranger = program.parsebio_split_pipe
435
if "split-pipe" in current_cellranger or 'spipe' in current_cellranger:
536
split_version = re.search(r"spipe_v(\d+\.\d+\.\d+)", current_cellranger)
@@ -29,60 +60,134 @@ parsebio_sample_list = getattr(config, "parsebio_sample_list", "")
2960
if parsebio_sample_list == "":
3061
raise ValueError("parsebio_sample_list is a required config parameter for parsebio")
3162

32-
rule split_pipe_all:
33-
input:
34-
r1 = rules.make_fastq_concat.output.r1,
35-
r2 = rules.make_fastq_concat.output.r2,
36-
output:
37-
summary_html = "{sample}/all-sample_analysis_summary.html"
38-
log:
39-
err = "run_{sample}_parsebio_split_pipe_all.err",
40-
log ="run_{sample}_parsebio_split_pipe_all.log",
41-
params:
42-
prefix = "{sample}",
43-
chemistry = config.chemistry,
44-
kit = config.kit,
45-
shell:
46-
"""
63+
# Check if TCR analysis is enabled
64+
tcr_enabled = getattr(config, "tcr", False)
65+
66+
immune_genome = "none"
67+
# If the reference indicates hg38 or hg39, force human immune genome
68+
ref_name = getattr(config, "ref", "").lower()
69+
print(ref_name)
70+
if "hg38" in ref_name or "hg39" in ref_name:
71+
immune_genome = "human"
72+
elif "mm10" in ref_name or "mm39" in ref_name:
73+
immune_genome = "mouse"
74+
75+
# Validate supported immune genomes when TCR analysis is enabled
76+
if tcr_enabled:
77+
if immune_genome not in ("human", "mouse"):
78+
raise ValueError(f"Immune genome for TCR analysis must be 'human' or 'mouse'. Detected: '{immune_genome}'. Check config.parent_dir ('{ref_name}') to determine genome.")
79+
80+
dict_tcr_parent = get_tcr_parent_dir()
81+
82+
rule split_pipe_all:
83+
input:
84+
r1 = rules.make_fastq_concat.output.r1,
85+
r2 = rules.make_fastq_concat.output.r2,
86+
output:
87+
summary_html = "{sample}/all-sample_analysis_summary.html"
88+
log:
89+
err = "run_{sample}_parsebio_split_pipe_all.err",
90+
log ="run_{sample}_parsebio_split_pipe_all.log",
91+
params:
92+
prefix = "{sample}",
93+
chemistry = config.chemistry,
94+
immune_genome = immune_genome,
95+
parent_dir = lambda wildcards: dict_tcr_parent[wildcards.sample],
96+
shell:
97+
"""
4798
rm -r {params.prefix};
4899
{program.parsebio_split_pipe} \
49100
--mode all \
50101
--chemistry {params.chemistry} \
51-
--kit {params.kit} \
52-
--genome_dir {reference.transcriptome_parsebio} \
102+
--tcr_analysis \
103+
--immune_genome {params.immune_genome} \
104+
--parent_dir {params.parent_dir} \
105+
--output_dir {params.prefix} \
53106
--fq1 {input.r1} \
54107
--fq2 {input.r2} \
55-
--output_dir {params.prefix} \
56-
--samp_list {parsebio_sample_list} \
57108
2>{log.err} 1>{log.log}
58109
"""
110+
rule split_pipe_comb:
111+
input:
112+
expand(rules.split_pipe_all.output, sample=samples)
113+
params:
114+
outdir = "split_pipe_comb",
115+
immune_genome = immune_genome,
116+
parent_dir = lambda wildcards: dict_tcr_parent[wildcards.sample],
117+
sublibs = expand(os.path.join(analysis, "{sample}"), sample=samples)
118+
output:
119+
summary_html = "split_pipe_comb/all-sample_analysis_summary.html"
120+
log:
121+
err = "run_parsebio_split_pipe_comb.err",
122+
log ="run_parsebio_split_pipe_comb.log",
59123

60-
rule split_pipe_comb:
61-
input:
62-
expand(rules.split_pipe_all.output, sample=samples)
63-
params:
64-
outdir = "split_pipe_comb",
65-
chemistry = config.chemistry,
66-
kit = config.kit,
67-
sublibs = expand(os.path.join(analysis, "{sample}"), sample=samples)
68-
output:
69-
summary_html = "split_pipe_comb/all-sample_analysis_summary.html"
70-
log:
71-
err = "run_parsebio_split_pipe_comb.err",
72-
log ="run_parsebio_split_pipe_comb.log",
124+
shell:
125+
"""
126+
{program.parsebio_split_pipe} \
127+
--mode comb \
128+
--immune_genome {params.immune_genome} \
129+
--parent_dir {params.parent_dir} \
130+
--genome_dir {reference.transcriptome_parsebio} \
131+
--sublibraries {params.sublibs} \
132+
--output_dir {params.outdir} \
133+
2>{log.err} 1>{log.log}
134+
"""
73135

74-
shell:
75-
"""
136+
else:
137+
rule split_pipe_all:
138+
input:
139+
r1 = rules.make_fastq_concat.output.r1,
140+
r2 = rules.make_fastq_concat.output.r2,
141+
output:
142+
summary_html = "{sample}/all-sample_analysis_summary.html"
143+
log:
144+
err = "run_{sample}_parsebio_split_pipe_all.err",
145+
log ="run_{sample}_parsebio_split_pipe_all.log",
146+
params:
147+
prefix = "{sample}",
148+
chemistry = config.chemistry,
149+
kit = config.kit,
150+
shell:
151+
"""
152+
rm -r {params.prefix};
76153
{program.parsebio_split_pipe} \
77-
--mode comb \
78-
--chemistry {config.chemistry} \
79-
--kit {config.kit} \
154+
--mode all \
155+
--chemistry {params.chemistry} \
156+
--kit {params.kit} \
80157
--genome_dir {reference.transcriptome_parsebio} \
81-
--sublibraries {params.sublibs} \
82-
--output_dir {params.outdir} \
158+
--fq1 {input.r1} \
159+
--fq2 {input.r2} \
160+
--output_dir {params.prefix} \
161+
--samp_list {parsebio_sample_list} \
83162
2>{log.err} 1>{log.log}
84163
"""
85164

165+
rule split_pipe_comb:
166+
input:
167+
expand(rules.split_pipe_all.output, sample=samples)
168+
params:
169+
outdir = "split_pipe_comb",
170+
chemistry = config.chemistry,
171+
kit = config.kit,
172+
sublibs = expand(os.path.join(analysis, "{sample}"), sample=samples)
173+
output:
174+
summary_html = "split_pipe_comb/all-sample_analysis_summary.html"
175+
log:
176+
err = "run_parsebio_split_pipe_comb.err",
177+
log ="run_parsebio_split_pipe_comb.log",
178+
179+
shell:
180+
"""
181+
{program.parsebio_split_pipe} \
182+
--mode comb \
183+
--chemistry {config.chemistry} \
184+
--kit {config.kit} \
185+
--genome_dir {reference.transcriptome_parsebio} \
186+
--sublibraries {params.sublibs} \
187+
--output_dir {params.outdir} \
188+
2>{log.err} 1>{log.log}
189+
"""
190+
86191
rule summaryFiles:
87192
input:
88193
rules.split_pipe_comb.output,

0 commit comments

Comments
 (0)