-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathHTAN_Config.py
More file actions
80 lines (70 loc) · 4.4 KB
/
HTAN_Config.py
File metadata and controls
80 lines (70 loc) · 4.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
from pathlib import Path
import snakemake.io
from collections import defaultdict
import itertools
class ParsedConfig:
def __init__(self, config):
self.ROOT = Path(config["ROOT"]).resolve()
self.raw_data_folder = Path(config["raw_data_folder"]).resolve()
self.analysis_folder = Path(config["analysis_folder"]).resolve()
self.analysis_folder_HTANunrelated = Path(config["analysis_folder_HTANunrelated"]).resolve()
self.COHORTS = config["COHORTS"]
self.motif_hoccomoco = config["motif_hoccomoco"]
# Add samples names to each cohort
for cohort in self.COHORTS.keys():
samples_IDs = []
with open(config["COHORTS"][cohort]["samples_IDs_file"]) as f:
samples_IDs.extend(f.read().strip().split('\n'))
self.COHORTS[cohort]["samples_IDs"] = samples_IDs
def generate_targets(self, config):
targets = []
for cohort in self.COHORTS.keys():
for sample in self.COHORTS[cohort]["samples_IDs"]:
targets.append(f"{self.raw_data_folder}/{cohort}/rds_files_rna/{sample}.rds")
targets.append(f"{self.raw_data_folder}/{cohort}/rds_files_atac/{sample}.rds")
targets.append(f"{self.raw_data_folder}/{cohort}/fragments_files/{sample}-atac_fragments.tsv.gz")
targets.append(f"{self.analysis_folder}/{cohort}/rds_files_multiome/{sample}_seurat_multi.rds")
targets.append(f"{self.analysis_folder}/{cohort}/metadata/{sample}_metadata.rds")
#targets.append(f"{self.analysis_folder}/{cohort}/scATOMIC/{sample}_annotation.rds")
targets.append(f"{self.analysis_folder}/{cohort}/MC_res/{sample}_MC_completed.txt")
targets.append(f"{self.analysis_folder}/{cohort}/MC_res_immune_stromal/{sample}_MC_completed.txt")
targets.append(f"{self.analysis_folder}/{cohort}/MC_res_immune_stromal_unsup/{sample}_MC_completed.txt")
targets.append(f"{self.analysis_folder}/{cohort}/MC_unsup_res/{sample}_MC_completed.txt")
# targets.append(f"{self.analysis_folder}/{cohort}/integrated_MC/{cohort}_mc_integrated2.rds") # add graining level info in file name
# targets.append(f"{self.analysis_folder}/{cohort}/MC_res/MC-g10_{sample}-atac_fragments.tsv.gz")
# targets.append(f"{self.analysis_folder}/{cohort}/integrated_data/all_cells.rds")
# targets.append(f"{self.analysis_folder}/{cohort}/integrated_data/immune_cells.rds")
# targets.append(f"{self.analysis_folder}/{cohort}/integrated_data/integrated_rna.rds")
# targets.append(f"{self.analysis_folder}/{cohort}/CNV/{sample}/epiAneufinder_1000000/{sample}_EpiAneufinder_clones.rds")
# targets.append(f"{self.analysis_folder}/{cohort}/CNV/{sample}/CopyscAT_1000000/{sample}_CopyscAT.rds")
return targets
def get_multiome_files(self, config, cohort):
targets = []
for sample in self.COHORTS[cohort]["samples_IDs"]:
targets.append(f"{self.analysis_folder}/{cohort}/rds_files_multiome/{sample}_seurat_multi.rds")
return targets
def get_scATOMIC_files(self, config, cohort):
targets = []
for sample in self.COHORTS[cohort]["samples_IDs"]:
targets.append(f"{self.analysis_folder}/{cohort}/scATOMIC/{sample}_annotation.rds")
return targets
def get_MC_files(self, config, cohort):
targets = []
for sample in self.COHORTS[cohort]["samples_IDs"]:
targets.append(f"{self.analysis_folder}/{cohort}/MC_res/{sample}_MC-g10_obj.rds")
return targets
def get_MC_mode_files(self, config, cohort,mc_mode):
targets = []
for sample in self.COHORTS[cohort]["samples_IDs"]:
targets.append(f"{self.analysis_folder}/{cohort}/MC_res_immune_stromal_{mc_mode}/{sample}_MC_completed.txt")
return targets
def get_preprocessed_files(self, config, cohort):
targets = []
for sample in self.COHORTS[cohort]["samples_IDs"]:
targets.append(f"{self.analysis_folder}/{cohort}/{sample}_preprocessing_completed.txt")
return targets
def get_sc_files_with_consensus_peaks(self, config, cohort):
targets = []
for sample in self.COHORTS[cohort]["samples_IDs"]:
targets.append(f"{self.analysis_folder}/{cohort}/{sample}_count_peaks_completed.txt")
return targets