SuperCellMultiomicsHTAN/HTAN_Config.py at submission · GfellerLab/SuperCellMultiomicsHTAN · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
from pathlib import Path
import snakemake.io
from collections import defaultdict
import itertools


class ParsedConfig:

    def __init__(self, config):
        self.ROOT = Path(config["ROOT"]).resolve()
        self.raw_data_folder = Path(config["raw_data_folder"]).resolve()
        self.analysis_folder = Path(config["analysis_folder"]).resolve()
        self.analysis_folder_HTANunrelated = Path(config["analysis_folder_HTANunrelated"]).resolve()
        self.COHORTS = config["COHORTS"]
        self.motif_hoccomoco = config["motif_hoccomoco"]

        # Add samples names to each cohort
        for cohort in self.COHORTS.keys():
            samples_IDs = []
            with open(config["COHORTS"][cohort]["samples_IDs_file"]) as f:
                samples_IDs.extend(f.read().strip().split('\n'))
            self.COHORTS[cohort]["samples_IDs"] = samples_IDs

    def generate_targets(self, config):
        targets = []
        for cohort in self.COHORTS.keys():
            for sample in self.COHORTS[cohort]["samples_IDs"]:
                targets.append(f"{self.raw_data_folder}/{cohort}/rds_files_rna/{sample}.rds")
                targets.append(f"{self.raw_data_folder}/{cohort}/rds_files_atac/{sample}.rds")
                targets.append(f"{self.raw_data_folder}/{cohort}/fragments_files/{sample}-atac_fragments.tsv.gz")
                targets.append(f"{self.analysis_folder}/{cohort}/rds_files_multiome/{sample}_seurat_multi.rds")
                targets.append(f"{self.analysis_folder}/{cohort}/metadata/{sample}_metadata.rds")
                #targets.append(f"{self.analysis_folder}/{cohort}/scATOMIC/{sample}_annotation.rds")
                targets.append(f"{self.analysis_folder}/{cohort}/MC_res/{sample}_MC_completed.txt")
                targets.append(f"{self.analysis_folder}/{cohort}/MC_res_immune_stromal/{sample}_MC_completed.txt")
                targets.append(f"{self.analysis_folder}/{cohort}/MC_res_immune_stromal_unsup/{sample}_MC_completed.txt")
                targets.append(f"{self.analysis_folder}/{cohort}/MC_unsup_res/{sample}_MC_completed.txt")
                # targets.append(f"{self.analysis_folder}/{cohort}/integrated_MC/{cohort}_mc_integrated2.rds")  # add graining level info in file name
                # targets.append(f"{self.analysis_folder}/{cohort}/MC_res/MC-g10_{sample}-atac_fragments.tsv.gz")
                # targets.append(f"{self.analysis_folder}/{cohort}/integrated_data/all_cells.rds")
                # targets.append(f"{self.analysis_folder}/{cohort}/integrated_data/immune_cells.rds")
                # targets.append(f"{self.analysis_folder}/{cohort}/integrated_data/integrated_rna.rds")

                # targets.append(f"{self.analysis_folder}/{cohort}/CNV/{sample}/epiAneufinder_1000000/{sample}_EpiAneufinder_clones.rds")
                # targets.append(f"{self.analysis_folder}/{cohort}/CNV/{sample}/CopyscAT_1000000/{sample}_CopyscAT.rds")
        return targets

    def get_multiome_files(self, config, cohort):
        targets = []
        for sample in self.COHORTS[cohort]["samples_IDs"]:
            targets.append(f"{self.analysis_folder}/{cohort}/rds_files_multiome/{sample}_seurat_multi.rds")
        return targets
    def get_scATOMIC_files(self, config, cohort):
        targets = []
        for sample in self.COHORTS[cohort]["samples_IDs"]:
            targets.append(f"{self.analysis_folder}/{cohort}/scATOMIC/{sample}_annotation.rds")
        return targets
    def get_MC_files(self, config, cohort):
        targets = []
        for sample in self.COHORTS[cohort]["samples_IDs"]:
            targets.append(f"{self.analysis_folder}/{cohort}/MC_res/{sample}_MC-g10_obj.rds")
        return targets

    def get_MC_mode_files(self, config, cohort,mc_mode):
        targets = []
        for sample in self.COHORTS[cohort]["samples_IDs"]:
            targets.append(f"{self.analysis_folder}/{cohort}/MC_res_immune_stromal_{mc_mode}/{sample}_MC_completed.txt")
        return targets

    def get_preprocessed_files(self, config, cohort):
        targets = []
        for sample in self.COHORTS[cohort]["samples_IDs"]:
            targets.append(f"{self.analysis_folder}/{cohort}/{sample}_preprocessing_completed.txt")
        return targets

    def get_sc_files_with_consensus_peaks(self, config, cohort):
        targets = []
        for sample in self.COHORTS[cohort]["samples_IDs"]:
            targets.append(f"{self.analysis_folder}/{cohort}/{sample}_count_peaks_completed.txt")
        return targets