GAND/nextflow.config at main · patrickCNMartin/GAND · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
//nextflow.config
nextflow.enable.dsl = 2


//=============================================================================
// PROFILES - Single Apptainer profile at the moment
// Conda was becoming a problem due to config drift
//=============================================================================
profiles {
   container_hpc {
        apptainer.enabled    = true
        apptainer.autoMounts = true
        apptainer.cacheDir   = "${baseDir}/container_cache/"

        process {
            executor = 'slurm'
            queue    = 'defq'
            container = "${baseDir}/containers/gand_image.sif"
            // Resource defaults
            cpus     = 1
            memory   = '64 GB'
            time     = '48h'
        }
    }
}


//=============================================================================
// PARAMS
//=============================================================================
params {

    // Which modules to run
    run_download    = false
    run_integration = true
    run_modelling   = true
    run_report      = true

    // Downloading workflow params
    dwl = [
        scrna_url    : [
            "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE244477&format=file"
        ],
        ref_url      : [
            "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE123nnn/GSE123335/suppl/GSE123335%5FE14%5Fcombined%5Fmatrix.txt.gz",
            "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE123nnn/GSE123335/suppl/GSE123335%5FE14%5Fcombined%5Fmatrix%5FClusterAnnotations.txt.gz"
        ],
        output_scrna : "${baseDir}/data/scRNA/",
        output_ref   : "${baseDir}/data/ref/"
    ]
    // Integration workflow params
    integration = [
        input              : "${baseDir}/data/scRNA/",
        ref                : "${baseDir}/data/ref",
        tmp                : "${baseDir}/data/tmp_scrna/",
        manifest           : "${baseDir}/data/scRNA/manifest.txt",
        number_pcs         : 30, // Number of Princpal Components
        min_features       : 100, // Minimum number of feature per cell
        max_features       : 10000, // Maximum number of features per cell
        percent_mt         : 10, // Percentage of Mitochondrial RNA allowed per cell
        n_var_features     : 2000, // Number of Variable features used for PCA
        cluster_resolution : 0.4, // Louvain clustering resolution
        integration_tag    : "integrated", // Tag to name the integration objects and meta data
        integration_method : "RPCAIntegration", // Seurat method used to integrate data
    ]

    // Modelling Params
    modelling = [
        tmp       : "${baseDir}/data/tmp_scrna/",
        annotated : "${baseDir}/data/tmp_scrna/GAND_seurat_annotated.rds",
        gene_sets : [["Chd3", "Foxp1","Foxp2","Satb2"],
                    ["Chd3", "Foxp1","Satb2"],
                    ["Chd3","Foxp2","Satb2"],
                    ["Arx"]], // Gene sets to check for enrichment by cell type
        min_cells  : 1, // Minimum number of cells used by cell type for modelling
        mut_genes : ["Foxp1","Foxp2"], // Check if two genes are mutually exclusively expressed in cells
        score_type: ["module","counts"], // mode = Seurat module score || counts = log counts => which to use for modelling
    ]

    // Reporting workflow params
    // Note this assumes that the full pipeline has been run
    // if you want to skip some reporting section, change that section
    // to null instead of path to file
    // This is not very elegant and could be improved.
    // The original intent was to not over saturate the report I shared
    // with un necessary info. But it has made things a little bit more
    // complicated and messy. Could be worth refactoring...
    report = [
        annotated : "${baseDir}/data/tmp_scrna/GAND_seurat_annotated.csv",
        mut_genes : "${baseDir}/data/tmp_scrna/mutually_exclusive_genes.csv",
        gene_sets  : "${baseDir}/data/tmp_scrna/*_geneset_list.csv",
        template  : "${baseDir}/bin/scRNA_report_template.Rmd",
        output    : "${baseDir}/results/scRNA",
    ]
}