-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathnextflow.config
More file actions
95 lines (86 loc) · 4.01 KB
/
nextflow.config
File metadata and controls
95 lines (86 loc) · 4.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
//nextflow.config
nextflow.enable.dsl = 2
//=============================================================================
// PROFILES - Single Apptainer profile at the moment
// Conda was becoming a problem due to config drift
//=============================================================================
profiles {
container_hpc {
apptainer.enabled = true
apptainer.autoMounts = true
apptainer.cacheDir = "${baseDir}/container_cache/"
process {
executor = 'slurm'
queue = 'defq'
container = "${baseDir}/containers/gand_image.sif"
// Resource defaults
cpus = 1
memory = '64 GB'
time = '48h'
}
}
}
//=============================================================================
// PARAMS
//=============================================================================
params {
// Which modules to run
run_download = false
run_integration = true
run_modelling = true
run_report = true
// Downloading workflow params
dwl = [
scrna_url : [
"https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE244477&format=file"
],
ref_url : [
"https://ftp.ncbi.nlm.nih.gov/geo/series/GSE123nnn/GSE123335/suppl/GSE123335%5FE14%5Fcombined%5Fmatrix.txt.gz",
"https://ftp.ncbi.nlm.nih.gov/geo/series/GSE123nnn/GSE123335/suppl/GSE123335%5FE14%5Fcombined%5Fmatrix%5FClusterAnnotations.txt.gz"
],
output_scrna : "${baseDir}/data/scRNA/",
output_ref : "${baseDir}/data/ref/"
]
// Integration workflow params
integration = [
input : "${baseDir}/data/scRNA/",
ref : "${baseDir}/data/ref",
tmp : "${baseDir}/data/tmp_scrna/",
manifest : "${baseDir}/data/scRNA/manifest.txt",
number_pcs : 30, // Number of Princpal Components
min_features : 100, // Minimum number of feature per cell
max_features : 10000, // Maximum number of features per cell
percent_mt : 10, // Percentage of Mitochondrial RNA allowed per cell
n_var_features : 2000, // Number of Variable features used for PCA
cluster_resolution : 0.4, // Louvain clustering resolution
integration_tag : "integrated", // Tag to name the integration objects and meta data
integration_method : "RPCAIntegration", // Seurat method used to integrate data
]
// Modelling Params
modelling = [
tmp : "${baseDir}/data/tmp_scrna/",
annotated : "${baseDir}/data/tmp_scrna/GAND_seurat_annotated.rds",
gene_sets : [["Chd3", "Foxp1","Foxp2","Satb2"],
["Chd3", "Foxp1","Satb2"],
["Chd3","Foxp2","Satb2"],
["Arx"]], // Gene sets to check for enrichment by cell type
min_cells : 1, // Minimum number of cells used by cell type for modelling
mut_genes : ["Foxp1","Foxp2"], // Check if two genes are mutually exclusively expressed in cells
score_type: ["module","counts"], // mode = Seurat module score || counts = log counts => which to use for modelling
]
// Reporting workflow params
// Note this assumes that the full pipeline has been run
// if you want to skip some reporting section, change that section
// to null instead of path to file
// This is not very elegant and could be improved.
// The original intent was to not over saturate the report I shared
// with un necessary info. But it has made things a little bit more
// complicated and messy. Could be worth refactoring...
report = [
annotated : "${baseDir}/data/tmp_scrna/GAND_seurat_annotated.csv",
mut_genes : "${baseDir}/data/tmp_scrna/mutually_exclusive_genes.csv",
gene_sets : "${baseDir}/data/tmp_scrna/*_geneset_list.csv",
template : "${baseDir}/bin/scRNA_report_template.Rmd",
output : "${baseDir}/results/scRNA",
]
}