-
Notifications
You must be signed in to change notification settings - Fork 15
Expand file tree
/
Copy pathconfig.vsh.yaml
More file actions
164 lines (159 loc) · 6.16 KB
/
Copy pathconfig.vsh.yaml
File metadata and controls
164 lines (159 loc) · 6.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
name: popv
namespace: "annotate"
scope: "public"
description: "Performs popular major vote cell typing on single cell sequence data using multiple algorithms. Note that this is a one-shot version of PopV."
authors:
- __merge__: /src/authors/matthias_beyens.yaml
roles: [ author ]
- __merge__: /src/authors/robrecht_cannoodt.yaml
roles: [ author ]
argument_groups:
- name: Inputs
description: Arguments related to the input (aka query) dataset.
arguments:
- name: "--input"
alternatives: [-i]
type: file
description: Input h5mu file.
direction: input
required: true
example: input.h5mu
- name: "--modality"
description: Which modality to process.
type: string
default: "rna"
required: false
- name: "--input_layer"
type: string
description: Which layer to use. If no value is provided, the counts are assumed to be in the `.X` slot. Otherwise, count data is expected to be in `.layers[input_layer]`.
required: false
- name: "--input_obs_batch"
type: string
description: Key in obs field of input adata for batch information. If no value is provided, batch label is assumed to be unknown.
required: false
- name: "--input_var_subset"
type: string
description: Subset the input object with this column.
required: false
- name: "--input_obs_label"
type: string
description: Key in obs field of input adata for label information. This is only used for training scANVI. Unlabelled cells should be set to `"unknown_celltype_label"`.
required: false
- name: "--unknown_celltype_label"
type: string
description: If `input_obs_label` is specified, cells with this value will be treated as unknown and will be predicted by the model.
default: "unknown"
required: false
- name: Reference
description: Arguments related to the reference dataset.
arguments:
- name: "--reference"
type: file
description: "User-provided reference tissue. The data that will be used as reference to call cell types."
example: TS_Bladder_filtered.h5ad
direction: input
required: true
- name: "--reference_layer"
type: string
description: Which layer to use. If no value is provided, the counts are assumed to be in the `.X` slot. Otherwise, count data is expected to be in `.layers[reference_layer]`.
required: false
- name: "--reference_obs_label"
type: string
description: Key in obs field of reference AnnData with cell-type information.
default: "cell_ontology_class"
required: false
- name: "--reference_obs_batch"
type: string
description: Key in obs field of input adata for batch information.
default: "donor_assay"
required: false
# - name: "--reference_models"
# type: file
# description: Pretrained models. Can be a directory or a tar gz.
# required: false
# example: pretrained_models_Bladder_ts.tar.gz
- name: Outputs
description: Output arguments.
arguments:
- name: "--output"
type: file
description: Output h5mu file.
direction: output
required: true
example: output.h5mu
__merge__: [., /src/base/h5_compression_argument.yaml]
# - name: "--output_models"
# type: file
# direction: output
# description: If `prediction_mode == "retrain"`, saves models to a directory and compresses the results into a tar gz.
# example: "output.tar.gz"
# required: false
- name: Arguments
description: Other arguments.
arguments:
- name: "--methods"
type: string
description: "Methods to call cell types. By default, runs to knn_on_scvi and scanvi."
example: ["knn_on_scvi", "scanvi"]
choices: [celltypist, knn_on_bbknn, knn_on_scanorama, knn_on_scvi, onclass, rf, scanvi, svm]
required: true
multiple: true
# - name: "--prediction_mode"
# type: string
# description: |
# Execution mode of cell-type annotation.
# "retrain": Train all prediction models and saves them to disk. Argument `output_models` must be defined.
# "inference": Classify all cells based on pretrained models. Argument `reference_models` must be defined.
# "fast": Fast inference using only query cells and single epoch in scArches.
# - name: "--plots"
# type: boolean
# description: "Creation of agreement and frequency plots between selected cell type algorithmn(s) and final PopV ensemble called cell type."
# default: false
# required: false
resources:
- type: python_script
path: script.py
- path: /src/utils/setup_logger.py
test_resources:
- type: python_script
path: test.py
- path: /resources_test/annotation_test_data/
- path: /resources_test/pbmc_1k_protein_v3/
engines:
- type: docker
#image: nvcr.io/nvidia/pytorch:22.12-py3
image: python:3.11-slim
setup:
- type: docker
env:
# Build extensions without AVX512/AVX2 support
- CFLAGS="-mno-avx512f -mno-avx2"
- CPPFLAGS="-mno-avx512f -mno-avx2"
- type: apt
packages:
- procps
- git
- build-essential
- type: python
packages:
- popv~=0.4.2
# Previously, scvi-tools < 1.2.2 pinned numpy to <2. We want to keep it pinned here
- numpy<2
- setuptools
# These need to be updated AFTER popv is installed.
# See https://github.com/YosefLab/PopV/issues/30
- type: python
__merge__: [ /src/base/requirements/anndata_mudata.yaml, .]
# download ontology required by popv
- type: docker
run: |
cd /opt && git clone --depth 1 https://github.com/YosefLab/PopV.git
test_setup:
- type: python
__merge__: [ /src/base/requirements/viashpy.yaml, .]
runners:
- type: executable
- type: nextflow
directives:
# TODO: should add new label highmem-single-gpu and lowmem-single-gpu
label: [highmem, highcpu, highdisk]