openpipeline/src/annotate/celltypist/config.vsh.yaml at 846ad62f325e91dbe7d276bc70c0204118150891 · openpipelines-bio/openpipeline

161 lines (154 loc) · 5.85 KB
name: celltypist
namespace: annotate
scope: "public"
description: Automated cell type annotation tool for scRNA-seq datasets on the basis of logistic regression classifiers optimised by the stochastic gradient descent algorithm.
  - __merge__: /src/authors/jakub_majercik.yaml
    roles: [ author ]
  - __merge__: /src/authors/weiwei_schultz.yaml
    roles: [ contributor ]
argument_groups:
  - name: Inputs
    description: Input dataset (query) arguments
    arguments:
      - name: "--input"
        alternatives: [-i]
        type: file
        description: The input (query) data to be labeled. Should be a .h5mu file.
        direction: input
        required: true
        example: input.h5mu
      - name: "--modality"
        description: Which modality to process.
        type: string
        default: "rna"
        required: false
      - name: "--input_layer"
        type: string
        description: The layer in the input data containing log normalized counts to be used for cell type annotation if .X is not to be used. 
      - name: "--input_var_gene_names"
        type: string
        required: false
        description: |
          The name of the adata var column in the input data containing gene names; when no gene_name_layer is provided, the var index will be used.
      - name: "--input_reference_gene_overlap"
        type: integer
        default: 100
        min: 1
        description: | 
          The minimum number of genes present in both the reference and query datasets.
  - name: Reference
    description: Arguments related to the reference dataset.
    arguments:
      - name: "--reference"
        type: file
        description: "The reference data to train the CellTypist classifiers on. Only required if a pre-trained --model is not provided."
        example: reference.h5mu
        direction: input
        required: false
      - name: "--reference_layer"
        type: string
        description: The layer in the reference data to be used for cell type annotation if .X is not to be used. Data are expected to be processed in the same way as the --input query dataset.
        required: false
      - name: "--reference_obs_target"
        type: string
        description: The name of the adata obs column in the reference data containing cell type annotations.
        default: "cell_ontology_class"
      - name: "--reference_var_gene_names"
        type: string
        required: false
        description: |
          The name of the adata var column in the reference data containing gene names; when no gene_name_layer is provided, the var index will be used.
      - name: "--reference_var_input"
        type: string
        required: false
        description: |
          .var column containing highly variable genes. By default, do not subset genes.
  - name: Model arguments
    description: Model arguments.
    arguments:
      - name: "--model"
        type: file
        description: "Pretrained model in pkl format. If not provided, the model will be trained on the reference data and --reference should be provided."
        required: false
        example: pretrained_model.pkl
      - name: "--feature_selection"
        type: boolean
        description: "Whether to perform feature selection."
        default: false
      - name: "--majority_voting"
        type: boolean
        description: "Whether to refine the predicted labels by running the majority voting classifier after over-clustering."
        default: false
      - name: "--C"
        type: double
        description: "Inverse of regularization strength in logistic regression."
        default: 1.0
      - name: "--max_iter"
        type: integer
        description: "Maximum number of iterations before reaching the minimum of the cost function."
        default: 1000
      - name: "--use_SGD"
        type: boolean_true
        description: "Whether to use the stochastic gradient descent algorithm."
      - name: "--min_prop"
        type: double
        description: |
          "For the dominant cell type within a subcluster, the minimum proportion of cells required to 
          support naming of the subcluster by this cell type. Ignored if majority_voting is set to False. 
          Subcluster that fails to pass this proportion threshold will be assigned 'Heterogeneous'."
        default: 0
  - name: Outputs
    description: Output arguments.
    arguments:
      - name: "--output"
        type: file
        description: Output h5mu file.
        direction: output
        example: output.h5mu
      - name: "--output_obs_predictions"
        type: string
        default: celltypist_pred
        required: false
        description: |
          In which `.obs` slots to store the predicted information.
      - name: "--output_obs_probability"
        type: string
        default: celltypist_probability
        required: false
        description: |
          In which `.obs` slots to store the probability of the predictions.
    __merge__: [., /src/base/h5_compression_argument.yaml]
  - type: python_script
    path: script.py
  - path: /src/utils/setup_logger.py
  - path: /src/utils/cross_check_genes.py
  - path: /src/utils/subset_vars.py
  - path: /src/utils/set_var_index.py
test_resources:
  - type: python_script
    path: test.py
  - path: /resources_test/annotation_test_data/
  - path: /resources_test/pbmc_1k_protein_v3/
  - type: docker
    image: python:3.10-slim
      - type: apt
        packages:
          - libhdf5-dev
          - procps
      - type: python
        __merge__: [ /src/base/requirements/scanpy.yaml, .]
      - type: python
        packages:
          - celltypist==1.6.3
      - type: python
        __merge__: [ /src/base/requirements/anndata_mudata.yaml, .]
    __merge__: [ /src/base/requirements/python_test_setup.yaml, .]
  - type: executable
  - type: nextflow
    directives:
      label: [highcpu, highmem, highdisk]
Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

FilesExpand file tree

config.vsh.yaml

Latest commit

History

config.vsh.yaml

File metadata and controls