openpipeline/src/annotate/svm_annotation/config.vsh.yaml at 14b5baf3129191e5808154cacedf33172492d627 · openpipelines-bio/openpipeline

154 lines (146 loc) · 5.23 KB
name: svm_annotation
namespace: annotate
description: Automated cell type annotation tool for scRNA-seq datasets on the basis of SVMs.
  - __merge__: /src/authors/jakub_majercik.yaml
    roles: [ author ]
argument_groups:
  - name: Inputs
    description: Input dataset (query) arguments
    arguments:
      - name: "--input"
        type: file
        description: The input (query) data to be labeled. Should be a .h5mu file.
        direction: input
        required: true
        example: input.h5mu
      - name: "--modality"
        description: Which modality to process.
        type: string
        default: "rna"
        required: false
      - name: "--input_layer"
        type: string
        description: The layer in the input data to be used for cell type annotation if .X is not to be used. 
      - name: "--input_var_gene_names"
        type: string
        required: false
        description: |
          The name of the adata var column in the input data containing gene names; when no gene_name_layer is provided, the var index will be used.
      - name: "--input_reference_gene_overlap"
        type: integer
        default: 100
        min: 1
        description: | 
          The minimum number of genes present in both the reference and query datasets.
  - name: Reference
    description: Arguments related to the reference dataset.
    arguments:
      - name: "--reference"
        type: file
        description: "The reference data to train the CellTypist classifiers on. Only required if a pre-trained --model is not provided."
        example: reference.h5mu
        direction: input
        required: false
      - name: "--reference_layer"
        type: string
        description: The layer in the reference data to be used for cell type annotation if .X is not to be used. Data are expected to be processed in the same way as the --input query dataset.
        required: false
      - name: "--reference_obs_target"
        type: string
        description:
        required: true
        description: |
          Key in .obs attribute of reference modality with cell-type information.
      - name:  "--reference_var_gene_names"
        type: string
        required: false
        description: |
          The name of the adata var column in the reference data containing gene names; when no gene_name_layer is provided, the var index will be used.
      - name: "--reference_var_input"
        type: string
        required: false
        description: |
          .var column containing highly variable genes. By default, do not subset genes.
  - name: Outputs
    description: Output arguments.
    arguments:
      - name: "--output"
        type: file
        description: Output h5mu file.
        direction: output
        example: output.h5mu
      - name: "--output_obs_prediction"
        type: string
        default: svm_pred
        required: false
        description: |
          In which `.obs` slots to store the predicted information.
      - name: "--output_obs_probability"
        type: string
        default: svm_probability
        required: false
        description: |
          In which `.obs` slots to store the probability of the predictions.
    __merge__: [., /src/base/h5_compression_argument.yaml]
  - name: Model arguments
    description: Model arguments.
    arguments:
      - name: "--model"
        type: file
        description: "Pretrained model in pkl format. If not provided, the model will be trained on the reference data and --reference should be provided."
        required: false
        example: pretrained_model.pkl
      - name: "--feature_selection"
        type: boolean
        description: "Whether to perform feature selection."
        default: true
      - name: "--max_iter"
        type: integer
        description: "Maximum number of iterations for the SVM."
        min: 1
        default: 5000
      - name: "--c_reg"
        type: double
        description: "Regularization parameter for the SVM."
        min: 0.0
        default: 1.0
      - name: "--class_weight"
        type: string
        description: |
          "Class weights for the SVM.  The `uniform` mode gives all classes a weight  of one. 
          The `balanced` mode (default) uses the values of y to automatically adjust weights inversely 
          proportional to class frequencies in the input data as n_samples / (n_classes * np.bincount(y))"
        choices: ["balanced", "uniform"]
        default: "balanced"
  - type: python_script
    path: script.py
  - path: /src/utils/setup_logger.py
  - path: /src/utils/cross_check_genes.py
  - path: /src/utils/subset_vars.py
  - path: /src/utils/set_var_index.py
test_resources:
  - type: python_script
    path: test.py
  - path: /resources_test/annotation_test_data/
  - path: /resources_test/pbmc_1k_protein_v3/
  - type: docker
    image: python:3.12-slim
      - type: apt
        packages:
          - libhdf5-dev
          - procps
      - type: python
        packages:
          - scikit-learn==1.5.2
      - type: python
        __merge__: [ /src/base/requirements/anndata_mudata.yaml, .]
    __merge__: [ /src/base/requirements/python_test_setup.yaml, .]
  - type: executable 
  - type: nextflow
    directives:
      label: [highcpu, highmem, highdisk]
Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

FilesExpand file tree

config.vsh.yaml

Latest commit

History

config.vsh.yaml

File metadata and controls