openpipelines-bio
diff --git a/‎.github/workflows/integration-test.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/integration-test.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/release-build.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/release-build.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/viash-test.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/viash-test.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 32 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎resources_test_scripts/annotation_test_data.sh‎
Lines changed: 4 additions & 0 deletions b/‎resources_test_scripts/annotation_test_data.sh‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/annotate/celltypist/config.vsh.yaml‎
Lines changed: 6 additions & 3 deletions b/‎src/annotate/celltypist/config.vsh.yaml‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎src/annotate/celltypist/test.py‎
Lines changed: 34 additions & 16 deletions b/‎src/annotate/celltypist/test.py‎
Lines changed: 34 additions & 16 deletions
diff --git a/‎src/annotate/consensus_vote/config.vsh.yaml‎
Lines changed: 111 additions & 0 deletions b/‎src/annotate/consensus_vote/config.vsh.yaml‎
Lines changed: 111 additions & 0 deletions
@@ -73,7 +73,7 @@ jobs:
 
     - uses: viash-io/viash-actions/setup@v6
 
-    - uses: nf-core/setup-nextflow@v2.1.4
+    - uses: nf-core/setup-nextflow@v3.0.0
 
     # use cache
     - name: Cache resources data
 
@@ -62,7 +62,7 @@ jobs:
 
     - uses: viash-io/viash-actions/setup@v6
 
-    - uses: nf-core/setup-nextflow@v2.1.4
+    - uses: nf-core/setup-nextflow@v3.0.0
 
     # use cache
     - name: Cache resources data
 
@@ -21,6 +21,7 @@ jobs:
         uses: actions/setup-python@v6
       - uses: r-lib/actions/setup-r@v2
         with:
+          r-version: 4.5.3
           use-public-rspm: true
       - run: python -m pip install pre-commit
         shell: bash
 
@@ -8,16 +8,28 @@
 
 * `workflows/rna/rna_multisample`, `workflows/multiomics/process_batches`, `feature_annotation/highly_variable_features_scanpy`: add an option to exclude features before running highly variable gene calculation based on a user-defined list of feature names (PR #1121).
 
+* `annotate/consensus_vote`: new component computing a (weighted) majority vote across cell type labels from multiple annotation methods (PR #1151).
+* 
 * `filter/filter_with_quantile`: added a component to filter numerical .obs or .var columns based on quantile thresholds, with optional subsetting (PR #1146).
 
 * `dimred/pca`: added possibility to do chunked processing using arguments `chunks` and `chunk_size`. Also added a `seed` argument in order to better control the variability between executions (PR #1157).
 
+* `workflows/multiomics/process_singlesample`: New workflow for processing RNA, protein and GDO modalities of individual samples (PR #1147).
+
+* `transform/clear_slots`: New component that can be used to remove all items from slots of a MuData object (PR #1171).
+
+* `workflows/multiomics/process_singlesample`, `workflows/multiomics/process_samples`, `workflows/multiomics/process_batches`: add `--intersect_obs` option to remove observations that are not present in all processed modalities, so each modality shares the same set of cells (PR #1173, 1175).
+
+* `labels_transfer/cellmapper`: New component that transfers labels from a reference to a query with a shared embedding using CellMapper (PR #1169, PR #1177)
+
 * `filter/create_cell_masks`: added a component to create boolean cell masks from a set of user-provided filters (PR #1165).
 
 ## MAJOR CHANGES
 
 * `qc/calculate_qc_metrics`: major improvements to memory consumption and runtimes (PR #1140).
 
+* `annotate/popv`: bump version to 0.6.1 (PR #1167).
+
 ## MINOR CHANGES
 
 * `dataflow/split_modalities`: improve memory consumption by only reading one modality at the same time (PR #1152).
@@ -28,10 +40,30 @@
 
 * Bump viash to 0.9.7 (PR #1145)
 
+* `annotate/celltypist` and `workflows/annotation/celltypist`: set `--input_layer` default to `log_normalized` and `--reference_var_input` default to `filter_with_hvg` to align with upstream workflow defaults (PR #1155).
+
+* `annotate/singler`: set `--input_layer` default to `log_normalized` and `--reference_var_input` default to `filter_with_hvg` to align with upstream workflow defaults (PR #1155).
+
+* `workflows/annotation/scanvi_scarches`: set `--input_obs_batch_label` and `--reference_obs_batch_label` defaults to `sample_id` and `--reference_var_hvg` default to `filter_with_hvg` to align with upstream workflow defaults (PR #1155).
+
+* `cluster/leiden`: added `flavor`, `n_iterations` and `seed` arguments (PR #1132)
+
+* `cluster/leiden`: avoid creating unnecessary copies of the output data (PR #1132).
+
+* `workflows/multiomics/process_samples`: refactored to use a shared `process_singlesample_base` subworkflow, which is also used by the new `process_singlesample` workflow to avoid code duplication (PR #1147).
+
+* Bump anndata to `0.12.11` (PR #1174).
+
+* Add missing `example` fields to several component and workflow configurations (PR #1067).
+
+* Testing: bump `viashpy` to 0.10.0 (PR #1178).
+
 ## BUG FIXES
 
 * `dataflow/split_h5mu`: pin scipy version to 1.16.3 to avoid regression that corrupts large sparse matrix indexing (PR #1153).
 
+* `convert/from_h5ad_h5mu`: store and reset var index names to avoid issues with a change in mudata (PR #1184).
+
 # openpipelines 4.0.4
 
 ## BUG FIXES
 
@@ -65,6 +65,10 @@ disease = np.random.choice(["healthy", "diseased"], size=n_cells, p=[0.5, 0.5])
 sub_ref_adata_final.obs["treatment"] = treatment
 sub_ref_adata_final.obs["disease"] = disease
 
+# Strip raw slot - not needed for annotation and causes compatibility issues between AnnData/MuData versions
+sub_ref_adata_final = sub_ref_adata_final.copy()
+sub_ref_adata_final.raw = None
+
 # Write out data
 sub_ref_adata_final.write("${OUT}/TS_Blood_filtered.h5ad", compression='gzip')
 HEREDOC
 
@@ -26,6 +26,7 @@ argument_groups:
         required: false
       - name: "--input_layer"
         type: string
+        default: log_normalized
         description: The layer in the input data containing counts that are lognormalized to 10000, .X is not to be used. 
       - name: "--input_var_gene_names"
         type: string
@@ -62,9 +63,10 @@ argument_groups:
         default: "cell_ontology_class"
       - name: "--reference_var_input"
         type: string
+        default: "filter_with_hvg"
         required: false
         description: |
-          .var column containing highly variable genes. By default, do not subset genes.
+          .var column containing highly variable genes. If not provided, genes will not be subset.
       - name: "--reference_var_gene_names"
         type: string
         required: false
@@ -147,13 +149,14 @@ engines:
   - type: docker
     image: nvcr.io/nvidia/pytorch:25.11-py3
     setup:
-      - type: python
-        __merge__: [ /src/base/requirements/scanpy.yaml, .]
       - type: python
         packages:
           - celltypist==1.7.1
       - type: python
         __merge__: [ /src/base/requirements/anndata_mudata.yaml, .]
+    test_setup:
+      - type: python
+        __merge__: [ /src/base/requirements/scanpy.yaml, .]
     __merge__: [ /src/base/requirements/python_test_setup.yaml, .]
 runners:
   - type: executable
 
@@ -21,34 +21,47 @@
 model_file = (
     f"{meta['resources_dir']}/annotation_test_data/celltypist_model_Immune_All_Low.pkl"
 )
-celltypist_input_file = (
-    f"{meta['resources_dir']}/annotation_test_data/demo_2000_cells.h5mu"
-)
-# input_file = f"{meta['resources_dir']}/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"
+input_file_1 = f"{meta['resources_dir']}/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"
+input_file_2 = f"{meta['resources_dir']}/annotation_test_data/demo_2000_cells.h5mu"
+reference_file = f"{meta['resources_dir']}/annotation_test_data/TS_Blood_filtered.h5mu"
 
 
 def log_normalize(adata):
-    sc.pp.normalize_total(adata, target_sum=1e4)
-    sc.pp.log1p(adata)
+    adata_norm = sc.pp.normalize_total(adata, target_sum=1e4, copy=True)
+    adata_lognorm = sc.pp.log1p(adata_norm, copy=True)
+    adata.layers["log_normalized"] = adata_lognorm.X
+    return adata
+
+
+def calculate_hvg(adata, n_top_genes=1000):
+    adata_hvg = sc.pp.highly_variable_genes(adata, n_top_genes=n_top_genes, copy=True)
+    adata.var["filter_with_hvg"] = adata_hvg.var["highly_variable"]
     return adata
 
 
 @pytest.fixture
 def reference_mdata():
-    mdata = mu.read_h5mu(
-        f"{meta['resources_dir']}/annotation_test_data/TS_Blood_filtered.h5mu"
-    )
+    mdata = mu.read_h5mu(reference_file)
+    adata = mdata.mod["rna"]  # already has layer "log_normalized" with 10k target sum
+    adata.var["filter_with_hvg"] = adata.var[
+        "highly_variable"
+    ]  # already has highly variable genes calculated
+    return mdata
+
+
+@pytest.fixture
+def input_mdata():
+    mdata = mu.read_h5mu(input_file_1)
     adata = mdata.mod["rna"].copy()
+    adata.layers["counts"] = adata.X.copy()  # store raw counts in a layer
     adata_lognorm = log_normalize(adata)
     mdata.mod["rna"] = adata_lognorm
     return mdata
 
 
 @pytest.fixture
-def input_mdata():
-    mdata = mu.read_h5mu(
-        f"{meta['resources_dir']}/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"
-    )
+def model_input_mdata():
+    mdata = mu.read_h5mu(input_file_2)
     adata = mdata.mod["rna"].copy()
     adata_lognorm = log_normalize(adata)
     mdata.mod["rna"] = adata_lognorm
@@ -155,15 +168,20 @@ def test_set_params(
     )
 
 
-def test_with_model(run_component, random_h5mu_path):
+def test_with_model(
+    run_component, random_h5mu_path, write_mudata_to_file, model_input_mdata
+):
     output_file = random_h5mu_path()
+    input_file = write_mudata_to_file(model_input_mdata)
 
     run_component(
         [
             "--input",
-            celltypist_input_file,
+            input_file,
             "--model",
             model_file,
+            "--reference_layer",
+            "",
             "--reference_obs_targets",
             "cell_type",
             "--output",
@@ -208,7 +226,7 @@ def test_fail_invalid_input_expression(
                 "--input",
                 input_file,
                 "--input_layer",
-                "log_normalized",
+                "counts",
                 "--reference",
                 reference_file,
                 "--reference_layer",
 
@@ -0,0 +1,111 @@
+name: consensus_vote
+namespace: annotate
+scope: "public"
+description: |
+  Combines cell type predictions from multiple annotation methods into a single consensus prediction using a weighted majority vote. 
+  For each cell, each method votes for its predicted cell type, optionally weighted by the probability score and/or a per-method weight.
+  The consensus prediction is the cell type with the highest total weighted vote.
+  Note that this method does not leverage pre-existing ontology or perform any reconciliation of cell type labels across methods, so the same cell type may be represented by different labels in different methods and will be treated as distinct cell types in the vote.
+authors:
+  - __merge__: /src/authors/dorien_roosen.yaml
+    roles: [ author ]
+
+argument_groups:
+  - name: Inputs
+    description: Input dataset arguments.
+    arguments:
+      - name: "--input"
+        type: file
+        description: Input h5mu file containing cell type predictions in .obs.
+        direction: input
+        required: true
+        example: input.h5mu
+      - name: "--modality"
+        description: Which modality to process.
+        type: string
+        default: "rna"
+        required: false
+      - name: "--input_obs_predictions"
+        type: string
+        description: |
+          One or more .obs column names containing cell type predictions (labels) from
+          different annotation methods.
+        required: true
+        multiple: true
+        example: ["scanvi_pred", "celltypist_pred"]
+      - name: "--input_obs_probabilities"
+        type: string
+        description: |
+          One or more .obs column names containing prediction probability scores,
+          one per method in --input_obs_predictions. When provided, each method's
+          vote is scaled by the probability score for that cell (in addition to
+          any per-method --weights). Must be the same length as --input_obs_predictions.
+        required: false
+        multiple: true
+        example: ["scanvi_prob", "celltypist_prob", "singler_prob"]
+      - name: "--tie_label"
+        type: string
+        description: |
+          Label to assign when two or more cell types receive equal votes.
+          If not provided, tied cells are assigned None (missing value).
+        required: false
+        example: "Unknown"
+      - name: "--weights"
+        type: double
+        description: |
+          Per-method weights for the consensus vote. Must be the same length as
+          --input_obs_predictions when provided. Weights are normalized to sum to 1
+          before use. If not provided, all methods are weighted equally.
+        required: false
+        multiple: true
+        example: [1.0, 2.0]
+
+  - name: Outputs
+    description: Output arguments.
+    arguments:
+      - name: "--output"
+        alternatives: [-o]
+        type: file
+        description: Output h5mu file.
+        direction: output
+        example: output.h5mu
+      - name: "--output_obs_predictions"
+        type: string
+        default: consensus_pred
+        required: false
+        description: |
+          In which `.obs` slot to store the consensus predicted cell type.
+      - name: "--output_obs_score"
+        type: string
+        default: consensus_score
+        required: false
+        description: |
+          In which `.obs` slot to store the consensus score, defined as the fraction
+          of total weight assigned to the winning cell type.
+    __merge__: [., /src/base/h5_compression_argument.yaml]
+
+resources:
+  - type: python_script
+    path: script.py
+  - path: /src/utils/setup_logger.py
+  - path: /src/utils/compress_h5mu.py
+
+test_resources:
+  - type: python_script
+    path: test.py
+
+engines:
+  - type: docker
+    image: python:3.13-slim
+    setup:
+      - type: apt
+        packages:
+          - procps
+      - type: python
+        __merge__: [ /src/base/requirements/anndata_mudata.yaml, .]
+    __merge__: [ /src/base/requirements/python_test_setup.yaml, .]
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: [lowcpu, lowmem, lowdisk]