BiomedSciAI
diff --git a/‎vllm/README.md‎
Lines changed: 4 additions & 8 deletions b/‎vllm/README.md‎
Lines changed: 4 additions & 8 deletions
diff --git a/‎vllm/examples/biomed_rna_example.py‎
Lines changed: 95 additions & 26 deletions b/‎vllm/examples/biomed_rna_example.py‎
Lines changed: 95 additions & 26 deletions
diff --git a/‎vllm/pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎vllm/pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎vllm/tests/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎vllm/tests/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎vllm/tests/conftest.py‎
Lines changed: 105 additions & 0 deletions b/‎vllm/tests/conftest.py‎
Lines changed: 105 additions & 0 deletions
@@ -1,18 +1,14 @@
-# vLLM BiomedRNA Model Plugins
+# vLLM BiomedRNA Model Plugin
 
-Running BiomedRNA models with vLLM through the plugin system.
+Running Inference for BiomedRNA models via VLLM plugin.
 
 ## Installation
 
-Install biomed-multi-omic and vllm plugin:
+add vllm plugin to your bmfm-multi-omic env:
 
 ```
-cd $HOME/git/biomed-multi-omic
-pip install -e ..
-cd vllm
-pip install -e .
+uv pip install -e .
 ```
-`
 
 ## Prerequisites
 
 
@@ -5,6 +5,8 @@
 This example demonstrates two approaches:
 1. Single batch processing (quick test with few cells)
 2. Full file iteration (memory-efficient processing of entire dataset)
+
+Both approaches use the same iteration pattern for consistency.
 """
 
 from pathlib import Path
@@ -16,15 +18,90 @@
 from vllm_biomed_rna_plugin.biomed_rna import (
     BiomedRnaForSequenceEmbedding,  # Register model class
 )
-from vllm_biomed_rna_plugin.preprocess import iter_h5ad_batches, preprocess_anndata
+from vllm_biomed_rna_plugin.preprocess import preprocess_anndata
 from vllm_biomed_rna_plugin.utils import DEFAULT_MODEL_PATH, load_tokenizer
 
 # Configuration
-H5AD_PATH: Path = Path("examples/resources/zheng68k.h5ad")
+ZHENG_SMALL_H5AD_PATH: Path = Path("examples/resources/zheng68k.h5ad") #165 samples
+
+
+def iter_h5ad_batches(
+    h5ad_path: str | Path,
+    tokenizer,
+    batch_size: int = 32,
+    max_length: int = 1024,
+    limit_genes: str = "protein_coding",
+    log_normalize_transform: bool = True,
+    limit_cells: int | None = None,
+):
+    """
+    Stream batches from h5ad file using DataModule preprocessing.
+
+    Memory-efficient processing with full bmfm-targets transformations:
+    - Log normalization (if enabled)
+    - Gene filtering (e.g., protein_coding only)
+    - Sequence length limiting (max_length)
+    - Attention mask generation
+
+    Uses backed="r" mode to avoid loading entire file into memory.
+
+    Args:
+    ----
+        h5ad_path: Path to h5ad file
+        tokenizer: MultiFieldTokenizer from bmfm-targets
+        batch_size: Number of cells per batch (default: 32)
+        max_length: Maximum sequence length (default: 1024)
+        limit_genes: Gene filtering strategy - "protein_coding" or None (default: "protein_coding")
+        log_normalize_transform: Apply log normalization (default: True)
+        limit_cells: Optional limit on total cells to process (default: None = all cells)
+
+    Yields:
+    ------
+        list[dict]: Batch of preprocessed cells in vLLM format
+
+    Example:
+    -------
+        >>> tokenizer = load_tokenizer()
+        >>> llm = get_vllm_biomed_rna_model()
+        >>>
+        >>> all_embeddings = []
+        >>> for batch in iter_h5ad_batches("data.h5ad", tokenizer, batch_size=32):
+        >>>     outputs = llm.embed(batch)
+        >>>     embeddings = [out.outputs.embedding for out in outputs]
+        >>>     all_embeddings.extend(embeddings)
+        >>>
+        >>> embeddings_array = np.array(all_embeddings)  # [n_cells, hidden_size]
+    """
+    # backed="r" = read-only mode, doesn't load full matrix into memory
+    adata = anndata.read_h5ad(str(h5ad_path), backed="r")
+    total_cells = adata.n_obs if limit_cells is None else min(limit_cells, adata.n_obs)
+
+    cells_processed = 0
+    for start in range(0, total_cells, batch_size):
+        end = min(start + batch_size, total_cells)
+
+        # Load chunk into memory
+        chunk_adata = adata[start:end].to_memory()
+
+        # Preprocess using DataModule (applies all transformations)
+        batch = preprocess_anndata(
+            chunk_adata,
+            tokenizer,
+            max_length=max_length,
+            limit_genes=limit_genes,
+            log_normalize_transform=log_normalize_transform,
+            batch_size=None,  # Process entire chunk at once
+        )
+
+        yield batch
+
+        cells_processed = end
+        if limit_cells and cells_processed >= limit_cells:
+            break
 
 
 def generate_embedding_for_h5ad_snippet(
-    h5ad_path: Path = H5AD_PATH,
+    h5ad_path: Path = ZHENG_SMALL_H5AD_PATH,
     num_samples: int = 10,
     max_length: int = 1024,
 ) -> np.ndarray:
@@ -74,8 +151,8 @@ def generate_embedding_for_h5ad_snippet(
     return embeddings
 
 
-def generate_embeddings_for_full_h5ad(
-    h5ad_path: Path = H5AD_PATH,
+def generate_embeddings_for_h5ad(
+    h5ad_path: Path = ZHENG_SMALL_H5AD_PATH,
     batch_size: int = 32,
     max_length: int = 1024,
     limit_cells: int | None = None,
@@ -84,7 +161,8 @@ def generate_embeddings_for_full_h5ad(
     Generate embeddings for entire h5ad file using batch iteration.
 
     Memory-efficient: Processes file in chunks without loading everything
-    into memory at once. Uses DataModule preprocessing for each batch.
+    into memory at once. Uses the iter_h5ad_batches helper function which
+    applies full DataModule preprocessing for each batch.
 
     Args:
     ----
@@ -101,41 +179,32 @@ def generate_embeddings_for_full_h5ad(
     print(f"Example 2: Full File Iteration (batch_size={batch_size})")
     print(f"{'='*80}")
 
+    # Initialize tokenizer and model
     tokenizer = load_tokenizer()
-    llm = get_vllm_biomed_rna_model(
-        model_path=DEFAULT_MODEL_PATH,
-    )
+    llm = get_vllm_biomed_rna_model(model_path=DEFAULT_MODEL_PATH)
 
-    # Get total cell count
+    # Get total cell count for progress reporting
     adata_info = anndata.read_h5ad(h5ad_path, backed="r")
     total_cells = (
         adata_info.n_obs if limit_cells is None else min(limit_cells, adata_info.n_obs)
     )
     print(f"Processing {total_cells} cells from {h5ad_path.name}")
 
-    # Process in batches
+    # Process in batches using the iteration helper
     all_embeddings = []
-    cells_processed = 0
-
     for batch in iter_h5ad_batches(
-        str(h5ad_path),
+        h5ad_path,
         tokenizer,
         batch_size=batch_size,
         max_length=max_length,
+        limit_cells=limit_cells,
     ):
         # Generate embeddings for this batch
         outputs = llm.embed(batch)
-        batch_embeddings: list[list[float]] = [
-            output.outputs.embedding for output in outputs
-        ]
+        batch_embeddings = [output.outputs.embedding for output in outputs]
         all_embeddings.extend(batch_embeddings)
-
-        cells_processed += len(batch)
-        print(f"  Processed {cells_processed}/{total_cells} cells...")
-
-        # Stop if we've reached the limit
-        if limit_cells and cells_processed >= limit_cells:
-            break
+        
+        print(f"  Processed {len(all_embeddings)}/{total_cells} cells...")
 
     # Convert to numpy array
     embeddings = np.array(all_embeddings)
@@ -149,7 +218,7 @@ def generate_embeddings_for_full_h5ad(
     # Example 1: Quick test with 10 cells
     embeddings_snippet = generate_embedding_for_h5ad_snippet(num_samples=10)
 
-    # Example 2: Process more cells using batch iteration
-    embeddings_full = generate_embeddings_for_full_h5ad(
+    # Example 2: Process full h5ad file using batch iteration
+    embeddings_full = generate_embeddings_for_h5ad(
         batch_size=32,
     )
@@ -26,7 +26,7 @@ classifiers = [
 ]
 
 dependencies = [
-    "vllm>=0.13.0",
+    "vllm>0.18.0",
     "torch>=2.9.0",
     "transformers>=4.56.0,<5",
 ]
 
@@ -0,0 +1 @@
+"""Tests package for vllm-biomed-rna-plugin."""
@@ -0,0 +1,105 @@
+"""Shared pytest fixtures for BiomedRNA tests."""
+
+import os
+from pathlib import Path
+
+import pytest
+import torch
+from transformers import AutoConfig
+
+from vllm_biomed_rna_plugin.biomed_rna import (
+    BiomedRnaConfig,
+    BiomedRnaForSequenceEmbedding,
+)
+
+
+def pytest_configure(config):
+    """Configure pytest and set environment variables for PyTorch."""
+    # Disable TorchInductor compilation warnings
+    os.environ["TORCHINDUCTOR_COMPILE_THREADS"] = "1"
+    os.environ["TORCH_COMPILE_DEBUG"] = "0"
+
+    # Use eager mode to avoid compilation issues
+    torch._dynamo.config.suppress_errors = True
+
+    # Set deterministic behavior
+    torch.use_deterministic_algorithms(False)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+
+
+LOCAL_MODEL_PATH = Path(
+    "/dccstor/bmfm-targets1/users/sivanra/models/biomed.rna.llama.47m.wced.multitask.v1"
+)
+HF_MODEL_PATH = "ibm-research/biomed.rna.llama.47m.wced.multitask.v1"
+
+__all__ = [
+    "create_dummy_vllm_config",
+    "create_rna_multi_modal_object",
+    "config",
+    "model",
+]
+
+
+def create_rna_multi_modal_object(
+    gene_ids: torch.Tensor, expr_values: torch.Tensor
+) -> dict:
+    """Create a multimodal data object for RNA input."""
+    return {
+        "rna": {
+            "gene_ids": gene_ids,
+            "expr_values": expr_values,
+        }
+    }
+
+
+def create_dummy_vllm_config(config: BiomedRnaConfig):
+    """Create minimal vLLM config for testing."""
+
+    class DummyPoolerConfig:
+        seq_pooling_type = "CLS"
+
+    class DummyMultiModalConfig:
+        """Dummy multimodal config for testing."""
+
+        # Required by SupportsMultiModal interface
+        mm_encoder_only = False
+
+        def get_limit_per_prompt(self, modality: str) -> int | None:
+            """Return None to indicate no limit for the modality."""
+            return None
+
+    class DummyModelConfig:
+        def __init__(self, hf_config):
+            self.hf_config = hf_config
+            self.dtype = torch.float32
+            self.head_dtype = torch.float32
+            self.pooler_config = DummyPoolerConfig()
+            self.multimodal_config = DummyMultiModalConfig()
+
+    class DummyVllmConfig:
+        def __init__(self, hf_config):
+            self.model_config = DummyModelConfig(hf_config)
+
+    return DummyVllmConfig(config)
+
+
+@pytest.fixture(scope="module")
+def config():
+    return AutoConfig.from_pretrained(LOCAL_MODEL_PATH)
+
+
+@pytest.fixture(scope="module")
+def model(config):
+    """Pytest fixture for BiomedRNA model with loaded weights."""
+    from safetensors.torch import load_file
+
+    # Load weights
+    weights = load_file(str(LOCAL_MODEL_PATH / "model.safetensors"))
+
+    # Create model with full config
+    vllm_config = create_dummy_vllm_config(config)
+    model = BiomedRnaForSequenceEmbedding(vllm_config=vllm_config)
+    model.load_weights(weights.items())
+    model.eval()
+    return model
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@ classifiers = [`
`26`	`26`	`]`
`27`	`27`
`28`	`28`	`dependencies = [`
`29`		`- "vllm>=0.13.0",`
	`29`	`+ "vllm>0.18.0",`
`30`	`30`	`"torch>=2.9.0",`
`31`	`31`	`"transformers>=4.56.0,<5",`
`32`	`32`	`]`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+"""Tests package for vllm-biomed-rna-plugin."""`