enable per output token likelihood prediction for evo2 (#1057)

yzhang123 · web-flow · commit 0f2e8dab73ac · 2025-08-25T17:58:44.000Z
### Description  ### Type of changes  - [ ] Bug fix (non-breaking change which fixes an issue) - [ ] New feature (non-breaking change which adds functionality) - [ ] Refactor - [ ] Documentation update - [ ] Other (please describe): ### CI Pipeline Configuration Configure CI behavior by applying the relevant labels: - [SKIP_CI](https://github.com/NVIDIA/bionemo-framework/blob/main/docs/docs/user-guide/contributing/contributing.md#skip_ci) - Skip all continuous integration tests - [INCLUDE_NOTEBOOKS_TESTS](https://github.com/NVIDIA/bionemo-framework/blob/main/docs/docs/user-guide/contributing/contributing.md#include_notebooks_tests) - Execute notebook validation tests in pytest - [INCLUDE_SLOW_TESTS](https://github.com/NVIDIA/bionemo-framework/blob/main/docs/docs/user-guide/contributing/contributing.md#include_slow_tests) - Execute tests labelled as slow in pytest for extensive testing > [!NOTE] > By default, the notebooks validation tests are skipped unless explicitly enabled. #### Authorizing CI Runs We use [copy-pr-bot](https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/#automation) to manage authorization of CI runs on NVIDIA's compute resources. - If a pull request is opened by a trusted user and contains only trusted changes, the pull request's code will automatically be copied to a pull-request/ prefixed branch in the source repository (e.g. pull-request/123) - If a pull request is opened by an untrusted user or contains untrusted changes, an NVIDIA org member must leave an `/ok to test` comment on the pull request to trigger CI. This will need to be done for each new commit. ### Usage  ```python # TODO: Add code snippet ``` ### Pre-submit Checklist  - [ ] I have tested these changes locally - [ ] I have updated the documentation accordingly - [ ] I have added/updated tests as needed - [ ] All existing tests pass successfully Signed-off-by: Yang Zhang <yangzhang@nvidia.com>
diff --git a/sub-packages/bionemo-evo2/src/bionemo/evo2/run/predict.py b/sub-packages/bionemo-evo2/src/bionemo/evo2/run/predict.py
@@ -111,7 +111,7 @@ def parse_args():
     )
     ap.add_argument(
         "--log-prob-collapse-option",
-        choices=["sum", "mean"],
+        choices=["sum", "mean", "per_token"],
         default="mean",
         help="How to collapse the log probabilities across the sequence dimension.",
     )
@@ -160,7 +160,7 @@ def __init__(
         self,
         *args,
         output_log_prob_seqs: bool = False,
-        log_prob_collapse_option: Literal["sum", "mean"] = "mean",
+        log_prob_collapse_option: Literal["sum", "mean", "per_token"] = "mean",
         **kwargs,
     ):
         """Initialize the predictor with our needs around computing log probabilities."""
@@ -195,10 +195,14 @@ def predict_step(self, batch, batch_idx: int | None = None) -> Tensor:
                 2,  # along the vocab dimension...
                 input_ids.unsqueeze(-1),  # using the token ids to index.
             ).squeeze(-1)
-            log_prob_seqs = torch.sum(logprobs * batch["loss_mask"][:, 1:].float(), dim=-1)
-            if self.log_prob_collapse_option == "mean":
-                log_prob_seqs = log_prob_seqs / (batch["loss_mask"][:, 1:].float().sum(dim=-1) + 1e-8)
-            return {"log_probs_seqs": log_prob_seqs.cpu(), "seq_idx": batch["seq_idx"].cpu()}
+            log_prob_per_token = logprobs * batch["loss_mask"][:, 1:].float()
+            if self.log_prob_collapse_option == "per_token":
+                return {"log_probs_seqs": log_prob_per_token.cpu(), "seq_idx": batch["seq_idx"].cpu()}
+            else:
+                log_prob_seqs = torch.sum(log_prob_per_token, dim=1)
+                if self.log_prob_collapse_option == "mean":
+                    log_prob_seqs = log_prob_seqs / (batch["loss_mask"][:, 1:].float().sum(dim=-1) + 1e-8)
+                return {"log_probs_seqs": log_prob_seqs.cpu(), "seq_idx": batch["seq_idx"].cpu()}
         else:
             # If the user wants to match back to logits, then they will need to do the offsetting logic themselves.
             return {
@@ -504,7 +508,7 @@ def __init__(
         config,
         tokenizer=None,
         output_log_prob_seqs: bool = False,
-        log_prob_collapse_option: Literal["sum", "mean"] = "mean",
+        log_prob_collapse_option: Literal["sum", "mean", "per_token"] = "mean",
     ):
         """Initialize the MambaPredictor, which wraps the mamba model for prediction handling model parallelism.