[BIONEMO-2473] Added tests for Evo2 LoRA fine-tuning (#1060)

balvisio · web-flow · commit dd4f626e4386 · 2025-09-25T16:12:27.000Z
### Description Fixes and added test for Evo2LoRA ### Type of changes  - [x] Bug fix (non-breaking change which fixes an issue) - [ ] New feature (non-breaking change which adds functionality) - [ ] Refactor - [ ] Documentation update - [ ] Other (please describe): ### CI Pipeline Configuration Configure CI behavior by applying the relevant labels: - [SKIP_CI](https://github.com/NVIDIA/bionemo-framework/blob/main/docs/docs/user-guide/contributing/contributing.md#skip_ci) - Skip all continuous integration tests - [INCLUDE_NOTEBOOKS_TESTS](https://github.com/NVIDIA/bionemo-framework/blob/main/docs/docs/user-guide/contributing/contributing.md#include_notebooks_tests) - Execute notebook validation tests in pytest - [INCLUDE_SLOW_TESTS](https://github.com/NVIDIA/bionemo-framework/blob/main/docs/docs/user-guide/contributing/contributing.md#include_slow_tests) - Execute tests labelled as slow in pytest for extensive testing > [!NOTE] > By default, the notebooks validation tests are skipped unless explicitly enabled. #### Authorizing CI Runs We use [copy-pr-bot](https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/#automation) to manage authorization of CI runs on NVIDIA's compute resources. - If a pull request is opened by a trusted user and contains only trusted changes, the pull request's code will automatically be copied to a pull-request/ prefixed branch in the source repository (e.g. pull-request/123) - If a pull request is opened by an untrusted user or contains untrusted changes, an NVIDIA org member must leave an `/ok to test` comment on the pull request to trigger CI. This will need to be done for each new commit. ### Usage  ```python # TODO: Add code snippet ``` ### Pre-submit Checklist  - [x] I have tested these changes locally - [x] I have updated the documentation accordingly - [x] I have added/updated tests as needed - [ ] All existing tests pass successfully  ## Summary by CodeRabbit * **New Features** * Expose controls for mock dataset sizes (train/val/test) for training runs. * LoRA finetuning flow simplified; LoRA integration now passes a preconstructed transform and checkpoint paths accept plain strings. * **Tests** * Added end-to-end integration tests for pretraining, finetuning, and LoRA finetuning with artifact and loss validations. * Introduced shared test helpers for constructing small training/finetune commands and consolidated imports. * **Chores** * Updated/cleaned license header boilerplate in tests.  Signed-off-by: Bruno Alvisio <balvisio@nvidia.com>
diff --git a/sub-packages/bionemo-evo2/src/bionemo/evo2/models/peft.py b/sub-packages/bionemo-evo2/src/bionemo/evo2/models/peft.py
@@ -13,20 +13,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
-# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
 from copy import deepcopy
 from typing import List, Optional
 
diff --git a/sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py b/sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py
@@ -38,7 +38,6 @@
 )
 from nemo.collections.nlp.modules.common.tokenizer_utils import get_nmt_tokenizer
 from nemo.lightning.pytorch import callbacks as nl_callbacks
-from nemo.lightning.pytorch.callbacks import ModelTransform
 from nemo.lightning.pytorch.callbacks.flops_callback import FLOPsMeasurementCallback
 from nemo.lightning.pytorch.callbacks.megatron_comm_overlap import MegatronCommOverlapCallback
 from nemo.lightning.pytorch.optim import CosineAnnealingScheduler
@@ -49,7 +48,7 @@
 from bionemo.evo2.data.sharded_eden_dataloader import ShardedEdenDataModule
 from bionemo.evo2.models.llama import LLAMA_MODEL_OPTIONS
 from bionemo.evo2.models.mamba import MAMBA_MODEL_OPTIONS, MambaModel, mamba_no_weight_decay_cond_with_embeddings
-from bionemo.evo2.run.peft import Evo2LoRA
+from bionemo.evo2.models.peft import Evo2LoRA
 from bionemo.evo2.utils.callbacks import GarbageCollectAtInferenceTime
 from bionemo.evo2.utils.config import hyena_no_weight_decay_cond_with_embeddings
 from bionemo.evo2.utils.logging.callbacks import TEVCallback
@@ -611,7 +610,7 @@ def parse_args(args: Optional[List[str]] = None) -> argparse.Namespace:
         help="Disable saving the last checkpoint.",
     )
     parser.add_argument("--lora-finetune", action="store_true", help="Use LoRA fine-tuning", default=False)
-    parser.add_argument("--lora-checkpoint-path", type=Path, default=None, help="LoRA checkpoint path")
+    parser.add_argument("--lora-checkpoint-path", type=str, default=None, help="LoRA checkpoint path")
     parser.add_argument(
         "--no-calculate-per-token-loss",
         action="store_true",
@@ -669,6 +668,9 @@ def train(args: argparse.Namespace) -> nl.Trainer:
             seq_length=args.seq_length,
             micro_batch_size=args.micro_batch_size,
             global_batch_size=global_batch_size,
+            num_train_samples=args.max_steps * global_batch_size,
+            num_val_samples=args.limit_val_batches * global_batch_size,
+            num_test_samples=1,
             num_workers=args.workers,
             tokenizer=tokenizer,
         )
@@ -823,7 +825,7 @@ def train(args: argparse.Namespace) -> nl.Trainer:
         callbacks.append(GarbageCollectAtInferenceTime())
 
     if args.lora_finetune:
-        callbacks.append(ModelTransform())
+        callbacks.append(lora_transform)
     if args.enable_preemption:
         callbacks.append(nl_callbacks.PreemptionCallback())
     if args.debug_ddp_parity_freq > 0:
diff --git a/sub-packages/bionemo-evo2/tests/bionemo/evo2/run/__init__.py b/sub-packages/bionemo-evo2/tests/bionemo/evo2/run/__init__.py
@@ -0,0 +1,14 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-Apache2
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/sub-packages/bionemo-evo2/tests/bionemo/evo2/run/common.py b/sub-packages/bionemo-evo2/tests/bionemo/evo2/run/common.py
@@ -0,0 +1,60 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024 Arc Institute. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024 Michael Poli. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024 Stanford University. All rights reserved
+# SPDX-License-Identifier: LicenseRef-Apache2
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def small_training_cmd(
+    path,
+    max_steps,
+    val_check,
+    global_batch_size: int | None = None,
+    devices: int = 1,
+    additional_args: str = "",
+):
+    """Command for training."""
+    cmd = (
+        f"train_evo2 --mock-data --result-dir {path} --devices {devices} "
+        "--model-size 1b_nv --num-layers 4 --hybrid-override-pattern SDH* --limit-val-batches 1 "
+        "--no-activation-checkpointing --add-bias-output --create-tensorboard-logger --create-tflops-callback "
+        f"--max-steps {max_steps} --warmup-steps 1 --val-check-interval {val_check} "
+        f"--seq-length 16 --hidden-dropout 0.1 --attention-dropout 0.1 {additional_args} "
+        f"{'--global-batch-size ' + str(global_batch_size) if global_batch_size is not None else ''}"
+    )
+    return cmd
+
+
+def small_training_finetune_cmd(
+    path,
+    max_steps,
+    val_check,
+    prev_ckpt,
+    devices: int = 1,
+    global_batch_size: int | None = None,
+    create_tflops_callback: bool = True,
+    additional_args: str = "",
+):
+    """Command for finetuning."""
+    cmd = (
+        f"train_evo2 --mock-data --result-dir {path} --devices {devices} "
+        "--model-size 1b_nv --num-layers 4 --hybrid-override-pattern SDH* --limit-val-batches 1 "
+        "--no-activation-checkpointing --add-bias-output --create-tensorboard-logger "
+        f"--max-steps {max_steps} --warmup-steps 1 --val-check-interval {val_check} "
+        f"--seq-length 16 --hidden-dropout 0.1 --attention-dropout 0.1 {additional_args} --ckpt-dir {prev_ckpt} "
+        f"{'--create-tflops-callback' if create_tflops_callback else ''} "
+        f"{'--global-batch-size ' + str(global_batch_size) if global_batch_size is not None else ''}"
+    )
+    return cmd
diff --git a/sub-packages/bionemo-evo2/tests/bionemo/evo2/run/test_finetune.py b/sub-packages/bionemo-evo2/tests/bionemo/evo2/run/test_finetune.py
@@ -0,0 +1,202 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024 Arc Institute. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024 Michael Poli. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024 Stanford University. All rights reserved
+# SPDX-License-Identifier: LicenseRef-Apache2
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import re
+
+import pytest
+
+from bionemo.testing.subprocess_utils import run_command_in_subprocess
+
+from .common import small_training_cmd, small_training_finetune_cmd
+
+
+def extract_val_losses(log_text: str, n: int):
+    """
+    Extracts validation losses every n-th occurrence (starting at 0).
+    Iteration index is derived by counting val_loss appearances.
+
+    Args:
+        log_text (str): The log output as a string.
+        n (int): Interval of occurrences (e.g., n=5 -> get val_loss at 0, 5, 10...).
+
+    Returns:
+        List of tuples: (step, validation_loss_value).
+    """
+    # Regex to capture val_loss values
+    pattern = re.compile(r"val_loss: ([0-9.]+)")
+
+    results = []
+    for idx, match in enumerate(pattern.finditer(log_text)):
+        if idx % n == 0:  # take every n-th val_loss occurrence
+            results.append((idx, float(match.group(1))))
+
+    return results
+
+
+@pytest.mark.timeout(2048)  # Optional: fail if the test takes too long.
+@pytest.mark.slow
+@pytest.mark.parametrize("with_peft", [True, False])
+def test_train_evo2_finetune_runs(tmp_path, with_peft: bool):
+    """
+    This test runs the `train_evo2` command with mock data in a temporary directory.
+    It uses the temporary directory provided by pytest as the working directory.
+    The command is run in a subshell, and we assert that it returns an exit code of 0.
+    """
+    num_steps = 25
+    val_steps = 10
+    global_batch_size = 128
+
+    # Note: The command assumes that `train_evo2` is in your PATH.
+    command = small_training_cmd(
+        tmp_path / "pretrain",
+        max_steps=num_steps,
+        val_check=val_steps,
+        global_batch_size=global_batch_size,
+        additional_args=" --lr 0.1 ",
+    )
+    stdout_pretrain: str = run_command_in_subprocess(command=command, path=str(tmp_path))
+    assert "Restoring model weights from RestoreConfig(path='" not in stdout_pretrain
+
+    log_dir = tmp_path / "pretrain" / "evo2"
+    checkpoints_dir = log_dir / "checkpoints"
+    tensorboard_dir = log_dir / "dev"
+
+    # Check if logs dir exists
+    assert log_dir.exists(), "Logs folder should exist."
+    # Check if checkpoints dir exists
+    assert checkpoints_dir.exists(), "Checkpoints folder does not exist."
+
+    expected_checkpoint_suffix = f"{num_steps * global_batch_size}.0-last"
+    # Check if any subfolder ends with the expected suffix
+    matching_subfolders = [
+        p for p in checkpoints_dir.iterdir() if p.is_dir() and (expected_checkpoint_suffix in p.name)
+    ]
+
+    assert matching_subfolders, (
+        f"No checkpoint subfolder ending with '{expected_checkpoint_suffix}' found in {checkpoints_dir}."
+    )
+
+    # Check if directory with tensorboard logs exists
+    assert tensorboard_dir.exists(), "TensorBoard logs folder does not exist."
+
+    event_files = list(tensorboard_dir.rglob("events.out.tfevents*"))
+    assert len(event_files) == 1, f"No or multiple TensorBoard event files found under {tensorboard_dir}"
+
+    val_losses = extract_val_losses(stdout_pretrain, val_steps)
+
+    for i in range(1, len(val_losses)):
+        assert val_losses[i][1] <= val_losses[i - 1][1], (
+            f"Validation loss increased at step {val_losses[i][0]}: {val_losses[i][1]} > {val_losses[i - 1][1]}"
+        )
+
+    # Check if directory with tensorboard logs exists
+    assert tensorboard_dir.exists(), "TensorBoard logs folder does not exist."
+    # Recursively search for files with tensorboard logger
+    event_files = list(tensorboard_dir.rglob("events.out.tfevents*"))
+    assert event_files, f"No TensorBoard event files found under {tensorboard_dir}"
+    assert len(matching_subfolders) == 1, "Only one checkpoint subfolder should be found."
+    if with_peft:
+        result_dir = tmp_path / "lora_finetune"
+        additional_args = "--lora-finetune --lr 0.1 "
+    else:
+        result_dir = tmp_path / "finetune"
+        additional_args = " --lr 0.1 "
+
+    command_finetune = small_training_finetune_cmd(
+        result_dir,
+        max_steps=num_steps,
+        val_check=val_steps,
+        global_batch_size=global_batch_size,
+        prev_ckpt=matching_subfolders[0],
+        create_tflops_callback=not with_peft,
+        additional_args=additional_args,
+    )
+    stdout_finetune: str = run_command_in_subprocess(command=command_finetune, path=str(tmp_path))
+    assert "Restoring model weights from RestoreConfig(path='" in stdout_finetune
+
+    log_dir_ft = result_dir / "evo2"
+    checkpoints_dir_ft = log_dir_ft / "checkpoints"
+    tensorboard_dir_ft = log_dir_ft / "dev"
+
+    # Check if logs dir exists
+    assert log_dir_ft.exists(), "Logs folder should exist."
+    # Check if checkpoints dir exists
+    assert checkpoints_dir_ft.exists(), "Checkpoints folder does not exist."
+
+    expected_checkpoint_suffix = f"{num_steps * global_batch_size}.0-last"
+    # Check if any subfolder ends with the expected suffix
+    matching_subfolders_finetune = [
+        p for p in checkpoints_dir_ft.iterdir() if p.is_dir() and (expected_checkpoint_suffix in p.name)
+    ]
+
+    assert matching_subfolders_finetune, (
+        f"No checkpoint subfolder ending with '{expected_checkpoint_suffix}' found in {checkpoints_dir_ft}."
+    )
+
+    # Check if directory with tensorboard logs exists
+    assert tensorboard_dir_ft.exists(), "TensorBoard logs folder does not exist."
+    # Recursively search for files with tensorboard logger
+    event_files_ft = list(tensorboard_dir_ft.rglob("events.out.tfevents*"))
+    assert len(event_files_ft) == 1, f"No or multiple TensorBoard event files found under {tensorboard_dir_ft}"
+
+    val_losses_ft = extract_val_losses(stdout_finetune, val_steps)
+
+    # Check that each validation loss is less than or equal to the previous one
+    for i in range(1, len(val_losses_ft)):
+        assert val_losses_ft[i][1] <= val_losses_ft[i - 1][1], (
+            f"Validation loss increased at step {val_losses_ft[i][0]}: {val_losses_ft[i][1]} > {val_losses_ft[i - 1][1]}"
+        )
+
+    assert len(matching_subfolders_finetune) == 1, "Only one checkpoint subfolder should be found."
+
+    # With LoRA, test resuming from a saved LoRA checkpoint
+    if with_peft:
+        result_dir = tmp_path / "lora_finetune_resume"
+
+        # Resume from LoRA checkpoint
+        command_resume_finetune = small_training_finetune_cmd(
+            result_dir,
+            max_steps=num_steps,
+            val_check=val_steps,
+            global_batch_size=global_batch_size,
+            prev_ckpt=matching_subfolders[0],
+            create_tflops_callback=False,
+            additional_args=f"--lora-finetune --lora-checkpoint-path {matching_subfolders_finetune[0]} --lr 0.1 ",
+        )
+        stdout_finetune: str = run_command_in_subprocess(command=command_resume_finetune, path=str(tmp_path))
+
+        log_dir_ft = result_dir / "evo2"
+        checkpoints_dir_ft = log_dir_ft / "checkpoints"
+        tensorboard_dir_ft = log_dir_ft / "dev"
+
+        # Check if logs dir exists
+        assert log_dir_ft.exists(), "Logs folder should exist."
+        # Check if checkpoints dir exists
+        assert checkpoints_dir_ft.exists(), "Checkpoints folder does not exist."
+
+        # Recursively search for files with tensorboard logger
+        event_files_ft = list(tensorboard_dir_ft.rglob("events.out.tfevents*"))
+        assert len(event_files_ft) == 1, f"No or multiple TensorBoard event files found under {tensorboard_dir_ft}"
+
+        val_losses_ft = extract_val_losses(stdout_finetune, val_steps)
+
+        # Check that each validation loss is less than or equal to the previous one
+        for i in range(1, len(val_losses_ft)):
+            assert val_losses_ft[i][1] <= val_losses_ft[i - 1][1], (
+                f"Validation loss increased at step {val_losses_ft[i][0]}: {val_losses_ft[i][1]} > {val_losses_ft[i - 1][1]}"
+            )
diff --git a/sub-packages/bionemo-evo2/tests/bionemo/evo2/run/test_train.py b/sub-packages/bionemo-evo2/tests/bionemo/evo2/run/test_train.py
@@ -30,14 +30,14 @@
 from bionemo.testing.megatron_parallel_state_utils import distributed_model_parallel_state
 from bionemo.testing.subprocess_utils import run_command_in_subprocess
 
+from .common import small_training_cmd, small_training_finetune_cmd
+
 
 fp8_available, reason_for_no_fp8 = check_fp8_support()
 
 
 def run_train_with_std_redirect(args: argparse.Namespace) -> Tuple[str, nl.Trainer]:
-    """
-    Run a function with output capture.
-    """
+    """Run a function with output capture."""
     stdout_buf, stderr_buf = io.StringIO(), io.StringIO()
     with redirect_stdout(stdout_buf), redirect_stderr(stderr_buf):
         with distributed_model_parallel_state():
@@ -50,28 +50,6 @@ def run_train_with_std_redirect(args: argparse.Namespace) -> Tuple[str, nl.Train
     return train_stdout, trainer
 
 
-def small_training_cmd(path, max_steps, val_check, devices: int = 1, additional_args: str = ""):
-    cmd = (
-        f"train_evo2 --mock-data --result-dir {path} --devices {devices} "
-        "--model-size 1b_nv --num-layers 4 --hybrid-override-pattern SDH* --limit-val-batches 1 "
-        "--no-activation-checkpointing --add-bias-output --create-tensorboard-logger --create-tflops-callback "
-        f"--max-steps {max_steps} --warmup-steps 1 --val-check-interval {val_check} --limit-val-batches 1 "
-        f"--seq-length 16 --hidden-dropout 0.1 --attention-dropout 0.1 {additional_args}"
-    )
-    return cmd
-
-
-def small_training_finetune_cmd(path, max_steps, val_check, prev_ckpt, devices: int = 1, additional_args: str = ""):
-    cmd = (
-        f"train_evo2 --mock-data --result-dir {path} --devices {devices} "
-        "--model-size 1b_nv --num-layers 4 --hybrid-override-pattern SDH* --limit-val-batches 1 "
-        "--no-activation-checkpointing --add-bias-output --create-tensorboard-logger --create-tflops-callback "
-        f"--max-steps {max_steps} --warmup-steps 1 --val-check-interval {val_check} --limit-val-batches 1 "
-        f"--seq-length 16 --hidden-dropout 0.1 --attention-dropout 0.1 {additional_args} --ckpt-dir {prev_ckpt}"
-    )
-    return cmd
-
-
 def small_training_mamba_cmd(path, max_steps, val_check, devices: int = 1, additional_args: str = ""):
     cmd = (
         f"train_evo2 --mock-data --result-dir {path} --devices {devices} "