NVIDIA · broland-hat · Aug 26, 2025 · Aug 20, 2025 · Aug 20, 2025 · Aug 20, 2025
@@ -20,23 +20,31 @@
 import pytest
 import torch
 
+from bionemo.testing.torch import get_device_and_memory_allocated
+
 
 def pytest_sessionstart(session):
     """Called at the start of the test session."""
     if torch.cuda.is_available():
         torch.cuda.reset_peak_memory_stats()
-        print(f"Starting test session. Initial GPU memory: {torch.cuda.memory_allocated() / 1024**3:.3f} GB")
+        print(
+            f"""
+            sub-packages/bionemo-evo2/tests/bionemoe/evo2: Starting test session
+            {get_device_and_memory_allocated()}
+            """
+        )
 
 
 def pytest_sessionfinish(session, exitstatus):
     """Called at the end of the test session."""
     if torch.cuda.is_available():
-        peak_memory = torch.cuda.max_memory_allocated()
-        final_memory = torch.cuda.memory_allocated()
-        print("\nTest session complete:")
-        print(f"  Peak GPU memory: {peak_memory / 1024**3:.3f} GB")
-        print(f"  Final GPU memory: {final_memory / 1024**3:.3f} GB")
-
+        print(
+            f"""
+            sub-packages/bionemo-evo2/tests/bionemoe/evo2: Test session complete
+            {get_device_and_memory_allocated()}
+            """
+        )
+
 
 @pytest.fixture(autouse=True)
 def cleanup_after_test():

@@ -48,6 +48,11 @@
 logger.setLevel(logging.DEBUG)  # Capture all levels in the logger itself
 
 
+MEM_REQUIREMENT_1B_GB = 18  # add 0.6 GB to max mem reserved, and round up
+MEM_REQUIREMENT_7B_GB = 48
+
+
+
 def load_weights_sharded_inplace_nemo2_to_mcore(
     model: MegatronModelType,
     distributed_checkpoint_dir: str | Path,
@@ -365,7 +370,7 @@ def check_matchrate(*, ckpt_name, matchrate, assert_matchrate=True):
 def test_forward(sequences: list[str], ckpt_name: str, expected_matchpercents: list[float]):
     assert len(sequences) > 0
     gb_available = torch.cuda.mem_get_info()[0] / 1024**3
-    if (gb_available < 38 and "1b" in ckpt_name) or (gb_available < 50 and "7b" in ckpt_name):
+    if (gb_available < MEM_REQUIREMENT_1B_GB and "1b" in ckpt_name) or (gb_available < MEM_REQUIREMENT_7B_GB and "7b" in ckpt_name):
         pytest.skip(
             f"Inference API requires more than 38GB of memory for 1b models, or 50GB for 7b models. {gb_available=}"
         )
@@ -429,7 +434,7 @@ def test_forward_manual(sequences: list[str], ckpt_name: str, expected_matchperc
     is_fp8_supported, compute_capability, device_info = check_fp8_support(torch.cuda.current_device())
     skip = "evo2/1b-8k:" in ckpt_name and not is_fp8_supported
     gb_available = torch.cuda.mem_get_info()[0] / 1024**3
-    if (gb_available < 38 and flash_decode) or (gb_available < 50 and flash_decode and "7b" in ckpt_name):
+    if (gb_available < MEM_REQUIREMENT_1B_GB and flash_decode) or (gb_available < MEM_REQUIREMENT_7B_GB and flash_decode and "7b" in ckpt_name):
         pytest.skip(
             f"Inference API requires more than 38GB of memory for 1b models, or 50GB for 7b models. {gb_available=}"
         )
@@ -544,7 +549,7 @@ def test_batch_generate(
     assert len(sequences) > 0
     is_fp8_supported, compute_capability, device_info = check_fp8_support(torch.cuda.current_device())
     gb_available = torch.cuda.mem_get_info()[0] / 1024**3
-    if (gb_available < 38 and "1b" in ckpt_name) or (gb_available < 50 and "7b" in ckpt_name):
+    if (gb_available < MEM_REQUIREMENT_1B_GB and "1b" in ckpt_name) or (gb_available < MEM_REQUIREMENT_7B_GB and "7b" in ckpt_name):
         pytest.skip(
             f"Inference API requires more than 38GB of memory for 1b models, or 50GB for 7b models. {gb_available=}"
         )
@@ -615,7 +620,7 @@ def test_batch_generate_coding_sequences(
 ):
     assert len(coding_sequences) > 0
     gb_available = torch.cuda.mem_get_info()[0] / 1024**3
-    if (gb_available < 38 and "1b" in ckpt_name) or (gb_available < 50 and "7b" in ckpt_name):
+    if (gb_available < MEM_REQUIREMENT_1B_GB and "1b" in ckpt_name) or (gb_available < MEM_REQUIREMENT_7B_GB and "7b" in ckpt_name):
         pytest.skip(
             f"Inference API requires more than 38GB of memory for 1b models, or 50GB for 7b models. {gb_available=}"
         )
@@ -724,7 +729,7 @@ def test_generate_speed(
 ):
     is_fp8_supported, compute_capability, device_info = check_fp8_support(torch.cuda.current_device())
     gb_available = torch.cuda.mem_get_info()[0] / 1024**3
-    if (gb_available < 38 and "1b" in ckpt_name) or (gb_available < 50 and "7b" in ckpt_name):
+    if (gb_available < MEM_REQUIREMENT_1B_GB and "1b" in ckpt_name) or (gb_available < MEM_REQUIREMENT_7B_GB and "7b" in ckpt_name):
         pytest.skip(
             f"Inference API requires more than 38GB of memory for 1b models, or 50GB for 7b models. {gb_available=}"
         )

@@ -61,3 +61,21 @@ def recursive_assert_approx_equal(x, y, atol=1e-4, rtol=1e-4):
             recursive_assert_approx_equal(x[key], y[key], atol=atol, rtol=rtol)
     else:
         assert x == y
+
+
+def get_device_and_memory_allocated() -> str:
+    """Get the current device index, name, and memory usage."""
+    current_device_index = torch.cuda.current_device()
+    props = torch.cuda.get_device_properties(current_device_index)
+    message = (
+        f"""
+        current device index: {current_device_index}
+        current device uuid: {props.uuid}
+        current device name: {props.name}
+        memory, total on device: {torch.cuda.mem_get_info()[1] / 1024**3:.3f} GB
+        memory, available on device: {torch.cuda.mem_get_info()[0] / 1024**3:.3f} GB        
+        memory allocated for tensors etc: {torch.cuda.memory_allocated() / 1024**3:.3f} GB
+        max memory reserved for tensors etc: {torch.cuda.max_memory_allocated() / 1024**3:.3f} GB
+        """
+    )
+    return message