[pre-commit.ci] auto fixes from pre-commit.com hooks

pre-commit-ci[bot] · pre-commit-ci[bot] · commit 13d720fb14f0 · 2026-01-09T12:10:30.000Z
for more information, see https://pre-commit.ci
diff --git a/tests/test_batch_size_invariance.py b/tests/test_batch_size_invariance.py
@@ -6,7 +6,6 @@
 """
 
 import subprocess
-from pathlib import Path
 
 import pytest
 import torch
@@ -29,8 +28,13 @@ def test_gradient_scale_invariance(tmp_path, batch_size_a, batch_size_b):
     gradient scales invariant to batch size.
     """
     # Create two simple datasets
-    texts_a = [f"The quick brown fox jumps over the lazy dog {i}" for i in range(batch_size_a)]
-    texts_b = [f"A journey of a thousand miles begins with a single step {i}" for i in range(batch_size_b)]
+    texts_a = [
+        f"The quick brown fox jumps over the lazy dog {i}" for i in range(batch_size_a)
+    ]
+    texts_b = [
+        f"A journey of a thousand miles begins with a single step {i}"
+        for i in range(batch_size_b)
+    ]
 
     ds_a = Dataset.from_dict({"text": texts_a})
     ds_b = Dataset.from_dict({"text": texts_b})
@@ -50,12 +54,19 @@ def test_gradient_scale_invariance(tmp_path, batch_size_a, batch_size_b):
     def run_bergson_build(index_name: str, dataset_path: str):
         index_path = index_dir / index_name
         cmd = [
-            "bergson", "build", str(index_path),
-            "--model", "gpt2",  # Use small model for testing
-            "--dataset", dataset_path,
-            "--prompt_column", "text",
-            "--projection_dim", "8",  # Small for speed
-            "--token_batch_size", "1000",
+            "bergson",
+            "build",
+            str(index_path),
+            "--model",
+            "gpt2",  # Use small model for testing
+            "--dataset",
+            dataset_path,
+            "--prompt_column",
+            "text",
+            "--projection_dim",
+            "8",  # Small for speed
+            "--token_batch_size",
+            "1000",
         ]
         subprocess.run(cmd, check=True, capture_output=True)
         return index_path
@@ -66,12 +77,8 @@ def run_bergson_build(index_name: str, dataset_path: str):
     index_combined = run_bergson_build("combined", str(data_dir / "data_combined"))
 
     # Load gradients
-    grads_a = torch.from_numpy(
-        load_gradients(index_a, structured=False).copy()
-    ).float()
-    grads_b = torch.from_numpy(
-        load_gradients(index_b, structured=False).copy()
-    ).float()
+    grads_a = torch.from_numpy(load_gradients(index_a, structured=False).copy()).float()
+    grads_b = torch.from_numpy(load_gradients(index_b, structured=False).copy()).float()
     grads_combined = torch.from_numpy(
         load_gradients(index_combined, structured=False).copy()
     ).float()
@@ -88,8 +95,8 @@ def run_bergson_build(index_name: str, dataset_path: str):
 
     # With the fix (sum instead of mean), the standard deviations should be very close
     # We allow 20% tolerance to account for numerical noise and outliers
-    ratio_a = std_a_sep / std_a_comb if std_a_comb > 0 else float('inf')
-    ratio_b = std_b_sep / std_b_comb if std_b_comb > 0 else float('inf')
+    ratio_a = std_a_sep / std_a_comb if std_a_comb > 0 else float("inf")
+    ratio_b = std_b_sep / std_b_comb if std_b_comb > 0 else float("inf")
 
     # Before the fix, these ratios could be 6x or more different
     # After the fix, they should be close to 1.0
@@ -107,6 +114,6 @@ def run_bergson_build(index_name: str, dataset_path: str):
     a_comb_norm = grads_a_in_combined / grads_a_in_combined.norm(dim=1, keepdim=True)
     cosines = (a_norm * a_comb_norm).sum(dim=1)
 
-    assert cosines.mean() > 0.99, (
-        f"Gradients should point in the same direction: cosine similarity = {cosines.mean():.4f}"
-    )
+    assert (
+        cosines.mean() > 0.99
+    ), f"Gradients should point in the same direction: cosine similarity = {cosines.mean():.4f}"