[pre-commit.ci] auto fixes from pre-commit.com hooks

pre-commit-ci[bot] · pre-commit-ci[bot] · commit 780aae97bacf · 2026-01-21T03:30:19.000Z
for more information, see https://pre-commit.ci
diff --git a/examples/semantic/asymmetric.py b/examples/semantic/asymmetric.py
diff --git a/examples/semantic/attribute_preservation.py b/examples/semantic/attribute_preservation.py
diff --git a/examples/semantic/data.py b/examples/semantic/data.py
@@ -8,7 +8,9 @@
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
 
-def reword(dataset: Dataset, model_name: str, prompt_template: str, batch_size: int = 8) -> Dataset:
+def reword(
+    dataset: Dataset, model_name: str, prompt_template: str, batch_size: int = 8
+) -> Dataset:
     """Reword facts in a dataset using a language model.
 
     Args:
diff --git a/examples/semantic/experiment.py b/examples/semantic/experiment.py
@@ -2,7 +2,6 @@
 
 import subprocess
 from pathlib import Path
-from typing import Any
 
 from datasets import Dataset, DatasetDict, concatenate_datasets, load_from_disk
 
@@ -51,7 +50,9 @@ def create_index(dataset_name: str, analysis_model_name: str) -> None:
         print(result.stderr)
 
 
-def finetune(dataset_path: str, analysis_model_name: str, finetuned_model_path: str) -> None:
+def finetune(
+    dataset_path: str, analysis_model_name: str, finetuned_model_path: str
+) -> None:
     """Finetune a model on a dataset using LoRA.
 
     Args:
@@ -145,9 +146,7 @@ def run_preconditioner_comparison() -> dict[str, dict[str, float]]:
         compute_scores_fast(
             base_path / "combined",  # Use precomputed gradients from combined index
             output_path,
-            preconditioner_path=(
-                base_path / prec_path if prec_path else None
-            ),
+            preconditioner_path=(base_path / prec_path if prec_path else None),
         )
 
     # 4. Compare metrics across strategies
@@ -179,7 +178,9 @@ def run_preconditioner_comparison() -> dict[str, dict[str, float]]:
             style_diff = s.get("intra_style", 0) - s.get("inter_style", 0)
             fact_diff = s.get("intra_fact", 0) - s.get("inter_fact_same_subject", 0)
             subj_diff = s.get("intra_subject", 0) - s.get("inter_subject", 0)
-            print(f"{name:<15} {style_diff:<12.4f} {fact_diff:<12.4f} {subj_diff:<12.4f}")
+            print(
+                f"{name:<15} {style_diff:<12.4f} {fact_diff:<12.4f} {subj_diff:<12.4f}"
+            )
 
     return all_stats
 
diff --git a/examples/semantic/metrics.py b/examples/semantic/metrics.py
@@ -6,7 +6,7 @@
 
 import numpy as np
 import torch
-from datasets import Dataset, DatasetDict, load_from_disk
+from datasets import DatasetDict, load_from_disk
 
 from bergson import load_gradient_dataset
 from bergson.data import load_gradients
@@ -29,10 +29,15 @@ def build_style_lookup(include_llama: bool = False) -> dict[tuple[str, str], str
         ("data/facts_dataset_pirate-Qwen3-8B-Base.hf", "pirate"),
     ]
     if include_llama:
-        style_datasets.extend([
-            ("data/facts_dataset_shakespeare-Meta-Llama-3-8B.hf", "shakespeare-llama"),
-            ("data/facts_dataset_pirate-Meta-Llama-3-8B.hf", "pirate-llama"),
-        ])
+        style_datasets.extend(
+            [
+                (
+                    "data/facts_dataset_shakespeare-Meta-Llama-3-8B.hf",
+                    "shakespeare-llama",
+                ),
+                ("data/facts_dataset_pirate-Meta-Llama-3-8B.hf", "pirate-llama"),
+            ]
+        )
     for path, style_name in style_datasets:
         ds = load_from_disk(path)
         if isinstance(ds, DatasetDict):
@@ -319,27 +324,29 @@ def compute_mean(mask: torch.Tensor) -> float:
     print("SEMANTIC SIMILARITY RESULTS")
     print("=" * 60)
 
-    print(f"\nSubject (same person vs different person):")
+    print("\nSubject (same person vs different person):")
     print(f"  Intra-subject mean: {stats['intra_subject']:.4f}")
     print(f"  Inter-subject mean: {stats['inter_subject']:.4f}")
     print(f"  Difference: {stats['intra_subject'] - stats['inter_subject']:.4f}")
 
-    print(f"\nFact (same person+field = same underlying fact):")
+    print("\nFact (same person+field = same underlying fact):")
     print(f"  Intra-fact mean: {stats['intra_fact']:.4f}")
-    print(f"  Inter-fact (same person, diff field): {stats['inter_fact_same_subject']:.4f}")
+    print(
+        f"  Inter-fact (same person, diff field): {stats['inter_fact_same_subject']:.4f}"
+    )
     print(f"  Difference: {stats['intra_fact'] - stats['inter_fact_same_subject']:.4f}")
 
-    print(f"\nField (same field type, e.g. birthdate, employer):")
+    print("\nField (same field type, e.g. birthdate, employer):")
     print(f"  Intra-field mean: {stats['intra_field']:.4f}")
     print(f"  Inter-field mean: {stats['inter_field']:.4f}")
     print(f"  Difference: {stats['intra_field'] - stats['inter_field']:.4f}")
 
-    print(f"\nTemplate (same original phrasing template):")
+    print("\nTemplate (same original phrasing template):")
     print(f"  Intra-template mean: {stats['intra_template']:.4f}")
     print(f"  Inter-template mean: {stats['inter_template']:.4f}")
     print(f"  Difference: {stats['intra_template'] - stats['inter_template']:.4f}")
 
-    print(f"\nStyle (same rewording style):")
+    print("\nStyle (same rewording style):")
     print(f"  Intra-style mean: {stats['intra_style']:.4f}")
     print(f"  Inter-style mean: {stats['inter_style']:.4f}")
     print(f"  Difference: {stats['intra_style'] - stats['inter_style']:.4f}")
diff --git a/examples/semantic/preconditioners.py b/examples/semantic/preconditioners.py
@@ -351,8 +351,16 @@ def compute_summed_loss_preconditioner(
     shakespeare_grads = load_gradients(shakespeare_path, structured=True)
 
     # Load datasets to match facts
-    pirate_ds = load_from_disk(str(pirate_path.parent / "pirate" / "dataset") if (pirate_path.parent / "pirate" / "dataset").exists() else "data/facts_dataset_pirate-Qwen3-8B-Base.hf")
-    shakespeare_ds = load_from_disk(str(shakespeare_path.parent / "shakespeare" / "dataset") if (shakespeare_path.parent / "shakespeare" / "dataset").exists() else "data/facts_dataset_shakespeare-Qwen3-8B-Base.hf")
+    pirate_ds = load_from_disk(
+        str(pirate_path.parent / "pirate" / "dataset")
+        if (pirate_path.parent / "pirate" / "dataset").exists()
+        else "data/facts_dataset_pirate-Qwen3-8B-Base.hf"
+    )
+    shakespeare_ds = load_from_disk(
+        str(shakespeare_path.parent / "shakespeare" / "dataset")
+        if (shakespeare_path.parent / "shakespeare" / "dataset").exists()
+        else "data/facts_dataset_shakespeare-Qwen3-8B-Base.hf"
+    )
 
     if hasattr(pirate_ds, "keys"):
         pirate_ds = pirate_ds["train"]
@@ -367,7 +375,9 @@ def compute_summed_loss_preconditioner(
     shakespeare_fact_to_idx = {f: i for i, f in enumerate(shakespeare_facts)}
 
     # Find common facts (contrastive pairs) and build aligned index arrays
-    common_facts = list(set(pirate_fact_to_idx.keys()) & set(shakespeare_fact_to_idx.keys()))
+    common_facts = list(
+        set(pirate_fact_to_idx.keys()) & set(shakespeare_fact_to_idx.keys())
+    )
     pirate_indices = [pirate_fact_to_idx[f] for f in common_facts]
     shakespeare_indices = [shakespeare_fact_to_idx[f] for f in common_facts]
     print(f"  Found {len(common_facts)} contrastive pairs")
@@ -469,7 +479,9 @@ def compute_pca_style_subspace(
     shakespeare_fact_to_idx = {f: i for i, f in enumerate(shakespeare_facts)}
 
     # Find common facts and build aligned index arrays
-    common_facts = list(set(pirate_fact_to_idx.keys()) & set(shakespeare_fact_to_idx.keys()))
+    common_facts = list(
+        set(pirate_fact_to_idx.keys()) & set(shakespeare_fact_to_idx.keys())
+    )
     pirate_indices = [pirate_fact_to_idx[f] for f in common_facts]
     shakespeare_indices = [shakespeare_fact_to_idx[f] for f in common_facts]
     print(f"  Found {len(common_facts)} contrastive pairs")
@@ -505,7 +517,9 @@ def compute_pca_style_subspace(
         # Get top-k (largest eigenvalues are at the end)
         k = min(top_k, eigvals.shape[0])
         top_eigvals = eigvals[-k:].flip(0)  # Descending order
-        top_eigvecs = eigvecs[:, -k:].flip(1)  # [d, k], columns are principal components
+        top_eigvecs = eigvecs[:, -k:].flip(
+            1
+        )  # [d, k], columns are principal components
 
         style_subspace[name] = (top_eigvecs, top_eigvals)
 
@@ -669,7 +683,9 @@ def compute_train_eval_mixed_preconditioner(
         print(f"Loading cached train-eval mixed preconditioner from {output_path}")
         return GradientProcessor.load(output_path)
 
-    print(f"Computing train-eval mixed preconditioner ({train_weight:.0%} train, {1-train_weight:.0%} eval)...")
+    print(
+        f"Computing train-eval mixed preconditioner ({train_weight:.0%} train, {1-train_weight:.0%} eval)..."
+    )
 
     train_path = Path(train_index_path)
     eval_path = Path(eval_grads_path)
diff --git a/examples/semantic/scoring.py b/examples/semantic/scoring.py
@@ -32,7 +32,11 @@ def load_scores_matrix(scores_path: Path | str) -> np.ndarray:
 
     # Handle both tuple format (from bergson) and list format (from JSON serialization)
     dtype_spec = info["dtype"]
-    if isinstance(dtype_spec, list) and len(dtype_spec) > 0 and isinstance(dtype_spec[0], list):
+    if (
+        isinstance(dtype_spec, list)
+        and len(dtype_spec) > 0
+        and isinstance(dtype_spec[0], list)
+    ):
         # Convert list of lists back to list of tuples
         dtype_spec = [tuple(item) for item in dtype_spec]
 
diff --git a/examples/train_lora.py b/examples/train_lora.py
@@ -274,8 +274,10 @@ def main():
     parser.add_argument("--split", type=str, default="test")
     parser.add_argument("--prompt_column", type=str, default="prompt")
     parser.add_argument("--completion_column", type=str, default="completion")
-    parser.add_argument("--no_push_to_private", action="store_false", dest="push_to_private")
-    
+    parser.add_argument(
+        "--no_push_to_private", action="store_false", dest="push_to_private"
+    )
+
     args = parser.parse_args()
 
     training_config = TrainingConfig(  # type: ignore