jbloomAus · Lewington-pitsos · Jul 2, 2024 · Jul 2, 2024 · Jul 6, 2024 · Jul 6, 2024
diff --git a/docs/generate_sae_table.py b/docs/generate_sae_table.py
@@ -66,7 +66,6 @@ def generate_sae_table():
         #     )
 
         for info in tqdm(model_info["saes"]):
-
             # can remove this by explicitly overriding config in yaml. Do this later.
             if model_info["conversion_func"] == "connor_rob_hook_z":
                 repo_id = model_info["repo_id"]

diff --git a/sae_lens/analysis/hooked_sae_transformer.py b/sae_lens/analysis/hooked_sae_transformer.py
@@ -57,7 +57,6 @@ def set_deep_attr(obj: Any, path: str, value: Any):
 
 
 class HookedSAETransformer(HookedTransformer):
-
     def __init__(
         self,
         *model_args: Any,

diff --git a/sae_lens/analysis/neuronpedia_runner.py b/sae_lens/analysis/neuronpedia_runner.py
@@ -62,7 +62,6 @@ def default(self, o: Any):
 
 
 class NeuronpediaRunner:
-
     def __init__(
         self,
         sae_id: str,
@@ -83,7 +82,6 @@ def __init__(
         top_acts_group_size: int = 20,
         quantile_group_size: int = 5,
     ):
-
         self.device = "cpu"
         if torch.backends.mps.is_available():
             self.device = "mps"

diff --git a/sae_lens/analysis/tsea.py b/sae_lens/analysis/tsea.py
@@ -17,7 +17,6 @@ def get_enrichment_df(
     features: list[int],
     gene_sets_selected: dict[str, set[int]],
 ):
-
     gene_sets_token_ids_padded = pad_gene_sets(gene_sets_selected)
     gene_sets_token_ids_tensor = torch.tensor(list(gene_sets_token_ids_padded.values()))
     enrichment_scores = calculate_batch_enrichment_scores(
@@ -91,7 +90,6 @@ def calculate_batch_enrichment_scores(scores: torch.Tensor, index_lists: torch.T
 def manhattan_plot_enrichment_scores(
     df_enrichment_scores: pd.DataFrame, label_threshold: float = 1.0, top_n: int = 3
 ):
-
     tmp_df = df_enrichment_scores.apply(lambda x: -1 * np.log(1 - x))
 
     # wide to long format
@@ -167,7 +165,6 @@ def plot_top_k_feature_projections_by_token_and_category(
     log_y: bool = True,
     histnorm: Optional[str] = None,
 ):
-
     if not os.path.exists("es_plots"):
         os.makedirs("es_plots")
 
@@ -291,7 +288,6 @@ def get_gene_set_from_regex(vocab: dict[str, int], pattern: str) -> set[int]:
 
 
 def get_test_gene_sets(model: HookedTransformer) -> dict[str, set[int]]:
-
     colors = [
         "red",
         "blue",

diff --git a/sae_lens/cache_activations_runner.py b/sae_lens/cache_activations_runner.py
@@ -1,18 +1,24 @@
 import math
 import os
-from typing import Tuple
 
+import numpy as np
 import torch
+from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
 from tqdm import tqdm
 
 from sae_lens.config import DTYPE_MAP, CacheActivationsRunnerConfig
 from sae_lens.load_model import load_model
-from sae_lens.training.activations_store import ActivationsStore
+from sae_lens.training.activations_store import FILE_EXTENSION, ActivationsStore
 
 
 class CacheActivationsRunner:
-
-    def __init__(self, cfg: CacheActivationsRunnerConfig):
+    def __init__(
+        self,
+        cfg: CacheActivationsRunnerConfig,
+        override_dataset: (
+            DatasetDict | Dataset | IterableDatasetDict | IterableDataset | None
+        ) = None,
+    ):
         self.cfg = cfg
         self.model = load_model(
             model_class_name=cfg.model_class_name,
@@ -23,9 +29,10 @@ def __init__(self, cfg: CacheActivationsRunnerConfig):
         self.activations_store = ActivationsStore.from_config(
             self.model,
             cfg,
+            override_dataset=override_dataset,
         )
 
-        self.file_extension = "safetensors"
+        self.file_extension = FILE_EXTENSION
 
     def __str__(self):
         """
@@ -40,27 +47,33 @@ def __str__(self):
             if isinstance(self.cfg.dtype, torch.dtype)
             else DTYPE_MAP[self.cfg.dtype].itemsize
         )
-        tokens_in_buffer = (
-            self.cfg.n_batches_in_buffer
-            * self.cfg.store_batch_size_prompts
-            * self.cfg.context_size
-        )
         total_training_tokens = self.cfg.training_tokens
         total_disk_space_gb = total_training_tokens * bytes_per_token / 10**9
 
         return (
             f"Activation Cache Runner:\n"
             f"Total training tokens: {total_training_tokens}\n"
-            f"Number of buffers: {math.ceil(total_training_tokens / tokens_in_buffer)}\n"
-            f"Tokens per buffer: {tokens_in_buffer}\n"
+            f"Number of buffers: {self.n_buffers}\n"
+            f"Tokens per buffer: {self.tokens_in_buffer}\n"
             f"Disk space required: {total_disk_space_gb:.2f} GB\n"
             f"Configuration:\n"
             f"{self.cfg}"
         )
 
+    @property
+    def tokens_in_buffer(self):
+        return (
+            self.cfg.n_batches_in_buffer
+            * self.cfg.store_batch_size_prompts
+            * self.cfg.context_size
+        )
+
+    @property
+    def n_buffers(self):
+        return math.ceil(self.cfg.training_tokens / self.tokens_in_buffer)
+
     @torch.no_grad()
     def run(self):
-
         new_cached_activations_path = self.cfg.new_cached_activations_path
 
         # if the activations directory exists and has files in it, raise an exception
@@ -73,94 +86,109 @@ def run(self):
         else:
             os.makedirs(new_cached_activations_path)
 
-        print(f"Started caching {self.cfg.training_tokens} activations")
-        tokens_per_buffer = (
-            self.cfg.store_batch_size_prompts
-            * self.cfg.context_size
-            * self.cfg.n_batches_in_buffer
-        )
-
-        n_buffers = math.ceil(self.cfg.training_tokens / tokens_per_buffer)
-
-        for i in tqdm(range(n_buffers), desc="Caching activations"):
+        for i in tqdm(range(self.n_buffers), desc="Caching activations"):
             try:
-                buffer = self.activations_store.get_buffer(self.cfg.n_batches_in_buffer)
-
-                self.activations_store.save_buffer(
-                    buffer, f"{new_cached_activations_path}/{i}.safetensors"
-                )
+                buffer = self.activations_store.get_buffer()
+                buffer_path = f"{new_cached_activations_path}/{i}.{self.file_extension}"
+                self.activations_store.save_buffer(buffer, buffer_path)
 
                 del buffer
 
-                if i % self.cfg.shuffle_every_n_buffers == 0 and i > 0:
+                if i > 0 and i % self.cfg.shuffle_every_n_buffers == 0:
                     # Shuffle the buffers on disk
 
                     # Do random pairwise shuffling between the last shuffle_every_n_buffers buffers
                     for _ in range(self.cfg.n_shuffles_with_last_section):
-                        self.shuffle_activations_pairwise(
+                        self.shuffle_two_random_buffers(
                             new_cached_activations_path,
-                            buffer_idx_range=(i - self.cfg.shuffle_every_n_buffers, i),
+                            start_idx=i - self.cfg.shuffle_every_n_buffers,
+                            end_idx=i,
                         )
 
                     # Do more random pairwise shuffling between all the buffers
                     for _ in range(self.cfg.n_shuffles_in_entire_dir):
-                        self.shuffle_activations_pairwise(
-                            new_cached_activations_path,
-                            buffer_idx_range=(0, i),
+                        self.shuffle_two_random_buffers(
+                            new_cached_activations_path, start_idx=0, end_idx=i
                         )
             except StopIteration:
                 print(
-                    f"Warning: Ran out of samples while filling the buffer at batch {i} before reaching {n_buffers} batches. No more caching will occur."
+                    f"Warning: Ran out of samples while filling the buffer at batch {i} before reaching {self.n_buffers} batches. No more caching will occur."
                 )
                 break
 
         # More final shuffling (mostly in case we didn't end on an i divisible by shuffle_every_n_buffers)
-        if n_buffers > 1:
+        if self.n_buffers > 1:
             for _ in tqdm(range(self.cfg.n_shuffles_final), desc="Final shuffling"):
-                self.shuffle_activations_pairwise(
+                self.shuffle_two_random_buffers(
                     new_cached_activations_path,
-                    buffer_idx_range=(0, n_buffers),
+                    start_idx=0,
+                    end_idx=self.n_buffers,
                 )
 
-    @torch.no_grad()
-    def shuffle_activations_pairwise(
-        self, datapath: str, buffer_idx_range: Tuple[int, int]
-    ):
+    def shuffle_two_random_buffers(self, datapath: str, start_idx: int, end_idx: int):
         """
-        Shuffles two buffers on disk.
+        Shuffles two randomly selected buffers on disk.
         """
         assert (
-            buffer_idx_range[0] < buffer_idx_range[1] - 1
+            start_idx < end_idx - 1
         ), "buffer_idx_range[0] must be smaller than buffer_idx_range[1] by at least 1"
 
-        buffer_idx1 = torch.randint(
-            buffer_idx_range[0], buffer_idx_range[1], (1,)
-        ).item()
-        buffer_idx2 = torch.randint(
-            buffer_idx_range[0], buffer_idx_range[1], (1,)
-        ).item()
+        buffer_idx1 = int(torch.randint(start_idx, end_idx, (1,)).item())
+        buffer_idx2 = int(torch.randint(start_idx, end_idx, (1,)).item())
         while buffer_idx1 == buffer_idx2:  # Make sure they're not the same
-            buffer_idx2 = torch.randint(
-                buffer_idx_range[0], buffer_idx_range[1], (1,)
-            ).item()
+            buffer_idx2 = int(torch.randint(start_idx, end_idx, (1,)).item())
 
-        buffer1 = self.activations_store.load_buffer(
-            f"{datapath}/{buffer_idx1}.{self.file_extension}"
-        )
-        buffer2 = self.activations_store.load_buffer(
-            f"{datapath}/{buffer_idx2}.{self.file_extension}"
+        self.shuffle_two_buffers(datapath, buffer_idx1, buffer_idx2)
+
+    @torch.no_grad()
+    def shuffle_two_buffers(self, datapath: str, buffer_idx1: int, buffer_idx2: int):
+        path1 = f"{datapath}/{buffer_idx1}.{self.file_extension}"
+        path2 = f"{datapath}/{buffer_idx2}.{self.file_extension}"
+
+        buffer1 = self.activations_store.load_buffer(path1)
+        buffer2 = self.activations_store.load_buffer(path2)
+
+        # Get total size and create a joint buffer
+        total_size = buffer1.shape[0] + buffer2.shape[0]
+        joint_buffer = np.memmap(
+            f"{datapath}/temp_joint_buffer",
+            dtype=buffer1.dtype,
+            mode="w+",
+            shape=(total_size,) + buffer1.shape[1:],
         )
-        joint_buffer = torch.cat([buffer1, buffer2])
 
-        # Shuffle them
-        joint_buffer = joint_buffer[torch.randperm(joint_buffer.shape[0])]
-        shuffled_buffer1 = joint_buffer[: buffer1.shape[0]]
-        shuffled_buffer2 = joint_buffer[buffer1.shape[0] :]
+        # Copy data to joint buffer
+        joint_buffer[: buffer1.shape[0]] = buffer1
+        joint_buffer[buffer1.shape[0] :] = buffer2
 
-        # Save them back
-        self.activations_store.save_buffer(
-            shuffled_buffer1, f"{datapath}/{buffer_idx1}.{self.file_extension}"
+        # Generate random permutation
+        permutation = np.random.permutation(total_size)
+
+        # Create shuffled buffers
+        shuffled_buffer1 = np.memmap(
+            f"{datapath}/temp_shuffled_1",
+            dtype=buffer1.dtype,
+            mode="w+",
+            shape=buffer1.shape,
         )
-        self.activations_store.save_buffer(
-            shuffled_buffer2, f"{datapath}/{buffer_idx2}.{self.file_extension}"
+        shuffled_buffer2 = np.memmap(
+            f"{datapath}/temp_shuffled_2",
+            dtype=buffer2.dtype,
+            mode="w+",
+            shape=buffer2.shape,
         )
+
+        # Apply permutation
+        shuffled_buffer1[:] = joint_buffer[permutation[: buffer1.shape[0]]]
+        shuffled_buffer2[:] = joint_buffer[permutation[buffer1.shape[0] :]]
+
+        # Save shuffled buffers back to original files
+        self.activations_store.save_buffer(shuffled_buffer1, path1)
+        self.activations_store.save_buffer(shuffled_buffer2, path2)
+
+        # Clean up temporary files
+        import os
+
+        os.remove(f"{datapath}/temp_joint_buffer")
+        os.remove(f"{datapath}/temp_shuffled_1")
+        os.remove(f"{datapath}/temp_shuffled_2")
diff --git a/sae_lens/config.py b/sae_lens/config.py
@@ -229,7 +229,6 @@ class LanguageModelSAERunnerConfig:
     sae_lens_training_version: str = field(default_factory=lambda: __version__)
 
     def __post_init__(self):
-
         if self.resume:
             raise ValueError(
                 "Resuming is no longer supported. You can finetune a trained SAE using cfg.from_pretrained path."
@@ -393,7 +392,6 @@ def get_training_sae_cfg_dict(self) -> dict[str, Any]:
         }
 
     def to_dict(self) -> dict[str, Any]:
-
         cfg_dict = {
             **self.__dict__,
             # some args may not be serializable by default
@@ -405,7 +403,6 @@ def to_dict(self) -> dict[str, Any]:
         return cfg_dict
 
     def to_json(self, path: str) -> None:
-
         if not os.path.exists(os.path.dirname(path)):
             os.makedirs(os.path.dirname(path))
 
@@ -483,7 +480,6 @@ def __post_init__(self):
 
 @dataclass
 class ToyModelSAERunnerConfig:
-
     architecture: Literal["standard", "gated"] = "standard"
 
     # ReLu Model Parameters

diff --git a/sae_lens/evals.py b/sae_lens/evals.py
@@ -18,7 +18,6 @@ def run_evals(
     eval_batch_size_prompts: int | None = None,
     model_kwargs: Mapping[str, Any] = {},
 ) -> Mapping[str, Any]:
-
     hook_name = sae.cfg.hook_name
     hook_head_index = sae.cfg.hook_head_index
     ### Evals
@@ -153,7 +152,6 @@ def get_recons_loss(
 
     # TODO(tomMcGrath): the rescaling below is a bit of a hack and could probably be tidied up
     def standard_replacement_hook(activations: torch.Tensor, hook: Any):
-
         original_device = activations.device
         activations = activations.to(sae.device)
 
@@ -171,7 +169,6 @@ def standard_replacement_hook(activations: torch.Tensor, hook: Any):
         return activations.to(original_device)
 
     def all_head_replacement_hook(activations: torch.Tensor, hook: Any):
-
         original_device = activations.device
         activations = activations.to(sae.device)
 
@@ -195,7 +192,6 @@ def all_head_replacement_hook(activations: torch.Tensor, hook: Any):
         return new_activations.to(original_device)
 
     def single_head_replacement_hook(activations: torch.Tensor, hook: Any):
-
         original_device = activations.device
         activations = activations.to(sae.device)
-Original file line number
+Diff line change
@@ Expand Up / @@ -57,7 +57,6 @@ def set_deep_attr(obj: Any, path: str, value: Any): @@
     class HookedSAETransformer(HookedTransformer):
         def __init__(
             self,
             *model_args: Any,
@@ Expand Down @@