Add file checkpointing (#96)

alan-cooney · web-flow · commit c16afdb0f541 · 2023-11-22T19:36:31.000-05:00
diff --git a/.gitignore b/.gitignore
@@ -135,3 +135,6 @@ dmypy.json
 
 # Generated docs
 docs/content/reference
+
+# Checkpoints directory
+.checkpoints
diff --git a/docs/content/demo.ipynb b/docs/content/demo.ipynb
diff --git a/sparse_autoencoder/activation_resampler/abstract_activation_resampler.py b/sparse_autoencoder/activation_resampler/abstract_activation_resampler.py
@@ -2,6 +2,7 @@
 
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
+from typing import final
 
 from sparse_autoencoder.activation_store.tensor_store import TensorActivationStore
 from sparse_autoencoder.autoencoder.model import SparseAutoencoder
@@ -35,6 +36,21 @@ class ParameterUpdateResults:
 class AbstractActivationResampler(ABC):
     """Abstract activation resampler."""
 
+    _resample_dataset_size: int | None = None
+    """Resample dataset size.
+
+    If none, will use the train dataset size.
+    """
+
+    @final
+    def __init__(self, resample_dataset_size: int | None = None) -> None:
+        """Initialize the abstract activation resampler.
+
+        Args:
+            resample_dataset_size: Resample dataset size. If none, will use the train dataset size.
+        """
+        self._resample_dataset_size = resample_dataset_size
+
     @abstractmethod
     def resample_dead_neurons(
         self,
@@ -43,7 +59,6 @@ def resample_dead_neurons(
         autoencoder: SparseAutoencoder,
         loss_fn: AbstractLoss,
         train_batch_size: int,
-        num_inputs: int = 819_200,
     ) -> ParameterUpdateResults:
         """Resample dead neurons.
 
@@ -53,8 +68,5 @@ def resample_dead_neurons(
             autoencoder: Sparse autoencoder model.
             loss_fn: Loss function.
             train_batch_size: Train batch size (also used for resampling).
-            num_inputs: Number of input activations to use when resampling. Will be rounded down to
-                be divisible by the batch size, and cannot be larger than the number of items
-                currently in the store.
         """
         raise NotImplementedError
diff --git a/sparse_autoencoder/activation_resampler/activation_resampler.py b/sparse_autoencoder/activation_resampler/activation_resampler.py
@@ -23,6 +23,7 @@
     SampledDeadNeuronInputs,
     TrainBatchStatistic,
 )
+from sparse_autoencoder.train.utils import get_model_device
 
 
 class ActivationResampler(AbstractActivationResampler):
@@ -76,12 +77,11 @@ def get_dead_neuron_indices(
         """
         return torch.where(neuron_activity <= threshold)[0]
 
-    @staticmethod
     def compute_loss_and_get_activations(
+        self,
         store: ActivationStore,
         autoencoder: SparseAutoencoder,
         loss_fn: AbstractLoss,
-        num_inputs: int,
         train_batch_size: int,
     ) -> tuple[TrainBatchStatistic, InputOutputActivationBatch]:
         """Compute the loss on a random subset of inputs.
@@ -92,7 +92,6 @@ def compute_loss_and_get_activations(
             store: Activation store.
             autoencoder: Sparse autoencoder model.
             loss_fn: Loss function.
-            num_inputs: Number of input activations to use.
             train_batch_size: Train batch size (also used for resampling).
 
         Returns:
@@ -102,19 +101,24 @@ def compute_loss_and_get_activations(
             loss_batches: list[TrainBatchStatistic] = []
             input_activations_batches: list[InputOutputActivationBatch] = []
             dataloader = DataLoader(store, batch_size=train_batch_size)
+            num_inputs = self._resample_dataset_size or len(store)
             batches: int = num_inputs // train_batch_size
+            model_device: torch.device = get_model_device(autoencoder)
 
             for batch_idx, batch in enumerate(iter(dataloader)):
                 input_activations_batches.append(batch)
-                learned_activations, reconstructed_activations = autoencoder(batch)
+                source_activations = batch.to(model_device)
+                learned_activations, reconstructed_activations = autoencoder(source_activations)
                 loss_batches.append(
-                    loss_fn.forward(batch, learned_activations, reconstructed_activations)
+                    loss_fn.forward(
+                        source_activations, learned_activations, reconstructed_activations
+                    )
                 )
                 if batch_idx >= batches:
                     break
 
-            loss_result = torch.cat(loss_batches)
-            input_activations = torch.cat(input_activations_batches)
+            loss_result = torch.cat(loss_batches).to(model_device)
+            input_activations = torch.cat(input_activations_batches).to(model_device)
 
             # Check we generated enough data
             if len(loss_result) < num_inputs:
@@ -188,7 +192,7 @@ def sample_input(
                 (0, input_activations.shape[-1]),
                 dtype=input_activations.dtype,
                 device=input_activations.device,
-            )
+            ).to(input_activations.device)
 
         sample_indices: LearntNeuronIndices = torch.multinomial(
             probabilities, num_samples=num_samples
@@ -261,7 +265,6 @@ def resample_dead_neurons(
         autoencoder: SparseAutoencoder,
         loss_fn: AbstractLoss,
         train_batch_size: int,
-        num_inputs: int = 819_200,
     ) -> ParameterUpdateResults:
         """Resample dead neurons.
 
@@ -271,9 +274,6 @@ def resample_dead_neurons(
             autoencoder: Sparse autoencoder model.
             loss_fn: Loss function.
             train_batch_size: Train batch size (also used for resampling).
-            num_inputs: Number of input activations to use when resampling. Will be rounded down
-                to divisible by the batch size, and cannot be larger than the number of items
-                currently in the store.
         """
         with torch.no_grad():
             dead_neuron_indices = self.get_dead_neuron_indices(neuron_activity)
@@ -284,7 +284,6 @@ def resample_dead_neurons(
                 store=activation_store,
                 autoencoder=autoencoder,
                 loss_fn=loss_fn,
-                num_inputs=num_inputs,
                 train_batch_size=train_batch_size,
             )
 
@@ -316,7 +315,11 @@ def resample_dead_neurons(
             rescaled_sampled_input = self.renormalize_and_scale(
                 sampled_input, neuron_activity, encoder_weight
             )
-            dead_encoder_bias_updates = torch.zeros_like(dead_neuron_indices, dtype=torch.float)
+            dead_encoder_bias_updates = torch.zeros_like(
+                dead_neuron_indices,
+                dtype=dead_decoder_weight_updates.dtype,
+                device=dead_decoder_weight_updates.device,
+            )
 
             return ParameterUpdateResults(
                 dead_neuron_indices=dead_neuron_indices,
diff --git a/sparse_autoencoder/activation_resampler/tests/test_resample_neurons.py b/sparse_autoencoder/activation_resampler/tests/test_resample_neurons.py
@@ -89,11 +89,10 @@ def test_gets_loss_and_correct_activations(
         input_activations_fixture: Tensor,
     ) -> None:
         """Test it gets loss and also returns the input activations."""
-        loss, input_activations = ActivationResampler.compute_loss_and_get_activations(
+        loss, input_activations = ActivationResampler().compute_loss_and_get_activations(
             store=activation_store_fixture,
             autoencoder=autoencoder_model_fixture,
             loss_fn=MSEReconstructionLoss(),
-            num_inputs=DEFAULT_N_ITEMS,
             train_batch_size=DEFAULT_N_ITEMS,
         )
 
@@ -115,11 +114,12 @@ def test_more_items_than_in_store_error(
             ValueError,
             match=r"Cannot get \d+ items from the store, as only \d+ were available.",
         ):
-            ActivationResampler.compute_loss_and_get_activations(
+            ActivationResampler(
+                resample_dataset_size=DEFAULT_N_ITEMS + 1
+            ).compute_loss_and_get_activations(
                 store=activation_store_fixture,
                 autoencoder=autoencoder_model_fixture,
                 loss_fn=MSEReconstructionLoss(),
-                num_inputs=DEFAULT_N_ITEMS + 1,
                 train_batch_size=DEFAULT_N_ITEMS + 1,
             )
 
@@ -266,7 +266,7 @@ def test_no_changes_if_no_dead_neurons(self) -> None:
         model = SparseAutoencoder(5, 10, torch.rand(5))
 
         res = ActivationResampler().resample_dead_neurons(
-            neuron_activity, store, model, MSEReconstructionLoss(), DEFAULT_N_ITEMS, DEFAULT_N_ITEMS
+            neuron_activity, store, model, MSEReconstructionLoss(), DEFAULT_N_ITEMS
         )
 
         assert res.dead_neuron_indices.numel() == 0, "Should not have any dead neurons"
@@ -290,7 +290,7 @@ def test_updates_a_dead_neuron_parameters(self) -> None:
         # Get the current & updated parameters
         current_parameters = model.state_dict()
         updated_parameters: ParameterUpdateResults = ActivationResampler().resample_dead_neurons(
-            neuron_activity, store, model, MSEReconstructionLoss(), DEFAULT_N_ITEMS, DEFAULT_N_ITEMS
+            neuron_activity, store, model, MSEReconstructionLoss(), DEFAULT_N_ITEMS
         )
 
         # Check the updated ones have changed
diff --git a/sparse_autoencoder/autoencoder/components/abstract_decoder.py b/sparse_autoencoder/autoencoder/components/abstract_decoder.py
@@ -9,8 +9,8 @@
     DeadDecoderNeuronWeightUpdates,
     DecoderWeights,
     InputOutputActivationBatch,
-    InputOutputNeuronIndices,
     LearnedActivationBatch,
+    LearntNeuronIndices,
 )
 
 
@@ -49,7 +49,7 @@ def reset_parameters(self) -> None:
     @final
     def update_dictionary_vectors(
         self,
-        dictionary_vector_indices: InputOutputNeuronIndices,
+        dictionary_vector_indices: LearntNeuronIndices,
         updated_weights: DeadDecoderNeuronWeightUpdates,
     ) -> None:
         """Update decoder dictionary vectors.
@@ -65,4 +65,4 @@ def update_dictionary_vectors(
             return
 
         with torch.no_grad():
-            self.weight[dictionary_vector_indices, :] = updated_weights
+            self.weight[:, dictionary_vector_indices] = updated_weights
diff --git a/sparse_autoencoder/autoencoder/components/abstract_encoder.py b/sparse_autoencoder/autoencoder/components/abstract_encoder.py
@@ -12,6 +12,7 @@
     InputOutputNeuronIndices,
     LearnedActivationBatch,
     LearntActivationVector,
+    LearntNeuronIndices,
 )
 
 
@@ -49,7 +50,7 @@ def forward(self, x: InputOutputActivationBatch) -> LearnedActivationBatch:
     @final
     def update_dictionary_vectors(
         self,
-        dictionary_vector_indices: InputOutputNeuronIndices,
+        dictionary_vector_indices: LearntNeuronIndices,
         updated_dictionary_weights: DeadEncoderNeuronWeightUpdates,
     ) -> None:
         """Update encoder dictionary vectors.
@@ -64,7 +65,7 @@ def update_dictionary_vectors(
             return
 
         with torch.no_grad():
-            self.weight[:, dictionary_vector_indices] = updated_dictionary_weights
+            self.weight[dictionary_vector_indices, :] = updated_dictionary_weights
 
     @final
     def update_bias(
diff --git a/sparse_autoencoder/autoencoder/components/tests/test_abstract_decoder.py b/sparse_autoencoder/autoencoder/components/tests/test_abstract_decoder.py
@@ -38,7 +38,9 @@ def forward(self, x: LearnedActivationBatch) -> InputOutputActivationBatch:
 
     def reset_parameters(self) -> None:
         """Mock reset parameters."""
-        self._weight: EncoderWeights = init.normal_(self._weight, mean=0, std=1)
+        self._weight: EncoderWeights = init.kaiming_normal_(
+            self._weight,
+        )
 
 
 @pytest.fixture()
@@ -81,10 +83,10 @@ def test_update_dictionary_vectors_with_no_neurons(mock_decoder: MockDecoder) ->
 @pytest.mark.parametrize(
     ("dictionary_vector_indices", "updates"),
     [
-        (torch.tensor([1]), torch.tensor([[0.5, 0.3, 0.2]])),  # Test with 1 neuron to update
+        (torch.tensor([1]), torch.rand(4, 1)),  # Test with 1 neuron to update
         (
             torch.tensor([0, 2]),
-            torch.tensor([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]),
+            torch.rand(4, 2),
         ),  # Test with 2 neurons to update
     ],
 )
@@ -98,5 +100,5 @@ def test_update_dictionary_vectors_with_neurons(
 
     # Check if the specified neurons are updated correctly
     assert torch.allclose(
-        mock_decoder.weight[dictionary_vector_indices, :], updates
+        mock_decoder.weight[:, dictionary_vector_indices], updates
     ), "update_dictionary_vectors should update the weights correctly."
diff --git a/sparse_autoencoder/autoencoder/components/tests/test_abstract_encoder.py b/sparse_autoencoder/autoencoder/components/tests/test_abstract_encoder.py
@@ -45,7 +45,7 @@ def forward(self, x: LearnedActivationBatch) -> InputOutputActivationBatch:
 
     def reset_parameters(self) -> None:
         """Mock reset parameters."""
-        self._weight: EncoderWeights = init.normal_(self._weight, mean=0, std=1)
+        self._weight: EncoderWeights = init.kaiming_normal_(self._weight)
 
 
 @pytest.fixture()
@@ -89,10 +89,10 @@ def test_update_dictionary_vectors_with_no_neurons(mock_encoder: MockEncoder) ->
 @pytest.mark.parametrize(
     ("dictionary_vector_indices", "updates"),
     [
-        (torch.tensor([1]), torch.rand((3, 1))),  # Test with 1 neuron to update
+        (torch.tensor([1]), torch.rand((1, 4))),  # Test with 1 neuron to update
         (
             torch.tensor([0, 2]),
-            torch.rand((3, 2)),
+            torch.rand((2, 4)),
         ),  # Test with 2 neurons to update
     ],
 )
@@ -106,5 +106,5 @@ def test_update_dictionary_vectors_with_neurons(
 
     # Check if the specified neurons are updated correctly
     assert torch.allclose(
-        mock_encoder.weight[:, dictionary_vector_indices], updates
+        mock_encoder.weight[dictionary_vector_indices, :], updates
     ), "update_dictionary_vectors should update the weights correctly."
diff --git a/sparse_autoencoder/train/abstract_pipeline.py b/sparse_autoencoder/train/abstract_pipeline.py
@@ -1,8 +1,10 @@
 """Abstract pipeline."""
 from abc import ABC, abstractmethod
 from collections.abc import Iterable
+from pathlib import Path
 from typing import final
 
+import torch
 from torch.utils.data import DataLoader
 from tqdm.auto import tqdm
 from transformer_lens import HookedTransformer
@@ -58,6 +60,8 @@ class AbstractPipeline(ABC):
 
     progress_bar: tqdm | None
 
+    total_training_steps: int = 1
+
     @final
     def __init__(  # noqa: PLR0913
         self,
@@ -73,6 +77,7 @@ def __init__(  # noqa: PLR0913
         train_metrics: list[AbstractTrainMetric] | None = None,
         validation_metrics: list[AbstractValidationMetric] | None = None,
         source_data_batch_size: int = 12,
+        checkpoint_directory: Path | None = None,
     ):
         """Initialize the pipeline."""
         self.cache_name = cache_name
@@ -87,6 +92,7 @@ def __init__(  # noqa: PLR0913
         self.optimizer = optimizer
         self.loss = loss
         self.source_data_batch_size = source_data_batch_size
+        self.checkpoint_directory = checkpoint_directory
 
         source_dataloader = source_dataset.get_dataloader(source_data_batch_size)
         self.source_data = self.stateful_dataloader_iterable(source_dataloader)
@@ -149,10 +155,14 @@ def validate_sae(self) -> None:
         """Get validation metrics."""
         raise NotImplementedError
 
-    @abstractmethod
+    @final
     def save_checkpoint(self) -> None:
         """Save the model as a checkpoint."""
-        raise NotImplementedError
+        if self.checkpoint_directory:
+            file_path: Path = (
+                self.checkpoint_directory / f"sae_state_dict-{self.total_training_steps}.pt"
+            )
+            torch.save(self.autoencoder.state_dict(), file_path)
 
     @final
     def run_pipeline(
@@ -196,6 +206,11 @@ def run_pipeline(
                 else:
                     neuron_activity = detached_neuron_activity
 
+                # Update the counters
+                last_resampled += store_size
+                last_validated += store_size
+                last_checkpoint += store_size
+
                 # Resample dead neurons (if needed)
                 progress_bar.set_postfix({"stage": "resample"})
                 if last_resampled > resample_frequency and self.activation_resampler is not None:
diff --git a/sparse_autoencoder/train/pipeline.py b/sparse_autoencoder/train/pipeline.py
@@ -113,8 +113,3 @@ def validate_sae(self) -> None:
         """Get validation metrics."""
         # Not currently setup
         return
-
-    def save_checkpoint(self) -> None:
-        """Save the model as a checkpoint."""
-        # Not currently setup
-        return