Improve optimizer documentation (#79)

alan-cooney · web-flow · commit b17e0f66aabb · 2023-11-19T15:53:37.000-08:00
diff --git a/sparse_autoencoder/optimizer/__init__.py b/sparse_autoencoder/optimizer/__init__.py
@@ -1 +1,20 @@
-"""Optimizer."""
+"""Optimizers for Sparse Autoencoders.
+
+When training a Sparse Autoencoder, it can be necessary to manually edit the model parameters
+(e.g. with neuron resampling to prevent dead neurons). When doing this, it's also necessary to
+reset the optimizer state for these parameters, as otherwise things like running averages will be
+incorrect (e.g. the running averages of the gradients and the squares of gradients with Adam).
+
+The optimizer used in the original [Towards Monosemanticity: Decomposing Language Models With
+Dictionary Learning](Towards Monosemanticity: Decomposing Language Models With Dictionary Learning)
+paper is available here as :class:`AdamWithReset`.
+
+To enable creating other optimizers with reset methods, we also provide the interface
+:class:`AbstractOptimizerWithReset`.
+"""
+
+from sparse_autoencoder.optimizer.abstract_optimizer import AbstractOptimizerWithReset
+from sparse_autoencoder.optimizer.adam_with_reset import AdamWithReset
+
+
+__all__ = ["AdamWithReset", "AbstractOptimizerWithReset"]
diff --git a/sparse_autoencoder/optimizer/abstract_optimizer.py b/sparse_autoencoder/optimizer/abstract_optimizer.py
@@ -1,9 +1,16 @@
 """Abstract optimizer with reset."""
 from abc import ABC, abstractmethod
 
+from sparse_autoencoder.tensor_types import DeadNeuronIndices
+
 
 class AbstractOptimizerWithReset(ABC):
-    """Abstract optimizer with reset."""
+    """Abstract optimizer with reset.
+
+    When implementing this interface, we recommend adding a `named_parameters` argument to the
+    constructor, which can be obtained from `named_parameters=model.named_parameters()` by the end
+    user. This is so that the optimizer can find the parameters to reset.
+    """
 
     @abstractmethod
     def reset_state_all_parameters(self) -> None:
@@ -13,3 +20,28 @@ def reset_state_all_parameters(self) -> None:
             parameters (e.g. with activation resampling).
         """
         raise NotImplementedError
+
+    @abstractmethod
+    def reset_neurons_state(
+        self,
+        parameter_name: str,
+        neuron_indices: DeadNeuronIndices,
+        axis: int,
+        parameter_group: int = 0,
+    ) -> None:
+        """Reset the state for specific neurons, on a specific parameter.
+
+        Args:
+            parameter_name: The name of the parameter. Examples from the standard sparse autoencoder
+                implementation  include `tied_bias`, `encoder.Linear.weight`, `encoder.Linear.bias`,
+                `decoder.Linear.weight`, and `decoder.ConstrainedUnitNormLinear.weight`.
+            neuron_indices: The indices of the neurons to reset.
+            axis: The axis of the parameter to reset.
+            parameter_group: The index of the parameter group to reset (typically this is just zero,
+                unless you have setup multiple parameter groups for e.g. different learning rates
+                for different parameters).
+
+        Raises:
+            ValueError: If the parameter name is not found.
+        """
+        raise NotImplementedError