Add more pipeline tests (#146)

alan-cooney · web-flow · commit 9f62039be1bf · 2023-12-10T11:51:06.000-03:00
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -41,5 +41,8 @@
   "python.testing.pytestEnabled": true,
   "rewrap.autoWrap.enabled": true,
   "rewrap.wrappingColumn": 100,
-  "python.analysis.diagnosticMode": "workspace"
+  "pylint.ignorePatterns": [
+    "*"
+  ]
 }
+
diff --git a/sparse_autoencoder/activation_resampler/abstract_activation_resampler.py b/sparse_autoencoder/activation_resampler/abstract_activation_resampler.py
@@ -3,7 +3,7 @@
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 
-from jaxtyping import Float, Int
+from jaxtyping import Float, Int, Int64
 from torch import Tensor
 
 from sparse_autoencoder.activation_store.tensor_store import TensorActivationStore
@@ -16,7 +16,7 @@
 class ParameterUpdateResults:
     """Parameter update results from resampling dead neurons."""
 
-    dead_neuron_indices: Int[Tensor, Axis.LEARNT_FEATURE_IDX]
+    dead_neuron_indices: Int64[Tensor, Axis.LEARNT_FEATURE_IDX]
     """Dead neuron indices."""
 
     dead_encoder_weight_updates: Float[
diff --git a/sparse_autoencoder/activation_resampler/activation_resampler.py b/sparse_autoencoder/activation_resampler/activation_resampler.py
@@ -1,6 +1,6 @@
 """Activation resampler."""
 from einops import rearrange
-from jaxtyping import Bool, Float, Int
+from jaxtyping import Bool, Float, Int64
 import torch
 from torch import Tensor
 from torch.nn import Parameter
@@ -139,13 +139,13 @@ def __init__(
         self.neuron_activity_window_end = resample_interval
         self.neuron_activity_window_start = resample_interval - n_activations_activity_collate
         self._max_n_resamples = max_n_resamples
-        self._collated_neuron_activity = torch.zeros(n_learned_features, dtype=torch.int32)
+        self._collated_neuron_activity = torch.zeros(n_learned_features, dtype=torch.int64)
         self._resample_dataset_size = resample_dataset_size
         self._threshold_is_dead_portion_fires = threshold_is_dead_portion_fires
 
     def _get_dead_neuron_indices(
         self,
-    ) -> Int[Tensor, Axis.LEARNT_FEATURE_IDX]:
+    ) -> Int64[Tensor, Axis.LEARNT_FEATURE_IDX]:
         """Identify the indices of neurons that are dead.
 
         Identifies any neurons that have fired less than the threshold portion of the collated
@@ -171,7 +171,7 @@ def _get_dead_neuron_indices(
             self._collated_neuron_activity <= threshold_is_dead_number_fires
         )[0]
 
-        return dead_indices.to(dtype=torch.int)
+        return dead_indices.to(dtype=torch.int64)
 
     def compute_loss_and_get_activations(
         self,
@@ -299,15 +299,15 @@ def sample_input(
                 device=input_activations.device,
             ).to(input_activations.device)
 
-        sample_indices: Int[Tensor, Axis.LEARNT_FEATURE_IDX] = torch.multinomial(
+        sample_indices: Int64[Tensor, Axis.LEARNT_FEATURE_IDX] = torch.multinomial(
             probabilities, num_samples=num_samples
         )
         return input_activations[sample_indices, :]
 
     @staticmethod
     def renormalize_and_scale(
         sampled_input: Float[Tensor, Axis.names(Axis.DEAD_FEATURE, Axis.INPUT_OUTPUT_FEATURE)],
-        neuron_activity: Int[Tensor, Axis.LEARNT_FEATURE],
+        neuron_activity: Int64[Tensor, Axis.LEARNT_FEATURE],
         encoder_weight: Float[
             Parameter, Axis.names(Axis.LEARNT_FEATURE, Axis.INPUT_OUTPUT_FEATURE)
         ],
@@ -447,7 +447,7 @@ def resample_dead_neurons(
 
     def step_resampler(
         self,
-        batch_neuron_activity: Int[Tensor, Axis.LEARNT_FEATURE],
+        batch_neuron_activity: Int64[Tensor, Axis.LEARNT_FEATURE],
         activation_store: ActivationStore,
         autoencoder: SparseAutoencoder,
         loss_fn: AbstractLoss,
diff --git a/sparse_autoencoder/activation_resampler/tests/test_activation_resampler.py b/sparse_autoencoder/activation_resampler/tests/test_activation_resampler.py
@@ -1,6 +1,6 @@
 """Tests for the resample_neurons module."""
 
-from jaxtyping import Float, Int
+from jaxtyping import Float, Int, Int64
 import pytest
 import torch
 from torch import Tensor
@@ -256,7 +256,7 @@ class TestRenormalizeAndScale:
     @staticmethod
     def calculate_expected_output(
         sampled_input: Float[Tensor, Axis.names(Axis.DEAD_FEATURE, Axis.INPUT_OUTPUT_FEATURE)],
-        neuron_activity: Int[Tensor, Axis.LEARNT_FEATURE],
+        neuron_activity: Int64[Tensor, Axis.LEARNT_FEATURE],
         encoder_weight: Float[
             Parameter, Axis.names(Axis.LEARNT_FEATURE, Axis.INPUT_OUTPUT_FEATURE)
         ],
@@ -288,7 +288,7 @@ def test_basic_renormalization(self) -> None:
         sampled_input: Float[
             Tensor, Axis.names(Axis.DEAD_FEATURE, Axis.INPUT_OUTPUT_FEATURE)
         ] = torch.tensor([[3.0, 4.0, 5.0]])
-        neuron_activity: Int[Tensor, Axis.LEARNT_FEATURE] = torch.tensor([1, 0, 1, 0, 1])
+        neuron_activity: Int64[Tensor, Axis.LEARNT_FEATURE] = torch.tensor([1, 0, 1, 0, 1])
         encoder_weight: Float[
             Parameter, Axis.names(Axis.LEARNT_FEATURE, Axis.INPUT_OUTPUT_FEATURE)
         ] = Parameter(torch.ones((DEFAULT_N_LEARNED_FEATURES, DEFAULT_N_INPUT_FEATURES)))
@@ -323,7 +323,7 @@ def test_no_changes_if_no_dead_neurons(
         self, full_activation_store: ActivationStore, autoencoder_model: SparseAutoencoder
     ) -> None:
         """Check it doesn't change anything if there are no dead neurons."""
-        neuron_activity = torch.ones(DEFAULT_N_LEARNED_FEATURES, dtype=torch.int32)
+        neuron_activity = torch.ones(DEFAULT_N_LEARNED_FEATURES, dtype=torch.int64)
         resampler = ActivationResampler(
             resample_interval=10,
             n_activations_activity_collate=10,
@@ -348,7 +348,7 @@ def test_updates_a_dead_neuron_parameters(
         self, full_activation_store: ActivationStore, autoencoder_model: SparseAutoencoder
     ) -> None:
         """Check it updates a dead neuron's parameters."""
-        neuron_activity = torch.ones(DEFAULT_N_LEARNED_FEATURES, dtype=torch.int32)
+        neuron_activity = torch.ones(DEFAULT_N_LEARNED_FEATURES, dtype=torch.int64)
         dead_neuron_idx = 2
         neuron_activity[dead_neuron_idx] = 0
 
@@ -395,19 +395,19 @@ class TestStepResampler:
     @pytest.mark.parametrize(
         ("neuron_activity", "threshold", "expected_indices"),
         [
-            (torch.tensor([1, 0, 3, 9, 0]), 0.0, torch.tensor([1, 4], dtype=torch.int)),
+            (torch.tensor([1, 0, 3, 9, 0]), 0.0, torch.tensor([1, 4], dtype=torch.int64)),
             (
                 torch.tensor([1, 2, 3, 4, 5]),
                 0.0,
-                torch.tensor([], dtype=torch.int),
+                torch.tensor([], dtype=torch.int64),
             ),
-            (torch.tensor([1, 0, 3, 9, 0]), 0.1, torch.tensor([0, 1, 4], dtype=torch.int)),
-            (torch.tensor([1, 2, 3, 4, 5]), 0.1, torch.tensor([0], dtype=torch.int)),
+            (torch.tensor([1, 0, 3, 9, 0]), 0.1, torch.tensor([0, 1, 4], dtype=torch.int64)),
+            (torch.tensor([1, 2, 3, 4, 5]), 0.1, torch.tensor([0], dtype=torch.int64)),
         ],
     )
     def test_gets_dead_neuron_indices(
         self,
-        neuron_activity: Int[Tensor, Axis.LEARNT_FEATURE],
+        neuron_activity: Int64[Tensor, Axis.LEARNT_FEATURE],
         threshold: float,
         expected_indices: Tensor,
         full_activation_store: ActivationStore,
@@ -463,7 +463,7 @@ def test_max_updates(
     ) -> None:
         """Check if max_updates, resample_interval and n_steps_collate are respected."""
         # Create neuron activity to log (with one dead neuron)
-        neuron_activity_batch_size_1 = torch.ones(DEFAULT_N_LEARNED_FEATURES, dtype=torch.int32)
+        neuron_activity_batch_size_1 = torch.ones(DEFAULT_N_LEARNED_FEATURES, dtype=torch.int64)
         neuron_activity_batch_size_1[2] = 0
 
         resampler = ActivationResampler(
diff --git a/sparse_autoencoder/autoencoder/components/abstract_decoder.py b/sparse_autoencoder/autoencoder/components/abstract_decoder.py
@@ -2,7 +2,7 @@
 from abc import ABC, abstractmethod
 from typing import final
 
-from jaxtyping import Float, Int
+from jaxtyping import Float, Int64
 import torch
 from torch import Tensor
 from torch.nn import Module, Parameter
@@ -61,7 +61,7 @@ def reset_parameters(self) -> None:
     @final
     def update_dictionary_vectors(
         self,
-        dictionary_vector_indices: Int[Tensor, Axis.LEARNT_FEATURE_IDX],
+        dictionary_vector_indices: Int64[Tensor, Axis.LEARNT_FEATURE_IDX],
         updated_weights: Float[Tensor, Axis.names(Axis.INPUT_OUTPUT_FEATURE, Axis.DEAD_FEATURE)],
     ) -> None:
         """Update decoder dictionary vectors.
diff --git a/sparse_autoencoder/autoencoder/components/abstract_encoder.py b/sparse_autoencoder/autoencoder/components/abstract_encoder.py
@@ -2,7 +2,7 @@
 from abc import ABC, abstractmethod
 from typing import final
 
-from jaxtyping import Float, Int
+from jaxtyping import Float, Int64
 import torch
 from torch import Tensor
 from torch.nn import Module, Parameter
@@ -62,7 +62,7 @@ def forward(
     @final
     def update_dictionary_vectors(
         self,
-        dictionary_vector_indices: Int[Tensor, Axis.LEARNT_FEATURE_IDX],
+        dictionary_vector_indices: Int64[Tensor, Axis.LEARNT_FEATURE_IDX],
         updated_dictionary_weights: Float[
             Tensor, Axis.names(Axis.DEAD_FEATURE, Axis.INPUT_OUTPUT_FEATURE)
         ],
@@ -84,7 +84,7 @@ def update_dictionary_vectors(
     @final
     def update_bias(
         self,
-        update_parameter_indices: Int[Tensor, Axis.INPUT_OUTPUT_FEATURE],
+        update_parameter_indices: Int64[Tensor, Axis.INPUT_OUTPUT_FEATURE],
         updated_bias_features: Float[Tensor, Axis.LEARNT_FEATURE] | float,
     ) -> None:
         """Update encoder bias.
diff --git a/sparse_autoencoder/autoencoder/components/tests/test_abstract_decoder.py b/sparse_autoencoder/autoencoder/components/tests/test_abstract_decoder.py
@@ -2,7 +2,7 @@
 
 from typing import final
 
-from jaxtyping import Float, Int
+from jaxtyping import Float, Int64
 import pytest
 import torch
 from torch import Tensor
@@ -72,9 +72,9 @@ def test_update_dictionary_vectors_with_no_neurons(mock_decoder: MockDecoder) ->
     """Test update_dictionary_vectors with 0 neurons to update."""
     original_weight = mock_decoder.weight.clone()  # Save original weight for comparison
 
-    dictionary_vector_indices: Int[Tensor, Axis.INPUT_OUTPUT_FEATURE] = torch.empty(
+    dictionary_vector_indices: Int64[Tensor, Axis.INPUT_OUTPUT_FEATURE] = torch.empty(
         0,
-        dtype=torch.int,  # Empty tensor with 1 dimension
+        dtype=torch.int64,  # Empty tensor with 1 dimension
     )
     updates: Float[Tensor, Axis.names(Axis.INPUT_OUTPUT_FEATURE, Axis.DEAD_FEATURE)] = torch.empty(
         (0, 0),
@@ -101,7 +101,7 @@ def test_update_dictionary_vectors_with_no_neurons(mock_decoder: MockDecoder) ->
 )
 def test_update_dictionary_vectors_with_neurons(
     mock_decoder: MockDecoder,
-    dictionary_vector_indices: Int[Tensor, Axis.INPUT_OUTPUT_FEATURE],
+    dictionary_vector_indices: Int64[Tensor, Axis.INPUT_OUTPUT_FEATURE],
     updates: Float[Tensor, Axis.names(Axis.INPUT_OUTPUT_FEATURE, Axis.DEAD_FEATURE)],
 ) -> None:
     """Test update_dictionary_vectors with 1 or 2 neurons to update."""
diff --git a/sparse_autoencoder/autoencoder/components/tests/test_abstract_encoder.py b/sparse_autoencoder/autoencoder/components/tests/test_abstract_encoder.py
@@ -2,7 +2,7 @@
 
 from typing import final
 
-from jaxtyping import Float, Int
+from jaxtyping import Float, Int64
 import pytest
 import torch
 from torch import Tensor
@@ -74,11 +74,11 @@ def test_update_dictionary_vectors_with_no_neurons(mock_encoder: MockEncoder) ->
     torch.random.manual_seed(0)
     original_weight = mock_encoder.weight.clone()  # Save original weight for comparison
 
-    dictionary_vector_indices: Int[Tensor, Axis.INPUT_OUTPUT_FEATURE] = torch.empty(
+    dictionary_vector_indices: Int64[Tensor, Axis.INPUT_OUTPUT_FEATURE] = torch.empty(
         0,
-        dtype=torch.int,  # Empty tensor with 1 dimension
+        dtype=torch.int64,  # Empty tensor with 1 dimension
     )
-    updates: Int[Tensor, Axis.INPUT_OUTPUT_FEATURE] = torch.empty(
+    updates: Float[Tensor, Axis.INPUT_OUTPUT_FEATURE] = torch.empty(
         (0, 0),
         dtype=torch.float,  # Empty tensor with 2 dimensions
     )
@@ -103,8 +103,8 @@ def test_update_dictionary_vectors_with_no_neurons(mock_encoder: MockEncoder) ->
 )
 def test_update_dictionary_vectors_with_neurons(
     mock_encoder: MockEncoder,
-    dictionary_vector_indices: Int[Tensor, Axis.INPUT_OUTPUT_FEATURE],
-    updates: Int[Tensor, Axis.INPUT_OUTPUT_FEATURE],
+    dictionary_vector_indices: Int64[Tensor, Axis.INPUT_OUTPUT_FEATURE],
+    updates: Float[Tensor, Axis.INPUT_OUTPUT_FEATURE],
 ) -> None:
     """Test update_dictionary_vectors with 1 or 2 neurons to update."""
     mock_encoder.update_dictionary_vectors(dictionary_vector_indices, updates)
diff --git a/sparse_autoencoder/metrics/train/neuron_activity_metric.py b/sparse_autoencoder/metrics/train/neuron_activity_metric.py
@@ -5,7 +5,7 @@
 """
 from typing import Any
 
-from jaxtyping import Int
+from jaxtyping import Int64
 import numpy as np
 from numpy.typing import NDArray
 import torch
@@ -43,7 +43,7 @@ class NeuronActivityHorizonData:
     _steps_since_last_calculated: int
     """Steps since last calculated."""
 
-    _neuron_activity: Int[Tensor, Axis.LEARNT_FEATURE]
+    _neuron_activity: Int64[Tensor, Axis.LEARNT_FEATURE]
     """Neuron activity since inception."""
 
     _thresholds: list[float]
@@ -52,8 +52,8 @@ class NeuronActivityHorizonData:
     @property
     def _dead_count(self) -> int:
         """Dead count."""
-        dead_bool_mask: Int[Tensor, Axis.LEARNT_FEATURE] = self._neuron_activity == 0
-        count_dead: Int[Tensor, Axis.SINGLE_ITEM] = dead_bool_mask.sum()
+        dead_bool_mask: Int64[Tensor, Axis.LEARNT_FEATURE] = self._neuron_activity == 0
+        count_dead: Int64[Tensor, Axis.SINGLE_ITEM] = dead_bool_mask.sum()
         return int(count_dead.item())
 
     @property
@@ -64,8 +64,8 @@ def _dead_fraction(self) -> float:
     @property
     def _alive_count(self) -> int:
         """Alive count."""
-        alive_bool_mask: Int[Tensor, Axis.LEARNT_FEATURE] = self._neuron_activity > 0
-        count_alive: Int[Tensor, Axis.SINGLE_ITEM] = alive_bool_mask.sum()
+        alive_bool_mask: Int64[Tensor, Axis.LEARNT_FEATURE] = self._neuron_activity > 0
+        count_alive: Int64[Tensor, Axis.SINGLE_ITEM] = alive_bool_mask.sum()
         return int(count_alive.item())
 
     def _almost_dead(self, threshold: float) -> int | None:
@@ -74,10 +74,10 @@ def _almost_dead(self, threshold: float) -> int | None:
         if threshold_in_activations < 1:
             return None
 
-        almost_dead_bool_mask: Int[Tensor, Axis.LEARNT_FEATURE] = (
+        almost_dead_bool_mask: Int64[Tensor, Axis.LEARNT_FEATURE] = (
             self._neuron_activity < threshold_in_activations
         )
-        count_almost_dead: Int[Tensor, Axis.SINGLE_ITEM] = almost_dead_bool_mask.sum()
+        count_almost_dead: Int64[Tensor, Axis.SINGLE_ITEM] = almost_dead_bool_mask.sum()
         return int(count_almost_dead.item())
 
     @property
@@ -134,14 +134,14 @@ def __init__(
             thresholds: Thresholds for almost dead neurons.
         """
         self._steps_since_last_calculated = 0
-        self._neuron_activity = torch.zeros(number_learned_features, dtype=torch.int)
+        self._neuron_activity = torch.zeros(number_learned_features, dtype=torch.int64)
         self._thresholds = thresholds
 
         # Get a precise activation_horizon
         self._horizon_steps = approximate_activation_horizon // train_batch_size
         self._horizon_number_activations = self._horizon_steps * train_batch_size
 
-    def step(self, neuron_activity: Int[Tensor, Axis.LEARNT_FEATURE]) -> dict[str, Any]:
+    def step(self, neuron_activity: Int64[Tensor, Axis.LEARNT_FEATURE]) -> dict[str, Any]:
         """Step the neuron activity horizon data.
 
         Args:
@@ -231,7 +231,7 @@ def calculate(self, data: TrainMetricData) -> dict[str, Any]:
         log = {}
 
         for horizon_data in self._data:
-            fired_count: Int[Tensor, Axis.LEARNT_FEATURE] = (
+            fired_count: Int64[Tensor, Axis.LEARNT_FEATURE] = (
                 (data.learned_activations > 0).sum(dim=0).detach().cpu()
             )
             horizon_specific_log = horizon_data.step(fired_count)
diff --git a/sparse_autoencoder/optimizer/abstract_optimizer.py b/sparse_autoencoder/optimizer/abstract_optimizer.py
@@ -2,7 +2,7 @@
 from abc import ABC, abstractmethod
 from typing import TypeAlias
 
-from jaxtyping import Int
+from jaxtyping import Int64
 from torch import Tensor
 from torch.nn.parameter import Parameter
 from torch.optim import Optimizer
@@ -33,7 +33,7 @@ def reset_state_all_parameters(self) -> None:
     def reset_neurons_state(
         self,
         parameter: Parameter,
-        neuron_indices: Int[Tensor, Axis.LEARNT_FEATURE_IDX],
+        neuron_indices: Int64[Tensor, Axis.LEARNT_FEATURE_IDX],
         axis: int,
     ) -> None:
         """Reset the state for specific neurons, on a specific parameter.
diff --git a/sparse_autoencoder/train/pipeline.py b/sparse_autoencoder/train/pipeline.py
diff --git a/sparse_autoencoder/train/sweep_config.py b/sparse_autoencoder/train/sweep_config.py
diff --git a/sparse_autoencoder/train/tests/test_pipeline.py b/sparse_autoencoder/train/tests/test_pipeline.py