hyperparam fix

pytorch · Jun 26, 2024 · 53e35f7 · 53e35f7
1 parent 9543f2e
commit 53e35f7
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 3 deletions.
diff --git a/sota-implementations/crossq/config.yaml b/sota-implementations/crossq/config.yaml
@@ -28,7 +28,7 @@ optim:
   policy_update_delay: 3
   gamma: 0.99
   loss_function: l2
-  lr: 3.0e-4
+  lr: 1.0e-3
   weight_decay: 0.0
   batch_size: 256
   alpha_init: 1.0
@@ -38,8 +38,8 @@ optim:
 
 # network
 network:
-  batch_norm_momentum: 0.01
-  warmup_steps: 100000 # 10^5
+  batch_norm_momentum: 0.99
+  warmup_steps: 100000 
   critic_hidden_sizes: [2048, 2048]
   actor_hidden_sizes: [256, 256]
   critic_activation: relu

diff --git a/torchrl/objectives/crossq.py b/torchrl/objectives/crossq.py
@@ -554,6 +554,7 @@ def qvalue_loss(
                 next_tensordict.set(self.tensor_keys.action, next_action)
                 next_sample_log_prob = next_dist.log_prob(next_action)
 
+        # TODO: separate forward pass seems faster than the combined.
         # next_state_action_value = self._vmap_qnetworkN0(
         #     next_tensordict.select(*self.qvalue_network.in_keys, strict=False),
         #     self.qvalue_network_params,