summed along dim=1

SharathRaparthy · Dahoas · commit be8bc1a27929 · 2023-07-18T12:00:47.000Z
diff --git a/trlx/trainer/accelerate_ppo_trainer.py b/trlx/trainer/accelerate_ppo_trainer.py
@@ -515,6 +515,8 @@ def make_experience(self, num_rollouts: int = 1024, iter_count: int = 0):  # noq
     
     @staticmethod
     def get_topk_indices(input_tensor, window_size: int, k: int, device):
+        # Sum the scores along dim 1
+        input_tensor = input_tensor.sum(1)
         # Use unfold to create the sliding windows
         unfolded = input_tensor.unfold(0, window_size, window_size)
         # Find the topk values and indices along the unfolded dimension