Fix a crash in NeMo 2.0 during module._apply(lambda t: t.cpu()) (#1502)

* Fix a crash with module._apply(lambda t: t.cpu()) Signed-off-by: Guyue Huang <[email protected]> * Add comments Signed-off-by: Guyue Huang <[email protected]> * Make sure tensor is moved to dst device before quantizer quantizes Signed-off-by: Guyue Huang <[email protected]> --------- Signed-off-by: Guyue Huang <[email protected]> Co-authored-by: Tim Moon <[email protected]>
NVIDIA · Feb 26, 2025 · 7b10a04 · 7b10a04
1 parent 6266011
commit 7b10a04
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 0 deletions.
diff --git a/transformer_engine/pytorch/tensor/float8_tensor.py b/transformer_engine/pytorch/tensor/float8_tensor.py
@@ -484,6 +484,8 @@ def _set_data(self, tensor: torch.Tensor) -> None:
 
         # Tensor device
         new_device = tensor.device if tensor.is_cuda else self.device
+        if not devices_match(new_device, tensor.device):
+            tensor = tensor.to(device=new_device)
 
         # Just copy FP8 data if other tensor is Float8Tensor
         if isinstance(tensor, Float8Tensor):

diff --git a/transformer_engine/pytorch/tensor/mxfp8_tensor.py b/transformer_engine/pytorch/tensor/mxfp8_tensor.py
@@ -368,6 +368,8 @@ def _set_data(self, tensor: torch.Tensor) -> None:
 
         # Tensor device
         new_device = tensor.device if tensor.is_cuda else self.device
+        if not devices_match(new_device, tensor.device):
+            tensor = tensor.to(device=new_device)
 
         # Just copy FP8 data if other tensor is MXFP8Tensor
         if isinstance(tensor, MXFP8Tensor):