diff --git a/transformer_engine/common/common.cu b/transformer_engine/common/common.cu index 566a55a925..b8ec6e0084 100644 --- a/transformer_engine/common/common.cu +++ b/transformer_engine/common/common.cu @@ -104,7 +104,7 @@ void create_2D_tensor_map(CUtensorMap &tensorMap, const SimpleTensor &tensor, const int TMA_needed_size = TMA_gmem_alignment / type_size; NVTE_CHECK(globalX % TMA_needed_size == 0, "Shape not supported. Expected multiple of ", - TMA_needed_size, ", got ", globalX); + TMA_needed_size, ", got ", globalX); // Create the tensor descriptor. NVTE_CHECK_CUDA_DRIVER(cuDriverTensorMapEncodeTiled(