[pre-commit.ci] auto fixes from pre-commit.com hooks

pre-commit-ci[bot] · pre-commit-ci[bot] · commit 5529307da27e · 2025-08-26T21:37:18.000Z
for more information, see https://pre-commit.ci
diff --git a/tests/pytorch/test_sanity.py b/tests/pytorch/test_sanity.py
@@ -899,6 +899,7 @@ def test_sanity_fp8_gemm_with_unalignment(N, datatype):
     )
     torch.cuda.synchronize()
 
+
 @pytest.mark.skipif(not fp8_available, reason=reason_for_no_fp8)
 @pytest.mark.parametrize("N", [32])
 @pytest.mark.parametrize("datatype", [torch.float16, torch.bfloat16])
@@ -912,23 +913,23 @@ def test_sanity_gemm_with_fp8quantization_and_unalignment(N, datatype):
     outp_type = datatype
     quantizer = Float8Quantizer(scales, amaxes, tex.DType.kFloat8E4M3)
     quantized_out, *_ = general_gemm(
-            weight,
-            inp,
-            get_workspace(),
-            outp_type,
-            quantization_params=quantizer,
-            bias=None,
-            use_split_accumulator=False,
-        )
+        weight,
+        inp,
+        get_workspace(),
+        outp_type,
+        quantization_params=quantizer,
+        bias=None,
+        use_split_accumulator=False,
+    )
     out, *_ = general_gemm(
-            weight,
-            inp,
-            get_workspace(),
-            outp_type,
-            quantization_params=None,
-            bias=None,
-            use_split_accumulator=False,
-        )
+        weight,
+        inp,
+        get_workspace(),
+        outp_type,
+        quantization_params=None,
+        bias=None,
+        use_split_accumulator=False,
+    )
     expected_quantized_out = quantizer(out)
     torch.testing.assert_close(expected_quantized_out, quantized_out)
 
diff --git a/transformer_engine/pytorch/csrc/extensions/gemm.cpp b/transformer_engine/pytorch/csrc/extensions/gemm.cpp
@@ -144,21 +144,21 @@ std::vector<py::object> gemm(py::handle A, bool transa, py::handle B, bool trans
   TensorWrapper unquantized_D_tensor;
   py::object unquantized_out;
   std::unique_ptr<Quantizer> my_quantizer = convert_quantizer(quantizer);
-  bool quantization_needed = !quantizer.is_none(); // TODO: Another use-case:
+  bool quantization_needed = !quantizer.is_none();  // TODO: Another use-case:
   // If already output is FP8, then no need for quantization, since cublas is gonna take care of it.
-  if(quantization_needed)
-  {
+  if (quantization_needed) {
     NoneQuantizer q{none};
     std::tie(unquantized_D_tensor, unquantized_out) = q.create_tensor(D_shape, gemm_output_dtype);
   }
 
-  TensorWrapper &out_tensor = quantization_needed ? unquantized_D_tensor : D_tensor;
+  TensorWrapper& out_tensor = quantization_needed ? unquantized_D_tensor : D_tensor;
   // Bias tensor
   TensorWrapper bias_tensor;
   MaybeTensor bias_grad = std::nullopt;
   if (bias.has_value()) {
     if (grad) {
-      auto opts = torch::TensorOptions().dtype(GetATenDType(out_tensor.dtype())).device(torch::kCUDA);
+      auto opts =
+          torch::TensorOptions().dtype(GetATenDType(out_tensor.dtype())).device(torch::kCUDA);
       bias_grad = at::empty({static_cast<int64_t>(B_shape.data[B_shape.ndim - 1])}, opts);
       bias_tensor = makeTransformerEngineTensor(*bias_grad);
     } else {
@@ -281,8 +281,7 @@ std::vector<py::object> gemm(py::handle A, bool transa, py::handle B, bool trans
       }
     }
   }
-  if(quantization_needed)
-    my_quantizer->quantize(unquantized_D_tensor, D_tensor);
+  if (quantization_needed) my_quantizer->quantize(unquantized_D_tensor, D_tensor);
   // Pack outputs
   std::vector<py::object> out;
   out.emplace_back(std::move(D));