@@ -148,7 +148,7 @@ static void sm100_fp8_gemm_1d1d(const torch::Tensor& a, const torch::Tensor& sfa
148148 };
149149 const auto & code = SM100FP8Gemm1D1DRuntime::generate (args);
150150 const auto & runtime = compiler->build (" sm100_fp8_gemm_1d1d" , code);
151- SM100FP8Gemm1D1DRuntime::launch (runtime, args);
151+ MAYBE_LAUNCH ( SM100FP8Gemm1D1DRuntime::launch (runtime, args) );
152152}
153153
154154static void sm100_m_grouped_fp8_gemm_contiguous_1d1d (const torch::Tensor& a, const torch::Tensor& sfa,
@@ -206,7 +206,7 @@ static void sm100_m_grouped_fp8_gemm_contiguous_1d1d(const torch::Tensor& a, con
206206 };
207207 const auto & code = SM100FP8Gemm1D1DRuntime::generate (args);
208208 const auto & runtime = compiler->build (" sm100_m_grouped_fp8_gemm_contiguous_1d1d" , code);
209- SM100FP8Gemm1D1DRuntime::launch (runtime, args);
209+ MAYBE_LAUNCH ( SM100FP8Gemm1D1DRuntime::launch (runtime, args) );
210210}
211211
212212static void sm100_m_grouped_fp8_gemm_masked_1d1d (const torch::Tensor& a, const torch::Tensor& sfa,
@@ -265,7 +265,7 @@ static void sm100_m_grouped_fp8_gemm_masked_1d1d(const torch::Tensor& a, const t
265265 };
266266 const auto & code = SM100FP8Gemm1D1DRuntime::generate (args);
267267 const auto & runtime = compiler->build (" sm100_fp8_m_grouped_gemm_masked_1d1d" , code);
268- SM100FP8Gemm1D1DRuntime::launch (runtime, args);
268+ MAYBE_LAUNCH ( SM100FP8Gemm1D1DRuntime::launch (runtime, args) );
269269}
270270
271271static void fp8_k_grouped_gemm_1d1d (const torch::Tensor& a, const torch::Tensor& sfa,
@@ -346,7 +346,7 @@ static void fp8_k_grouped_gemm_1d1d(const torch::Tensor& a, const torch::Tensor&
346346 };
347347 const auto & code = SM100FP8Gemm1D1DRuntime::generate (args);
348348 const auto & runtime = compiler->build (" sm100_fp8_k_grouped_gemm_1d1d" , code);
349- SM100FP8Gemm1D1DRuntime::launch (runtime, args);
349+ MAYBE_LAUNCH ( SM100FP8Gemm1D1DRuntime::launch (runtime, args) );
350350}
351351
352352} // namespace deep_gemm
0 commit comments