Fix dynamic quant crash issue in torch.compile mode

hlin99 · hlin99 · commit 063bd3118a55 · 2025-03-21T00:42:13.000Z
diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py
@@ -988,7 +988,11 @@ def forward_hpu(
         if self.quant_config.activation_scheme == "dynamic" and not self.block_quant:
             x_fp8, x_scale = dynamic_quant(x)
 
-        htorch.core.mark_step()
+        if torch._dynamo.is_compiling():
+            torch._dynamo.graph_break()
+        else:
+            htorch.core.mark_step()
+
         if (self.padded_weights_buffer is None
                 or self.padded_weights_buffer.dtype != x.dtype
                 or self.padded_weights_buffer.device != x.device