Skip to content

Commit 063bd31

Browse files
committed
Fix dynamic quant crash issue in torch.compile mode
1 parent fb6bf83 commit 063bd31

File tree

1 file changed

+5
-1
lines changed
  • vllm/model_executor/layers/quantization

1 file changed

+5
-1
lines changed

vllm/model_executor/layers/quantization/fp8.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -988,7 +988,11 @@ def forward_hpu(
988988
if self.quant_config.activation_scheme == "dynamic" and not self.block_quant:
989989
x_fp8, x_scale = dynamic_quant(x)
990990

991-
htorch.core.mark_step()
991+
if torch._dynamo.is_compiling():
992+
torch._dynamo.graph_break()
993+
else:
994+
htorch.core.mark_step()
995+
992996
if (self.padded_weights_buffer is None
993997
or self.padded_weights_buffer.dtype != x.dtype
994998
or self.padded_weights_buffer.device != x.device

0 commit comments

Comments
 (0)