We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 9839db5 commit a3f6647Copy full SHA for a3f6647
vllm/v1/attention/backends/mla/flashmla_sparse.py
@@ -636,8 +636,10 @@ def __init__(
636
vllm_config.model_config.max_model_len
637
)
638
self.prefill_workspace_shape = (prefill_workspace_size, head_size)
639
- self.prefill_bf16_workspace = current_workspace_manager().get_simultaneous(
640
- (self.prefill_workspace_shape, torch.bfloat16)
+ (self.prefill_bf16_workspace,) = (
+ current_workspace_manager().get_simultaneous(
641
+ (self.prefill_workspace_shape, torch.bfloat16)
642
+ )
643
644
645
def _forward_bf16_kv(
0 commit comments