Skip to content

Commit 9839db5

Browse files
clean up
Signed-off-by: Lucas Wilkinson <[email protected]>
1 parent b0027f2 commit 9839db5

File tree

1 file changed

+6
-8
lines changed

1 file changed

+6
-8
lines changed

vllm/v1/attention/backends/mla/flashmla_sparse.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -628,16 +628,14 @@ def __init__(
628628
self.topk_indices_buffer = indexer.topk_indices_buffer
629629
self.padding = 128 if current_platform.is_device_capability(100) else 64
630630

631-
vllm_config = get_current_vllm_config()
632-
assert vllm_config is not None and vllm_config.model_config is not None
633-
prefill_workspace_size = get_prefill_workspace_size(
634-
vllm_config.model_config.max_model_len
635-
)
636-
637-
self.prefill_workspace_shape = (prefill_workspace_size, head_size)
638-
639631
if kv_cache_dtype == "fp8_ds_mla":
640632
# Reserve workspace during initialization
633+
vllm_config = get_current_vllm_config()
634+
assert vllm_config is not None and vllm_config.model_config is not None
635+
prefill_workspace_size = get_prefill_workspace_size(
636+
vllm_config.model_config.max_model_len
637+
)
638+
self.prefill_workspace_shape = (prefill_workspace_size, head_size)
641639
self.prefill_bf16_workspace = current_workspace_manager().get_simultaneous(
642640
(self.prefill_workspace_shape, torch.bfloat16)
643641
)

0 commit comments

Comments
 (0)