File tree Expand file tree Collapse file tree 1 file changed +6
-8
lines changed
vllm/v1/attention/backends/mla Expand file tree Collapse file tree 1 file changed +6
-8
lines changed Original file line number Diff line number Diff line change @@ -628,16 +628,14 @@ def __init__(
628628 self .topk_indices_buffer = indexer .topk_indices_buffer
629629 self .padding = 128 if current_platform .is_device_capability (100 ) else 64
630630
631- vllm_config = get_current_vllm_config ()
632- assert vllm_config is not None and vllm_config .model_config is not None
633- prefill_workspace_size = get_prefill_workspace_size (
634- vllm_config .model_config .max_model_len
635- )
636-
637- self .prefill_workspace_shape = (prefill_workspace_size , head_size )
638-
639631 if kv_cache_dtype == "fp8_ds_mla" :
640632 # Reserve workspace during initialization
633+ vllm_config = get_current_vllm_config ()
634+ assert vllm_config is not None and vllm_config .model_config is not None
635+ prefill_workspace_size = get_prefill_workspace_size (
636+ vllm_config .model_config .max_model_len
637+ )
638+ self .prefill_workspace_shape = (prefill_workspace_size , head_size )
641639 self .prefill_bf16_workspace = current_workspace_manager ().get_simultaneous (
642640 (self .prefill_workspace_shape , torch .bfloat16 )
643641 )
You can’t perform that action at this time.
0 commit comments