1- #
2- # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3- #
4- # Licensed under the Apache License, Version 2.0 (the "License");
5- # you may not use this file except in compliance with the License.
6- # You may obtain a copy of the License at
7- #
8- # http://www.apache.org/licenses/LICENSE-2.0
9- #
10- # Unless required by applicable law or agreed to in writing, software
11- # distributed under the License is distributed on an "AS IS" BASIS,
12- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13- # See the License for the specific language governing permissions and
14- # limitations under the License.
15- # This file is a part of the vllm-ascend project.
16- #
17-
181import gc
192import os
203from datetime import timedelta
214from typing import TYPE_CHECKING , Optional , Tuple
225
236import torch
24- # import vllm.envs as envs
257from torch .distributed import ProcessGroup
268from torch .distributed .distributed_c10d import PrefixStore
27- # from vllm.logger import logger
28- # from vllm.platforms import Platform, PlatformEnum
299
3010import os
3111from collections .abc import Callable
4222 PlatformEnum )
4323from fastvideo .utils import import_pynvml
4424
45- # import vllm_ascend.envs as envs_ascend
46- # from vllm_ascend.ascend_config import check_ascend_config, init_ascend_config
47- # from vllm_ascend.utils import (ASCEND_QUATIZATION_METHOD,
48- # check_torchair_cache_exist,
49- # delete_torchair_cache_file,
50- # update_aclgraph_sizes)
51-
52- # if TYPE_CHECKING:
53- # from vllm.config import ModelConfig, VllmConfig
54- # from vllm.utils import FlexibleArgumentParser
55- # else:
56- # ModelConfig = None
57- # VllmConfig = None
58- # FlexibleArgumentParser = None
59-
6025logger = init_logger (__name__ )
6126
6227class NPUPlatform (Platform ):
@@ -115,8 +80,8 @@ def clear_npu_memory(cls):
11580 @classmethod
11681 def get_attn_backend_cls (cls , selected_backend : AttentionBackendEnum | None ,
11782 head_size : int , dtype : torch .dtype ) -> str :
118- # TODO(will): maybe come up with a more general interface for local attention
119- # if distributed is False, we always try to use Flash attn
83+ # the NPU only supports Flash Attention
84+ # TODO(will): Other tasks will be synchronized in subsequent updates.
12085
12186 logger .info ("Trying FASTVIDEO_ATTENTION_BACKEND=%s" ,
12287 envs .FASTVIDEO_ATTENTION_BACKEND )
@@ -216,9 +181,6 @@ def get_attn_backend_cls(cls, selected_backend: AttentionBackendEnum | None,
216181
217182 return "fastvideo.attention.backends.flash_attn.FlashAttentionBackend"
218183
219- @classmethod
220- def get_punica_wrapper (cls ) -> str :
221- return "vllm_ascend.lora.punica_wrapper.punica_npu.PunicaWrapperNPU"
222184
223185 @classmethod
224186 def get_current_memory_usage (cls ,
@@ -235,19 +197,6 @@ def get_device_communicator_cls(cls) -> str:
235197 def is_pin_memory_available (cls ):
236198 return True
237199
238- # @classmethod
239- # def supports_v1(cls, model_config: ModelConfig) -> bool:
240- # """Returns whether the current platform can support v1 for the supplied
241- # model configuration.
242- # """
243- # return True
244-
245- # @classmethod
246- # def get_piecewise_backend_cls(cls) -> str:
247- # """
248- # Get piecewise backend class for piecewise graph.
249- # """
250- # return "vllm_ascend.compilation.piecewise_backend.NPUPiecewiseBackend" # noqa
251200
252201 @classmethod
253202 def stateless_init_device_torch_dist_pg (
@@ -276,12 +225,8 @@ def stateless_init_device_torch_dist_pg(
276225 backend_class = ProcessGroupHCCL (prefix_store , group_rank , group_size ,
277226 backend_options )
278227 device = torch .device ("npu" )
279- # TODO(Yizhou): Like we mentioned above, _set_default_backend is not
280- # implemented in the 2.5.1 version of PyTorch. But we need to set it
281- # after the latest version is released.
282- # pg._set_default_backend(backend_type)
283228 backend_class ._set_sequence_number_for_group ()
284229 backend_type = ProcessGroup .BackendType .CUSTOM
285230
286231 pg ._register_backend (device , backend_type , backend_class )
287- return pg
232+ return pg
0 commit comments