You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Note, this Server command is standard vllm server parameters present in Habana AI at vllm/.buildkite/nightly-benchmarks/tests/serving-tests.json
Output result:
etected flags: [-compile_one_hot -cpu -fp32_softmax +fsdpa -gaudi +gaudi2 -gaudi3]
ERROR 01-29 16:41:43 engine.py:381] Device type HPU is not supported for torch.Generator() api.
ERROR 01-29 16:41:43 engine.py:381] Traceback (most recent call last):
ERROR 01-29 16:41:43 engine.py:381] File "/usr/local/lib/python3.10/dist-packages/vllm-0.6.3.dev2059+g8642d892.gaudi000-py3.10.egg/vllm/engine/multiprocessing/engine.py", line 372, in run_mp_engine
ERROR 01-29 16:41:43 engine.py:381] engine = MQLLMEngine.from_engine_args(engine_args=engine_args,
ERROR 01-29 16:41:43 engine.py:381] File "/usr/local/lib/python3.10/dist-packages/vllm-0.6.3.dev2059+g8642d892.gaudi000-py3.10.egg/vllm/engine/multiprocessing/engine.py", line 120, in from_engine_args
ERROR 01-29 16:41:43 engine.py:381] return cls(ipc_path=ipc_path,
ERROR 01-29 16:41:43 engine.py:381] File "/usr/local/lib/python3.10/dist-packages/vllm-0.6.3.dev2059+g8642d892.gaudi000-py3.10.egg/vllm/engine/multiprocessing/engine.py", line 72, in init
ERROR 01-29 16:41:43 engine.py:381] self.engine = LLMEngine(*args, **kwargs)
ERROR 01-29 16:41:43 engine.py:381] File "/usr/local/lib/python3.10/dist-packages/vllm-0.6.3.dev2059+g8642d892.gaudi000-py3.10.egg/vllm/engine/llm_engine.py", line 271, in init
ERROR 01-29 16:41:43 engine.py:381] self.model_executor = executor_class(vllm_config=vllm_config, )
ERROR 01-29 16:41:43 engine.py:381] File "/usr/local/lib/python3.10/dist-packages/vllm-0.6.3.dev2059+g8642d892.gaudi000-py3.10.egg/vllm/executor/executor_base.py", line 49, in init
ERROR 01-29 16:41:43 engine.py:381] self._init_executor()
ERROR 01-29 16:41:43 engine.py:381] File "/usr/local/lib/python3.10/dist-packages/vllm-0.6.3.dev2059+g8642d892.gaudi000-py3.10.egg/vllm/executor/uniproc_executor.py", line 40, in _init_executor
ERROR 01-29 16:41:43 engine.py:381] self.collective_rpc("load_model")
ERROR 01-29 16:41:43 engine.py:381] File "/usr/local/lib/python3.10/dist-packages/vllm-0.6.3.dev2059+g8642d892.gaudi000-py3.10.egg/vllm/executor/uniproc_executor.py", line 51, in collective_rpc
ERROR 01-29 16:41:43 engine.py:381] answer = run_method(self.driver_worker, method, args, kwargs)
ERROR 01-29 16:41:43 engine.py:381] File "/usr/local/lib/python3.10/dist-packages/vllm-0.6.3.dev2059+g8642d892.gaudi000-py3.10.egg/vllm/utils.py", line 2305, in run_method
ERROR 01-29 16:41:43 engine.py:381] return func(*args, **kwargs)
ERROR 01-29 16:41:43 engine.py:381] File "/usr/local/lib/python3.10/dist-packages/vllm-0.6.3.dev2059+g8642d892.gaudi000-py3.10.egg/vllm/worker/hpu_worker.py", line 219, in load_model
ERROR 01-29 16:41:43 engine.py:381] self.model_runner.load_model()
ERROR 01-29 16:41:43 engine.py:381] File "/usr/local/lib/python3.10/dist-packages/vllm-0.6.3.dev2059+g8642d892.gaudi000-py3.10.egg/vllm/worker/hpu_model_runner.py", line 708, in load_model
ERROR 01-29 16:41:43 engine.py:381] self.model = get_model(vllm_config=self.vllm_config)
ERROR 01-29 16:41:43 engine.py:381] File "/usr/local/lib/python3.10/dist-packages/vllm-0.6.3.dev2059+g8642d892.gaudi000-py3.10.egg/vllm/model_executor/model_loader/init.py", line 12, in get_model
ERROR 01-29 16:41:43 engine.py:381] return loader.load_model(vllm_config=vllm_config)
ERROR 01-29 16:41:43 engine.py:381] File "/usr/local/lib/python3.10/dist-packages/vllm-0.6.3.dev2059+g8642d892.gaudi000-py3.10.egg/vllm/model_executor/model_loader/loader.py", line 439, in load_model
ERROR 01-29 16:41:43 engine.py:381] initialize_dummy_weights(model)
ERROR 01-29 16:41:43 engine.py:381] File "/usr/local/lib/python3.10/dist-packages/vllm-0.6.3.dev2059+g8642d892.gaudi000-py3.10.egg/vllm/model_executor/model_loader/weight_utils.py", line 647, in initialize_dummy_weights
ERROR 01-29 16:41:43 engine.py:381] generator = torch.Generator(device=param.data.device)
ERROR 01-29 16:41:43 engine.py:381] RuntimeError: Device type HPU is not supported for torch.Generator() api.
Before submitting a new issue...
Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.
The text was updated successfully, but these errors were encountered:
Your current environment
The output of `python collect_env.py`
Model Input Dumps
No response
🐛 Describe the bug
Running vLLM provided benchmarks causes the given below error,
Input command:
Server command: python3 -m vllm.entrypoints.openai.api_server --model meta-llama/Meta-Llama-3.1-8B-Instruct --tensor-parallel-size 1 --swap-space 16 --disable-log-stats --disable-log-requests --load-format dummy
Note, this Server command is standard vllm server parameters present in Habana AI at vllm/.buildkite/nightly-benchmarks/tests/serving-tests.json
Output result:
etected flags: [-compile_one_hot -cpu -fp32_softmax +fsdpa -gaudi +gaudi2 -gaudi3]
ERROR 01-29 16:41:43 engine.py:381] Device type HPU is not supported for torch.Generator() api.
ERROR 01-29 16:41:43 engine.py:381] Traceback (most recent call last):
ERROR 01-29 16:41:43 engine.py:381] File "/usr/local/lib/python3.10/dist-packages/vllm-0.6.3.dev2059+g8642d892.gaudi000-py3.10.egg/vllm/engine/multiprocessing/engine.py", line 372, in run_mp_engine
ERROR 01-29 16:41:43 engine.py:381] engine = MQLLMEngine.from_engine_args(engine_args=engine_args,
ERROR 01-29 16:41:43 engine.py:381] File "/usr/local/lib/python3.10/dist-packages/vllm-0.6.3.dev2059+g8642d892.gaudi000-py3.10.egg/vllm/engine/multiprocessing/engine.py", line 120, in from_engine_args
ERROR 01-29 16:41:43 engine.py:381] return cls(ipc_path=ipc_path,
ERROR 01-29 16:41:43 engine.py:381] File "/usr/local/lib/python3.10/dist-packages/vllm-0.6.3.dev2059+g8642d892.gaudi000-py3.10.egg/vllm/engine/multiprocessing/engine.py", line 72, in init
ERROR 01-29 16:41:43 engine.py:381] self.engine = LLMEngine(*args, **kwargs)
ERROR 01-29 16:41:43 engine.py:381] File "/usr/local/lib/python3.10/dist-packages/vllm-0.6.3.dev2059+g8642d892.gaudi000-py3.10.egg/vllm/engine/llm_engine.py", line 271, in init
ERROR 01-29 16:41:43 engine.py:381] self.model_executor = executor_class(vllm_config=vllm_config, )
ERROR 01-29 16:41:43 engine.py:381] File "/usr/local/lib/python3.10/dist-packages/vllm-0.6.3.dev2059+g8642d892.gaudi000-py3.10.egg/vllm/executor/executor_base.py", line 49, in init
ERROR 01-29 16:41:43 engine.py:381] self._init_executor()
ERROR 01-29 16:41:43 engine.py:381] File "/usr/local/lib/python3.10/dist-packages/vllm-0.6.3.dev2059+g8642d892.gaudi000-py3.10.egg/vllm/executor/uniproc_executor.py", line 40, in _init_executor
ERROR 01-29 16:41:43 engine.py:381] self.collective_rpc("load_model")
ERROR 01-29 16:41:43 engine.py:381] File "/usr/local/lib/python3.10/dist-packages/vllm-0.6.3.dev2059+g8642d892.gaudi000-py3.10.egg/vllm/executor/uniproc_executor.py", line 51, in collective_rpc
ERROR 01-29 16:41:43 engine.py:381] answer = run_method(self.driver_worker, method, args, kwargs)
ERROR 01-29 16:41:43 engine.py:381] File "/usr/local/lib/python3.10/dist-packages/vllm-0.6.3.dev2059+g8642d892.gaudi000-py3.10.egg/vllm/utils.py", line 2305, in run_method
ERROR 01-29 16:41:43 engine.py:381] return func(*args, **kwargs)
ERROR 01-29 16:41:43 engine.py:381] File "/usr/local/lib/python3.10/dist-packages/vllm-0.6.3.dev2059+g8642d892.gaudi000-py3.10.egg/vllm/worker/hpu_worker.py", line 219, in load_model
ERROR 01-29 16:41:43 engine.py:381] self.model_runner.load_model()
ERROR 01-29 16:41:43 engine.py:381] File "/usr/local/lib/python3.10/dist-packages/vllm-0.6.3.dev2059+g8642d892.gaudi000-py3.10.egg/vllm/worker/hpu_model_runner.py", line 708, in load_model
ERROR 01-29 16:41:43 engine.py:381] self.model = get_model(vllm_config=self.vllm_config)
ERROR 01-29 16:41:43 engine.py:381] File "/usr/local/lib/python3.10/dist-packages/vllm-0.6.3.dev2059+g8642d892.gaudi000-py3.10.egg/vllm/model_executor/model_loader/init.py", line 12, in get_model
ERROR 01-29 16:41:43 engine.py:381] return loader.load_model(vllm_config=vllm_config)
ERROR 01-29 16:41:43 engine.py:381] File "/usr/local/lib/python3.10/dist-packages/vllm-0.6.3.dev2059+g8642d892.gaudi000-py3.10.egg/vllm/model_executor/model_loader/loader.py", line 439, in load_model
ERROR 01-29 16:41:43 engine.py:381] initialize_dummy_weights(model)
ERROR 01-29 16:41:43 engine.py:381] File "/usr/local/lib/python3.10/dist-packages/vllm-0.6.3.dev2059+g8642d892.gaudi000-py3.10.egg/vllm/model_executor/model_loader/weight_utils.py", line 647, in initialize_dummy_weights
ERROR 01-29 16:41:43 engine.py:381] generator = torch.Generator(device=param.data.device)
ERROR 01-29 16:41:43 engine.py:381] RuntimeError: Device type HPU is not supported for torch.Generator() api.
Before submitting a new issue...
The text was updated successfully, but these errors were encountered: