Skip to content

Commit 4157f56

Browse files
authored
[Hardware][TPU][Bugfix] Fix v1 mp profiler (#15409)
Signed-off-by: Siyuan Liu <[email protected]>
1 parent 051da7e commit 4157f56

File tree

1 file changed

+8
-2
lines changed

1 file changed

+8
-2
lines changed

vllm/v1/worker/tpu_worker.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -66,14 +66,18 @@ def __init__(
6666
from vllm.utils import init_cached_hf_modules
6767
init_cached_hf_modules()
6868

69+
# Delay profiler initialization to the start of the profiling.
70+
# This is because in vLLM V1, MP runtime is initialized before the
71+
# TPU Worker is initialized. The profiler server needs to start after
72+
# MP runtime is initialized.
6973
self.profiler = None
74+
self.profile_dir = None
7075
if envs.VLLM_TORCH_PROFILER_DIR and self.rank < 1:
7176
# For TPU, we can only have 1 active profiler session for 1 profiler
7277
# server. So we only profile on rank0.
7378
self.profile_dir = envs.VLLM_TORCH_PROFILER_DIR
7479
logger.info("Profiling enabled. Traces will be saved to: %s",
7580
self.profile_dir)
76-
self.profiler = xp.start_server(9012)
7781

7882
if self.model_config.seed is None:
7983
self.model_config.seed = 0
@@ -168,9 +172,11 @@ def execute_model(
168172

169173
def profile(self, is_start: bool = True):
170174
if self.rank < 1:
171-
if self.profiler is None:
175+
if self.profile_dir is None:
172176
raise RuntimeError("Profiler is not enabled.")
173177
if is_start:
178+
if self.profiler is None:
179+
self.profiler = xp.start_server(9012)
174180
xp.start_trace(self.profile_dir)
175181
else:
176182
xp.stop_trace()

0 commit comments

Comments
 (0)