@@ -66,14 +66,18 @@ def __init__(
66
66
from vllm .utils import init_cached_hf_modules
67
67
init_cached_hf_modules ()
68
68
69
+ # Delay profiler initialization to the start of the profiling.
70
+ # This is because in vLLM V1, MP runtime is initialized before the
71
+ # TPU Worker is initialized. The profiler server needs to start after
72
+ # MP runtime is initialized.
69
73
self .profiler = None
74
+ self .profile_dir = None
70
75
if envs .VLLM_TORCH_PROFILER_DIR and self .rank < 1 :
71
76
# For TPU, we can only have 1 active profiler session for 1 profiler
72
77
# server. So we only profile on rank0.
73
78
self .profile_dir = envs .VLLM_TORCH_PROFILER_DIR
74
79
logger .info ("Profiling enabled. Traces will be saved to: %s" ,
75
80
self .profile_dir )
76
- self .profiler = xp .start_server (9012 )
77
81
78
82
if self .model_config .seed is None :
79
83
self .model_config .seed = 0
@@ -168,9 +172,11 @@ def execute_model(
168
172
169
173
def profile (self , is_start : bool = True ):
170
174
if self .rank < 1 :
171
- if self .profiler is None :
175
+ if self .profile_dir is None :
172
176
raise RuntimeError ("Profiler is not enabled." )
173
177
if is_start :
178
+ if self .profiler is None :
179
+ self .profiler = xp .start_server (9012 )
174
180
xp .start_trace (self .profile_dir )
175
181
else :
176
182
xp .stop_trace ()
0 commit comments