From 0c82aa07f6ec4ef2cdae79088af11e7745a6aeb1 Mon Sep 17 00:00:00 2001 From: lizhenneng Date: Mon, 1 Dec 2025 09:55:19 +0800 Subject: [PATCH] Improve Python backend shutdown speed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When shutting down linux operating system, triton server needs taking 1 to 2 seconds to stop triton server,especially when the Python backend has already been started, it leads to a longer shutdown time。 Signed-off-by: lizhenneng --- src/pb_stub.cc | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/pb_stub.cc b/src/pb_stub.cc index 56048d78..12f3aba3 100644 --- a/src/pb_stub.cc +++ b/src/pb_stub.cc @@ -77,6 +77,9 @@ using cudaStream_t = void*; namespace triton { namespace backend { namespace python { std::atomic non_graceful_exit = {false}; +std::atomic should_exit{false}; +std::condition_variable exit_cv; +std::mutex exit_mutex; void SignalHandler(int signum) @@ -2058,7 +2061,11 @@ main(int argc, char** argv) // shared memory and will be set to false by the parent process. // The parent process expects that the stub process sets this // variable to true within 1 second. - std::this_thread::sleep_for(std::chrono::milliseconds(300)); + std::unique_lock lock(exit_mutex); + if (exit_cv.wait_for(lock, std::chrono::milliseconds(300), []{ + return should_exit.load(); + })) { + } stub->UpdateHealth(); @@ -2099,6 +2106,8 @@ main(int argc, char** argv) stub->TerminateParentToStubQueueMonitor(); } background_thread_running = false; + should_exit = true; + exit_cv.notify_all(); background_thread.join(); break; }