Skip to content

Commit 22039aa

Browse files
committed
simplify API
1 parent 0150602 commit 22039aa

File tree

3 files changed

+24
-12
lines changed

3 files changed

+24
-12
lines changed

tools/server/server-context.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,16 @@ extern "C" {
609609

610610
// initialize slots and server-related data
611611
void server_context::init() {
612+
613+
// wiring up the task queue callbacks
614+
queue_tasks.on_new_task([this](server_task && task) {
615+
this->process_single_task(std::move(task));
616+
});
617+
queue_tasks.on_update_slots([this]() {
618+
this->update_slots();
619+
});
620+
621+
// setup slots
612622
SRV_INF("initializing slots, n_slots = %d\n", params_base.n_parallel);
613623

614624
const int n_ctx_train = llama_model_n_ctx_train(model);

tools/server/server-context.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,16 @@ struct server_context {
151151
// initialize slots and server-related data
152152
void init();
153153

154+
// request the server to stop
155+
void terminate() {
156+
queue_tasks.terminate();
157+
}
158+
159+
// this call will block main thread until termination
160+
void start_loop() {
161+
queue_tasks.start_loop();
162+
}
163+
154164
server_slot * get_slot_by_id(int id);
155165

156166
server_slot * get_available_slot(const server_task & task);

tools/server/server.cpp

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ int main(int argc, char ** argv) {
161161
auto clean_up = [&ctx_http, &ctx_server]() {
162162
SRV_INF("%s: cleaning up before exit...\n", __func__);
163163
ctx_http.stop();
164-
ctx_server.queue_results.terminate();
164+
ctx_server.terminate();
165165
llama_backend_free();
166166
};
167167

@@ -189,17 +189,9 @@ int main(int argc, char ** argv) {
189189

190190
LOG_INF("%s: model loaded\n", __func__);
191191

192-
ctx_server.queue_tasks.on_new_task([&ctx_server](server_task && task) {
193-
ctx_server.process_single_task(std::move(task));
194-
});
195-
196-
ctx_server.queue_tasks.on_update_slots([&ctx_server]() {
197-
ctx_server.update_slots();
198-
});
199-
200192
shutdown_handler = [&](int) {
201193
// this will unblock start_loop()
202-
ctx_server.queue_tasks.terminate();
194+
ctx_server.terminate();
203195
};
204196

205197
// TODO: refactor in common/console
@@ -219,8 +211,8 @@ int main(int argc, char ** argv) {
219211

220212
LOG_INF("%s: server is listening on %s\n", __func__, ctx_http.listening_address.c_str());
221213
LOG_INF("%s: starting the main loop...\n", __func__);
222-
// this call blocks the main thread until queue_tasks.terminate() is called
223-
ctx_server.queue_tasks.start_loop();
214+
// this call blocks the main thread until ctx_server.terminate() is called
215+
ctx_server.start_loop();
224216

225217
clean_up();
226218
if (ctx_http.thread.joinable()) {

0 commit comments

Comments
 (0)