diff --git a/common/arg.cpp b/common/arg.cpp index 40af7e574830f..56827a65908be 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -2734,6 +2734,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.public_path = value; } ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_STATIC_PATH")); + add_opt(common_arg( + {"--api-prefix"}, "PREFIX", + string_format("prefix path the server serves from, without the trailing slash (default: %s)", params.api_prefix.c_str()), + [](common_params & params, const std::string & value) { + params.api_prefix = value; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_API_PREFIX")); add_opt(common_arg( {"--no-webui"}, string_format("Disable the Web UI (default: %s)", params.webui ? "enabled" : "disabled"), diff --git a/common/common.h b/common/common.h index 8922090e7b10d..a5abe32859fdd 100644 --- a/common/common.h +++ b/common/common.h @@ -370,6 +370,7 @@ struct common_params { std::string hostname = "127.0.0.1"; std::string public_path = ""; // NOLINT + std::string api_prefix = ""; // NOLINT std::string chat_template = ""; // NOLINT bool use_jinja = false; // NOLINT bool enable_chat_template = true; diff --git a/tools/server/server.cpp b/tools/server/server.cpp index d3f6271931f62..57b917f2f97b3 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -4806,14 +4806,14 @@ int main(int argc, char ** argv) { // register static assets routes if (!params.public_path.empty()) { // Set the base directory for serving static files - bool is_found = svr->set_mount_point("/", params.public_path); + bool is_found = svr->set_mount_point(params.api_prefix + "/", params.public_path); if (!is_found) { LOG_ERR("%s: static assets path not found: %s\n", __func__, params.public_path.c_str()); return 1; } } else { // using embedded static index.html - svr->Get("/", [](const httplib::Request & req, httplib::Response & res) { + svr->Get(params.api_prefix + "/", [](const httplib::Request & req, httplib::Response & res) { if (req.get_header_value("Accept-Encoding").find("gzip") == std::string::npos) { res.set_content("Error: gzip is not supported by this browser", "text/plain"); } else { @@ -4829,37 +4829,37 @@ int main(int argc, char ** argv) { } // register API routes - svr->Get ("/health", handle_health); // public endpoint (no API key check) - svr->Get ("/metrics", handle_metrics); - svr->Get ("/props", handle_props); - svr->Post("/props", handle_props_change); - svr->Post("/api/show", handle_api_show); - svr->Get ("/models", handle_models); // public endpoint (no API key check) - svr->Get ("/v1/models", handle_models); // public endpoint (no API key check) - svr->Get ("/api/tags", handle_models); // ollama specific endpoint. public endpoint (no API key check) - svr->Post("/completion", handle_completions); // legacy - svr->Post("/completions", handle_completions); - svr->Post("/v1/completions", handle_completions_oai); - svr->Post("/chat/completions", handle_chat_completions); - svr->Post("/v1/chat/completions", handle_chat_completions); - svr->Post("/api/chat", handle_chat_completions); // ollama specific endpoint - svr->Post("/infill", handle_infill); - svr->Post("/embedding", handle_embeddings); // legacy - svr->Post("/embeddings", handle_embeddings); - svr->Post("/v1/embeddings", handle_embeddings_oai); - svr->Post("/rerank", handle_rerank); - svr->Post("/reranking", handle_rerank); - svr->Post("/v1/rerank", handle_rerank); - svr->Post("/v1/reranking", handle_rerank); - svr->Post("/tokenize", handle_tokenize); - svr->Post("/detokenize", handle_detokenize); - svr->Post("/apply-template", handle_apply_template); + svr->Get (params.api_prefix + "/health", handle_health); // public endpoint (no API key check) + svr->Get (params.api_prefix + "/metrics", handle_metrics); + svr->Get (params.api_prefix + "/props", handle_props); + svr->Post(params.api_prefix + "/props", handle_props_change); + svr->Post(params.api_prefix + "/api/show", handle_api_show); + svr->Get (params.api_prefix + "/models", handle_models); // public endpoint (no API key check) + svr->Get (params.api_prefix + "/v1/models", handle_models); // public endpoint (no API key check) + svr->Get (params.api_prefix + "/api/tags", handle_models); // ollama specific endpoint. public endpoint (no API key check) + svr->Post(params.api_prefix + "/completion", handle_completions); // legacy + svr->Post(params.api_prefix + "/completions", handle_completions); + svr->Post(params.api_prefix + "/v1/completions", handle_completions_oai); + svr->Post(params.api_prefix + "/chat/completions", handle_chat_completions); + svr->Post(params.api_prefix + "/v1/chat/completions", handle_chat_completions); + svr->Post(params.api_prefix + "/api/chat", handle_chat_completions); // ollama specific endpoint + svr->Post(params.api_prefix + "/infill", handle_infill); + svr->Post(params.api_prefix + "/embedding", handle_embeddings); // legacy + svr->Post(params.api_prefix + "/embeddings", handle_embeddings); + svr->Post(params.api_prefix + "/v1/embeddings", handle_embeddings_oai); + svr->Post(params.api_prefix + "/rerank", handle_rerank); + svr->Post(params.api_prefix + "/reranking", handle_rerank); + svr->Post(params.api_prefix + "/v1/rerank", handle_rerank); + svr->Post(params.api_prefix + "/v1/reranking", handle_rerank); + svr->Post(params.api_prefix + "/tokenize", handle_tokenize); + svr->Post(params.api_prefix + "/detokenize", handle_detokenize); + svr->Post(params.api_prefix + "/apply-template", handle_apply_template); // LoRA adapters hotswap - svr->Get ("/lora-adapters", handle_lora_adapters_list); - svr->Post("/lora-adapters", handle_lora_adapters_apply); + svr->Get (params.api_prefix + "/lora-adapters", handle_lora_adapters_list); + svr->Post(params.api_prefix + "/lora-adapters", handle_lora_adapters_apply); // Save & load slots - svr->Get ("/slots", handle_slots); - svr->Post("/slots/:id_slot", handle_slots_action); + svr->Get (params.api_prefix + "/slots", handle_slots); + svr->Post(params.api_prefix + "/slots/:id_slot", handle_slots_action); // // Start the server