Skip to content

Commit 0f814c6

Browse files
authored
Merge branch 'main' into main
2 parents fbbd28b + 2e96809 commit 0f814c6

File tree

163 files changed

+7656
-999
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

163 files changed

+7656
-999
lines changed

CMakeLists.txt

100644100755
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,20 +28,20 @@ if(USE_NPU)
2828
if(DEVICE_TYPE STREQUAL "USE_A3")
2929
message("downloading a3 arm xllm kernels")
3030
file(DOWNLOAD
31-
"https://9n-das-tools.s3.cn-north-1.jdcloud-oss.com/xllm-ai/xllm_kernels/0.6.0/xllm_kernels-1.3.1-Linux.a3.arm.rpm"
31+
"https://9n-das-tools.s3.cn-north-1.jdcloud-oss.com/xllm-ai/xllm_kernels/0.6.0/xllm_kernels-1.3.2-Linux.a3.arm.rpm"
3232
"${CMAKE_BINARY_DIR}/xllm_kernels.rpm"
3333
)
3434
else()
3535
if(DEVICE_ARCH STREQUAL "ARM")
3636
message("downloading a2 arm xllm_kernels")
3737
file(DOWNLOAD
38-
"https://9n-das-tools.s3.cn-north-1.jdcloud-oss.com/xllm-ai/xllm_kernels/0.6.0/xllm_kernels-1.3.1-Linux.a2.arm.rpm"
38+
"https://9n-das-tools.s3.cn-north-1.jdcloud-oss.com/xllm-ai/xllm_kernels/0.6.0/xllm_kernels-1.3.2-Linux.a2.arm.rpm"
3939
"${CMAKE_BINARY_DIR}/xllm_kernels.rpm"
4040
)
4141
else()
4242
message("downloading a2 x86 xllm_kernels")
4343
file(DOWNLOAD
44-
"https://9n-das-tools.s3.cn-north-1.jdcloud-oss.com/xllm-ai/xllm_kernels/0.6.0/xllm_kernels-1.3.1-Linux.a2.x86.rpm"
44+
"https://9n-das-tools.s3.cn-north-1.jdcloud-oss.com/xllm-ai/xllm_kernels/0.6.0/xllm_kernels-1.3.2-Linux.a2.x86.rpm"
4545
"${CMAKE_BINARY_DIR}/xllm_kernels.rpm"
4646
)
4747
endif()

cibuild/build_mlu.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ function error() {
66
exit 1
77
}
88

9-
IMAGE="cambricon-base/pytorch:v25.06.0-torch2.7.1-torchmlu1.27.2-ubuntu22.04-py310_xllm251016"
9+
IMAGE="cambricon-base/pytorch:v25.06.0-torch2.7.1-torchmlu1.27.2-ubuntu22.04-py310_xllm251104"
1010

1111
RUN_OPTS=(
1212
--rm

docs/zh/cli_reference.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ xLLM使用gflags来管理服务启动参数,具体的参数含义如下:
7070
|:---------:|:---------:|:---------:|:---------:|:---------:|:---------:|
7171
| `max_concurrent_requests` | int32 | 0 | 任意大于0的整数 | 限流用,限制实例中正在处理的总请求数 | |
7272
| `model_id` | string | "" | ip:port | 模型名称,非路径 | |
73-
| `num_handling_threads` | int32 | 4 | 任意大于0的整数 | 处理输入请求的线程池大小 | |
73+
| `num_request_handling_threads` | int32 | 4 | 任意大于0的整数 | 处理输入请求的线程池大小 | |
7474
| `num_response_handling_threads` | int32 | 4 | 任意大于0的整数 | 处理输出的线程池大小 | |
7575
| `prefill_scheduling_memory_usage_threshold` | double | 0.95 | 0-1之间的值 | 当kv cache使用量达到该阈值时,暂停prefill请求的调度 | |
7676
| `num_response_handling_threads` | int32 | 4 | 任意大于0的整数 | 处理输出的线程池大小 | |

third_party/xllm_ops

Submodule xllm_ops updated from 2cda9bf to 797a0cb

xllm/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,9 @@ add_subdirectory(api_service)
66
add_subdirectory(core)
77
add_subdirectory(function_call)
88
add_subdirectory(models)
9-
add_subdirectory(proto)
9+
add_subdirectory(parser)
1010
add_subdirectory(processors)
11+
add_subdirectory(proto)
1112
add_subdirectory(pybind)
1213
add_subdirectory(server)
1314

xllm/api_service/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,12 @@ cc_library(
1212
embedding_service_impl.h
1313
image_generation_service_impl.h
1414
rerank_service_impl.h
15+
qwen3_rerank_service_impl.h
1516
non_stream_call.h
1617
service_impl_factory.h
1718
stream_call.h
1819
models_service_impl.h
20+
stream_output_parser.h
1921
SRCS
2022
api_service.cpp
2123
call.cpp
@@ -25,6 +27,8 @@ cc_library(
2527
image_generation_service_impl.cpp
2628
models_service_impl.cpp
2729
rerank_service_impl.cpp
30+
stream_output_parser.cpp
31+
qwen3_rerank_service_impl.cpp
2832
DEPS
2933
:master
3034
:chat_template
@@ -34,6 +38,7 @@ cc_library(
3438
absl::flat_hash_set
3539
absl::random_random
3640
:function_call
41+
:reasoning
3742
torch
3843
$<$<BOOL:${USE_NPU}>:torch_npu>
3944
)

xllm/api_service/api_service.cpp

Lines changed: 41 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,15 @@ APIService::APIService(Master* master,
5151
embedding_service_impl_ =
5252
ServiceImplFactory<EmbeddingServiceImpl>::create_service_impl(
5353
llm_master, model_names);
54-
rerank_service_impl_ =
55-
ServiceImplFactory<RerankServiceImpl>::create_service_impl(llm_master,
56-
model_names);
54+
if (FLAGS_enable_qwen3_reranker) {
55+
rerank_service_impl_ =
56+
ServiceImplFactory<Qwen3RerankServiceImpl>::create_service_impl(
57+
llm_master, model_names);
58+
} else {
59+
rerank_service_impl_ =
60+
ServiceImplFactory<RerankServiceImpl>::create_service_impl(
61+
llm_master, model_names);
62+
}
5763
} else if (FLAGS_backend == "vlm") {
5864
auto vlm_master = dynamic_cast<VLMMaster*>(master);
5965
mm_chat_service_impl_ =
@@ -95,9 +101,11 @@ void APIService::CompletionsHttp(::google::protobuf::RpcController* controller,
95101
google::protobuf::Arena::CreateMessage<proto::CompletionResponse>(arena);
96102

97103
auto ctrl = reinterpret_cast<brpc::Controller*>(controller);
98-
std::string attachment = std::move(ctrl->request_attachment().to_string());
99104
std::string error;
100-
auto st = json2pb::JsonToProtoMessage(attachment, req_pb, &error);
105+
json2pb::Json2PbOptions options;
106+
butil::IOBuf& buf = ctrl->request_attachment();
107+
butil::IOBufAsZeroCopyInputStream iobuf_stream(buf);
108+
auto st = json2pb::JsonToProtoMessage(&iobuf_stream, req_pb, options, &error);
101109
if (!st) {
102110
ctrl->SetFailed(error);
103111
LOG(ERROR) << "parse json to proto failed: " << error;
@@ -127,16 +135,14 @@ void ChatCompletionsImpl(std::unique_ptr<Service>& service,
127135
auto resp_pb =
128136
google::protobuf::Arena::CreateMessage<typename ChatCall::ResType>(arena);
129137

130-
std::string attachment = std::move(ctrl->request_attachment().to_string());
131138
std::string error;
132-
133-
google::protobuf::util::JsonParseOptions options;
134-
options.ignore_unknown_fields = true;
135-
auto json_status =
136-
google::protobuf::util::JsonStringToMessage(attachment, req_pb, options);
137-
if (!json_status.ok()) {
138-
ctrl->SetFailed(json_status.ToString());
139-
LOG(ERROR) << "parse json to proto failed: " << json_status.ToString();
139+
json2pb::Json2PbOptions options;
140+
butil::IOBuf& buf = ctrl->request_attachment();
141+
butil::IOBufAsZeroCopyInputStream iobuf_stream(buf);
142+
auto st = json2pb::JsonToProtoMessage(&iobuf_stream, req_pb, options, &error);
143+
if (!st) {
144+
ctrl->SetFailed(error);
145+
LOG(ERROR) << "parse json to proto failed: " << buf.to_string();
140146
return;
141147
}
142148

@@ -201,9 +207,11 @@ void APIService::EmbeddingsHttp(::google::protobuf::RpcController* controller,
201207
google::protobuf::Arena::CreateMessage<proto::EmbeddingResponse>(arena);
202208

203209
auto ctrl = reinterpret_cast<brpc::Controller*>(controller);
204-
std::string attachment = std::move(ctrl->request_attachment().to_string());
205210
std::string error;
206-
auto st = json2pb::JsonToProtoMessage(attachment, req_pb, &error);
211+
json2pb::Json2PbOptions options;
212+
butil::IOBuf& buf = ctrl->request_attachment();
213+
butil::IOBufAsZeroCopyInputStream iobuf_stream(buf);
214+
auto st = json2pb::JsonToProtoMessage(&iobuf_stream, req_pb, options, &error);
207215
if (!st) {
208216
ctrl->SetFailed(error);
209217
LOG(ERROR) << "parse json to proto failed: " << error;
@@ -248,10 +256,13 @@ void APIService::ImageGenerationHttp(
248256
auto resp_pb =
249257
google::protobuf::Arena::CreateMessage<proto::ImageGenerationResponse>(
250258
arena);
259+
251260
auto ctrl = reinterpret_cast<brpc::Controller*>(controller);
252-
std::string attachment = std::move(ctrl->request_attachment().to_string());
253261
std::string error;
254-
auto st = json2pb::JsonToProtoMessage(attachment, req_pb, &error);
262+
json2pb::Json2PbOptions options;
263+
butil::IOBuf& buf = ctrl->request_attachment();
264+
butil::IOBufAsZeroCopyInputStream iobuf_stream(buf);
265+
auto st = json2pb::JsonToProtoMessage(&iobuf_stream, req_pb, options, &error);
255266
if (!st) {
256267
ctrl->SetFailed(error);
257268
LOG(ERROR) << "parse json to proto failed: " << error;
@@ -290,9 +301,11 @@ void APIService::RerankHttp(::google::protobuf::RpcController* controller,
290301
google::protobuf::Arena::CreateMessage<proto::RerankResponse>(arena);
291302

292303
auto ctrl = reinterpret_cast<brpc::Controller*>(controller);
293-
std::string attachment = std::move(ctrl->request_attachment().to_string());
294304
std::string error;
295-
auto st = json2pb::JsonToProtoMessage(attachment, req_pb, &error);
305+
json2pb::Json2PbOptions options;
306+
butil::IOBuf& buf = ctrl->request_attachment();
307+
butil::IOBufAsZeroCopyInputStream iobuf_stream(buf);
308+
auto st = json2pb::JsonToProtoMessage(&iobuf_stream, req_pb, options, &error);
296309
if (!st) {
297310
ctrl->SetFailed(error);
298311
LOG(ERROR) << "parse json to proto failed: " << error;
@@ -398,9 +411,11 @@ void APIService::LinkCluster(::google::protobuf::RpcController* controller,
398411
google::protobuf::Arena::CreateMessage<proto::RpcStatus>(arena);
399412

400413
auto ctrl = reinterpret_cast<brpc::Controller*>(controller);
401-
std::string attachment = std::move(ctrl->request_attachment().to_string());
402414
std::string error;
403-
auto st = json2pb::JsonToProtoMessage(attachment, req_pb, &error);
415+
json2pb::Json2PbOptions options;
416+
butil::IOBuf& buf = ctrl->request_attachment();
417+
butil::IOBufAsZeroCopyInputStream iobuf_stream(buf);
418+
auto st = json2pb::JsonToProtoMessage(&iobuf_stream, req_pb, options, &error);
404419
if (!st) {
405420
ctrl->SetFailed(error);
406421
LOG(ERROR) << "parse json to proto failed: " << error;
@@ -452,9 +467,11 @@ void APIService::UnlinkCluster(::google::protobuf::RpcController* controller,
452467
google::protobuf::Arena::CreateMessage<proto::RpcStatus>(arena);
453468

454469
auto ctrl = reinterpret_cast<brpc::Controller*>(controller);
455-
std::string attachment = std::move(ctrl->request_attachment().to_string());
456470
std::string error;
457-
auto st = json2pb::JsonToProtoMessage(attachment, req_pb, &error);
471+
json2pb::Json2PbOptions options;
472+
butil::IOBuf& buf = ctrl->request_attachment();
473+
butil::IOBufAsZeroCopyInputStream iobuf_stream(buf);
474+
auto st = json2pb::JsonToProtoMessage(&iobuf_stream, req_pb, options, &error);
458475
if (!st) {
459476
ctrl->SetFailed(error);
460477
LOG(ERROR) << "parse json to proto failed: " << error;

xllm/api_service/api_service.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ limitations under the License.
2020
#include "embedding_service_impl.h"
2121
#include "image_generation_service_impl.h"
2222
#include "models_service_impl.h"
23+
#include "qwen3_rerank_service_impl.h"
2324
#include "rerank_service_impl.h"
2425
#include "xllm_service.pb.h"
2526

0 commit comments

Comments
 (0)