Skip to content

Commit 4aa32aa

Browse files
refactor: refactor the priority and on/offline code.
1 parent ac92836 commit 4aa32aa

18 files changed

+147
-101
lines changed

xllm/core/common/global_flags.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,6 @@ DEFINE_int32(heart_beat_interval, 3, "heart beat interval");
202202

203203
DEFINE_string(priority_strategy, "FCFS", "priority strategy for requests");
204204

205-
DEFINE_bool(enable_on_preempt_off,
205+
DEFINE_bool(enable_online_preempt_offline,
206206
true,
207207
"whether enable online preempt offline");

xllm/core/common/global_flags.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,4 +129,4 @@ DECLARE_bool(use_zero_evict);
129129

130130
DECLARE_string(priority_strategy);
131131

132-
DECLARE_bool(enable_on_preempt_off);
132+
DECLARE_bool(enable_online_preempt_offline);

xllm/core/common/metrics.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,13 +88,13 @@ DEFINE_GAUGE(num_running_requests, "Number of running requests in scheduler");
8888
DEFINE_GAUGE(num_waiting_requests, "Number of waiting requests in scheduler");
8989
DEFINE_GAUGE(num_preempted_requests,
9090
"Number of preempted requests in scheduler");
91-
DEFINE_GAUGE(num_offd_preempt_off_requests,
91+
DEFINE_GAUGE(num_offline_decode_preempt_offline_requests,
9292
"Number of offline decode preempt offline requests in scheduler");
93-
DEFINE_GAUGE(num_ond_preempt_on_requests,
93+
DEFINE_GAUGE(num_online_decode_preempt_online_requests,
9494
"Number of online decode preempt online requests in scheduler");
95-
DEFINE_GAUGE(num_onp_preempt_off_requests,
95+
DEFINE_GAUGE(num_online_prefill_preempt_offline_requests,
9696
"Number of online prefill preempt offline requests in scheduler");
97-
DEFINE_GAUGE(num_ond_preempt_off_requests,
97+
DEFINE_GAUGE(num_online_decode_preempt_offline_requests,
9898
"Number of online decode preempt offline requests in scheduler");
9999

100100
DEFINE_GAUGE(num_running_sequences, "Number of running sequences");

xllm/core/common/metrics.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -149,10 +149,10 @@ DECLARE_GAUGE(num_pending_requests);
149149
DECLARE_GAUGE(num_running_requests);
150150
DECLARE_GAUGE(num_waiting_requests);
151151
DECLARE_GAUGE(num_preempted_requests);
152-
DECLARE_GAUGE(num_offd_preempt_off_requests);
153-
DECLARE_GAUGE(num_ond_preempt_on_requests);
154-
DECLARE_GAUGE(num_onp_preempt_off_requests);
155-
DECLARE_GAUGE(num_ond_preempt_off_requests);
152+
DECLARE_GAUGE(num_offline_decode_preempt_offline_requests);
153+
DECLARE_GAUGE(num_online_decode_preempt_online_requests);
154+
DECLARE_GAUGE(num_online_prefill_preempt_offline_requests);
155+
DECLARE_GAUGE(num_online_decode_preempt_offline_requests);
156156
DECLARE_GAUGE(num_running_sequences);
157157
DECLARE_GAUGE(kv_cache_utilization_perc);
158158
DECLARE_GAUGE(num_blocks_in_prefix_cache);

xllm/core/common/options.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ class Options {
116116

117117
PROPERTY(std::string, priority_strategy) = "FCFS";
118118

119-
PROPERTY(bool, enable_on_preempt_off) = true;
119+
PROPERTY(bool, enable_online_preempt_offline) = true;
120120
};
121121

122122
} // namespace xllm

xllm/core/distributed_runtime/disagg_pd_service_impl.cpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -90,14 +90,15 @@ std::shared_ptr<Request> DisaggPDServiceImpl::generate_request(
9090
output_callback,
9191
batch_output_callback);
9292

93-
auto new_request = std::make_shared<Request>(req.req_id(),
94-
req.x_request_id(),
95-
req.x_request_time(),
96-
std::move(req_state),
97-
req.service_req_id(),
98-
req.offline(),
99-
req.slo_ms(),
100-
req.priority());
93+
auto new_request = std::make_shared<Request>(
94+
req.req_id(),
95+
req.x_request_id(),
96+
req.x_request_time(),
97+
std::move(req_state),
98+
req.service_req_id(),
99+
req.offline(),
100+
req.slo_ms(),
101+
static_cast<xllm::RequestPriority>(req.priority()));
101102

102103
// add one sequence, rest will be added by scheduler
103104
return new_request;

xllm/core/framework/request/request.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ Request::Request(const std::string& request_id,
3636
const std::string& service_request_id,
3737
bool offline,
3838
int32_t slo_ms,
39-
xllm::proto::Priority priority)
39+
RequestPriority priority)
4040
: request_id_(request_id),
4141
service_request_id_(service_request_id),
4242
x_request_id_(x_request_id),

xllm/core/framework/request/request.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ limitations under the License.
3131

3232
namespace xllm {
3333

34+
enum class RequestPriority { DEFAULT = 0, HIGH = 1, NORMAL = 2, LOW = 3 };
35+
3436
class Request {
3537
public:
3638
Request(const std::string& request_id,
@@ -40,7 +42,7 @@ class Request {
4042
const std::string& service_request_id = "",
4143
bool offline = false,
4244
int32_t slo_ms = 0,
43-
xllm::proto::Priority priority = xllm::proto::Priority::NORMAL);
45+
RequestPriority priority = RequestPriority::NORMAL);
4446

4547
bool finished() const;
4648

@@ -86,7 +88,7 @@ class Request {
8688

8789
const bool offline() const { return offline_; }
8890
const int32_t slo_ms() const { return slo_ms_; }
89-
const xllm::proto::Priority priority() const { return priority_; }
91+
const RequestPriority priority() const { return priority_; }
9092

9193
RequestState& state() { return state_; }
9294

@@ -119,7 +121,7 @@ class Request {
119121

120122
int32_t slo_ms_;
121123

122-
xllm::proto::Priority priority_;
124+
RequestPriority priority_;
123125

124126
private:
125127
void create_sequences_group();

xllm/core/framework/request/request_params.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ RequestParams::RequestParams(const proto::CompletionRequest& request,
5454
slo_ms = request.slo_ms();
5555
}
5656
if (request.has_priority()) {
57-
priority = request.priority();
57+
priority = static_cast<xllm::RequestPriority>(request.priority());
5858
}
5959

6060
if (request.has_service_request_id()) {
@@ -203,7 +203,7 @@ void InitFromChatRequest(RequestParams& params, const ChatRequest& request) {
203203
params.slo_ms = request.slo_ms();
204204
}
205205
if (request.has_priority()) {
206-
params.priority = request.priority();
206+
params.priority = static_cast<xllm::RequestPriority>(request.priority());
207207
}
208208

209209
if (request.has_service_request_id()) {

xllm/core/framework/request/request_params.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ limitations under the License.
2929
#include "core/common/types.h"
3030
#include "embedding.pb.h"
3131
#include "multimodal.pb.h"
32+
#include "request.h"
3233
#include "request_output.h"
3334

3435
namespace xllm {
@@ -130,7 +131,7 @@ struct RequestParams {
130131

131132
int32_t slo_ms = 0;
132133

133-
xllm::proto::Priority priority = xllm::proto::Priority::NORMAL;
134+
RequestPriority priority = RequestPriority::NORMAL;
134135
};
135136

136137
} // namespace xllm

0 commit comments

Comments
 (0)