Skip to content

Commit f1ebe83

Browse files
refactor: refactor the priority and on/offline code.
1 parent ac92836 commit f1ebe83

25 files changed

+151
-167
lines changed

xllm/core/common/global_flags.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,6 @@ DEFINE_int32(heart_beat_interval, 3, "heart beat interval");
202202

203203
DEFINE_string(priority_strategy, "FCFS", "priority strategy for requests");
204204

205-
DEFINE_bool(enable_on_preempt_off,
205+
DEFINE_bool(enable_online_preempt_offline,
206206
true,
207207
"whether enable online preempt offline");

xllm/core/common/global_flags.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,4 +129,4 @@ DECLARE_bool(use_zero_evict);
129129

130130
DECLARE_string(priority_strategy);
131131

132-
DECLARE_bool(enable_on_preempt_off);
132+
DECLARE_bool(enable_online_preempt_offline);

xllm/core/common/metrics.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,13 +88,13 @@ DEFINE_GAUGE(num_running_requests, "Number of running requests in scheduler");
8888
DEFINE_GAUGE(num_waiting_requests, "Number of waiting requests in scheduler");
8989
DEFINE_GAUGE(num_preempted_requests,
9090
"Number of preempted requests in scheduler");
91-
DEFINE_GAUGE(num_offd_preempt_off_requests,
91+
DEFINE_GAUGE(num_offline_decode_preempt_offline_requests,
9292
"Number of offline decode preempt offline requests in scheduler");
93-
DEFINE_GAUGE(num_ond_preempt_on_requests,
93+
DEFINE_GAUGE(num_online_decode_preempt_online_requests,
9494
"Number of online decode preempt online requests in scheduler");
95-
DEFINE_GAUGE(num_onp_preempt_off_requests,
95+
DEFINE_GAUGE(num_online_prefill_preempt_offline_requests,
9696
"Number of online prefill preempt offline requests in scheduler");
97-
DEFINE_GAUGE(num_ond_preempt_off_requests,
97+
DEFINE_GAUGE(num_online_decode_preempt_offline_requests,
9898
"Number of online decode preempt offline requests in scheduler");
9999

100100
DEFINE_GAUGE(num_running_sequences, "Number of running sequences");

xllm/core/common/metrics.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -149,10 +149,10 @@ DECLARE_GAUGE(num_pending_requests);
149149
DECLARE_GAUGE(num_running_requests);
150150
DECLARE_GAUGE(num_waiting_requests);
151151
DECLARE_GAUGE(num_preempted_requests);
152-
DECLARE_GAUGE(num_offd_preempt_off_requests);
153-
DECLARE_GAUGE(num_ond_preempt_on_requests);
154-
DECLARE_GAUGE(num_onp_preempt_off_requests);
155-
DECLARE_GAUGE(num_ond_preempt_off_requests);
152+
DECLARE_GAUGE(num_offline_decode_preempt_offline_requests);
153+
DECLARE_GAUGE(num_online_decode_preempt_online_requests);
154+
DECLARE_GAUGE(num_online_prefill_preempt_offline_requests);
155+
DECLARE_GAUGE(num_online_decode_preempt_offline_requests);
156156
DECLARE_GAUGE(num_running_sequences);
157157
DECLARE_GAUGE(kv_cache_utilization_perc);
158158
DECLARE_GAUGE(num_blocks_in_prefix_cache);

xllm/core/common/options.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ class Options {
116116

117117
PROPERTY(std::string, priority_strategy) = "FCFS";
118118

119-
PROPERTY(bool, enable_on_preempt_off) = true;
119+
PROPERTY(bool, enable_online_preempt_offline) = true;
120120
};
121121

122122
} // namespace xllm

xllm/core/distributed_runtime/disagg_pd_service_impl.cpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -90,14 +90,15 @@ std::shared_ptr<Request> DisaggPDServiceImpl::generate_request(
9090
output_callback,
9191
batch_output_callback);
9292

93-
auto new_request = std::make_shared<Request>(req.req_id(),
94-
req.x_request_id(),
95-
req.x_request_time(),
96-
std::move(req_state),
97-
req.service_req_id(),
98-
req.offline(),
99-
req.slo_ms(),
100-
req.priority());
93+
auto new_request = std::make_shared<Request>(
94+
req.req_id(),
95+
req.x_request_id(),
96+
req.x_request_time(),
97+
std::move(req_state),
98+
req.service_req_id(),
99+
req.offline(),
100+
req.slo_ms(),
101+
static_cast<xllm::RequestPriority>(req.priority()));
101102

102103
// add one sequence, rest will be added by scheduler
103104
return new_request;

xllm/core/framework/block/block_manager.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,6 @@ class BlockManager {
6262

6363
virtual void cache(const Slice<int32_t>& token_ids,
6464
const Slice<Block>& blocks) = 0;
65-
virtual bool check_if_enough_to_evict(
66-
DecodePriorityQueue* running_queue_to_evict,
67-
Sequence* prefill_sequence,
68-
size_t& num_request_to_evict) = 0;
6965

7066
// get merged all dp rank KVCacheEvent
7167
virtual void get_merged_kvcache_event(KvCacheEvent* event) const = 0;

xllm/core/framework/block/block_manager_impl.cpp

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -74,33 +74,6 @@ void BlockManagerImpl::deallocate(const Slice<Block>& blocks) {
7474
}
7575
}
7676

77-
bool BlockManagerImpl::check_if_enough_to_evict(
78-
DecodePriorityQueue* running_queue_to_evict,
79-
Sequence* prefill_sequence,
80-
size_t& num_request_to_evict) {
81-
// check if it's enough when we evict this requests queue
82-
83-
const size_t num_blocks_needed =
84-
(prefill_sequence->num_tokens() + block_size_ - 1) / block_size_;
85-
size_t num_blocks_can_evict = 0;
86-
// count the number of blocks can be preempted
87-
for (auto it = running_queue_to_evict->rbegin();
88-
it != running_queue_to_evict->rend();
89-
++it) {
90-
std::shared_ptr<Request> request_to_preempt = *it;
91-
num_request_to_evict++;
92-
// count the number of blocks belong to the request
93-
for (const auto& seq : request_to_preempt->sequences()) {
94-
num_blocks_can_evict += seq->kv_state().num_kv_blocks();
95-
}
96-
if ((num_blocks_needed <= num_blocks_can_evict) ||
97-
has_enough_blocks(num_blocks_needed - num_blocks_can_evict)) {
98-
return true;
99-
}
100-
}
101-
return false;
102-
}
103-
10477
bool BlockManagerImpl::has_enough_blocks(uint32_t num_blocks) {
10578
if (num_blocks <= num_free_blocks_) {
10679
return true;

xllm/core/framework/block/block_manager_impl.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,6 @@ class BlockManagerImpl : public BlockManager {
4646

4747
void get_merged_kvcache_event(KvCacheEvent* event) const override;
4848

49-
bool check_if_enough_to_evict(DecodePriorityQueue* running_queue_to_evict,
50-
Sequence* prefill_sequence,
51-
size_t& num_request_to_evict) override;
52-
5349
size_t num_blocks_in_prefix_cache() const override {
5450
if (options_.enable_prefix_cache()) {
5551
CHECK(prefix_cache_);

xllm/core/framework/block/block_manager_pool.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -92,16 +92,6 @@ bool BlockManagerPool::allocate(Sequence* sequence) {
9292
return allocate(sequence, sequence->num_tokens());
9393
}
9494

95-
bool BlockManagerPool::check_if_enough_to_evict(
96-
DecodePriorityQueue* running_queue_to_evict,
97-
Sequence* prefill_sequence,
98-
size_t& num_request_to_evict) {
99-
DCHECK(prefill_sequence != nullptr);
100-
int32_t dp_rank = prefill_sequence->dp_rank();
101-
return block_managers_[dp_rank]->check_if_enough_to_evict(
102-
running_queue_to_evict, prefill_sequence, num_request_to_evict);
103-
}
104-
10595
bool BlockManagerPool::allocate(std::vector<Sequence*>& sequences) {
10696
for (auto* sequence : sequences) {
10797
DCHECK(sequence != nullptr);

0 commit comments

Comments
 (0)