@@ -30,6 +30,7 @@ BlockManagerImpl::BlockManagerImpl(const Options& options)
3030 }
3131
3232 size_t total_blocks = options_.num_blocks ();
33+ block_size_ = options_.block_size ();
3334 num_free_blocks_ = total_blocks;
3435 free_blocks_.reserve (total_blocks);
3536 for (int32_t i = 0 ; i < total_blocks; ++i) {
@@ -73,6 +74,33 @@ void BlockManagerImpl::deallocate(const Slice<Block>& blocks) {
7374 }
7475}
7576
77+ bool BlockManagerImpl::check_if_enough_to_evict (
78+ DecodePriorityQueue* running_queue_to_evict,
79+ Sequence* prefill_sequence,
80+ size_t & num_request_to_evict) {
81+ // check if it's enough when we evict this requests queue
82+
83+ const size_t num_blocks_needed =
84+ (prefill_sequence->num_tokens () + block_size_ - 1 ) / block_size_;
85+ size_t num_blocks_can_evict = 0 ;
86+ // count the number of blocks can be preempted
87+ for (auto it = running_queue_to_evict->rbegin ();
88+ it != running_queue_to_evict->rend ();
89+ ++it) {
90+ std::shared_ptr<Request> request_to_preempt = *it;
91+ num_request_to_evict++;
92+ // count the number of blocks belong to the request
93+ for (const auto & seq : request_to_preempt->sequences ()) {
94+ num_blocks_can_evict += seq->kv_state ().num_kv_blocks ();
95+ }
96+ if ((num_blocks_needed <= num_blocks_can_evict) ||
97+ has_enough_blocks (num_blocks_needed - num_blocks_can_evict)) {
98+ return true ;
99+ }
100+ }
101+ return false ;
102+ }
103+
76104bool BlockManagerImpl::has_enough_blocks (uint32_t num_blocks) {
77105 if (num_blocks <= num_free_blocks_) {
78106 return true ;
0 commit comments