@@ -90,10 +90,14 @@ struct Core {
90
90
/// When a task is scheduled from a worker, it is stored in this slot. The
91
91
/// worker will check this slot for a task **before** checking the run
92
92
/// queue. This effectively results in the **last** scheduled task to be run
93
- /// next (LIFO). This is an optimization for message passing patterns and
94
- /// helps to reduce latency.
93
+ /// next (LIFO). This is an optimization for improving locality which
94
+ /// benefits message passing patterns and helps to reduce latency.
95
95
lifo_slot : Option < Notified > ,
96
96
97
+ /// When `true`, locally scheduled tasks go to the LIFO slot. When `false`,
98
+ /// they go to the back of the `run_queue`.
99
+ lifo_enabled : bool ,
100
+
97
101
/// The worker-local run queue.
98
102
run_queue : queue:: Local < Arc < Handle > > ,
99
103
@@ -191,6 +195,12 @@ type Notified = task::Notified<Arc<Handle>>;
191
195
// Tracks thread-local state
192
196
scoped_thread_local ! ( static CURRENT : Context ) ;
193
197
198
+ /// Value picked out of thin-air. Running the LIFO slot a handful of times
199
+ /// seemms sufficient to benefit from locality. More than 3 times probably is
200
+ /// overweighing. The value can be tuned in the future with data that shows
201
+ /// improvements.
202
+ const MAX_LIFO_POLLS_PER_TICK : usize = 3 ;
203
+
194
204
pub ( super ) fn create (
195
205
size : usize ,
196
206
park : Parker ,
@@ -214,6 +224,7 @@ pub(super) fn create(
214
224
cores. push ( Box :: new ( Core {
215
225
tick : 0 ,
216
226
lifo_slot : None ,
227
+ lifo_enabled : !config. disable_lifo_slot ,
217
228
run_queue,
218
229
is_searching : false ,
219
230
is_shutdown : false ,
@@ -422,7 +433,13 @@ fn run(worker: Arc<Worker>) {
422
433
423
434
impl Context {
424
435
fn run ( & self , mut core : Box < Core > ) -> RunResult {
436
+ // Reset `lifo_enabled` here in case the core was previously stolen from
437
+ // a task that had the LIFO slot disabled.
438
+ self . reset_lifo_enabled ( & mut core) ;
439
+
425
440
while !core. is_shutdown {
441
+ self . assert_lifo_enabled_is_correct ( & core) ;
442
+
426
443
// Increment the tick
427
444
core. tick ( ) ;
428
445
@@ -463,13 +480,16 @@ impl Context {
463
480
// another idle worker to try to steal work.
464
481
core. transition_from_searching ( & self . worker ) ;
465
482
483
+ self . assert_lifo_enabled_is_correct ( & core) ;
484
+
466
485
// Make the core available to the runtime context
467
486
core. metrics . start_poll ( ) ;
468
487
* self . core . borrow_mut ( ) = Some ( core) ;
469
488
470
489
// Run the task
471
490
coop:: budget ( || {
472
491
task. run ( ) ;
492
+ let mut lifo_polls = 0 ;
473
493
474
494
// As long as there is budget remaining and a task exists in the
475
495
// `lifo_slot`, then keep running.
@@ -478,7 +498,12 @@ impl Context {
478
498
// by another worker.
479
499
let mut core = match self . core . borrow_mut ( ) . take ( ) {
480
500
Some ( core) => core,
481
- None => return Err ( ( ) ) ,
501
+ None => {
502
+ // In this case, we cannot call `reset_lifo_enabled()`
503
+ // because the core was stolen. The stealer will handle
504
+ // that at the top of `Context::run`
505
+ return Err ( ( ) ) ;
506
+ }
482
507
} ;
483
508
484
509
// If task poll times is enabled, measure the poll time. Note
@@ -491,35 +516,62 @@ impl Context {
491
516
// Check for a task in the LIFO slot
492
517
let task = match core. lifo_slot . take ( ) {
493
518
Some ( task) => task,
494
- None => return Ok ( core) ,
519
+ None => {
520
+ self . reset_lifo_enabled ( & mut core) ;
521
+ return Ok ( core) ;
522
+ }
495
523
} ;
496
524
497
- // Polling a task doesn't necessarily consume any budget, if it
498
- // doesn't use any Tokio leaf futures. To prevent such tasks
499
- // from using the lifo slot in an infinite loop, we consume an
500
- // extra unit of budget between each iteration of the loop.
501
- coop:: consume_one ( ) ;
502
-
503
- if coop:: has_budget_remaining ( ) {
504
- // Run the LIFO task, then loop
505
- core. metrics . start_poll ( ) ;
506
- * self . core . borrow_mut ( ) = Some ( core) ;
507
- let task = self . worker . handle . shared . owned . assert_owner ( task) ;
508
- task. run ( ) ;
509
- } else {
525
+ if !coop:: has_budget_remaining ( ) {
510
526
// Not enough budget left to run the LIFO task, push it to
511
527
// the back of the queue and return.
512
528
core. run_queue . push_back_or_overflow (
513
529
task,
514
530
self . worker . inject ( ) ,
515
531
& mut core. metrics ,
516
532
) ;
533
+ // If we hit this point, the LIFO slot should be enabled.
534
+ // There is no need to reset it.
535
+ debug_assert ! ( core. lifo_enabled) ;
517
536
return Ok ( core) ;
518
537
}
538
+
539
+ // Track that we are about to run a task from the LIFO slot.
540
+ lifo_polls += 1 ;
541
+ super :: counters:: inc_lifo_schedules ( ) ;
542
+
543
+ // Disable the LIFO slot if we reach our limit
544
+ //
545
+ // In ping-ping style workloads where task A notifies task B,
546
+ // which notifies task A again, continuously prioritizing the
547
+ // LIFO slot can cause starvation as these two tasks will
548
+ // repeatedly schedule the other. To mitigate this, we limit the
549
+ // number of times the LIFO slot is prioritized.
550
+ if lifo_polls >= MAX_LIFO_POLLS_PER_TICK {
551
+ core. lifo_enabled = false ;
552
+ super :: counters:: inc_lifo_capped ( ) ;
553
+ }
554
+
555
+ // Run the LIFO task, then loop
556
+ core. metrics . start_poll ( ) ;
557
+ * self . core . borrow_mut ( ) = Some ( core) ;
558
+ let task = self . worker . handle . shared . owned . assert_owner ( task) ;
559
+ task. run ( ) ;
519
560
}
520
561
} )
521
562
}
522
563
564
+ fn reset_lifo_enabled ( & self , core : & mut Core ) {
565
+ core. lifo_enabled = !self . worker . handle . shared . config . disable_lifo_slot ;
566
+ }
567
+
568
+ fn assert_lifo_enabled_is_correct ( & self , core : & Core ) {
569
+ debug_assert_eq ! (
570
+ core. lifo_enabled,
571
+ !self . worker. handle. shared. config. disable_lifo_slot
572
+ ) ;
573
+ }
574
+
523
575
fn maintenance ( & self , mut core : Box < Core > ) -> Box < Core > {
524
576
if core. tick % self . worker . handle . shared . config . event_interval == 0 {
525
577
super :: counters:: inc_num_maintenance ( ) ;
@@ -573,6 +625,8 @@ impl Context {
573
625
}
574
626
575
627
fn park_timeout ( & self , mut core : Box < Core > , duration : Option < Duration > ) -> Box < Core > {
628
+ self . assert_lifo_enabled_is_correct ( & core) ;
629
+
576
630
// Take the parker out of core
577
631
let mut park = core. park . take ( ) . expect ( "park missing" ) ;
578
632
@@ -840,7 +894,7 @@ impl Handle {
840
894
// task must always be pushed to the back of the queue, enabling other
841
895
// tasks to be executed. If **not** a yield, then there is more
842
896
// flexibility and the task may go to the front of the queue.
843
- let should_notify = if is_yield || self . shared . config . disable_lifo_slot {
897
+ let should_notify = if is_yield || !core . lifo_enabled {
844
898
core. run_queue
845
899
. push_back_or_overflow ( task, & self . shared . inject , & mut core. metrics ) ;
846
900
true
0 commit comments