@@ -34,6 +34,7 @@ import (
3434 "k8s.io/client-go/rest"
3535 "k8s.io/component-base/metrics/legacyregistry"
3636 ctrl "sigs.k8s.io/controller-runtime"
37+ "sigs.k8s.io/controller-runtime/pkg/log"
3738 "sigs.k8s.io/controller-runtime/pkg/log/zap"
3839 "sigs.k8s.io/controller-runtime/pkg/manager"
3940 "sigs.k8s.io/controller-runtime/pkg/metrics/filters"
@@ -43,7 +44,13 @@ import (
4344 "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics"
4445 "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics/collectors"
4546 "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling"
47+ "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins"
48+ "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/filter"
49+ "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/picker"
50+ "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/prefix"
51+ "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/scorer"
4652 runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/server"
53+ envutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/env"
4754 "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
4855)
4956
@@ -107,8 +114,22 @@ var (
107114 "Prometheus metric for the LoRA info metrics (must be in vLLM label format)." )
108115
109116 setupLog = ctrl .Log .WithName ("setup" )
117+
118+ // Environment variables
119+ schedulerV2 = envutil .GetEnvString ("EXPERIMENTAL_USE_SCHEDULER_V2" , "false" , setupLog )
120+ prefixCacheScheduling = envutil .GetEnvString ("ENABLE_PREFIX_CACHE_SCHEDULING" , "false" , setupLog )
110121)
111122
123+ func loadPrefixCacheConfig () prefix.Config {
124+ baseLogger := log .Log .WithName ("env-config" )
125+
126+ return prefix.Config {
127+ HashBlockSize : envutil .GetEnvInt ("PREFIX_CACHE_HASH_BLOCK_SIZE" , prefix .DefaultHashBlockSize , baseLogger ),
128+ MaxPrefixBlocksToMatch : envutil .GetEnvInt ("PREFIX_CACHE_MAX_PREFIX_BLOCKS" , prefix .DefaultMaxPrefixBlocks , baseLogger ),
129+ LRUIndexerCapacity : envutil .GetEnvInt ("PREFIX_CACHE_LRU_CAPACITY" , prefix .DefaultLRUIndexerCapacity , baseLogger ),
130+ }
131+ }
132+
112133func main () {
113134 if err := run (); err != nil {
114135 os .Exit (1 )
@@ -172,6 +193,27 @@ func run() error {
172193 datastore := datastore .NewDatastore (ctx , pmf )
173194
174195 scheduler := scheduling .NewScheduler (datastore )
196+ if schedulerV2 == "true" {
197+ queueScorerWeight := envutil .GetEnvInt ("QUEUE_SCORE_WEIGHT" , scorer .DefaultQueueScorerWeight , setupLog )
198+ kvCacheScorerWeight := envutil .GetEnvInt ("KV_CACHE_SCORE_WEIGHT" , scorer .DefaultKVCacheScorerWeight , setupLog )
199+ scorers := map [plugins.Scorer ]int {
200+ & scorer.QueueScorer {}: queueScorerWeight ,
201+ & scorer.KVCacheScorer {}: kvCacheScorerWeight ,
202+ }
203+ schedConfigOpts := []scheduling.ConfigOption {}
204+ if prefixCacheScheduling == "true" {
205+ prefixScorerWeight := envutil .GetEnvInt ("PREFIX_CACHE_SCORE_WEIGHT" , prefix .DefaultScorerWeight , setupLog )
206+ schedConfigOpts = append (schedConfigOpts , scheduling .AddPrefixPlugin (loadPrefixCacheConfig (), prefixScorerWeight ))
207+ }
208+ schedulerConfig := scheduling .NewSchedulerConfig (
209+ []plugins.PreSchedule {},
210+ []plugins.Filter {filter .NewSheddableCapacityFilter ()},
211+ scorers ,
212+ picker .NewMaxScorePicker (),
213+ []plugins.PostSchedule {},
214+ schedConfigOpts ... )
215+ scheduler = scheduling .NewSchedulerWithConfig (datastore , schedulerConfig )
216+ }
175217 serverRunner := & runserver.ExtProcServerRunner {
176218 GrpcPort : * grpcPort ,
177219 DestinationEndpointHintMetadataNamespace : * destinationEndpointHintMetadataNamespace ,
0 commit comments