Skip to content

Commit 9ef49b8

Browse files
vMaroonclubanderson
authored andcommitted
fix PD scorers configurations (#133)
Signed-off-by: Maroon Ayoub <[email protected]>
1 parent 826e95a commit 9ef49b8

File tree

3 files changed

+29
-4
lines changed

3 files changed

+29
-4
lines changed

README.md

+12
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,12 @@ export PREFILL_ENABLE_LOAD_AWARE_SCORER=true
5959
export PREFILL_LOAD_AWARE_SCORER_WEIGHT=1.0
6060
```
6161

62+
To enable and configure the prefix aware scorer for prefill, the following environment variables must be configured:
63+
```
64+
export PREFILL_ENABLE_PREFIX_AWARE_SCORER=true
65+
export PREFILL_PREFIX_AWARE_SCORER_WEIGHT=1.0
66+
```
67+
6268
Decode configuration:
6369

6470
To enable and configure the kv cache scorer for decode, the following environment variables must be configured:
@@ -72,6 +78,12 @@ To enable and configure the load aware scorer for decode, the following environm
7278
export DECODE_ENABLE_LOAD_AWARE_SCORER=true
7379
export DECODE_LOAD_AWARE_SCORER_WEIGHT=1.0
7480
```
81+
82+
To enable and configure the prefix aware scorer for decode, the following environment variables must be configured:
83+
```
84+
export DECODE_ENABLE_PREFIX_AWARE_SCORER=true
85+
export DECODE_PREFIX_AWARE_SCORER_WEIGHT=1.0
86+
```
7587
---
7688
[Inference Gateways]:#concepts-and-definitions
7789

pkg/epp/scheduling/config_utils.go

+5
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,17 @@ import (
2929
const (
3030
prefillKvCacheScorerEnablementEnvVar = "PREFILL_ENABLE_KVCACHE_AWARE_SCORER"
3131
prefillLoadAwareScorerEnablementEnvVar = "PREFILL_ENABLE_LOAD_AWARE_SCORER"
32+
prefillPrefixScorerEnablementEnvVar = "PREFILL_ENABLE_PREFIX_AWARE_SCORER"
3233
decodeKvCacheScorerEnablementEnvVar = "DECODE_ENABLE_KVCACHE_AWARE_SCORER"
3334
decodeLoadAwareScorerEnablementEnvVar = "DECODE_ENABLE_LOAD_AWARE_SCORER"
35+
decodePrefixScorerEnablementEnvVar = "DECODE_ENABLE_PREFIX_AWARE_SCORER"
3436

3537
prefillKvCacheScorerWeightEnvVar = "PREFILL_KVCACHE_AWARE_SCORER_WEIGHT"
3638
prefillLoadAwareScorerWeightEnvVar = "PREFILL_LOAD_AWARE_SCORER_WEIGHT"
39+
prefillPrefixScorerWeightEnvVar = "PREFILL_PREFIX_AWARE_SCORER_WEIGHT"
3740
decodeKvCacheScorerWeightEnvVar = "DECODE_KVCACHE_AWARE_SCORER_WEIGHT"
3841
decodeLoadAwareScorerWeightEnvVar = "DECODE_LOAD_AWARE_SCORER_WEIGHT"
42+
decodePrefixScorerWeightEnvVar = "DECODE_PREFIX_AWARE_SCORER_WEIGHT"
3943

4044
pdEnabledEnvKey = "PD_ENABLED"
4145

@@ -46,6 +50,7 @@ const (
4650
const (
4751
loadAwareScorerName = "LoadAwareScorer"
4852
kvCacheAwareScorerName = "KVCacheAwareScorer"
53+
prefixAwareScorerName = "PrefixAwareScorer"
4954
)
5055

5156
func addScorerByEnvironment(ctx context.Context, config *SchedulerConfig, scorerName string, scorerEnabledEnvKey string, weightEnvKey string, logger logr.Logger) {

pkg/epp/scheduling/pd_config.go

+12-4
Original file line numberDiff line numberDiff line change
@@ -66,12 +66,20 @@ func init() {
6666

6767
func loadPrefillConfiguration(ctx context.Context, logger logr.Logger) {
6868
// add scorers
69-
addScorerByEnvironment(ctx, prefillConfig, kvCacheAwareScorerName, kvCacheScorerEnablementEnvVar, kvCacheScorerWeightEnvVar, logger)
70-
addScorerByEnvironment(ctx, prefillConfig, loadAwareScorerName, loadAwareScorerEnablementEnvVar, loadAwareScorerWeightEnvVar, logger)
69+
addScorerByEnvironment(ctx, prefillConfig, kvCacheAwareScorerName, prefillKvCacheScorerEnablementEnvVar,
70+
prefillKvCacheScorerWeightEnvVar, logger)
71+
addScorerByEnvironment(ctx, prefillConfig, loadAwareScorerName, prefillLoadAwareScorerEnablementEnvVar,
72+
prefillLoadAwareScorerWeightEnvVar, logger)
73+
addScorerByEnvironment(ctx, prefillConfig, prefixAwareScorerName, prefillPrefixScorerEnablementEnvVar,
74+
prefillPrefixScorerWeightEnvVar, logger)
7175
}
7276

7377
func loadDecodeConfiguration(ctx context.Context, logger logr.Logger) {
7478
// add scorers
75-
addScorerByEnvironment(ctx, decodeConfig, kvCacheAwareScorerName, kvCacheScorerEnablementEnvVar, kvCacheScorerWeightEnvVar, logger)
76-
addScorerByEnvironment(ctx, decodeConfig, loadAwareScorerName, loadAwareScorerEnablementEnvVar, loadAwareScorerWeightEnvVar, logger)
79+
addScorerByEnvironment(ctx, decodeConfig, kvCacheAwareScorerName, decodeKvCacheScorerEnablementEnvVar,
80+
decodeKvCacheScorerWeightEnvVar, logger)
81+
addScorerByEnvironment(ctx, decodeConfig, loadAwareScorerName, decodeLoadAwareScorerEnablementEnvVar,
82+
decodeLoadAwareScorerWeightEnvVar, logger)
83+
addScorerByEnvironment(ctx, decodeConfig, prefixAwareScorerName, decodePrefixScorerEnablementEnvVar,
84+
decodePrefixScorerWeightEnvVar, logger)
7785
}

0 commit comments

Comments
 (0)