Skip to content

[DO NOT MERGE] Renamed Scheduler APIs to better reflect their usage. #116

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/epp/handlers/request.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ func (s *StreamingServer) HandleRequestBody(
return reqCtx, errutil.Error{Code: errutil.Internal, Msg: fmt.Sprintf("error marshaling request body: %v", err)}
}

res, err := s.scheduler.Schedule(ctx, llmReq)
res, err := s.scheduler.OnRequest(ctx, llmReq)
if err != nil {
return reqCtx, errutil.Error{Code: errutil.InferencePoolResourceExhausted, Msg: fmt.Errorf("failed to find target pod: %w", err).Error()}
}
Expand Down
6 changes: 3 additions & 3 deletions pkg/epp/handlers/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ type StreamingServer struct {
}

type Scheduler interface {
Schedule(ctx context.Context, b *schedulingtypes.LLMRequest) (result *schedulingtypes.Result, err error)
RunPostResponsePlugins(ctx context.Context, req *types.LLMRequest, tragetPodName string) (*schedulingtypes.Result, error)
OnRequest(ctx context.Context, b *schedulingtypes.LLMRequest) (result *schedulingtypes.Result, err error)
OnResponse(ctx context.Context, req *types.LLMRequest, tragetPodName string) (*schedulingtypes.Result, error)
}

// RequestContext stores context information during the life time of an HTTP request.
Expand Down Expand Up @@ -212,7 +212,7 @@ func (s *StreamingServer) Process(srv extProcPb.ExternalProcessor_ProcessServer)
}

var result *types.Result
result, err = s.scheduler.RunPostResponsePlugins(ctx, llmReq, reqCtx.TargetPod)
result, err = s.scheduler.OnResponse(ctx, llmReq, reqCtx.TargetPod)
if err != nil {
logger.V(logutil.DEFAULT).Error(err, "Error handling response")
reqCtx.ResponseStatusCode = errutil.ModelServerError
Expand Down
24 changes: 15 additions & 9 deletions pkg/epp/scheduling/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,10 @@ type Datastore interface {
PodGetAll() []backendmetrics.PodMetrics
}

// Schedule finds the target pod based on metrics and the requested lora adapter.
func (s *Scheduler) Schedule(ctx context.Context, req *types.LLMRequest) (*types.Result, error) {
// OnRequest finds the target pod based on metrics and the requested lora adapter.
// OnRequest is invoked during the processing of the request, before it is sent to
// appropriate pod for inference
func (s *Scheduler) OnRequest(ctx context.Context, req *types.LLMRequest) (*types.Result, error) {
logger := log.FromContext(ctx).WithValues("request", req)
loggerDebug := logger.V(logutil.DEBUG)

Expand Down Expand Up @@ -212,9 +214,9 @@ func (s *Scheduler) runPostSchedulePlugins(ctx *types.SchedulingContext, res *ty
}
}

func (s *Scheduler) RunPostResponsePlugins(ctx context.Context, req *types.LLMRequest, targetPodName string) (*types.Result, error) {
logger := log.FromContext(ctx)

// OnResponse is invoked during the processing of a response from an inference pod. It will invoke
// any defined plugins that process the response.
func (s *Scheduler) OnResponse(ctx context.Context, req *types.LLMRequest, targetPodName string) (*types.Result, error) {
pool, err := s.datastore.PoolGet()
if err != nil {
return nil, errutil.Error{Code: errutil.Internal, Msg: "failed to find a target pod"} // pool not defined, no pods
Expand All @@ -234,14 +236,18 @@ func (s *Scheduler) RunPostResponsePlugins(ctx context.Context, req *types.LLMRe

sCtx := types.NewSchedulingContext(ctx, req, pods, pool.Spec.TargetPortNumber)

s.runPostResponsePlugins(sCtx, targetPod)

return &types.Result{TargetPod: nil, MutatedHeaders: sCtx.MutatedHeaders}, nil
}

func (s *Scheduler) runPostResponsePlugins(ctx *types.SchedulingContext, targetPod types.Pod) {
for _, plugin := range s.postResponsePlugins {
logger.V(logutil.DEBUG).Info("Running post-response plugin", "plugin", plugin.Name())
ctx.Logger.V(logutil.DEBUG).Info("Running post-response plugin", "plugin", plugin.Name())
before := time.Now()
plugin.PostResponse(sCtx, targetPod)
plugin.PostResponse(ctx, targetPod)
metrics.RecordSchedulerPluginProcessingLatency(plugins.PostResponsePluginType, plugin.Name(), time.Since(before))
}

return &types.Result{TargetPod: nil, MutatedHeaders: sCtx.MutatedHeaders}, nil
}

type defaultPlugin struct {
Expand Down
6 changes: 3 additions & 3 deletions pkg/epp/scheduling/scheduler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ func TestSchedule(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
scheduler := NewScheduler(&fakeDataStore{pods: test.input})
got, err := scheduler.Schedule(context.Background(), test.req)
got, err := scheduler.OnRequest(context.Background(), test.req)
if test.err != (err != nil) {
t.Errorf("Unexpected error, got %v, want %v", err, test.err)
}
Expand Down Expand Up @@ -407,7 +407,7 @@ func TestSchedulePlugins(t *testing.T) {
Model: "test-model",
Headers: test.requestHeaders,
}
got, err := scheduler.Schedule(context.Background(), req)
got, err := scheduler.OnRequest(context.Background(), req)

// Validate error state
if test.err != (err != nil) {
Expand Down Expand Up @@ -518,7 +518,7 @@ func TestPostResponse(t *testing.T) {
Headers: test.responseHeaders,
}

result, err := scheduler.RunPostResponsePlugins(context.Background(), req, test.input[0].Pod.NamespacedName.String())
result, err := scheduler.OnResponse(context.Background(), req, test.input[0].Pod.NamespacedName.String())
if err != nil {
t.Errorf("Received an error. Error: %s", err)
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/epp/scheduling/scorers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ func TestScorers(t *testing.T) {
scheduler := NewScheduler(&fakeDataStore{pods: test.input})
scheduler.scorers = map[plugins.Scorer]int{test.scorer: 1}
scheduler.picker = &picker.MaxScorePicker{}
got, err := scheduler.Schedule(context.Background(), test.req)
got, err := scheduler.OnRequest(context.Background(), test.req)
if test.err != (err != nil) {
t.Errorf("Unexpected error, got %v, want %v", err, test.err)
}
Expand Down