Skip to content

Commit da84ef0

Browse files
authored
Make processingRetryTimeout configurable (#3387)
1 parent 66ab0f9 commit da84ef0

File tree

5 files changed

+22
-15
lines changed

5 files changed

+22
-15
lines changed

cmd/livepeer/livepeer.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ func parseLivepeerConfig() starter.LivepeerConfig {
163163
cfg.AIModelsDir = flag.String("aiModelsDir", *cfg.AIModelsDir, "Set directory where AI model weights are stored")
164164
cfg.AIRunnerImage = flag.String("aiRunnerImage", *cfg.AIRunnerImage, "[Deprecated] Specify the base Docker image for the AI runner. Example: livepeer/ai-runner:0.0.1. Use -aiRunnerImageOverrides instead.")
165165
cfg.AIRunnerImageOverrides = flag.String("aiRunnerImageOverrides", *cfg.AIRunnerImageOverrides, `Specify overrides for the Docker images used by the AI runner. Example: '{"default": "livepeer/ai-runner:v1.0", "batch": {"text-to-speech": "livepeer/ai-runner:text-to-speech-v1.0"}, "live": {"another-pipeline": "livepeer/ai-runner:another-pipeline-v1.0"}}'`)
166+
cfg.AIProcessingRetryTimeout = flag.Duration("aiProcessingRetryTimeout", *cfg.AIProcessingRetryTimeout, "Timeout for retrying to initiate AI processing request")
166167

167168
// Live AI:
168169
cfg.MediaMTXApiPassword = flag.String("mediaMTXApiPassword", "", "HTTP basic auth password for MediaMTX API requests")

cmd/livepeer/starter/starter.go

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ type LivepeerConfig struct {
165165
TestOrchAvail *bool
166166
AIRunnerImage *string
167167
AIRunnerImageOverrides *string
168+
AIProcessingRetryTimeout *time.Duration
168169
KafkaBootstrapServers *string
169170
KafkaUsername *string
170171
KafkaPassword *string
@@ -215,6 +216,7 @@ func DefaultLivepeerConfig() LivepeerConfig {
215216
defaultAIModels := ""
216217
defaultAIModelsDir := ""
217218
defaultAIRunnerImage := "livepeer/ai-runner:latest"
219+
defaultAIProcessingRetryTimeout := 2 * time.Second
218220
defaultAIRunnerImageOverrides := ""
219221
defaultLiveAIAuthWebhookURL := ""
220222
defaultLivePaymentInterval := 5 * time.Second
@@ -320,15 +322,16 @@ func DefaultLivepeerConfig() LivepeerConfig {
320322
TestTranscoder: &defaultTestTranscoder,
321323

322324
// AI:
323-
AIServiceRegistry: &defaultAIServiceRegistry,
324-
AIWorker: &defaultAIWorker,
325-
AIModels: &defaultAIModels,
326-
AIModelsDir: &defaultAIModelsDir,
327-
AIRunnerImage: &defaultAIRunnerImage,
328-
AIRunnerImageOverrides: &defaultAIRunnerImageOverrides,
329-
LiveAIAuthWebhookURL: &defaultLiveAIAuthWebhookURL,
330-
LivePaymentInterval: &defaultLivePaymentInterval,
331-
GatewayHost: &defaultGatewayHost,
325+
AIServiceRegistry: &defaultAIServiceRegistry,
326+
AIWorker: &defaultAIWorker,
327+
AIModels: &defaultAIModels,
328+
AIModelsDir: &defaultAIModelsDir,
329+
AIRunnerImage: &defaultAIRunnerImage,
330+
AIProcessingRetryTimeout: &defaultAIProcessingRetryTimeout,
331+
AIRunnerImageOverrides: &defaultAIRunnerImageOverrides,
332+
LiveAIAuthWebhookURL: &defaultLiveAIAuthWebhookURL,
333+
LivePaymentInterval: &defaultLivePaymentInterval,
334+
GatewayHost: &defaultGatewayHost,
332335

333336
// Onchain:
334337
EthAcctAddr: &defaultEthAcctAddr,
@@ -513,6 +516,7 @@ func StartLivepeer(ctx context.Context, cfg LivepeerConfig) {
513516
if err != nil {
514517
glog.Errorf("Error creating livepeer node: %v", err)
515518
}
519+
n.AIProcesssingRetryTimeout = *cfg.AIProcessingRetryTimeout
516520

517521
if *cfg.OrchSecret != "" {
518522
n.OrchSecret, _ = common.ReadFromFile(*cfg.OrchSecret)

core/livepeernode.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,9 @@ type LivepeerNode struct {
119119
Database *common.DB
120120

121121
// AI worker public fields
122-
AIWorker AI
123-
AIWorkerManager *RemoteAIWorkerManager
122+
AIWorker AI
123+
AIWorkerManager *RemoteAIWorkerManager
124+
AIProcesssingRetryTimeout time.Duration
124125

125126
// Transcoder public fields
126127
SegmentChans map[ManifestID]SegmentChan

server/ai_process.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ import (
2727
"github.com/livepeer/lpms/stream"
2828
)
2929

30-
const processingRetryTimeout = 2 * time.Second
3130
const defaultTextToImageModelID = "stabilityai/sdxl-turbo"
3231
const defaultImageToImageModelID = "stabilityai/sdxl-turbo"
3332
const defaultImageToVideoModelID = "stabilityai/stable-video-diffusion-img2vid-xt"
@@ -1464,13 +1463,15 @@ func processAIRequest(ctx context.Context, params aiRequestParams, req interface
14641463
}
14651464
capName := cap.String()
14661465
ctx = clog.AddVal(ctx, "capability", capName)
1466+
ctx = clog.AddVal(ctx, "model_id", modelID)
14671467

14681468
clog.V(common.VERBOSE).Infof(ctx, "Received AI request model_id=%s", modelID)
14691469
start := time.Now()
14701470
defer clog.Infof(ctx, "Processed AI request model_id=%v took=%v", modelID, time.Since(start))
14711471

14721472
var resp interface{}
14731473

1474+
processingRetryTimeout := params.node.AIProcesssingRetryTimeout
14741475
cctx, cancel := context.WithTimeout(ctx, processingRetryTimeout)
14751476
defer cancel()
14761477

server/ai_session.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ type AISessionSelector struct {
165165
os drivers.OSSession
166166
}
167167

168-
func NewAISessionSelector(cap core.Capability, modelID string, node *core.LivepeerNode, ttl time.Duration) (*AISessionSelector, error) {
168+
func NewAISessionSelector(ctx context.Context, cap core.Capability, modelID string, node *core.LivepeerNode, ttl time.Duration) (*AISessionSelector, error) {
169169
var stakeRdr stakeReader
170170
if node.Eth != nil {
171171
stakeRdr = &storeStakeReader{store: node.Database}
@@ -193,7 +193,7 @@ func NewAISessionSelector(cap core.Capability, modelID string, node *core.Livepe
193193
os: drivers.NodeStorage.NewSession(strconv.Itoa(int(cap)) + "_" + modelID),
194194
}
195195

196-
if err := sel.Refresh(context.Background()); err != nil {
196+
if err := sel.Refresh(ctx); err != nil {
197197
return nil, err
198198
}
199199

@@ -405,7 +405,7 @@ func (c *AISessionManager) getSelector(ctx context.Context, cap core.Capability,
405405
if !ok {
406406
// Create the selector
407407
var err error
408-
sel, err = NewAISessionSelector(cap, modelID, c.node, c.ttl)
408+
sel, err = NewAISessionSelector(ctx, cap, modelID, c.node, c.ttl)
409409
if err != nil {
410410
return nil, err
411411
}

0 commit comments

Comments
 (0)