Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: expose HTTP timeout on AIServiceBackend #384

Merged
merged 7 commits into from
Feb 25, 2025
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions api/v1alpha1/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,11 @@ type AIServiceBackendSpec struct {
// +optional
BackendSecurityPolicyRef *gwapiv1.LocalObjectReference `json:"backendSecurityPolicyRef,omitempty"`

// Timeouts defines the timeouts that can be configured for an HTTP request.
//
// +optional
Timeouts *gwapiv1.HTTPRouteTimeouts `json:"timeouts,omitempty"`

// TODO: maybe add backend-level LLMRequestCost configuration that overrides the AIGatewayRoute-level LLMRequestCost.
// That may be useful for the backend that has a different cost calculation logic.
}
Expand Down
5 changes: 5 additions & 0 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 18 additions & 2 deletions internal/controller/ai_gateway_route.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ const (
//
// secret with backendSecurityPolicy auth instead of mounting new secret files to the external proc.
mountedExtProcSecretPath = "/etc/backend_security_policy" // #nosec G101

defaultRequestTimeout = "60s"
defaultBackendRequestTimeout = "60s"
)

// AIGatewayRouteController implements [reconcile.TypedReconciler].
Expand Down Expand Up @@ -429,7 +432,8 @@ func (c *AIGatewayRouteController) newHTTPRoute(ctx context.Context, dst *gwapiv
Matches: []gwapiv1.HTTPRouteMatch{
{Headers: []gwapiv1.HTTPHeaderMatch{{Name: selectedBackendHeaderKey, Value: key}}},
},
Filters: rewriteFilters,
Filters: rewriteFilters,
Timeouts: b.Spec.Timeouts,
}
rules[i] = rule
}
Expand All @@ -443,7 +447,8 @@ func (c *AIGatewayRouteController) newHTTPRoute(ctx context.Context, dst *gwapiv
BackendRefs: []gwapiv1.HTTPBackendRef{
{BackendRef: gwapiv1.BackendRef{BackendObjectReference: backends[0].Spec.BackendRef}},
},
Filters: rewriteFilters,
Filters: rewriteFilters,
Timeouts: defaultTimeout(),
})
}

Expand Down Expand Up @@ -677,3 +682,14 @@ func backendSecurityPolicyVolumeName(ruleIndex, backendRefIndex int, name string
func backendSecurityMountPath(backendSecurityPolicyKey string) string {
return fmt.Sprintf("%s/%s", mountedExtProcSecretPath, backendSecurityPolicyKey)
}

func defaultTimeout() *gwapiv1.HTTPRouteTimeouts {
var (
requestTimeout = gwapiv1.Duration(defaultRequestTimeout)
backendRequestTimeout = gwapiv1.Duration(defaultBackendRequestTimeout)
)
return &gwapiv1.HTTPRouteTimeouts{
Request: &requestTimeout,
BackendRequest: &backendRequestTimeout,
}
}
14 changes: 14 additions & 0 deletions internal/controller/ai_gateway_route_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -351,29 +351,38 @@ func Test_newHTTPRoute(t *testing.T) {
},
},
}
var (
timeout1 gwapiv1.Duration = "30s"
timeout2 gwapiv1.Duration = "60s"
timeout3 gwapiv1.Duration = "90s"
)
for _, backend := range []*aigv1a1.AIServiceBackend{
{
ObjectMeta: metav1.ObjectMeta{Name: "apple", Namespace: "ns1"},
Spec: aigv1a1.AIServiceBackendSpec{
BackendRef: gwapiv1.BackendObjectReference{Name: "some-backend1", Namespace: ptr.To[gwapiv1.Namespace]("ns1")},
Timeouts: &gwapiv1.HTTPRouteTimeouts{Request: &timeout1, BackendRequest: &timeout2},
},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "orange", Namespace: "ns1"},
Spec: aigv1a1.AIServiceBackendSpec{
BackendRef: gwapiv1.BackendObjectReference{Name: "some-backend2", Namespace: ptr.To[gwapiv1.Namespace]("ns1")},
Timeouts: &gwapiv1.HTTPRouteTimeouts{Request: &timeout2, BackendRequest: &timeout3},
},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "pineapple", Namespace: "ns1"},
Spec: aigv1a1.AIServiceBackendSpec{
BackendRef: gwapiv1.BackendObjectReference{Name: "some-backend3", Namespace: ptr.To[gwapiv1.Namespace]("ns1")},
Timeouts: &gwapiv1.HTTPRouteTimeouts{Request: &timeout1, BackendRequest: &timeout3},
},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "foo", Namespace: "ns1"},
Spec: aigv1a1.AIServiceBackendSpec{
BackendRef: gwapiv1.BackendObjectReference{Name: "some-backend4", Namespace: ptr.To[gwapiv1.Namespace]("ns1")},
Timeouts: &gwapiv1.HTTPRouteTimeouts{Request: &timeout1, BackendRequest: &timeout2},
},
},
} {
Expand All @@ -389,24 +398,28 @@ func Test_newHTTPRoute(t *testing.T) {
{Headers: []gwapiv1.HTTPHeaderMatch{{Name: selectedBackendHeaderKey, Value: "apple.ns1"}}},
},
BackendRefs: []gwapiv1.HTTPBackendRef{{BackendRef: gwapiv1.BackendRef{BackendObjectReference: gwapiv1.BackendObjectReference{Name: "some-backend1", Namespace: ptr.To[gwapiv1.Namespace]("ns1")}}}},
Timeouts: &gwapiv1.HTTPRouteTimeouts{Request: &timeout1, BackendRequest: &timeout2},
},
{
Matches: []gwapiv1.HTTPRouteMatch{
{Headers: []gwapiv1.HTTPHeaderMatch{{Name: selectedBackendHeaderKey, Value: "orange.ns1"}}},
},
BackendRefs: []gwapiv1.HTTPBackendRef{{BackendRef: gwapiv1.BackendRef{BackendObjectReference: gwapiv1.BackendObjectReference{Name: "some-backend2", Namespace: ptr.To[gwapiv1.Namespace]("ns1")}}}},
Timeouts: &gwapiv1.HTTPRouteTimeouts{Request: &timeout2, BackendRequest: &timeout3},
},
{
Matches: []gwapiv1.HTTPRouteMatch{
{Headers: []gwapiv1.HTTPHeaderMatch{{Name: selectedBackendHeaderKey, Value: "pineapple.ns1"}}},
},
BackendRefs: []gwapiv1.HTTPBackendRef{{BackendRef: gwapiv1.BackendRef{BackendObjectReference: gwapiv1.BackendObjectReference{Name: "some-backend3", Namespace: ptr.To[gwapiv1.Namespace]("ns1")}}}},
Timeouts: &gwapiv1.HTTPRouteTimeouts{Request: &timeout1, BackendRequest: &timeout3},
},
{
Matches: []gwapiv1.HTTPRouteMatch{
{Headers: []gwapiv1.HTTPHeaderMatch{{Name: selectedBackendHeaderKey, Value: "foo.ns1"}}},
},
BackendRefs: []gwapiv1.HTTPBackendRef{{BackendRef: gwapiv1.BackendRef{BackendObjectReference: gwapiv1.BackendObjectReference{Name: "some-backend4", Namespace: ptr.To[gwapiv1.Namespace]("ns1")}}}},
Timeouts: &gwapiv1.HTTPRouteTimeouts{Request: &timeout1, BackendRequest: &timeout2},
},
}
require.Len(t, httpRoute.Spec.Rules, 5) // 4 backends + 1 for the default rule.
Expand All @@ -419,6 +432,7 @@ func Test_newHTTPRoute(t *testing.T) {
} else {
require.Equal(t, expRules[i].Matches, r.Matches)
require.Equal(t, expRules[i].BackendRefs, r.BackendRefs)
require.Equal(t, expRules[i].Timeouts, r.Timeouts)
}

// Each rule should have a host rewrite filter by default.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,64 @@ spec:
required:
- name
type: object
timeouts:
description: Timeouts defines the timeouts that can be configured
for an HTTP request.
properties:
backendRequest:
description: |-
BackendRequest specifies a timeout for an individual request from the gateway
to a backend. This covers the time from when the request first starts being
sent from the gateway to when the full response has been received from the backend.

Setting a timeout to the zero duration (e.g. "0s") SHOULD disable the timeout
completely. Implementations that cannot completely disable the timeout MUST
instead interpret the zero duration as the longest possible value to which
the timeout can be set.

An entire client HTTP transaction with a gateway, covered by the Request timeout,
may result in more than one call from the gateway to the destination backend,
for example, if automatic retries are supported.

The value of BackendRequest must be a Gateway API Duration string as defined by
GEP-2257. When this field is unspecified, its behavior is implementation-specific;
when specified, the value of BackendRequest must be no more than the value of the
Request timeout (since the Request timeout encompasses the BackendRequest timeout).

Support: Extended
pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$
type: string
request:
description: |-
Request specifies the maximum duration for a gateway to respond to an HTTP request.
If the gateway has not been able to respond before this deadline is met, the gateway
MUST return a timeout error.

For example, setting the `rules.timeouts.request` field to the value `10s` in an
`HTTPRoute` will cause a timeout if a client request is taking longer than 10 seconds
to complete.

Setting a timeout to the zero duration (e.g. "0s") SHOULD disable the timeout
completely. Implementations that cannot completely disable the timeout MUST
instead interpret the zero duration as the longest possible value to which
the timeout can be set.

This timeout is intended to cover as close to the whole request-response transaction
as possible although an implementation MAY choose to start the timeout after the entire
request stream has been received instead of immediately after the transaction is
initiated by the client.

The value of Request is a Gateway API Duration string as defined by GEP-2257. When this
field is unspecified, request timeout behavior is implementation-specific.

Support: Extended
pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$
type: string
type: object
x-kubernetes-validations:
- message: backendRequest timeout cannot be longer than request timeout
rule: '!(has(self.request) && has(self.backendRequest) && duration(self.request)
!= duration(''0s'') && duration(self.backendRequest) > duration(self.request))'
required:
- backendRef
- schema
Expand Down
5 changes: 5 additions & 0 deletions site/docs/api/api.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,11 @@ AIServiceBackendSpec details the AIServiceBackend configuration.
type="[LocalObjectReference](#localobjectreference)"
required="false"
description="BackendSecurityPolicyRef is the name of the BackendSecurityPolicy resources this backend<br />is being attached to."
/><ApiField
name="timeouts"
type="[HTTPRouteTimeouts](#httproutetimeouts)"
required="false"
description="Timeouts defines the timeouts that can be configured for an HTTP request."
/>


Expand Down