diff --git a/Makefile b/Makefile index dee7e99e0..4ef67054f 100644 --- a/Makefile +++ b/Makefile @@ -96,7 +96,7 @@ help: ## Display this help. ##@ Development .PHONY: manifests -manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects. +manifests: controller-gen ## Generate WebhookConfiguration, RBAC and CustomResourceDefinition objects. $(CONTROLLER_GEN) rbac:roleName=manager-role crd webhook paths="./..." output:crd:artifacts:config=config/crd/bases .PHONY: generate diff --git a/config/charts/inferencepool/templates/rbac.yaml b/config/charts/inferencepool/templates/rbac.yaml index 4cb5d7be0..0c432f4b9 100644 --- a/config/charts/inferencepool/templates/rbac.yaml +++ b/config/charts/inferencepool/templates/rbac.yaml @@ -5,12 +5,6 @@ metadata: labels: {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} rules: -- apiGroups: ["inference.networking.x-k8s.io"] - resources: ["inferencemodels", "inferencepools"] - verbs: ["get", "watch", "list"] -- apiGroups: [""] - resources: ["pods"] - verbs: ["get", "watch", "list"] - apiGroups: - authentication.k8s.io resources: @@ -37,6 +31,35 @@ roleRef: kind: ClusterRole name: {{ include "gateway-api-inference-extension.name" . }} --- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ include "gateway-api-inference-extension.name" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} +rules: +- apiGroups: ["inference.networking.x-k8s.io"] + resources: ["inferencemodels", "inferencepools"] + verbs: ["get", "watch", "list"] +- apiGroups: [""] + resources: ["pods"] + verbs: ["get", "watch", "list"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "gateway-api-inference-extension.name" . }} + namespace: {{ .Release.Namespace }} +subjects: +- kind: ServiceAccount + name: {{ include "gateway-api-inference-extension.name" . }} + namespace: {{ .Release.Namespace }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ include "gateway-api-inference-extension.name" . }} +--- apiVersion: v1 kind: ServiceAccount metadata: diff --git a/config/manifests/inferencepool-resources.yaml b/config/manifests/inferencepool-resources.yaml index aa8c82901..0b909c9b8 100644 --- a/config/manifests/inferencepool-resources.yaml +++ b/config/manifests/inferencepool-resources.yaml @@ -29,6 +29,12 @@ spec: appProtocol: http2 type: ClusterIP --- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: vllm-llama3-8b-instruct-epp + namespace: default +--- apiVersion: apps/v1 kind: Deployment metadata: @@ -46,6 +52,7 @@ spec: labels: app: vllm-llama3-8b-instruct-epp spec: + serviceAccountName: vllm-llama3-8b-instruct-epp # Conservatively, this timeout should mirror the longest grace period of the pods within the pool terminationGracePeriodSeconds: 130 containers: @@ -174,20 +181,38 @@ data: weight: 1 - pluginRef: max-score-picker --- -kind: ClusterRole +kind: Role +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: pod-read + namespace: default +rules: +- apiGroups: [ "inference.networking.x-k8s.io" ] + resources: [ "inferencepools", "inferencemodels" ] + verbs: [ "get", "watch", "list" ] +- apiGroups: [ "" ] + resources: [ "pods" ] + verbs: [ "get", "watch", "list" ] +--- +kind: RoleBinding apiVersion: rbac.authorization.k8s.io/v1 metadata: + name: pod-read-binding + namespace: default +subjects: +- kind: ServiceAccount + name: vllm-llama3-8b-instruct-epp + namespace: default +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role name: pod-read +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: auth-reviewer rules: -- apiGroups: ["inference.networking.x-k8s.io"] - resources: ["inferencepools"] - verbs: ["get", "watch", "list"] -- apiGroups: ["inference.networking.x-k8s.io"] - resources: ["inferencemodels"] - verbs: ["get", "watch", "list"] -- apiGroups: [""] - resources: ["pods"] - verbs: ["get", "watch", "list"] - apiGroups: - authentication.k8s.io resources: @@ -200,16 +225,16 @@ rules: - subjectaccessreviews verbs: - create ---- +--- kind: ClusterRoleBinding apiVersion: rbac.authorization.k8s.io/v1 metadata: - name: pod-read-binding + name: auth-reviewer-binding subjects: - kind: ServiceAccount - name: default + name: vllm-llama3-8b-instruct-epp namespace: default roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole - name: pod-read + name: auth-reviewer diff --git a/test/e2e/epp/e2e_suite_test.go b/test/e2e/epp/e2e_suite_test.go index 0ccde422a..647ec09a7 100644 --- a/test/e2e/epp/e2e_suite_test.go +++ b/test/e2e/epp/e2e_suite_test.go @@ -384,14 +384,29 @@ func createInferExt(k8sClient client.Client, filePath string) { ginkgo.By("Creating inference extension resources from manifest: " + filePath) createObjsFromYaml(k8sClient, outManifests) + // Wait for the serviceaccount to exist. + testutils.EventuallyExists(ctx, func() error { + return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: inferExtName}, &corev1.ServiceAccount{}) + }, existsTimeout, interval) + + // Wait for the role to exist. + testutils.EventuallyExists(ctx, func() error { + return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: "pod-read"}, &rbacv1.Role{}) + }, existsTimeout, interval) + + // Wait for the rolebinding to exist. + testutils.EventuallyExists(ctx, func() error { + return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: "pod-read-binding"}, &rbacv1.RoleBinding{}) + }, existsTimeout, interval) + // Wait for the clusterrole to exist. testutils.EventuallyExists(ctx, func() error { - return k8sClient.Get(ctx, types.NamespacedName{Name: "pod-read"}, &rbacv1.ClusterRole{}) + return k8sClient.Get(ctx, types.NamespacedName{Name: "auth-reviewer"}, &rbacv1.ClusterRole{}) }, existsTimeout, interval) // Wait for the clusterrolebinding to exist. testutils.EventuallyExists(ctx, func() error { - return k8sClient.Get(ctx, types.NamespacedName{Name: "pod-read-binding"}, &rbacv1.ClusterRoleBinding{}) + return k8sClient.Get(ctx, types.NamespacedName{Name: "auth-reviewer-binding"}, &rbacv1.ClusterRoleBinding{}) }, existsTimeout, interval) // Wait for the deployment to exist. diff --git a/test/testdata/inferencepool-e2e.yaml b/test/testdata/inferencepool-e2e.yaml index 9e7fd6a0f..aa7abb54f 100644 --- a/test/testdata/inferencepool-e2e.yaml +++ b/test/testdata/inferencepool-e2e.yaml @@ -26,6 +26,12 @@ spec: appProtocol: http2 type: ClusterIP --- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: vllm-llama3-8b-instruct-epp + namespace: $E2E_NS +--- apiVersion: apps/v1 kind: Deployment metadata: @@ -43,6 +49,7 @@ spec: labels: app: vllm-llama3-8b-instruct-epp spec: + serviceAccountName: vllm-llama3-8b-instruct-epp # Conservatively, this timeout should mirror the longest grace period of the pods within the pool terminationGracePeriodSeconds: 130 containers: @@ -171,20 +178,38 @@ data: weight: 1 - pluginRef: max-score-picker --- -kind: ClusterRole +kind: Role apiVersion: rbac.authorization.k8s.io/v1 metadata: name: pod-read + namespace: $E2E_NS +rules: +- apiGroups: [ "inference.networking.x-k8s.io" ] + resources: [ "inferencepools", "inferencemodels" ] + verbs: [ "get", "watch", "list" ] +- apiGroups: [ "" ] + resources: [ "pods" ] + verbs: [ "get", "watch", "list" ] +--- +kind: RoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: pod-read-binding + namespace: $E2E_NS +subjects: +- kind: ServiceAccount + name: vllm-llama3-8b-instruct-epp + namespace: $E2E_NS +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: pod-read +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: auth-reviewer rules: -- apiGroups: ["inference.networking.x-k8s.io"] - resources: ["inferencepools"] - verbs: ["get", "watch", "list"] -- apiGroups: ["inference.networking.x-k8s.io"] - resources: ["inferencemodels"] - verbs: ["get", "watch", "list"] -- apiGroups: [""] - resources: ["pods"] - verbs: ["get", "watch", "list"] - apiGroups: - authentication.k8s.io resources: @@ -201,12 +226,12 @@ rules: kind: ClusterRoleBinding apiVersion: rbac.authorization.k8s.io/v1 metadata: - name: pod-read-binding + name: auth-reviewer-binding subjects: - kind: ServiceAccount - name: default + name: vllm-llama3-8b-instruct-epp namespace: $E2E_NS roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole - name: pod-read + name: auth-reviewer diff --git a/test/utils/utils.go b/test/utils/utils.go index ba74069ff..23a85702a 100644 --- a/test/utils/utils.go +++ b/test/utils/utils.go @@ -43,7 +43,7 @@ import ( func DeleteClusterResources(ctx context.Context, cli client.Client) error { binding := &rbacv1.ClusterRoleBinding{ ObjectMeta: metav1.ObjectMeta{ - Name: "pod-read-binding", + Name: "auth-reviewer-binding", }, } err := cli.Delete(ctx, binding, client.PropagationPolicy(metav1.DeletePropagationForeground)) @@ -52,7 +52,7 @@ func DeleteClusterResources(ctx context.Context, cli client.Client) error { } role := &rbacv1.ClusterRole{ ObjectMeta: metav1.ObjectMeta{ - Name: "pod-read", + Name: "auth-reviewer", }, } err = cli.Delete(ctx, role, client.PropagationPolicy(metav1.DeletePropagationForeground))