From f40d935797552b1ff5c35337b1a15535dadac240 Mon Sep 17 00:00:00 2001 From: Seth Heidkamp <61526534+sheidkamp@users.noreply.github.com> Date: Tue, 12 Nov 2024 19:33:43 -0500 Subject: [PATCH] Split webhook validation (#10284) Co-authored-by: soloio-bulldozer[bot] <48420018+soloio-bulldozer[bot]@users.noreply.github.com> Co-authored-by: changelog-bot Co-authored-by: Sam Heilbron Co-authored-by: Nadine Spies <17709352+Nadine2016@users.noreply.github.com> Co-authored-by: Jenny Shu <28537278+jenshu@users.noreply.github.com> --- .github/workflows/pr-kubernetes-tests.yaml | 3 +- .../split-validating-webhook.yaml | 14 ++ docs/content/reference/values.txt | 3 +- go.mod | 2 +- go.sum | 4 +- install/helm/gloo/generate/values.go | 3 +- ...eway-validation-webhook-configuration.yaml | 47 +++- install/helm/gloo/values-template.yaml | 3 + ...y-validation-webhook-configuration_test.go | 170 +++++++++++-- .../pkg/services/k8sadmission/README.md | 3 + .../validation/split_webhook/suite.go | 224 ++++++++++++++++++ .../split-webhook/basic-upstream.yaml | 10 + ...oo-webhook-failure-policy-fail-values.yaml | 5 + ...-webhook-failure-policy-ignore-values.yaml | 5 + ...be-webhook-failure-policy-fail-values.yaml | 5 + ...-webhook-failure-policy-ignore-values.yaml | 5 + .../e2e/features/validation/types.go | 8 + .../validation_reject_invalid/suite.go | 2 +- .../validation_strict_warnings/suite.go | 2 +- .../e2e/tests/validation_strict_tests.go | 2 + 20 files changed, 476 insertions(+), 44 deletions(-) create mode 100644 changelog/v1.18.0-beta34/split-validating-webhook.yaml create mode 100644 test/kubernetes/e2e/features/validation/split_webhook/suite.go create mode 100644 test/kubernetes/e2e/features/validation/testdata/split-webhook/basic-upstream.yaml create mode 100644 test/kubernetes/e2e/features/validation/testdata/split-webhook/gloo-webhook-failure-policy-fail-values.yaml create mode 100644 test/kubernetes/e2e/features/validation/testdata/split-webhook/gloo-webhook-failure-policy-ignore-values.yaml create mode 100644 test/kubernetes/e2e/features/validation/testdata/split-webhook/kube-webhook-failure-policy-fail-values.yaml create mode 100644 test/kubernetes/e2e/features/validation/testdata/split-webhook/kube-webhook-failure-policy-ignore-values.yaml diff --git a/.github/workflows/pr-kubernetes-tests.yaml b/.github/workflows/pr-kubernetes-tests.yaml index 77c0e9c2fb5..d4af78516fd 100644 --- a/.github/workflows/pr-kubernetes-tests.yaml +++ b/.github/workflows/pr-kubernetes-tests.yaml @@ -76,8 +76,9 @@ jobs: go-test-run-regex: '(^TestK8sGatewayIstio$$|^TestGlooGatewayEdgeGateway$$|^TestGlooctlIstioInjectEdgeApiGateway$$)' # October 10, 2024: 22 minutes + # TODO (sheidkamp) rebalance tests - cluster-name: 'cluster-five' - go-test-args: '-v -timeout=25m' + go-test-args: '-v -timeout=35m' go-test-run-regex: '^TestFullEnvoyValidation$$|^TestValidationStrict$$|^TestValidationAlwaysAccept$$|^TestTransformationValidationDisabled$$|^TestGloomtlsGatewayEdgeGateway$$|^TestWatchNamespaceSelector$$' # October 10, 2024: 12 minutes diff --git a/changelog/v1.18.0-beta34/split-validating-webhook.yaml b/changelog/v1.18.0-beta34/split-validating-webhook.yaml new file mode 100644 index 00000000000..1846a17dde3 --- /dev/null +++ b/changelog/v1.18.0-beta34/split-validating-webhook.yaml @@ -0,0 +1,14 @@ +changelog: + - type: NEW_FEATURE + issueLink: https://github.com/solo-io/gloo/issues/10247 + resolvesIssue: false + description: >- + Split the validating webhook to allow different failure policies for gloo/non-gloo resources. + The split out webhook for kubernetes resources shares all configuration with the existing webhook except for the failure policy, + which can be set with `gateway.validation.kubeCoreFailurePolicy` + - type: DEPENDENCY_BUMP + dependencyOwner: solo-io + dependencyRepo: k8s-utils + dependencyTag: v0.8.1 + description: >- + Bump k8s-utils to v0.8.1 for updated `ConvertUnstructured` function diff --git a/docs/content/reference/values.txt b/docs/content/reference/values.txt index 49cc3166acf..55fc3e90627 100644 --- a/docs/content/reference/values.txt +++ b/docs/content/reference/values.txt @@ -569,7 +569,8 @@ |gateway.validation.disableTransformationValidation|bool|false|set this to true to disable transformation validation. This may bring significant performance benefits if using many transformations, at the cost of possibly incorrect transformations being sent to Envoy. When using this value make sure to pre-validate transformations.| |gateway.validation.warnRouteShortCircuiting|bool|false|Write a warning to route resources if validation produced a route ordering warning (defaults to false). By setting to true, this means that Gloo Edge will start assigning warnings to resources that would result in route short-circuiting within a virtual host.| |gateway.validation.secretName|string|gateway-validation-certs|Name of the Kubernetes Secret containing TLS certificates used by the validation webhook server. This secret will be created by the certGen Job if the certGen Job is enabled.| -|gateway.validation.failurePolicy|string|Ignore|failurePolicy defines how unrecognized errors from the Gateway validation endpoint are handled - allowed values are 'Ignore' or 'Fail'. Defaults to Ignore | +|gateway.validation.failurePolicy|string|Ignore|Specify how to handle unrecognized errors for Gloo resources that are returned from the Gateway validation endpoint. Supported values are 'Ignore' or 'Fail'| +|gateway.validation.kubeCoreFailurePolicy|string|Ignore|Specify how to handle unrecognized errors for Kubernetes core resources that are returned by the Gateway validation endpoint. Currently the [validation webhook](https://github.com/solo-io/gloo/blob/main/install/helm/gloo/templates/5-gateway-validation-webhook-configuration.yaml) is configured to handle errors for Kubernetes secrets and namespaces. Supported values are 'Ignore' or 'Fail'. If you set this value to 'Fail', you cannot modify these core resources if the 'gloo' service is unavailable.| |gateway.validation.webhook.enabled|bool|true|enable validation webhook (default true)| |gateway.validation.webhook.disableHelmHook|bool|false|do not create the webhook as helm hook (default false)| |gateway.validation.webhook.timeoutSeconds|int||the timeout for the webhook, defaults to 10| diff --git a/go.mod b/go.mod index 80efa625a96..8502bf2d6dd 100644 --- a/go.mod +++ b/go.mod @@ -49,7 +49,7 @@ require ( github.com/sergi/go-diff v1.2.0 github.com/solo-io/go-list-licenses v0.1.4 github.com/solo-io/go-utils v0.27.1 - github.com/solo-io/k8s-utils v0.8.0 + github.com/solo-io/k8s-utils v0.8.1 github.com/solo-io/protoc-gen-ext v0.0.25 github.com/solo-io/protoc-gen-openapi v0.2.5 github.com/solo-io/skv2 v0.41.0 diff --git a/go.sum b/go.sum index 4dbaf450a9f..c6d19ef7b1f 100644 --- a/go.sum +++ b/go.sum @@ -2697,8 +2697,8 @@ github.com/solo-io/go-list-licenses v0.1.4/go.mod h1:x6LSp/NrYgVXwNum7ZOiaAYTpg6 github.com/solo-io/go-utils v0.20.2/go.mod h1:6e8K1spnMWwlnJRSNp/J84GEyJbrcK4Gm7i+ehzCi8c= github.com/solo-io/go-utils v0.27.1 h1:14XwaKv21EaYYeUF2wFfPe3DPz2Gbm9sfenGv/aRIls= github.com/solo-io/go-utils v0.27.1/go.mod h1:cwbQIYO1/BeU4aPB0Yy8WzzS77dfVTZyCVqbA4YsRSY= -github.com/solo-io/k8s-utils v0.8.0 h1:jXd4HFDgbPWxHi04QDFYwA37D1nYr9XJI3MVa75oCD8= -github.com/solo-io/k8s-utils v0.8.0/go.mod h1:fOIFkh4+F45MmrUZEFx0pW75EvFYOR7v5/BIIQiSIwA= +github.com/solo-io/k8s-utils v0.8.1 h1:Xqqze6RLWsHCYetbaiXDEnuhFRXyqw0azyogggK43H8= +github.com/solo-io/k8s-utils v0.8.1/go.mod h1:fOIFkh4+F45MmrUZEFx0pW75EvFYOR7v5/BIIQiSIwA= github.com/solo-io/protoc-gen-ext v0.0.25 h1:UqNW/A4UqCO5aUFg7LYdV82tK0R2mqu7RFftYtT/Fu8= github.com/solo-io/protoc-gen-ext v0.0.25/go.mod h1:TZwUhbFtfd1fQQGBN6qWwtea0Fhi3V6DvGQnbqk3jf8= github.com/solo-io/protoc-gen-openapi v0.2.5 h1:l8YKsVks6JDFRzA9liYZIqauqpYRxHXnmHi4TjTIRf4= diff --git a/install/helm/gloo/generate/values.go b/install/helm/gloo/generate/values.go index fdfd06998c8..410aa2442fc 100644 --- a/install/helm/gloo/generate/values.go +++ b/install/helm/gloo/generate/values.go @@ -479,7 +479,8 @@ type GatewayValidation struct { DisableTransformationValidation *bool `json:"disableTransformationValidation,omitempty" desc:"set this to true to disable transformation validation. This may bring significant performance benefits if using many transformations, at the cost of possibly incorrect transformations being sent to Envoy. When using this value make sure to pre-validate transformations."` WarnRouteShortCircuiting *bool `json:"warnRouteShortCircuiting,omitempty" desc:"Write a warning to route resources if validation produced a route ordering warning (defaults to false). By setting to true, this means that Gloo Edge will start assigning warnings to resources that would result in route short-circuiting within a virtual host."` SecretName *string `json:"secretName,omitempty" desc:"Name of the Kubernetes Secret containing TLS certificates used by the validation webhook server. This secret will be created by the certGen Job if the certGen Job is enabled."` - FailurePolicy *string `json:"failurePolicy,omitempty" desc:"failurePolicy defines how unrecognized errors from the Gateway validation endpoint are handled - allowed values are 'Ignore' or 'Fail'. Defaults to Ignore "` + FailurePolicy *string `json:"failurePolicy,omitempty" desc:"Specify how to handle unrecognized errors for Gloo resources that are returned from the Gateway validation endpoint. Supported values are 'Ignore' or 'Fail'"` + KubeCoreFailurePolicy *string `json:"kubeCoreFailurePolicy,omitempty" desc:"Specify how to handle unrecognized errors for Kubernetes core resources that are returned by the Gateway validation endpoint. Currently the [validation webhook](https://github.com/solo-io/gloo/blob/main/install/helm/gloo/templates/5-gateway-validation-webhook-configuration.yaml) is configured to handle errors for Kubernetes secrets and namespaces. Supported values are 'Ignore' or 'Fail'. If you set this value to 'Fail', you cannot modify these core resources if the 'gloo' service is unavailable."` Webhook *Webhook `json:"webhook,omitempty" desc:"webhook specific configuration"` ValidationServerGrpcMaxSizeBytes *int `json:"validationServerGrpcMaxSizeBytes,omitempty" desc:"gRPC max message size in bytes for the gloo validation server"` LivenessProbeEnabled *bool `json:"livenessProbeEnabled,omitempty" desc:"Set to true to enable a liveness probe for the gateway (default is false). You must also set the 'Probes' value to true."` diff --git a/install/helm/gloo/templates/5-gateway-validation-webhook-configuration.yaml b/install/helm/gloo/templates/5-gateway-validation-webhook-configuration.yaml index e37961115ab..669fc02774d 100644 --- a/install/helm/gloo/templates/5-gateway-validation-webhook-configuration.yaml +++ b/install/helm/gloo/templates/5-gateway-validation-webhook-configuration.yaml @@ -52,7 +52,36 @@ specific resources, we will manage the resources that the webhook receives via t apiGroups: ["gloo.solo.io"] apiVersions: ["v1"] resources: ["upstreams"]{{/* TODO(https://github.com/solo-io/gloo/issues/2797): Extend to all gloo resources */}} -{{/* Can't use the include for this one because if the operations are empty, we need to drop the whole list element */}} + - operations: {{ include "gloo.webhookvalidation.operationsForResource" (list "ratelimitconfigs" .Values.gateway.validation.webhook.skipDeleteValidationResources) }} + apiGroups: ["ratelimit.solo.io"] + apiVersions: ["v1alpha1"] + resources: ["ratelimitconfigs"] + sideEffects: None + matchPolicy: Exact +{{- if .Values.gateway.validation.webhook.timeoutSeconds }} + timeoutSeconds: {{ .Values.gateway.validation.webhook.timeoutSeconds }} +{{- end }} + admissionReviewVersions: + - v1beta1 # v1beta1 still live in 1.22 https://github.com/kubernetes/api/blob/release-1.22/admission/v1beta1/types.go#L33 +{{- if .Values.gateway.validation.failurePolicy }} + failurePolicy: {{ .Values.gateway.validation.failurePolicy }} +{{- end }} {{- /* if .Values.gateway.validation.failurePolicy */}} + +{{/* Webhook for core resources - only render if we need to */}} +{{- if and + (not (has "*" .Values.gateway.validation.webhook.skipDeleteValidationResources)) + (or (not (has "secrets" .Values.gateway.validation.webhook.skipDeleteValidationResources)) + (not (has "namespaces" .Values.gateway.validation.webhook.skipDeleteValidationResources))) +}} +- name: kube.{{ .Release.Namespace }}.svc # must be a domain with at least three segments separated by dots + clientConfig: + service: + name: gloo + namespace: {{ .Release.Namespace }} + path: "/validation" + caBundle: "" # update manually or use certgen job or cert-manager's ca-injector + rules: +{{- /* Can't use the include for this one because if the operations are empty, we need to drop the whole list element */}} {{- if and (not (has "*" .Values.gateway.validation.webhook.skipDeleteValidationResources)) (not (has "secrets" .Values.gateway.validation.webhook.skipDeleteValidationResources)) }} - operations: [ "DELETE" ] apiGroups: [""]{{/* We do not have internal secret CRDs. We want to validate the deletion of secrets such as TLS, so we add "" which refers to Kubernetes' core APIs. */}} @@ -66,10 +95,6 @@ specific resources, we will manage the resources that the webhook receives via t apiVersions: ["v1"] resources: ["namespaces"] {{- end }} - - operations: {{ include "gloo.webhookvalidation.operationsForResource" (list "ratelimitconfigs" .Values.gateway.validation.webhook.skipDeleteValidationResources) }} - apiGroups: ["ratelimit.solo.io"] - apiVersions: ["v1alpha1"] - resources: ["ratelimitconfigs"] sideEffects: None matchPolicy: Exact {{- if .Values.gateway.validation.webhook.timeoutSeconds }} @@ -77,9 +102,11 @@ specific resources, we will manage the resources that the webhook receives via t {{- end }} admissionReviewVersions: - v1beta1 # v1beta1 still live in 1.22 https://github.com/kubernetes/api/blob/release-1.22/admission/v1beta1/types.go#L33 -{{- if .Values.gateway.validation.failurePolicy }} - failurePolicy: {{ .Values.gateway.validation.failurePolicy }} -{{- end }} {{/* if .Values.gateway.validation.failurePolicy */}} +{{- if .Values.gateway.validation.kubeCoreFailurePolicy }} + failurePolicy: {{ .Values.gateway.validation.kubeCoreFailurePolicy -}} +{{- end }} {{/* if .Values.gateway.validation.kubeCoreFailurePolicy */}} +{{- end }} {{/* render webhook if */}} + {{- end }} {{/* if and .Values.gateway.enabled .Values.gateway.validation.enabled .Values.gateway.validation.webhook.enabled */}} {{- end }} {{/* define "gateway.validationWebhookSpec" */}} @@ -88,6 +115,6 @@ specific resources, we will manage the resources that the webhook receives via t {{- if .Values.gateway.validation -}} {{- if .Values.gateway.validation.webhook -}} {{- $kubeResourceOverride = .Values.gateway.validation.webhook.kubeResourceOverride -}} -{{- end -}} {{/* if .Values.gateway.validation.webhook */}} -{{- end -}} {{/* if .Values.gateway.validation */}} +{{- end -}} {{/* if .Values.gateway.validation.webhook */ -}} +{{- end -}} {{/* if .Values.gateway.validation */ -}} {{- include "gloo.util.merge" (list . $kubeResourceOverride "gateway.validationWebhookSpec") -}} \ No newline at end of file diff --git a/install/helm/gloo/values-template.yaml b/install/helm/gloo/values-template.yaml index ff53a9e404c..541e79eba54 100644 --- a/install/helm/gloo/values-template.yaml +++ b/install/helm/gloo/values-template.yaml @@ -131,6 +131,9 @@ gateway: validation: enabled: true failurePolicy: "Ignore" + # This is the recommended setting because if it set to "Fail" modifications to core resources such as secrets and namespace that are defined + # in the validating webhook will be blocked if the Gloo Service is not available. + kubeCoreFailurePolicy: "Ignore" secretName: gateway-validation-certs alwaysAcceptResources: true allowWarnings: true diff --git a/install/test/5-gateway-validation-webhook-configuration_test.go b/install/test/5-gateway-validation-webhook-configuration_test.go index b4aa1c3931c..fea3c02d8db 100644 --- a/install/test/5-gateway-validation-webhook-configuration_test.go +++ b/install/test/5-gateway-validation-webhook-configuration_test.go @@ -16,7 +16,9 @@ import ( . "github.com/onsi/gomega" gloostringutils "github.com/solo-io/gloo/pkg/utils/stringutils" "github.com/solo-io/go-utils/stringutils" + "github.com/solo-io/k8s-utils/installutils/kuberesource" . "github.com/solo-io/k8s-utils/manifesttestutils" + admissionregistrationv1 "k8s.io/api/admissionregistration/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "sigs.k8s.io/yaml" ) @@ -38,13 +40,16 @@ var _ = Describe("WebhookValidationConfiguration helm test", func() { testManifest = tm } - // DescribeTable("Can remove DELETEs from webhook rules", func(resources []string, expectedRemoved int) { timeoutSeconds := 5 // Count the "DELETES" as a sanity check. expectedDeletes := 6 - expectedRemoved - expectedChart := generateExpectedChart(timeoutSeconds, resources, expectedDeletes) + expectedChart := generateExpectedChart(expectedChartArgs{ + timeoutSeconds: timeoutSeconds, + skipDeletes: resources, + expectedDeletes: expectedDeletes, + }) prepareMakefile(namespace, glootestutils.HelmValues{ ValuesArgs: []string{ @@ -67,6 +72,62 @@ var _ = Describe("WebhookValidationConfiguration helm test", func() { Entry("empty", []string{}, 0), ) + It("Can set gloo failurePolicy", func() { + timeoutSeconds := 5 + expectedChart := generateExpectedChart(expectedChartArgs{ + timeoutSeconds: timeoutSeconds, + glooFailurePolicy: "Fail", + expectedDeletes: 6, + }) + + prepareMakefile(namespace, glootestutils.HelmValues{ + ValuesArgs: []string{ + fmt.Sprintf(`gateway.validation.webhook.timeoutSeconds=%d`, timeoutSeconds), + `gateway.validation.failurePolicy=Fail`, + }, + }) + testManifest.ExpectUnstructured(expectedChart.GetKind(), expectedChart.GetNamespace(), expectedChart.GetName()).To(BeEquivalentTo(expectedChart)) + }) + + It("Can set kube failurePolicy", func() { + timeoutSeconds := 5 + expectedChart := generateExpectedChart(expectedChartArgs{ + timeoutSeconds: timeoutSeconds, + kubeFailurePolicy: "Fail", + expectedDeletes: 6, + }) + + prepareMakefile(namespace, glootestutils.HelmValues{ + ValuesArgs: []string{ + fmt.Sprintf(`gateway.validation.webhook.timeoutSeconds=%d`, timeoutSeconds), + `gateway.validation.kubeCoreFailurePolicy=Fail`, + }, + }) + testManifest.ExpectUnstructured(expectedChart.GetKind(), expectedChart.GetNamespace(), expectedChart.GetName()).To(BeEquivalentTo(expectedChart)) + }) + + It("Kube Webhook is not rendered if secrets and resources are skipped", func() { + prepareMakefile(namespace, glootestutils.HelmValues{ + ValuesArgs: []string{ + `gateway.validation.webhook.skipDeleteValidationResources={secrets,namespaces}`, + }, + }) + + // assert that the kube webhook is not rendered + testManifest.SelectResources(func(resource *unstructured.Unstructured) bool { + return resource.GetKind() == "ValidatingWebhookConfiguration" && resource.GetName() == "gloo-gateway-validation-webhook-"+namespace + }).ExpectAll(func(webhook *unstructured.Unstructured) { + webhookObject, err := kuberesource.ConvertUnstructured(webhook) + + ExpectWithOffset(1, err).NotTo(HaveOccurred(), fmt.Sprintf("Webhook %+v should be able to convert from unstructured", webhook)) + structuredDeployment, ok := webhookObject.(*admissionregistrationv1.ValidatingWebhookConfiguration) + ExpectWithOffset(1, ok).To(BeTrue(), fmt.Sprintf("Webhook %+v should be able to cast to a structured deployment", webhook)) + + ExpectWithOffset(1, structuredDeployment.Webhooks).To(HaveLen(1), fmt.Sprintf("Only one webhook should be present on deployment %+v", structuredDeployment)) + ExpectWithOffset(1, structuredDeployment.Webhooks[0].Name).To(Equal("gloo."+namespace+".svc"), fmt.Sprintf("Webhook name should be 'gloo.%s.svc' on deployment %+v", namespace, structuredDeployment)) + }) + }) + Context("enablePolicyApi", func() { // containPolicyApiOperation returns a GomegaMatcher which will assert that a provided ValidatingWebhookConfiguration @@ -179,20 +240,38 @@ var _ = Describe("WebhookValidationConfiguration helm test", func() { runTests(allTests) }) -func generateExpectedChart(timeoutSeconds int, skipDeletes []string, expectedDeletes int) *unstructured.Unstructured { - rules := generateRules(skipDeletes) +type expectedChartArgs struct { + timeoutSeconds int + skipDeletes []string + expectedDeletes int + glooFailurePolicy string + kubeFailurePolicy string +} + +func generateExpectedChart(args expectedChartArgs) *unstructured.Unstructured { + + // Default failure policies + if args.glooFailurePolicy == "" { + args.glooFailurePolicy = "Ignore" + } + if args.kubeFailurePolicy == "" { + args.kubeFailurePolicy = "Ignore" + } + + GinkgoHelper() + glooRules, coreRules := generateRules(args.skipDeletes) // indent "rules" m1 := regexp.MustCompile("\n") - rules = m1.ReplaceAllString(rules, "\n ") + glooRules = m1.ReplaceAllString(glooRules, "\n ") + coreRules = m1.ReplaceAllString(coreRules, "\n ") // Check that we have the expected number of DELETEs m2 := regexp.MustCompile(`DELETE`) - deletes := m2.FindAllStringIndex(rules, -1) - Expect(deletes).To(HaveLen(expectedDeletes)) - - return makeUnstructured(` + deletes := len(m2.FindAllStringIndex(glooRules, -1)) + len(m2.FindAllStringIndex(coreRules, -1)) + Expect(deletes).To(Equal(args.expectedDeletes)) + chart := ` apiVersion: admissionregistration.k8s.io/v1 kind: ValidatingWebhookConfiguration metadata: @@ -212,18 +291,41 @@ webhooks: path: "/validation" caBundle: "" # update manually or use certgen job rules: - ` + rules + ` + ` + glooRules + ` sideEffects: None matchPolicy: Exact - timeoutSeconds: ` + strconv.Itoa(timeoutSeconds) + ` + timeoutSeconds: ` + strconv.Itoa(args.timeoutSeconds) + ` admissionReviewVersions: - v1beta1 - failurePolicy: Ignore -`) + failurePolicy: ` + args.glooFailurePolicy + ` +` + // Only create the webhook for core resources if there are any resources being valdiated + if coreRules != "[]\n " { + chart += `- name: kube.` + namespace + `.svc # must be a domain with at least three segments separated by dots + clientConfig: + service: + name: gloo + namespace: ` + namespace + ` + path: "/validation" + caBundle: "" # update manually or use certgen job + rules: + ` + coreRules + ` + sideEffects: None + matchPolicy: Exact + timeoutSeconds: ` + strconv.Itoa(args.timeoutSeconds) + ` + admissionReviewVersions: + - v1beta1 + failurePolicy: ` + args.kubeFailurePolicy + ` +` + } + + return makeUnstructured(chart) } -func generateRules(skipDeleteReources []string) string { - rules := []map[string][]string{ +// generateRules returns gloo rules and core rules as separate strings +func generateRules(skipDeleteReources []string) (string, string) { + GinkgoHelper() + glooRules := []map[string][]string{ { "operations": {"CREATE", "UPDATE", "DELETE"}, "apiGroups": {"gateway.solo.io"}, @@ -248,6 +350,15 @@ func generateRules(skipDeleteReources []string) string { "apiVersions": {"v1"}, "resources": {"upstreams"}, }, + { + "operations": {"CREATE", "UPDATE", "DELETE"}, + "apiGroups": {"ratelimit.solo.io"}, + "apiVersions": {"v1alpha1"}, + "resources": {"ratelimitconfigs"}, + }, + } + + coreRules := []map[string][]string{ { "operations": {"DELETE"}, "apiGroups": {""}, @@ -260,16 +371,21 @@ func generateRules(skipDeleteReources []string) string { "apiVersions": {"v1"}, "resources": {"namespaces"}, }, - { - "operations": {"CREATE", "UPDATE", "DELETE"}, - "apiGroups": {"ratelimit.solo.io"}, - "apiVersions": {"v1alpha1"}, - "resources": {"ratelimitconfigs"}, - }, } - finalRules := []map[string][]string{} - for i, rule := range rules { + finalGlooRules := []map[string][]string{} + for i, rule := range glooRules { + if stringutils.ContainsAny([]string{rule["resources"][0], "*"}, skipDeleteReources) { + rule["operations"] = gloostringutils.DeleteOneByValue(rule["operations"], "DELETE") + } + + if len(rule["operations"]) != 0 { + finalGlooRules = append(finalGlooRules, glooRules[i]) + } + } + + finalNonGlooRules := []map[string][]string{} + for i, rule := range coreRules { if stringutils.ContainsAny([]string{rule["resources"][0], "*"}, skipDeleteReources) { rule["operations"] = gloostringutils.DeleteOneByValue(rule["operations"], "DELETE") // A namespace with an update to a label can cause it to no longer be watched, @@ -280,11 +396,13 @@ func generateRules(skipDeleteReources []string) string { } if len(rule["operations"]) != 0 { - finalRules = append(finalRules, rules[i]) + finalNonGlooRules = append(finalNonGlooRules, coreRules[i]) } } - str, err := yaml.Marshal(finalRules) + glooRulesYaml, err := yaml.Marshal(finalGlooRules) + Expect(err).NotTo(HaveOccurred()) + nonGlooRulesYaml, err := yaml.Marshal(finalNonGlooRules) Expect(err).NotTo(HaveOccurred()) - return string(str) + return string(glooRulesYaml), string(nonGlooRulesYaml) } diff --git a/projects/gateway/pkg/services/k8sadmission/README.md b/projects/gateway/pkg/services/k8sadmission/README.md index 8b5d1575fb9..166c856ffbc 100644 --- a/projects/gateway/pkg/services/k8sadmission/README.md +++ b/projects/gateway/pkg/services/k8sadmission/README.md @@ -16,6 +16,8 @@ Gloo leverages the ValidatingAdmissionWebhook to validate proposed changes to cu #### Which resources are subject to the ValidatingAdmissionWebhook? The ValidatingWebhookConfiguration is part of the Kubernetes API, and configured through a [Helm template](https://github.com/solo-io/gloo/blob/main/install/helm/gloo/templates/5-gateway-validation-webhook-configuration.yaml). Based on the webhook rules, only the API groups/resources that match the rules will be subject to validation +The template defines two separate webhooks, one for Gloo resources and one for supported Kubernetes resources, currently namespaces and secrets. These webhooks can have different `failurePolicies` defined, and it is recommended that the Kubernetes webhook failure policy, set the Helm value `gateway.validation.kubeCoreFailurePolicy`, remains at its default value of `Ignore`. The danger of a `failurePolicy` of `Fail` in this situation is that if the Gloo Service is unavailable, unrelated namespaces and secrets will not be able to be modified if those modifications match the webhook rules. + #### Where is the Webhook entrypoint defined? The ValidatingWebhookConfiguration defines the `name` and `path` to the service which handles validation requests. In Gloo, this is the Gloo Service, at the `/validation` endpoint. @@ -44,6 +46,7 @@ Instead of re-defining this same set of validation code in our webhook, we re-us #### Where is the webhook configuration defined? Webhook configuration is defined on the [Gloo Settings resource](https://github.com/solo-io/gloo/blob/a3430da820bd39a8b0940025c1040e33eeb7d8f8/projects/gloo/api/v1/settings.proto#L605) + ### Debugging #### What if requests aren't received by the webhook? 1. Turn on [debug logs](https://docs.solo.io/gloo-edge/latest/operations/debugging_gloo/#changing-logging-levels-and-more) and confirm that requests are not being processed by the validation webhook. diff --git a/test/kubernetes/e2e/features/validation/split_webhook/suite.go b/test/kubernetes/e2e/features/validation/split_webhook/suite.go new file mode 100644 index 00000000000..e15676b90d5 --- /dev/null +++ b/test/kubernetes/e2e/features/validation/split_webhook/suite.go @@ -0,0 +1,224 @@ +package split_webhook + +import ( + "context" + "fmt" + "strconv" + "strings" + "time" + + "github.com/onsi/gomega" + gloo_defaults "github.com/solo-io/gloo/projects/gloo/pkg/defaults" + "github.com/solo-io/gloo/test/kubernetes/e2e" + "github.com/solo-io/gloo/test/kubernetes/e2e/features/validation" + "github.com/solo-io/gloo/test/kubernetes/testutils/helper" + "github.com/solo-io/solo-kit/pkg/api/v1/clients" + "github.com/solo-io/solo-kit/pkg/api/v1/resources" + "github.com/solo-io/solo-kit/pkg/api/v1/resources/core" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/stretchr/testify/suite" + appsv1 "k8s.io/api/apps/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +var _ e2e.NewSuiteFunc = NewTestingSuite + +type testingSuite struct { + suite.Suite + + ctx context.Context + + // testInstallation contains all the metadata/utilities necessary to execute a series of tests + // against an installation of Gloo Gateway + testInstallation *e2e.TestInstallation + + testHelper *helper.SoloTestHelper + glooReplicas int + manifestObjects map[string][]client.Object + rollback func() error + resourceDeleted bool +} + +// This suite is mean to be run in an environment where validation is enabled +func NewTestingSuite(ctx context.Context, testInst *e2e.TestInstallation) suite.TestingSuite { + return &testingSuite{ + ctx: ctx, + testInstallation: testInst, + testHelper: e2e.MustTestHelper(ctx, testInst), + } +} + +type webhookFailurePolicyTest struct { + // glooFailurePolicyFail determines whether the gloo webhook failure policy is set to Fail + glooFailurePolicyFail bool + // kubeFailurePolicyFail determines whether the kube webhook failure policy is set to Fail + kubeFailurePolicyFail bool +} + +func (s *testingSuite) TearDownSuite() { + // nothing at the moment +} +func (s *testingSuite) SetupSuite() { + // nothing at the moment +} + +func (s *testingSuite) BeforeTest(suiteName, testName string) { + // Apply the upgrade values file + var err error + s.rollback, err = s.testHelper.UpgradeGloo(s.ctx, 600*time.Second, helper.WithExtraArgs([]string{ + // Reuse values so there's no need to know the prior values used + "--reuse-values", + "--values", upgradeValues[testName], + }...)) + s.testInstallation.Assertions.Require.NoError(err) + + // Create resource we will be trying to delete + manifest := manifests[testName] + err = s.testInstallation.Actions.Kubectl().ApplyFile(s.ctx, manifest.filename, "-n", s.testInstallation.Metadata.InstallNamespace) + s.Assert().NoError(err, "can apply %s", manifest.filename) + + s.Assert().NotNil(manifest.validateCreated, "validateCreated function must be set for %s", testName) + manifest.validateCreated(s) + + // Get current replica count of gloo deployment + stdout, _, err := s.testInstallation.Actions.Kubectl().Execute(s.ctx, "get", "deployment", "gloo", "-n", s.testInstallation.Metadata.InstallNamespace, "-o=jsonpath='{.status.replicas}'") + s.Assert().NoError(err) + + if stdout == "" { + s.glooReplicas = 0 + } else { + s.glooReplicas, err = strconv.Atoi(strings.Trim(stdout, "'")) + s.Assert().NoError(err) + } + // Scale gloo deployment to 0 + err = s.testInstallation.Actions.Kubectl().Scale(s.ctx, s.testInstallation.Metadata.InstallNamespace, "deployment/gloo", 0) + s.Assert().NoError(err, "can scale gloo deployment to 0") + s.testInstallation.Assertions.EventuallyRunningReplicas(s.ctx, s.glooDeployment().ObjectMeta, gomega.Equal(0)) + + s.validateCaBundles() + s.resourceDeleted = false +} + +func (s *testingSuite) AfterTest(suiteName, testName string) { + // Scale gloo deployment back to original replica count + err := s.testInstallation.Actions.Kubectl().Scale(s.ctx, s.testInstallation.Metadata.InstallNamespace, "deployment/gloo", uint(s.glooReplicas)) + s.Assert().NoError(err, "can scale gloo deployment back to %d", s.glooReplicas) + s.testInstallation.Assertions.EventuallyRunningReplicas(s.ctx, s.glooDeployment().ObjectMeta, gomega.Equal(s.glooReplicas)) + s.testInstallation.Assertions.EventuallyPodsRunning(s.ctx, s.testInstallation.Metadata.InstallNamespace, metav1.ListOptions{LabelSelector: "gloo=gloo"}, time.Minute*2) + + // Delete the resource created if it hasn't been already + if !s.resourceDeleted { + manifest := manifests[testName] + output, err := s.testInstallation.Actions.Kubectl().DeleteFileWithOutput(s.ctx, manifest.filename, "-n", s.testInstallation.Metadata.InstallNamespace) + s.testInstallation.Assertions.ExpectObjectDeleted(manifest.filename, err, output) + } + + // Rollback the helm upgrades + err = s.rollback() + s.testInstallation.Assertions.Require.NoError(err) +} + +func (s *testingSuite) glooDeployment() *appsv1.Deployment { + return &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: s.testInstallation.Metadata.InstallNamespace, + Name: "gloo", + Labels: map[string]string{"gloo": "gloo"}, + }, + } +} + +// Test the caBundle is set for both webhooks +func (s *testingSuite) validateCaBundles() { + + for i := 0; i < 2; i++ { + stdout, _, err := s.testInstallation.Actions.Kubectl().Execute( + s.ctx, "get", + "ValidatingWebhookConfiguration", fmt.Sprintf("gloo-gateway-validation-webhook-%s", s.testInstallation.Metadata.InstallNamespace), + "-n", s.testInstallation.Metadata.InstallNamespace, + "-o", fmt.Sprintf("jsonpath={.webhooks[%d].clientConfig.caBundle}", i), + ) + + s.Assert().NoError(err) + // The value is set as "" in the template, so if it is not empty we know it was set + s.Assert().NotEmpty(stdout) + } + +} + +func (s *testingSuite) TestGlooFailurePolicyFail() { + s.testDeleteResource(validation.BasicUpstream, false) +} + +func (s *testingSuite) TestKubeFailurePolicyFail() { + s.testDeleteResource(validation.Secret, false) +} + +func (s *testingSuite) TestGlooFailurePolicyIgnore() { + s.testDeleteResource(validation.BasicUpstream, true) +} + +func (s *testingSuite) TestKubeFailurePolicyIgnore() { + s.testDeleteResource(validation.Secret, true) +} + +func (s *testingSuite) testDeleteResource(fileName string, shouldDelete bool) { + output, err := s.testInstallation.Actions.Kubectl().DeleteFileWithOutput(s.ctx, fileName, "-n", s.testInstallation.Metadata.InstallNamespace) + + if shouldDelete { + s.Assert().NoError(err) + s.testInstallation.Assertions.ExpectObjectDeleted(fileName, err, output) + s.resourceDeleted = true + } else { + s.Assert().Error(err) + s.Assert().Contains(output, "Internal error occurred: failed calling webhook") + } + +} + +var upgradeValues = map[string]string{ + "TestGlooFailurePolicyFail": validation.GlooFailurePolicyFailValues, + "TestKubeFailurePolicyFail": validation.KubeFailurePolicyFailValues, + "TestGlooFailurePolicyIgnore": validation.GlooFailurePolicyIgnoreValues, + "TestKubeFailurePolicyIgnore": validation.KubeFailurePolicyIgnoreValues, +} + +// These tests create one resource and try to delete it, so don't need lists of resources +type testManifest struct { + filename string + validateCreated func(*testingSuite) +} + +var ( + validateSecretCreated = func(s *testingSuite) { + // No error is enough to validate the secret was created + } + + validateUpstreamCreated = func(s *testingSuite) { + s.testInstallation.Assertions.EventuallyResourceStatusMatchesState( + func() (resources.InputResource, error) { + return s.testInstallation.ResourceClients.UpstreamClient().Read(s.testInstallation.Metadata.InstallNamespace, "json-upstream", clients.ReadOpts{Ctx: s.ctx}) + }, + core.Status_Accepted, + gloo_defaults.GlooReporter, + ) + } + + secretManifest = &testManifest{ + filename: validation.Secret, + validateCreated: validateSecretCreated, + } + + upstreamManifest = &testManifest{ + filename: validation.BasicUpstream, + validateCreated: validateUpstreamCreated, + } + + manifests = map[string]*testManifest{ + "TestGlooFailurePolicyFail": upstreamManifest, + "TestGlooFailurePolicyIgnore": upstreamManifest, + "TestKubeFailurePolicyFail": secretManifest, + "TestKubeFailurePolicyIgnore": secretManifest, + } +) diff --git a/test/kubernetes/e2e/features/validation/testdata/split-webhook/basic-upstream.yaml b/test/kubernetes/e2e/features/validation/testdata/split-webhook/basic-upstream.yaml new file mode 100644 index 00000000000..296d787b7d8 --- /dev/null +++ b/test/kubernetes/e2e/features/validation/testdata/split-webhook/basic-upstream.yaml @@ -0,0 +1,10 @@ +apiVersion: gloo.solo.io/v1 +kind: Upstream +metadata: + name: json-upstream +spec: + static: + hosts: + - addr: jsonplaceholder.typicode.com + port: 80 + diff --git a/test/kubernetes/e2e/features/validation/testdata/split-webhook/gloo-webhook-failure-policy-fail-values.yaml b/test/kubernetes/e2e/features/validation/testdata/split-webhook/gloo-webhook-failure-policy-fail-values.yaml new file mode 100644 index 00000000000..14fe09a0d50 --- /dev/null +++ b/test/kubernetes/e2e/features/validation/testdata/split-webhook/gloo-webhook-failure-policy-fail-values.yaml @@ -0,0 +1,5 @@ +gateway: + validation: + failurePolicy: Fail # For "strict" validation mode, fail the validation if webhook server is not available + webhook: + skipDeleteValidationResources: [] \ No newline at end of file diff --git a/test/kubernetes/e2e/features/validation/testdata/split-webhook/gloo-webhook-failure-policy-ignore-values.yaml b/test/kubernetes/e2e/features/validation/testdata/split-webhook/gloo-webhook-failure-policy-ignore-values.yaml new file mode 100644 index 00000000000..7037a7e1e9a --- /dev/null +++ b/test/kubernetes/e2e/features/validation/testdata/split-webhook/gloo-webhook-failure-policy-ignore-values.yaml @@ -0,0 +1,5 @@ +gateway: + validation: + failurePolicy: Ignore + webhook: + skipDeleteValidationResources: [] \ No newline at end of file diff --git a/test/kubernetes/e2e/features/validation/testdata/split-webhook/kube-webhook-failure-policy-fail-values.yaml b/test/kubernetes/e2e/features/validation/testdata/split-webhook/kube-webhook-failure-policy-fail-values.yaml new file mode 100644 index 00000000000..090139485f5 --- /dev/null +++ b/test/kubernetes/e2e/features/validation/testdata/split-webhook/kube-webhook-failure-policy-fail-values.yaml @@ -0,0 +1,5 @@ +gateway: + validation: + kubeCoreFailurePolicy: Fail # For "strict" validation mode, fail the validation if webhook server is not available + webhook: + skipDeleteValidationResources: [] \ No newline at end of file diff --git a/test/kubernetes/e2e/features/validation/testdata/split-webhook/kube-webhook-failure-policy-ignore-values.yaml b/test/kubernetes/e2e/features/validation/testdata/split-webhook/kube-webhook-failure-policy-ignore-values.yaml new file mode 100644 index 00000000000..b1076ce044b --- /dev/null +++ b/test/kubernetes/e2e/features/validation/testdata/split-webhook/kube-webhook-failure-policy-ignore-values.yaml @@ -0,0 +1,5 @@ +gateway: + validation: + kubeCoreFailurePolicy: Ignore + webhook: + skipDeleteValidationResources: [] \ No newline at end of file diff --git a/test/kubernetes/e2e/features/validation/types.go b/test/kubernetes/e2e/features/validation/types.go index e0870e79733..720c00ea9a0 100644 --- a/test/kubernetes/e2e/features/validation/types.go +++ b/test/kubernetes/e2e/features/validation/types.go @@ -45,6 +45,14 @@ var ( VSTransformationHeaderText = filepath.Join(util.MustGetThisDir(), "testdata", "transformation", "vs-transform-header-text.yaml") VSTransformationSingleReplace = filepath.Join(util.MustGetThisDir(), "testdata", "transformation", "vs-transform-single-replace.yaml") + // Split webhook validation + BasicUpstream = filepath.Join(util.MustGetThisDir(), "testdata", "split-webhook", "basic-upstream.yaml") + + GlooFailurePolicyFailValues = filepath.Join(util.MustGetThisDir(), "testdata", "split-webhook", "gloo-webhook-failure-policy-fail-values.yaml") + KubeFailurePolicyFailValues = filepath.Join(util.MustGetThisDir(), "testdata", "split-webhook", "kube-webhook-failure-policy-fail-values.yaml") + GlooFailurePolicyIgnoreValues = filepath.Join(util.MustGetThisDir(), "testdata", "split-webhook", "gloo-webhook-failure-policy-ignore-values.yaml") + KubeFailurePolicyIgnoreValues = filepath.Join(util.MustGetThisDir(), "testdata", "split-webhook", "kube-webhook-failure-policy-ignore-values.yaml") + ExpectedUpstreamResp = &testmatchers.HttpResponse{ StatusCode: http.StatusOK, Body: gomega.ContainSubstring("Welcome to nginx!"), diff --git a/test/kubernetes/e2e/features/validation/validation_reject_invalid/suite.go b/test/kubernetes/e2e/features/validation/validation_reject_invalid/suite.go index 4e1cb9693ec..510bcee937e 100644 --- a/test/kubernetes/e2e/features/validation/validation_reject_invalid/suite.go +++ b/test/kubernetes/e2e/features/validation/validation_reject_invalid/suite.go @@ -117,7 +117,7 @@ func (s *testingSuite) TestVirtualServiceWithSecretDeletion() { // failing to delete a secret that is in use output, err := s.testInstallation.Actions.Kubectl().DeleteFileWithOutput(s.ctx, validation.Secret, "-n", s.testInstallation.Metadata.InstallNamespace) s.Assert().Error(err) - s.Assert().Contains(output, fmt.Sprintf(`admission webhook "gloo.%s.svc" denied the request`, s.testInstallation.Metadata.InstallNamespace)) + s.Assert().Contains(output, fmt.Sprintf(`admission webhook "kube.%s.svc" denied the request`, s.testInstallation.Metadata.InstallNamespace)) s.Assert().Contains(output, fmt.Sprintf("failed validating the deletion of resource")) s.Assert().Contains(output, fmt.Sprintf("SSL secret not found: list did not find secret %s.tls-secret", s.testInstallation.Metadata.InstallNamespace)) diff --git a/test/kubernetes/e2e/features/validation/validation_strict_warnings/suite.go b/test/kubernetes/e2e/features/validation/validation_strict_warnings/suite.go index 118de06adbb..1c8ca461b6d 100644 --- a/test/kubernetes/e2e/features/validation/validation_strict_warnings/suite.go +++ b/test/kubernetes/e2e/features/validation/validation_strict_warnings/suite.go @@ -116,7 +116,7 @@ func (s *testingSuite) TestVirtualServiceWithSecretDeletion() { // failing to delete a secret that is in use output, err := s.testInstallation.Actions.Kubectl().DeleteFileWithOutput(s.ctx, validation.Secret, "-n", s.testInstallation.Metadata.InstallNamespace) s.Assert().Error(err) - s.Assert().Contains(output, fmt.Sprintf(`admission webhook "gloo.%s.svc" denied the request`, s.testInstallation.Metadata.InstallNamespace)) + s.Assert().Contains(output, fmt.Sprintf(`admission webhook "kube.%s.svc" denied the request`, s.testInstallation.Metadata.InstallNamespace)) s.Assert().Contains(output, fmt.Sprintf("failed validating the deletion of resource")) s.Assert().Contains(output, fmt.Sprintf("SSL secret not found: list did not find secret %s.tls-secret", s.testInstallation.Metadata.InstallNamespace)) diff --git a/test/kubernetes/e2e/tests/validation_strict_tests.go b/test/kubernetes/e2e/tests/validation_strict_tests.go index ddf449e9719..f9b8f846f10 100644 --- a/test/kubernetes/e2e/tests/validation_strict_tests.go +++ b/test/kubernetes/e2e/tests/validation_strict_tests.go @@ -2,6 +2,7 @@ package tests import ( "github.com/solo-io/gloo/test/kubernetes/e2e" + "github.com/solo-io/gloo/test/kubernetes/e2e/features/validation/split_webhook" "github.com/solo-io/gloo/test/kubernetes/e2e/features/validation/validation_reject_invalid" "github.com/solo-io/gloo/test/kubernetes/e2e/features/validation/validation_strict_warnings" ) @@ -11,6 +12,7 @@ func ValidationStrictSuiteRunner() e2e.SuiteRunner { validationSuiteRunner.Register("ValidationStrictWarnings", validation_strict_warnings.NewTestingSuite) validationSuiteRunner.Register("ValidationRejectInvalid", validation_reject_invalid.NewTestingSuite) + validationSuiteRunner.Register("ValidationSplitWebhook", split_webhook.NewTestingSuite) return validationSuiteRunner }