diff --git a/api/v1beta2/sparkapplication_types.go b/api/v1beta2/sparkapplication_types.go index 4a6f6545b..396b7e6f8 100644 --- a/api/v1beta2/sparkapplication_types.go +++ b/api/v1beta2/sparkapplication_types.go @@ -142,6 +142,9 @@ type SparkApplicationSpec struct { // scheduler backend since Spark 3.0. // +optional DynamicAllocation *DynamicAllocation `json:"dynamicAllocation,omitempty"` + // Kerberos configures Kerberos authentication for Hadoop access. + // +optional + Kerberos *KerberosSpec `json:"kerberos,omitempty"` } // SparkApplicationStatus defines the observed state of SparkApplication @@ -604,6 +607,9 @@ const ( // SecretTypeHadoopDelegationToken is for secrets from an Hadoop delegation token that needs the // environment variable HADOOP_TOKEN_FILE_LOCATION. SecretTypeHadoopDelegationToken SecretType = "HadoopDelegationToken" + // SecretTypeKerberosKeytab is for secrets from a Kerberos keytab file that needs the + // environment variable KRB5_KEYTAB_FILE. + SecretTypeKerberosKeytab SecretType = "KerberosKeytab" // SecretTypeGeneric is for secrets that needs no special handling. SecretTypeGeneric SecretType = "Generic" ) @@ -717,3 +723,41 @@ type DynamicAllocation struct { // +optional ShuffleTrackingTimeout *int64 `json:"shuffleTrackingTimeout,omitempty"` } + +// KerberosSpec defines the Kerberos authentication configuration for Hadoop access. +type KerberosSpec struct { + // Principal is the Kerberos principal name for authentication. + // +optional + Principal *string `json:"principal,omitempty"` + // KeytabSecret is the name of the secret containing the Kerberos keytab file. + // +optional + KeytabSecret *string `json:"keytabSecret,omitempty"` + // KeytabFile is the path to the keytab file within the keytab secret. + // Defaults to "krb5.keytab" if not specified. + // +optional + KeytabFile *string `json:"keytabFile,omitempty"` + // ConfigSecret is the name of the secret containing the Kerberos configuration file (krb5.conf). + // +optional + ConfigSecret *string `json:"configSecret,omitempty"` + // ConfigFile is the path to the krb5.conf file within the config secret. + // Defaults to "krb5.conf" if not specified. + // +optional + ConfigFile *string `json:"configFile,omitempty"` + // Realm is the Kerberos realm. This is optional and can be inferred from the principal. + // +optional + Realm *string `json:"realm,omitempty"` + // KDC is the Key Distribution Center address. + // +optional + KDC *string `json:"kdc,omitempty"` + // RenewalCredentials specifies the credential renewal strategy. + // Valid values are "keytab" (default) and "ccache". + // "keytab" enables automatic renewal using the provided keytab. + // "ccache" uses existing ticket cache (requires manual ticket management). + // +optional + // +kubebuilder:validation:Enum={keytab,ccache} + RenewalCredentials *string `json:"renewalCredentials,omitempty"` + // EnabledServices specifies which Hadoop services should have Kerberos credentials enabled. + // Defaults to ["hadoopfs", "hbase", "hive"] if not specified. + // +optional + EnabledServices []string `json:"enabledServices,omitempty"` +} diff --git a/api/v1beta2/zz_generated.deepcopy.go b/api/v1beta2/zz_generated.deepcopy.go index a490b0eb8..273f5330e 100644 --- a/api/v1beta2/zz_generated.deepcopy.go +++ b/api/v1beta2/zz_generated.deepcopy.go @@ -367,6 +367,66 @@ func (in *GPUSpec) DeepCopy() *GPUSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *KerberosSpec) DeepCopyInto(out *KerberosSpec) { + *out = *in + if in.Principal != nil { + in, out := &in.Principal, &out.Principal + *out = new(string) + **out = **in + } + if in.KeytabSecret != nil { + in, out := &in.KeytabSecret, &out.KeytabSecret + *out = new(string) + **out = **in + } + if in.KeytabFile != nil { + in, out := &in.KeytabFile, &out.KeytabFile + *out = new(string) + **out = **in + } + if in.ConfigSecret != nil { + in, out := &in.ConfigSecret, &out.ConfigSecret + *out = new(string) + **out = **in + } + if in.ConfigFile != nil { + in, out := &in.ConfigFile, &out.ConfigFile + *out = new(string) + **out = **in + } + if in.Realm != nil { + in, out := &in.Realm, &out.Realm + *out = new(string) + **out = **in + } + if in.KDC != nil { + in, out := &in.KDC, &out.KDC + *out = new(string) + **out = **in + } + if in.RenewalCredentials != nil { + in, out := &in.RenewalCredentials, &out.RenewalCredentials + *out = new(string) + **out = **in + } + if in.EnabledServices != nil { + in, out := &in.EnabledServices, &out.EnabledServices + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KerberosSpec. +func (in *KerberosSpec) DeepCopy() *KerberosSpec { + if in == nil { + return nil + } + out := new(KerberosSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *MonitoringSpec) DeepCopyInto(out *MonitoringSpec) { *out = *in @@ -840,6 +900,11 @@ func (in *SparkApplicationSpec) DeepCopyInto(out *SparkApplicationSpec) { *out = new(DynamicAllocation) (*in).DeepCopyInto(*out) } + if in.Kerberos != nil { + in, out := &in.Kerberos, &out.Kerberos + *out = new(KerberosSpec) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SparkApplicationSpec. diff --git a/charts/spark-operator-chart/README.md b/charts/spark-operator-chart/README.md index 6e2fceda4..4ebd9bdf8 100644 --- a/charts/spark-operator-chart/README.md +++ b/charts/spark-operator-chart/README.md @@ -173,6 +173,12 @@ See [helm uninstall](https://helm.sh/docs/helm/helm_uninstall) for command docum | spark.serviceAccount.automountServiceAccountToken | bool | `true` | Auto-mount service account token to the spark applications pods. | | spark.rbac.create | bool | `true` | Specifies whether to create RBAC resources for spark applications. | | spark.rbac.annotations | object | `{}` | Optional annotations for the spark application RBAC resources. | +| spark.kerberos.enable | bool | `false` | Enable Kerberos authentication support for Spark applications. | +| spark.kerberos.defaultPrincipal | string | `""` | Default Kerberos principal for authentication (can be overridden per application). Example: spark@EXAMPLE.COM | +| spark.kerberos.defaultRealm | string | `""` | Default Kerberos realm (can be overridden per application). Example: EXAMPLE.COM | +| spark.kerberos.defaultKDC | string | `""` | Default Kerberos KDC address (can be overridden per application). Example: kdc.example.com:88 | +| spark.kerberos.defaultKeytabSecret | string | `""` | Name of the secret containing the default Kerberos keytab file. This secret should contain a file named 'krb5.keytab' | +| spark.kerberos.defaultConfigSecret | string | `""` | Name of the secret containing the default Kerberos configuration (krb5.conf). This secret should contain a file named 'krb5.conf' | | prometheus.metrics.enable | bool | `true` | Specifies whether to enable prometheus metrics scraping. | | prometheus.metrics.port | int | `8080` | Metrics port. | | prometheus.metrics.portName | string | `"metrics"` | Metrics port name. | diff --git a/charts/spark-operator-chart/crds/sparkoperator.k8s.io_scheduledsparkapplications.yaml b/charts/spark-operator-chart/crds/sparkoperator.k8s.io_scheduledsparkapplications.yaml index 0a2d9c7f7..59ad4460f 100644 --- a/charts/spark-operator-chart/crds/sparkoperator.k8s.io_scheduledsparkapplications.yaml +++ b/charts/spark-operator-chart/crds/sparkoperator.k8s.io_scheduledsparkapplications.yaml @@ -10342,6 +10342,57 @@ spec: items: type: string type: array + kerberos: + description: Kerberos configures Kerberos authentication for Hadoop + access. + properties: + configFile: + description: |- + ConfigFile is the path to the krb5.conf file within the config secret. + Defaults to "krb5.conf" if not specified. + type: string + configSecret: + description: ConfigSecret is the name of the secret containing + the Kerberos configuration file (krb5.conf). + type: string + enabledServices: + description: |- + EnabledServices specifies which Hadoop services should have Kerberos credentials enabled. + Defaults to ["hadoopfs", "hbase", "hive"] if not specified. + items: + type: string + type: array + kdc: + description: KDC is the Key Distribution Center address. + type: string + keytabFile: + description: |- + KeytabFile is the path to the keytab file within the keytab secret. + Defaults to "krb5.keytab" if not specified. + type: string + keytabSecret: + description: KeytabSecret is the name of the secret containing + the Kerberos keytab file. + type: string + principal: + description: Principal is the Kerberos principal name for + authentication. + type: string + realm: + description: Realm is the Kerberos realm. This is optional + and can be inferred from the principal. + type: string + renewalCredentials: + description: |- + RenewalCredentials specifies the credential renewal strategy. + Valid values are "keytab" (default) and "ccache". + "keytab" enables automatic renewal using the provided keytab. + "ccache" uses existing ticket cache (requires manual ticket management). + enum: + - keytab + - ccache + type: string + type: object mainApplicationFile: description: MainFile is the path to a bundled JAR, Python, or R file of the application. diff --git a/charts/spark-operator-chart/crds/sparkoperator.k8s.io_sparkapplications.yaml b/charts/spark-operator-chart/crds/sparkoperator.k8s.io_sparkapplications.yaml index c3d4c59ec..544159c96 100644 --- a/charts/spark-operator-chart/crds/sparkoperator.k8s.io_sparkapplications.yaml +++ b/charts/spark-operator-chart/crds/sparkoperator.k8s.io_sparkapplications.yaml @@ -10260,6 +10260,56 @@ spec: items: type: string type: array + kerberos: + description: Kerberos configures Kerberos authentication for Hadoop + access. + properties: + configFile: + description: |- + ConfigFile is the path to the krb5.conf file within the config secret. + Defaults to "krb5.conf" if not specified. + type: string + configSecret: + description: ConfigSecret is the name of the secret containing + the Kerberos configuration file (krb5.conf). + type: string + enabledServices: + description: |- + EnabledServices specifies which Hadoop services should have Kerberos credentials enabled. + Defaults to ["hadoopfs", "hbase", "hive"] if not specified. + items: + type: string + type: array + kdc: + description: KDC is the Key Distribution Center address. + type: string + keytabFile: + description: |- + KeytabFile is the path to the keytab file within the keytab secret. + Defaults to "krb5.keytab" if not specified. + type: string + keytabSecret: + description: KeytabSecret is the name of the secret containing + the Kerberos keytab file. + type: string + principal: + description: Principal is the Kerberos principal name for authentication. + type: string + realm: + description: Realm is the Kerberos realm. This is optional and + can be inferred from the principal. + type: string + renewalCredentials: + description: |- + RenewalCredentials specifies the credential renewal strategy. + Valid values are "keytab" (default) and "ccache". + "keytab" enables automatic renewal using the provided keytab. + "ccache" uses existing ticket cache (requires manual ticket management). + enum: + - keytab + - ccache + type: string + type: object mainApplicationFile: description: MainFile is the path to a bundled JAR, Python, or R file of the application. diff --git a/charts/spark-operator-chart/values.yaml b/charts/spark-operator-chart/values.yaml index 7cf7ba13b..bd53f1fac 100644 --- a/charts/spark-operator-chart/values.yaml +++ b/charts/spark-operator-chart/values.yaml @@ -409,6 +409,31 @@ spark: # -- Optional annotations for the spark application RBAC resources. annotations: {} + # Kerberos configuration for Spark applications + kerberos: + # -- Enable Kerberos authentication support for Spark applications. + enable: false + + # -- Default Kerberos principal for authentication (can be overridden per application). + # Example: spark@EXAMPLE.COM + defaultPrincipal: "" + + # -- Default Kerberos realm (can be overridden per application). + # Example: EXAMPLE.COM + defaultRealm: "" + + # -- Default Kerberos KDC address (can be overridden per application). + # Example: kdc.example.com:88 + defaultKDC: "" + + # -- Name of the secret containing the default Kerberos keytab file. + # This secret should contain a file named 'krb5.keytab' + defaultKeytabSecret: "" + + # -- Name of the secret containing the default Kerberos configuration (krb5.conf). + # This secret should contain a file named 'krb5.conf' + defaultConfigSecret: "" + prometheus: metrics: # -- Specifies whether to enable prometheus metrics scraping. diff --git a/config/crd/bases/sparkoperator.k8s.io_scheduledsparkapplications.yaml b/config/crd/bases/sparkoperator.k8s.io_scheduledsparkapplications.yaml index 0a2d9c7f7..59ad4460f 100644 --- a/config/crd/bases/sparkoperator.k8s.io_scheduledsparkapplications.yaml +++ b/config/crd/bases/sparkoperator.k8s.io_scheduledsparkapplications.yaml @@ -10342,6 +10342,57 @@ spec: items: type: string type: array + kerberos: + description: Kerberos configures Kerberos authentication for Hadoop + access. + properties: + configFile: + description: |- + ConfigFile is the path to the krb5.conf file within the config secret. + Defaults to "krb5.conf" if not specified. + type: string + configSecret: + description: ConfigSecret is the name of the secret containing + the Kerberos configuration file (krb5.conf). + type: string + enabledServices: + description: |- + EnabledServices specifies which Hadoop services should have Kerberos credentials enabled. + Defaults to ["hadoopfs", "hbase", "hive"] if not specified. + items: + type: string + type: array + kdc: + description: KDC is the Key Distribution Center address. + type: string + keytabFile: + description: |- + KeytabFile is the path to the keytab file within the keytab secret. + Defaults to "krb5.keytab" if not specified. + type: string + keytabSecret: + description: KeytabSecret is the name of the secret containing + the Kerberos keytab file. + type: string + principal: + description: Principal is the Kerberos principal name for + authentication. + type: string + realm: + description: Realm is the Kerberos realm. This is optional + and can be inferred from the principal. + type: string + renewalCredentials: + description: |- + RenewalCredentials specifies the credential renewal strategy. + Valid values are "keytab" (default) and "ccache". + "keytab" enables automatic renewal using the provided keytab. + "ccache" uses existing ticket cache (requires manual ticket management). + enum: + - keytab + - ccache + type: string + type: object mainApplicationFile: description: MainFile is the path to a bundled JAR, Python, or R file of the application. diff --git a/config/crd/bases/sparkoperator.k8s.io_sparkapplications.yaml b/config/crd/bases/sparkoperator.k8s.io_sparkapplications.yaml index c3d4c59ec..544159c96 100644 --- a/config/crd/bases/sparkoperator.k8s.io_sparkapplications.yaml +++ b/config/crd/bases/sparkoperator.k8s.io_sparkapplications.yaml @@ -10260,6 +10260,56 @@ spec: items: type: string type: array + kerberos: + description: Kerberos configures Kerberos authentication for Hadoop + access. + properties: + configFile: + description: |- + ConfigFile is the path to the krb5.conf file within the config secret. + Defaults to "krb5.conf" if not specified. + type: string + configSecret: + description: ConfigSecret is the name of the secret containing + the Kerberos configuration file (krb5.conf). + type: string + enabledServices: + description: |- + EnabledServices specifies which Hadoop services should have Kerberos credentials enabled. + Defaults to ["hadoopfs", "hbase", "hive"] if not specified. + items: + type: string + type: array + kdc: + description: KDC is the Key Distribution Center address. + type: string + keytabFile: + description: |- + KeytabFile is the path to the keytab file within the keytab secret. + Defaults to "krb5.keytab" if not specified. + type: string + keytabSecret: + description: KeytabSecret is the name of the secret containing + the Kerberos keytab file. + type: string + principal: + description: Principal is the Kerberos principal name for authentication. + type: string + realm: + description: Realm is the Kerberos realm. This is optional and + can be inferred from the principal. + type: string + renewalCredentials: + description: |- + RenewalCredentials specifies the credential renewal strategy. + Valid values are "keytab" (default) and "ccache". + "keytab" enables automatic renewal using the provided keytab. + "ccache" uses existing ticket cache (requires manual ticket management). + enum: + - keytab + - ccache + type: string + type: object mainApplicationFile: description: MainFile is the path to a bundled JAR, Python, or R file of the application. diff --git a/docs/api-docs.md b/docs/api-docs.md index 861c507cf..d2da96455 100644 --- a/docs/api-docs.md +++ b/docs/api-docs.md @@ -643,6 +643,20 @@ DynamicAllocation scheduler backend since Spark 3.0.

+ + +kerberos
+ + +KerberosSpec + + + + +(Optional) +

Kerberos configures Kerberos authentication for Hadoop access.

+ + @@ -1609,6 +1623,138 @@ int64 +

KerberosSpec +

+

+(Appears on:SparkApplicationSpec) +

+
+

KerberosSpec defines the Kerberos authentication configuration for Hadoop access.

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+principal
+ +string + +
+(Optional) +

Principal is the Kerberos principal name for authentication.

+
+keytabSecret
+ +string + +
+(Optional) +

KeytabSecret is the name of the secret containing the Kerberos keytab file.

+
+keytabFile
+ +string + +
+(Optional) +

KeytabFile is the path to the keytab file within the keytab secret. +Defaults to “krb5.keytab” if not specified.

+
+configSecret
+ +string + +
+(Optional) +

ConfigSecret is the name of the secret containing the Kerberos configuration file (krb5.conf).

+
+configFile
+ +string + +
+(Optional) +

ConfigFile is the path to the krb5.conf file within the config secret. +Defaults to “krb5.conf” if not specified.

+
+realm
+ +string + +
+(Optional) +

Realm is the Kerberos realm. This is optional and can be inferred from the principal.

+
+kdc
+ +string + +
+(Optional) +

KDC is the Key Distribution Center address.

+
+renewalCredentials
+ +string + +
+(Optional) +

RenewalCredentials specifies the credential renewal strategy. +Valid values are “keytab” (default) and “ccache”. +“keytab” enables automatic renewal using the provided keytab. +“ccache” uses existing ticket cache (requires manual ticket management).

+
+enabledServices
+ +[]string + +
+(Optional) +

EnabledServices specifies which Hadoop services should have Kerberos credentials enabled. +Defaults to [“hadoopfs”, “hbase”, “hive”] if not specified.

+

MonitoringSpec

@@ -2310,6 +2456,10 @@ the environment variable GOOGLE_APPLICATION_CREDENTIALS.

SecretTypeHadoopDelegationToken is for secrets from an Hadoop delegation token that needs the environment variable HADOOP_TOKEN_FILE_LOCATION.

+

"KerberosKeytab"

+

SecretTypeKerberosKeytab is for secrets from a Kerberos keytab file that needs the +environment variable KRB5_KEYTAB_FILE.

+

SparkApplicationSpec @@ -2736,6 +2886,20 @@ DynamicAllocation scheduler backend since Spark 3.0.

+ + +kerberos
+ + +KerberosSpec + + + + +(Optional) +

Kerberos configures Kerberos authentication for Hadoop access.

+ +

SparkApplicationStatus diff --git a/examples/spark-pi-kerberos.yaml b/examples/spark-pi-kerberos.yaml new file mode 100644 index 000000000..b2dbdffed --- /dev/null +++ b/examples/spark-pi-kerberos.yaml @@ -0,0 +1,120 @@ +# +# Copyright 2025 The Kubeflow authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Example SparkApplication with Kerberos authentication for secure Hadoop access +# Prerequisites: +# 1. Create secrets containing keytab and krb5.conf files: +# kubectl create secret generic spark-kerberos-keytab --from-file=krb5.keytab=/path/to/spark.keytab +# kubectl create secret generic spark-kerberos-config --from-file=krb5.conf=/path/to/krb5.conf +# 2. Ensure HDFS and other Hadoop services are configured for Kerberos authentication + +apiVersion: "sparkoperator.k8s.io/v1beta2" +kind: SparkApplication +metadata: + name: spark-pi-kerberos + namespace: default +spec: + type: Scala + mode: cluster + image: docker.io/library/spark:4.0.0 + imagePullPolicy: Always + mainClass: org.apache.spark.examples.SparkPi + mainApplicationFile: "local:///opt/spark/examples/jars/spark-examples_2.12-4.0.0.jar" + sparkVersion: "4.0.0" + restartPolicy: + type: Never + + # Kerberos configuration for secure Hadoop access + kerberos: + # Kerberos principal (user@REALM) + principal: "spark@EXAMPLE.COM" + # Realm (optional, can be inferred from principal) + realm: "EXAMPLE.COM" + # KDC address (optional) + kdc: "kdc.example.com:88" + # Secret containing keytab file + keytabSecret: "spark-kerberos-keytab" + # keytabFile: "krb5.keytab" # Default, can be customized + # Secret containing krb5.conf + configSecret: "spark-kerberos-config" + # configFile: "krb5.conf" # Default, can be customized + # Credential renewal strategy for long-running applications (Spark 4+) + renewalCredentials: "keytab" # or "ccache" for ticket cache + # Services that should have Kerberos credentials enabled + enabledServices: ["hadoopfs", "hbase", "hive"] + + # Hadoop configuration for Kerberos-enabled HDFS + hadoopConf: + # Enable Kerberos authentication + "hadoop.security.authentication": "kerberos" + "hadoop.security.authorization": "true" + # HDFS configuration for Kerberos + "dfs.nameservices": "hdfs-cluster" + "dfs.ha.namenodes.hdfs-cluster": "nn1,nn2" + "dfs.namenode.rpc-address.hdfs-cluster.nn1": "namenode1.example.com:8020" + "dfs.namenode.rpc-address.hdfs-cluster.nn2": "namenode2.example.com:8020" + "dfs.client.failover.proxy.provider.hdfs-cluster": "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider" + + # Spark configuration + sparkConf: + # SQL warehouse access configuration (if using Hive) + "spark.sql.warehouse.dir": "hdfs://hdfs-cluster/user/hive/warehouse" + "spark.sql.hive.metastore.version": "3.1.2" + "spark.sql.hive.metastore.jars": "builtin" + # Enable adaptive query execution for better performance + "spark.sql.adaptive.enabled": "true" + "spark.sql.adaptive.coalescePartitions.enabled": "true" + # Enable Kerberos for additional services (optional) + "spark.security.credentials.yarn.enabled": "true" + + driver: + cores: 1 + coreLimit: "1200m" + memory: "512m" + labels: + version: 4.0.0 + serviceAccount: spark-operator-spark + # Mount Kerberos keytab as secret + secrets: + - name: "spark-kerberos-keytab" + path: "/etc/kerberos/keytab" + secretType: "KerberosKeytab" + - name: "spark-kerberos-config" + path: "/etc/kerberos/conf" + secretType: "Generic" + # Set environment variables for Kerberos + env: + - name: "KRB5_CONFIG" + value: "/etc/kerberos/conf/krb5.conf" + + executor: + cores: 1 + instances: 1 + memory: "512m" + labels: + version: 4.0.0 + # Mount Kerberos keytab as secret + secrets: + - name: "spark-kerberos-keytab" + path: "/etc/kerberos/keytab" + secretType: "KerberosKeytab" + - name: "spark-kerberos-config" + path: "/etc/kerberos/conf" + secretType: "Generic" + # Set environment variables for Kerberos + env: + - name: "KRB5_CONFIG" + value: "/etc/kerberos/conf/krb5.conf" diff --git a/go.mod b/go.mod index dacc722d6..402d9869d 100644 --- a/go.mod +++ b/go.mod @@ -23,6 +23,7 @@ require ( k8s.io/apiextensions-apiserver v0.33.3 k8s.io/apimachinery v0.33.3 k8s.io/client-go v0.33.3 + k8s.io/klog/v2 v2.130.1 k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 sigs.k8s.io/controller-runtime v0.20.4 sigs.k8s.io/scheduler-plugins v0.31.8 @@ -149,7 +150,6 @@ require ( k8s.io/code-generator v0.32.5 // indirect k8s.io/component-base v0.33.3 // indirect k8s.io/gengo/v2 v2.0.0-20240911193312-2b36238f13e9 // indirect - k8s.io/klog/v2 v2.130.1 // indirect k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff // indirect k8s.io/kubectl v0.33.3 // indirect oras.land/oras-go/v2 v2.6.0 // indirect diff --git a/internal/controller/sparkapplication/submission.go b/internal/controller/sparkapplication/submission.go index dc7aae8e0..265a84f5a 100644 --- a/internal/controller/sparkapplication/submission.go +++ b/internal/controller/sparkapplication/submission.go @@ -24,6 +24,7 @@ import ( "path/filepath" "strings" + "k8s.io/klog/v2" "sigs.k8s.io/controller-runtime/pkg/log" "github.com/kubeflow/spark-operator/v2/api/v1beta2" @@ -102,6 +103,7 @@ func buildSparkSubmitArgs(app *v1beta2.SparkApplication) ([]string, error) { submissionWaitAppCompletionOption, sparkConfOption, hadoopConfOption, + kerberosConfOption, driverPodTemplateOption, driverPodNameOption, driverConfOption, @@ -420,9 +422,37 @@ func driverConfOption(app *v1beta2.SparkApplication) ([]string, error) { // driverSecretOption returns a list of spark-submit arguments for mounting secrets to driver pod. func driverSecretOption(app *v1beta2.SparkApplication) ([]string, error) { var args []string - for _, secret := range app.Spec.Driver.Secrets { + secrets := make([]v1beta2.SecretInfo, 0, len(app.Spec.Driver.Secrets)) + + // Add explicit driver secrets + secrets = append(secrets, app.Spec.Driver.Secrets...) + + // Auto-add Kerberos secrets if Kerberos is configured + if app.Spec.Kerberos != nil { + klog.V(2).Infof("DEBUG: Kerberos configuration found for app %s, auto-adding secrets", app.Name) + if app.Spec.Kerberos.KeytabSecret != nil { + klog.V(2).Infof("DEBUG: Adding keytab secret %s for driver", *app.Spec.Kerberos.KeytabSecret) + secrets = append(secrets, v1beta2.SecretInfo{ + Name: *app.Spec.Kerberos.KeytabSecret, + Path: common.DefaultKerberosKeytabMountPath, + Type: v1beta2.SecretTypeKerberosKeytab, + }) + } + if app.Spec.Kerberos.ConfigSecret != nil { + klog.V(2).Infof("DEBUG: Adding config secret %s for driver", *app.Spec.Kerberos.ConfigSecret) + secrets = append(secrets, v1beta2.SecretInfo{ + Name: *app.Spec.Kerberos.ConfigSecret, + Path: common.DefaultKerberosConfigMountPath, + Type: v1beta2.SecretTypeGeneric, + }) + } + } + + klog.V(2).Infof("DEBUG: Processing %d driver secrets for app %s", len(secrets), app.Name) + for _, secret := range secrets { property := fmt.Sprintf(common.SparkKubernetesDriverSecretsTemplate, secret.Name) args = append(args, "--conf", fmt.Sprintf("%s=%s", property, secret.Path)) + klog.V(2).Infof("DEBUG: Added driver secret config: %s=%s", property, secret.Path) switch secret.Type { case v1beta2.SecretTypeGCPServiceAccount: property := fmt.Sprintf(common.SparkKubernetesDriverEnvTemplate, common.EnvGoogleApplicationCredentials) @@ -432,8 +462,25 @@ func driverSecretOption(app *v1beta2.SparkApplication) ([]string, error) { property := fmt.Sprintf(common.SparkKubernetesDriverEnvTemplate, common.EnvHadoopTokenFileLocation) conf := fmt.Sprintf("%s=%s", property, filepath.Join(secret.Path, common.HadoopDelegationTokenFileName)) args = append(args, "--conf", conf) + case v1beta2.SecretTypeKerberosKeytab: + property := fmt.Sprintf(common.SparkKubernetesDriverEnvTemplate, common.EnvKerberosKeytabFile) + conf := fmt.Sprintf("%s=%s", property, filepath.Join(secret.Path, common.KerberosKeytabFileName)) + args = append(args, "--conf", conf) + // Set Kerberos environment variables for Spark delegation token manager + if app.Spec.Kerberos != nil && app.Spec.Kerberos.Principal != nil { + principalProperty := fmt.Sprintf(common.SparkKubernetesDriverEnvTemplate, "KRB5_PRINCIPAL") + principalConf := fmt.Sprintf("%s=%s", principalProperty, *app.Spec.Kerberos.Principal) + args = append(args, "--conf", principalConf) + + // Set Spark keytab configuration via environment variable override at runtime + keytabPath := filepath.Join(secret.Path, common.KerberosKeytabFileName) + sparkKeytabProperty := fmt.Sprintf(common.SparkKubernetesDriverEnvTemplate, "SPARK_CONF_SPARK_KERBEROS_KEYTAB") + sparkKeytabConf := fmt.Sprintf("%s=%s", sparkKeytabProperty, keytabPath) + args = append(args, "--conf", sparkKeytabConf) + } } } + klog.V(2).Infof("DEBUG: Driver secret options generated: %v", args) return args, nil } @@ -757,9 +804,37 @@ func executorConfOption(app *v1beta2.SparkApplication) ([]string, error) { func executorSecretOption(app *v1beta2.SparkApplication) ([]string, error) { var args []string - for _, secret := range app.Spec.Executor.Secrets { + secrets := make([]v1beta2.SecretInfo, 0, len(app.Spec.Executor.Secrets)) + + // Add explicit executor secrets + secrets = append(secrets, app.Spec.Executor.Secrets...) + + // Auto-add Kerberos secrets if Kerberos is configured + if app.Spec.Kerberos != nil { + klog.V(2).Infof("DEBUG: Kerberos configuration found for app %s, auto-adding secrets to executor", app.Name) + if app.Spec.Kerberos.KeytabSecret != nil { + klog.V(2).Infof("DEBUG: Adding keytab secret %s for executor", *app.Spec.Kerberos.KeytabSecret) + secrets = append(secrets, v1beta2.SecretInfo{ + Name: *app.Spec.Kerberos.KeytabSecret, + Path: common.DefaultKerberosKeytabMountPath, + Type: v1beta2.SecretTypeKerberosKeytab, + }) + } + if app.Spec.Kerberos.ConfigSecret != nil { + klog.V(2).Infof("DEBUG: Adding config secret %s for executor", *app.Spec.Kerberos.ConfigSecret) + secrets = append(secrets, v1beta2.SecretInfo{ + Name: *app.Spec.Kerberos.ConfigSecret, + Path: common.DefaultKerberosConfigMountPath, + Type: v1beta2.SecretTypeGeneric, + }) + } + } + + klog.V(2).Infof("DEBUG: Processing %d executor secrets for app %s", len(secrets), app.Name) + for _, secret := range secrets { property := fmt.Sprintf(common.SparkKubernetesExecutorSecretsTemplate, secret.Name) args = append(args, "--conf", fmt.Sprintf("%s=%s", property, secret.Path)) + klog.V(2).Infof("DEBUG: Added executor secret config: %s=%s", property, secret.Path) switch secret.Type { case v1beta2.SecretTypeGCPServiceAccount: property := fmt.Sprintf(common.SparkExecutorEnvTemplate, common.EnvGoogleApplicationCredentials) @@ -769,8 +844,19 @@ func executorSecretOption(app *v1beta2.SparkApplication) ([]string, error) { property := fmt.Sprintf(common.SparkExecutorEnvTemplate, common.EnvHadoopTokenFileLocation) args = append(args, "--conf", fmt.Sprintf("%s=%s", property, filepath.Join(secret.Path, common.HadoopDelegationTokenFileName))) + case v1beta2.SecretTypeKerberosKeytab: + property := fmt.Sprintf(common.SparkExecutorEnvTemplate, common.EnvKerberosKeytabFile) + args = append(args, "--conf", fmt.Sprintf("%s=%s", property, + filepath.Join(secret.Path, common.KerberosKeytabFileName))) + // Also set the Kerberos principal environment variable for executors + if app.Spec.Kerberos != nil && app.Spec.Kerberos.Principal != nil { + principalProperty := fmt.Sprintf(common.SparkExecutorEnvTemplate, "KRB5_PRINCIPAL") + principalConf := fmt.Sprintf("%s=%s", principalProperty, *app.Spec.Kerberos.Principal) + args = append(args, "--conf", principalConf) + } } } + klog.V(2).Infof("DEBUG: Executor secret options generated: %v", args) return args, nil } @@ -1098,3 +1184,81 @@ func executorPodTemplateOption(app *v1beta2.SparkApplication) ([]string, error) } return args, nil } + +// kerberosConfOption returns Kerberos-specific configuration arguments. +func kerberosConfOption(app *v1beta2.SparkApplication) ([]string, error) { + if app.Spec.Kerberos == nil { + return nil, nil + } + + var args []string + kerberos := app.Spec.Kerberos + + // Set the Kerberos principal and enable keytab-based authentication + if kerberos.Principal != nil { + // Hadoop-level Kerberos configuration + args = append(args, "--conf", "spark.hadoop.hadoop.security.authentication=kerberos") + args = append(args, "--conf", "spark.hadoop.hadoop.security.authorization=true") + + // Set Spark Kerberos principal for HadoopDelegationTokenManager + args = append(args, "--conf", fmt.Sprintf("spark.kerberos.principal=%s", *kerberos.Principal)) + + // Set credential renewal strategy + renewalCredentials := "keytab" // Default for Spark 4+ + if kerberos.RenewalCredentials != nil { + renewalCredentials = *kerberos.RenewalCredentials + } + args = append(args, "--conf", fmt.Sprintf("spark.kerberos.renewal.credentials=%s", renewalCredentials)) + + // Enable delegation token retrieval for Hadoop services + enabledServices := []string{"hadoopfs", "hbase", "hive"} // Default services + if len(kerberos.EnabledServices) > 0 { + enabledServices = kerberos.EnabledServices + } + + for _, service := range enabledServices { + args = append(args, "--conf", fmt.Sprintf("spark.security.credentials.%s.enabled=true", service)) + } + } + + // Set keytab configuration for Spark 4 Kerberos support + if kerberos.KeytabSecret != nil && kerberos.Principal != nil { + keytabFileName := common.KerberosKeytabFileName + if kerberos.KeytabFile != nil { + keytabFileName = *kerberos.KeytabFile + } + keytabPath := fmt.Sprintf("%s/%s", common.DefaultKerberosKeytabMountPath, keytabFileName) + + // For Spark 4 validation, we need to specify the keytab path that will be available + // in the driver pod. The operator ensures the secret is mounted at this location. + args = append(args, "--conf", fmt.Sprintf("spark.kerberos.keytab=%s", keytabPath)) + + // Hadoop Kerberos configuration (for backward compatibility and direct Hadoop access) + args = append(args, "--conf", fmt.Sprintf("spark.hadoop.hadoop.kerberos.principal=%s", *kerberos.Principal)) + args = append(args, "--conf", fmt.Sprintf("spark.hadoop.hadoop.kerberos.keytab=%s", keytabPath)) + } + + // Set Kerberos config file path + if kerberos.ConfigSecret != nil { + configFileName := common.KerberosConfigFileName + if kerberos.ConfigFile != nil { + configFileName = *kerberos.ConfigFile + } + configPath := fmt.Sprintf("%s/%s", common.DefaultKerberosConfigMountPath, configFileName) + + // Set Java system property for Kerberos configuration + args = append(args, "--conf", fmt.Sprintf("spark.hadoop.java.security.krb5.conf=%s", configPath)) + + // Set driver and executor JVM options for krb5.conf + args = append(args, "--conf", fmt.Sprintf("spark.driver.extraJavaOptions=-Djava.security.krb5.conf=%s", configPath)) + args = append(args, "--conf", fmt.Sprintf("spark.executor.extraJavaOptions=-Djava.security.krb5.conf=%s", configPath)) + } + + // Configure HDFS access for Kerberos (if Hadoop configuration is present) + if len(app.Spec.HadoopConf) > 0 { + // Enable Kerberos for HDFS access + args = append(args, "--conf", "spark.kerberos.access.hadoopFileSystems=hdfs") + } + + return args, nil +} diff --git a/internal/controller/sparkapplication/submission_kerberos_test.go b/internal/controller/sparkapplication/submission_kerberos_test.go new file mode 100644 index 000000000..ea69c23c4 --- /dev/null +++ b/internal/controller/sparkapplication/submission_kerberos_test.go @@ -0,0 +1,145 @@ +/* +Copyright 2025 The Kubeflow authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package sparkapplication + +import ( + "reflect" + "testing" + + "github.com/kubeflow/spark-operator/v2/api/v1beta2" + "github.com/kubeflow/spark-operator/v2/pkg/common" +) + +func TestKerberosConfOption(t *testing.T) { + tests := []struct { + name string + app *v1beta2.SparkApplication + expected []string + }{ + { + name: "no kerberos configuration", + app: &v1beta2.SparkApplication{ + Spec: v1beta2.SparkApplicationSpec{}, + }, + expected: nil, + }, + { + name: "basic kerberos configuration with keytab", + app: &v1beta2.SparkApplication{ + Spec: v1beta2.SparkApplicationSpec{ + Kerberos: &v1beta2.KerberosSpec{ + Principal: stringPtr("spark@EXAMPLE.COM"), + KeytabSecret: stringPtr("spark-keytab"), + ConfigSecret: stringPtr("spark-config"), + }, + }, + }, + expected: []string{ + "--conf", "spark.hadoop.hadoop.security.authentication=kerberos", + "--conf", "spark.hadoop.hadoop.security.authorization=true", + "--conf", "spark.kerberos.principal=spark@EXAMPLE.COM", + "--conf", "spark.kerberos.renewal.credentials=keytab", + "--conf", "spark.security.credentials.hadoopfs.enabled=true", + "--conf", "spark.security.credentials.hbase.enabled=true", + "--conf", "spark.security.credentials.hive.enabled=true", + "--conf", "spark.kerberos.keytab=" + common.DefaultKerberosKeytabMountPath + "/" + common.KerberosKeytabFileName, + "--conf", "spark.hadoop.hadoop.kerberos.principal=spark@EXAMPLE.COM", + "--conf", "spark.hadoop.hadoop.kerberos.keytab=" + common.DefaultKerberosKeytabMountPath + "/" + common.KerberosKeytabFileName, + "--conf", "spark.hadoop.java.security.krb5.conf=" + common.DefaultKerberosConfigMountPath + "/" + common.KerberosConfigFileName, + "--conf", "spark.driver.extraJavaOptions=-Djava.security.krb5.conf=" + common.DefaultKerberosConfigMountPath + "/" + common.KerberosConfigFileName, + "--conf", "spark.executor.extraJavaOptions=-Djava.security.krb5.conf=" + common.DefaultKerberosConfigMountPath + "/" + common.KerberosConfigFileName, + }, + }, + { + name: "kerberos with custom renewal credentials and enabled services", + app: &v1beta2.SparkApplication{ + Spec: v1beta2.SparkApplicationSpec{ + Kerberos: &v1beta2.KerberosSpec{ + Principal: stringPtr("spark@EXAMPLE.COM"), + KeytabSecret: stringPtr("spark-keytab"), + ConfigSecret: stringPtr("spark-config"), + RenewalCredentials: stringPtr("ccache"), + EnabledServices: []string{"hadoopfs", "yarn"}, + }, + }, + }, + expected: []string{ + "--conf", "spark.hadoop.hadoop.security.authentication=kerberos", + "--conf", "spark.hadoop.hadoop.security.authorization=true", + "--conf", "spark.kerberos.principal=spark@EXAMPLE.COM", + "--conf", "spark.kerberos.renewal.credentials=ccache", + "--conf", "spark.security.credentials.hadoopfs.enabled=true", + "--conf", "spark.security.credentials.yarn.enabled=true", + "--conf", "spark.kerberos.keytab=" + common.DefaultKerberosKeytabMountPath + "/" + common.KerberosKeytabFileName, + "--conf", "spark.hadoop.hadoop.kerberos.principal=spark@EXAMPLE.COM", + "--conf", "spark.hadoop.hadoop.kerberos.keytab=" + common.DefaultKerberosKeytabMountPath + "/" + common.KerberosKeytabFileName, + "--conf", "spark.hadoop.java.security.krb5.conf=" + common.DefaultKerberosConfigMountPath + "/" + common.KerberosConfigFileName, + "--conf", "spark.driver.extraJavaOptions=-Djava.security.krb5.conf=" + common.DefaultKerberosConfigMountPath + "/" + common.KerberosConfigFileName, + "--conf", "spark.executor.extraJavaOptions=-Djava.security.krb5.conf=" + common.DefaultKerberosConfigMountPath + "/" + common.KerberosConfigFileName, + }, + }, + { + name: "kerberos with hadoop configuration enables HDFS access", + app: &v1beta2.SparkApplication{ + Spec: v1beta2.SparkApplicationSpec{ + Kerberos: &v1beta2.KerberosSpec{ + Principal: stringPtr("spark@EXAMPLE.COM"), + KeytabSecret: stringPtr("spark-keytab"), + ConfigSecret: stringPtr("spark-config"), + }, + HadoopConf: map[string]string{ + "hadoop.security.authentication": "kerberos", + }, + }, + }, + expected: []string{ + "--conf", "spark.hadoop.hadoop.security.authentication=kerberos", + "--conf", "spark.hadoop.hadoop.security.authorization=true", + "--conf", "spark.kerberos.principal=spark@EXAMPLE.COM", + "--conf", "spark.kerberos.renewal.credentials=keytab", + "--conf", "spark.security.credentials.hadoopfs.enabled=true", + "--conf", "spark.security.credentials.hbase.enabled=true", + "--conf", "spark.security.credentials.hive.enabled=true", + "--conf", "spark.kerberos.keytab=" + common.DefaultKerberosKeytabMountPath + "/" + common.KerberosKeytabFileName, + "--conf", "spark.hadoop.hadoop.kerberos.principal=spark@EXAMPLE.COM", + "--conf", "spark.hadoop.hadoop.kerberos.keytab=" + common.DefaultKerberosKeytabMountPath + "/" + common.KerberosKeytabFileName, + "--conf", "spark.hadoop.java.security.krb5.conf=" + common.DefaultKerberosConfigMountPath + "/" + common.KerberosConfigFileName, + "--conf", "spark.driver.extraJavaOptions=-Djava.security.krb5.conf=" + common.DefaultKerberosConfigMountPath + "/" + common.KerberosConfigFileName, + "--conf", "spark.executor.extraJavaOptions=-Djava.security.krb5.conf=" + common.DefaultKerberosConfigMountPath + "/" + common.KerberosConfigFileName, + "--conf", "spark.kerberos.access.hadoopFileSystems=hdfs", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := kerberosConfOption(tt.app) + if err != nil { + t.Errorf("kerberosConfOption() error = %v", err) + return + } + if !reflect.DeepEqual(result, tt.expected) { + t.Errorf("kerberosConfOption() = %v, expected %v", result, tt.expected) + } + }) + } +} + +// Helper function to create string pointers +func stringPtr(s string) *string { + return &s +} diff --git a/pkg/common/spark.go b/pkg/common/spark.go index 1d043137c..7a8f2d77e 100644 --- a/pkg/common/spark.go +++ b/pkg/common/spark.go @@ -394,4 +394,24 @@ const ( // delegation token. This name is added to the delegation token secret mount path to // form the path to the file referred to by HADOOP_TOKEN_FILE_LOCATION. HadoopDelegationTokenFileName = "hadoop.token" + + // EnvKerberosKeytabFile is the environment variable for specifying the location + // where the Kerberos keytab file is located. + EnvKerberosKeytabFile = "KRB5_KEYTAB_FILE" + + // EnvKerberosConfig is the environment variable for specifying the location + // where the Kerberos configuration file (krb5.conf) is located. + EnvKerberosConfig = "KRB5_CONFIG" + + // KerberosKeytabFileName is the default name of the Kerberos keytab file. + KerberosKeytabFileName = "krb5.keytab" + + // KerberosConfigFileName is the default name of the Kerberos configuration file. + KerberosConfigFileName = "krb5.conf" + + // DefaultKerberosKeytabMountPath is the default path where Kerberos keytab secrets are mounted. + DefaultKerberosKeytabMountPath = "/etc/kerberos/keytab" + + // DefaultKerberosConfigMountPath is the default path where Kerberos config secrets are mounted. + DefaultKerberosConfigMountPath = "/etc/kerberos/conf" )