From 00d19abc98bbf5fa8cef0bc277c48eb1007534f5 Mon Sep 17 00:00:00 2001 From: Alex Aizman Date: Tue, 5 Dec 2023 10:37:16 -0500 Subject: [PATCH] in-cluster K8s environment: prune, comment, and document * Changes: * K8s node name: get rid of `K8S_NODE_NAME` - use `MY_NODE` instead * K8s pod name: `MY_POD` and `HOSTNAME` - in that sequence, with checks * K8s namespace: `K8S_NS` and `POD_NAMESPACE` - ditto Signed-off-by: Alex Aizman --- api/env/ais.go | 15 ++++- api/env/authn.go | 1 + cmn/k8s/client.go | 16 ++++- cmn/k8s/init.go | 64 ++++++++++++++----- .../kube_templates/aisproxy_deployment.yml | 2 +- .../kube_templates/aistarget_deployment.yml | 2 +- docs/environment-vars.md | 6 +- docs/http_api.md | 17 ++--- 8 files changed, 87 insertions(+), 36 deletions(-) diff --git a/api/env/ais.go b/api/env/ais.go index 71d1847ea24..8e8fac94a34 100644 --- a/api/env/ais.go +++ b/api/env/ais.go @@ -4,6 +4,8 @@ */ package env +// See also: docs/environment-vars.md + var ( AIS = struct { Endpoint string @@ -19,7 +21,9 @@ var ( NumTarget string NumProxy string // K8s - K8sPod string + K8sPod string + K8sNode string + K8sNamespace string }{ // the way to designate primary when cluster's starting up Endpoint: "AIS_ENDPOINT", @@ -41,7 +45,12 @@ var ( NumTarget: "NUM_TARGET", NumProxy: "NUM_PROXY", - // via ais-k8s repo (see ais-k8s/operator/pkg/resources/cmn/env.go) - K8sPod: "MY_POD", + // via ais-k8s repo + // see also: + // * https://github.com/NVIDIA/ais-k8s/blob/master/operator/pkg/resources/cmn/env.go + // * docs/environment-vars.md + K8sPod: "MY_POD", + K8sNode: "MY_NODE", + K8sNamespace: "K8S_NS", } ) diff --git a/api/env/authn.go b/api/env/authn.go index 1699a9f3fb6..f03d39387e8 100644 --- a/api/env/authn.go +++ b/api/env/authn.go @@ -5,6 +5,7 @@ package env // authn environment variables +// see also: docs/environment-vars.md var ( AuthN = struct { diff --git a/cmn/k8s/client.go b/cmn/k8s/client.go index 70ec72bed39..0d42368c1ab 100644 --- a/cmn/k8s/client.go +++ b/cmn/k8s/client.go @@ -10,6 +10,7 @@ import ( "os" "strings" + "github.com/NVIDIA/aistore/api/env" "github.com/NVIDIA/aistore/cmn/debug" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -67,13 +68,22 @@ func _initClient() { // Retrieve pod namespace // See: -// - https://github.com/kubernetes/kubernetes/blob/master/staging/src/k8s.io/client-go/tools/clientcmd/client_config.go +// - topic: "how to get current namespace of an in-cluster go Kubernetes client" // - https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ -// - https://kubernetes.io/docs/tasks/access-application-cluster/access-cluster/#accessing-the-api-from-a-pod. func _namespace() (namespace string) { - if namespace = os.Getenv("POD_NAMESPACE"); namespace != "" { + // production + if namespace = os.Getenv(env.AIS.K8sNamespace); namespace != "" { + debug.Func(func() { + ns := os.Getenv(defaultNamespaceEnv) + debug.Assertf(ns == "" || ns == namespace, "%q vs %q", ns, namespace) + }) return } + // otherwise, try default env var + if namespace = os.Getenv(defaultNamespaceEnv); namespace != "" { + return + } + // finally, last resort kludge if ns, err := os.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/namespace"); err == nil { if namespace = strings.TrimSpace(string(ns)); len(namespace) > 0 { return diff --git a/cmn/k8s/init.go b/cmn/k8s/init.go index 90fb1f395af..8526550b82b 100644 --- a/cmn/k8s/init.go +++ b/cmn/k8s/init.go @@ -7,53 +7,69 @@ package k8s import ( "errors" "os" + "strings" + "github.com/NVIDIA/aistore/api/env" + "github.com/NVIDIA/aistore/cmn/debug" "github.com/NVIDIA/aistore/cmn/nlog" v1 "k8s.io/api/core/v1" ) const ( - // env var names - k8sPodNameEnv = "HOSTNAME" - k8sNodeNameEnv = "K8S_NODE_NAME" + defaultPodNameEnv = "HOSTNAME" + defaultNamespaceEnv = "POD_NAMESPACE" +) - // misc. +const ( Default = "default" Pod = "pod" Svc = "svc" ) -var NodeName string // assign upon successful initialization +const nonK8s = "non-Kubernetes deployment" -var ErrK8sRequired = errors.New("the operation requires Kubernetes") +var ( + NodeName string // assign upon successful initialization + + ErrK8sRequired = errors.New("the operation requires Kubernetes") +) func Init() { - var ( - pod *v1.Pod - nodeName = os.Getenv(k8sNodeNameEnv) - podName = os.Getenv(k8sPodNameEnv) - ) _initClient() client, err := GetClient() if err != nil { - nlog.Infof("K8s client nil => non-Kubernetes deployment: (%s: %q, %s: %q)", k8sPodNameEnv, podName, k8sNodeNameEnv, nodeName) + nlog.Infoln(nonK8s, "(init k8s-client returned:", _short(err)+")") return } - nlog.Infof("Checking (%s: %q, %s: %q)", k8sPodNameEnv, podName, k8sNodeNameEnv, nodeName) - // if specified, `k8sNodeNameEnv` takes precedence: proceed directly to check + var ( + pod *v1.Pod + nodeName = os.Getenv(env.AIS.K8sNode) + podName = os.Getenv(env.AIS.K8sPod) + ) + if podName != "" { + debug.Func(func() { + pn := os.Getenv(defaultPodNameEnv) + debug.Assertf(pn == "" || pn == podName, "%q vs %q", pn, podName) + }) + } else { + podName = os.Getenv(defaultPodNameEnv) + } + nlog.Infof("Checking K8s pod: %q, node: %q", podName, nodeName) + + // node name specified - proceed directly to check if nodeName != "" { goto checkNode } if podName == "" { - nlog.Infoln("K8s environment (above) not set => non-Kubernetes deployment") + nlog.Infoln("K8s environment (above) not set =>", nonK8s) return } // check POD pod, err = client.Pod(podName) if err != nil { - nlog.Errorf("Failed to get K8s pod %q: %v (tip: try setting %q env variable)", podName, err, k8sNodeNameEnv) + nlog.Errorf("Failed to get K8s pod %q: %v", podName, err) return } nodeName = pod.Spec.NodeName @@ -63,7 +79,7 @@ func Init() { checkNode: // always check Node node, err := client.Node(nodeName) if err != nil { - nlog.Errorf("Failed to get K8s node %q: %v (tip: try setting %q env variable)", nodeName, err, k8sNodeNameEnv) + nlog.Errorf("Failed to get K8s node %q: %v", nodeName, err) return } @@ -83,3 +99,17 @@ func _ppvols(volumes []v1.Volume) { } func IsK8s() bool { return NodeName != "" } + +func _short(err error) string { + const max = 20 + msg := err.Error() + idx := strings.IndexByte(msg, ',') + switch { + case len(msg) < max: + return msg + case idx > max: + return msg[:idx] + default: + return msg[:max] + } +} diff --git a/deploy/dev/k8s/kube_templates/aisproxy_deployment.yml b/deploy/dev/k8s/kube_templates/aisproxy_deployment.yml index 46befb62764..fe1a88d39b7 100644 --- a/deploy/dev/k8s/kube_templates/aisproxy_deployment.yml +++ b/deploy/dev/k8s/kube_templates/aisproxy_deployment.yml @@ -27,7 +27,7 @@ spec: failureThreshold: 5 periodSeconds: 10 env: - - name: K8S_NODE_NAME + - name: MY_NODE valueFrom: fieldRef: fieldPath: spec.nodeName diff --git a/deploy/dev/k8s/kube_templates/aistarget_deployment.yml b/deploy/dev/k8s/kube_templates/aistarget_deployment.yml index 98cf082fe23..fb5c51326b9 100644 --- a/deploy/dev/k8s/kube_templates/aistarget_deployment.yml +++ b/deploy/dev/k8s/kube_templates/aistarget_deployment.yml @@ -30,7 +30,7 @@ spec: # Required for hostport to operate, or implement a full pod security policy privileged: true env: - - name: K8S_NODE_NAME + - name: MY_NODE valueFrom: fieldRef: fieldPath: spec.nodeName diff --git a/docs/environment-vars.md b/docs/environment-vars.md index 06984cfa378..e661811972b 100644 --- a/docs/environment-vars.md +++ b/docs/environment-vars.md @@ -115,9 +115,9 @@ See also: | name | comment | | ---- | ------- | -| `MY_POD` | Kubernetes POD name | -| `K8S_NODE_NAME` | Kubernetes node name | -| `POD_NAMESPACE` | Kubernetes namespace | +| `MY_POD` and `HOSTNAME` | Kubernetes POD name. `MY_POD` is used in [production](operator/pkg/resources/cmn/env.go); `HOSTNAME`, on the other hand, is usually considered a Kubernetes default | +| `MY_NODE` | Kubernetes node name | +| `K8S_NS` and `POD_NAMESPACE` | Kubernetes namespace. `K8S_NS` is used in [production](operator/pkg/resources/cmn/env.go), while `POD_NAMESPACE` - development | Kubernetes POD name is also reported via `ais show cluster` CLI - when it is a Kubernetes deployment, e.g.: diff --git a/docs/http_api.md b/docs/http_api.md index 6f3a730064c..c0e42ec4420 100644 --- a/docs/http_api.md +++ b/docs/http_api.md @@ -669,15 +669,16 @@ Date: Tue, 08 Nov 2022 17:03:03 GMT Content-Type: text/plain; charset=utf-8 Transfer-Encoding: chunked -Started up at 2022/11/08 11:33:54, host u2204, go1.19.3 for linux/amd64 -I 11:33:54.537821 config.go:1774 log.dir: "/tmp/ais/1/log"; l4.proto: tcp; pub port: 8081; verbosity: 3 -I 11:33:54.537990 config.go:1776 config: "/root/.ais1/.ais.conf"; stats_time: 10s; authentication: false; backends: [ais aws gcp] -I 11:33:54.538001 daemon.go:177 Version 3.12-rc1.c5a523de4, build time 2022-11-08T11:33:50-0500, debug true -I 11:33:54.538005 daemon.go:185 CPUs(16, runtime=16) -I 11:33:54.538165 util.go:39 Verifying type of deployment (HOSTNAME: "", K8S_NODE_NAME: "") -I 11:33:54.538169 util.go:46 Couldn't initiate a K8s client, assuming non-Kubernetes deployment +Started up at 2023/11/08 02:34:35, host ais-target-13, go1.21.4 for linux/amd64 +W 02:34:35.701629 config:1238 control and data share one intra-cluster network (ais-target-13.ais.svc.cluster.local) +I 02:34:35.701785 config:1755 log.dir: "/var/log/ais"; l4.proto: tcp; pub port: 51081; verbosity: 3 +I 02:34:35.701791 config:1757 config: "/etc/ais/.ais.conf"; stats_time: 10s; authentication: false; backends: [aws] +I 02:34:35.701811 daemon:195 Version 3.21.1.4ce0e0b, build time 2023-11-08T00:05:16+0000, debug false, CPUs(256, runtime=256), containerized +I 02:34:35.702060 init:42 Checking (HOSTNAME: "ais-target-13") +I 02:34:35.721086 init:60 K8s spec: NodeName 10.0.140.13 Hostname ais-target-13 HostNetwork false + ... -I 11:33:55.564338 htrun.go:1637 t[CJet8081]: joined cluster via http://127.0.0.1:9080 +I 02:34:54.772574 htrun:1916 t[DfooZbarT] via primary health: cluster startup Ok, Smap v34[t=10, p=10] ... ```