Skip to content

Commit

Permalink
in-cluster K8s environment: prune, comment, and document
Browse files Browse the repository at this point in the history
* Changes:
  * K8s node name: get rid of `K8S_NODE_NAME`   - use `MY_NODE` instead
  * K8s pod name:  `MY_POD` and `HOSTNAME`      - in that sequence, with checks
  * K8s namespace: `K8S_NS` and `POD_NAMESPACE` - ditto

Signed-off-by: Alex Aizman <[email protected]>
  • Loading branch information
alex-aizman committed Dec 5, 2023
1 parent 571246e commit 00d19ab
Show file tree
Hide file tree
Showing 8 changed files with 87 additions and 36 deletions.
15 changes: 12 additions & 3 deletions api/env/ais.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
*/
package env

// See also: docs/environment-vars.md

var (
AIS = struct {
Endpoint string
Expand All @@ -19,7 +21,9 @@ var (
NumTarget string
NumProxy string
// K8s
K8sPod string
K8sPod string
K8sNode string
K8sNamespace string
}{
// the way to designate primary when cluster's starting up
Endpoint: "AIS_ENDPOINT",
Expand All @@ -41,7 +45,12 @@ var (
NumTarget: "NUM_TARGET",
NumProxy: "NUM_PROXY",

// via ais-k8s repo (see ais-k8s/operator/pkg/resources/cmn/env.go)
K8sPod: "MY_POD",
// via ais-k8s repo
// see also:
// * https://github.com/NVIDIA/ais-k8s/blob/master/operator/pkg/resources/cmn/env.go
// * docs/environment-vars.md
K8sPod: "MY_POD",
K8sNode: "MY_NODE",
K8sNamespace: "K8S_NS",
}
)
1 change: 1 addition & 0 deletions api/env/authn.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
package env

// authn environment variables
// see also: docs/environment-vars.md

var (
AuthN = struct {
Expand Down
16 changes: 13 additions & 3 deletions cmn/k8s/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"os"
"strings"

"github.com/NVIDIA/aistore/api/env"
"github.com/NVIDIA/aistore/cmn/debug"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -67,13 +68,22 @@ func _initClient() {

// Retrieve pod namespace
// See:
// - https://github.com/kubernetes/kubernetes/blob/master/staging/src/k8s.io/client-go/tools/clientcmd/client_config.go
// - topic: "how to get current namespace of an in-cluster go Kubernetes client"
// - https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/
// - https://kubernetes.io/docs/tasks/access-application-cluster/access-cluster/#accessing-the-api-from-a-pod.
func _namespace() (namespace string) {
if namespace = os.Getenv("POD_NAMESPACE"); namespace != "" {
// production
if namespace = os.Getenv(env.AIS.K8sNamespace); namespace != "" {
debug.Func(func() {
ns := os.Getenv(defaultNamespaceEnv)
debug.Assertf(ns == "" || ns == namespace, "%q vs %q", ns, namespace)
})
return
}
// otherwise, try default env var
if namespace = os.Getenv(defaultNamespaceEnv); namespace != "" {
return
}
// finally, last resort kludge
if ns, err := os.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/namespace"); err == nil {
if namespace = strings.TrimSpace(string(ns)); len(namespace) > 0 {
return
Expand Down
64 changes: 47 additions & 17 deletions cmn/k8s/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,53 +7,69 @@ package k8s
import (
"errors"
"os"
"strings"

"github.com/NVIDIA/aistore/api/env"
"github.com/NVIDIA/aistore/cmn/debug"
"github.com/NVIDIA/aistore/cmn/nlog"
v1 "k8s.io/api/core/v1"
)

const (
// env var names
k8sPodNameEnv = "HOSTNAME"
k8sNodeNameEnv = "K8S_NODE_NAME"
defaultPodNameEnv = "HOSTNAME"
defaultNamespaceEnv = "POD_NAMESPACE"
)

// misc.
const (
Default = "default"
Pod = "pod"
Svc = "svc"
)

var NodeName string // assign upon successful initialization
const nonK8s = "non-Kubernetes deployment"

var ErrK8sRequired = errors.New("the operation requires Kubernetes")
var (
NodeName string // assign upon successful initialization

ErrK8sRequired = errors.New("the operation requires Kubernetes")
)

func Init() {
var (
pod *v1.Pod
nodeName = os.Getenv(k8sNodeNameEnv)
podName = os.Getenv(k8sPodNameEnv)
)
_initClient()
client, err := GetClient()
if err != nil {
nlog.Infof("K8s client nil => non-Kubernetes deployment: (%s: %q, %s: %q)", k8sPodNameEnv, podName, k8sNodeNameEnv, nodeName)
nlog.Infoln(nonK8s, "(init k8s-client returned:", _short(err)+")")
return
}
nlog.Infof("Checking (%s: %q, %s: %q)", k8sPodNameEnv, podName, k8sNodeNameEnv, nodeName)

// if specified, `k8sNodeNameEnv` takes precedence: proceed directly to check
var (
pod *v1.Pod
nodeName = os.Getenv(env.AIS.K8sNode)
podName = os.Getenv(env.AIS.K8sPod)
)
if podName != "" {
debug.Func(func() {
pn := os.Getenv(defaultPodNameEnv)
debug.Assertf(pn == "" || pn == podName, "%q vs %q", pn, podName)
})
} else {
podName = os.Getenv(defaultPodNameEnv)
}
nlog.Infof("Checking K8s pod: %q, node: %q", podName, nodeName)

// node name specified - proceed directly to check
if nodeName != "" {
goto checkNode
}
if podName == "" {
nlog.Infoln("K8s environment (above) not set => non-Kubernetes deployment")
nlog.Infoln("K8s environment (above) not set =>", nonK8s)
return
}

// check POD
pod, err = client.Pod(podName)
if err != nil {
nlog.Errorf("Failed to get K8s pod %q: %v (tip: try setting %q env variable)", podName, err, k8sNodeNameEnv)
nlog.Errorf("Failed to get K8s pod %q: %v", podName, err)
return
}
nodeName = pod.Spec.NodeName
Expand All @@ -63,7 +79,7 @@ func Init() {
checkNode: // always check Node
node, err := client.Node(nodeName)
if err != nil {
nlog.Errorf("Failed to get K8s node %q: %v (tip: try setting %q env variable)", nodeName, err, k8sNodeNameEnv)
nlog.Errorf("Failed to get K8s node %q: %v", nodeName, err)
return
}

Expand All @@ -83,3 +99,17 @@ func _ppvols(volumes []v1.Volume) {
}

func IsK8s() bool { return NodeName != "" }

func _short(err error) string {
const max = 20
msg := err.Error()
idx := strings.IndexByte(msg, ',')
switch {
case len(msg) < max:
return msg
case idx > max:
return msg[:idx]
default:
return msg[:max]
}
}
2 changes: 1 addition & 1 deletion deploy/dev/k8s/kube_templates/aisproxy_deployment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ spec:
failureThreshold: 5
periodSeconds: 10
env:
- name: K8S_NODE_NAME
- name: MY_NODE
valueFrom:
fieldRef:
fieldPath: spec.nodeName
Expand Down
2 changes: 1 addition & 1 deletion deploy/dev/k8s/kube_templates/aistarget_deployment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ spec:
# Required for hostport to operate, or implement a full pod security policy
privileged: true
env:
- name: K8S_NODE_NAME
- name: MY_NODE
valueFrom:
fieldRef:
fieldPath: spec.nodeName
Expand Down
6 changes: 3 additions & 3 deletions docs/environment-vars.md
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,9 @@ See also:

| name | comment |
| ---- | ------- |
| `MY_POD` | Kubernetes POD name |
| `K8S_NODE_NAME` | Kubernetes node name |
| `POD_NAMESPACE` | Kubernetes namespace |
| `MY_POD` and `HOSTNAME` | Kubernetes POD name. `MY_POD` is used in [production](operator/pkg/resources/cmn/env.go); `HOSTNAME`, on the other hand, is usually considered a Kubernetes default |
| `MY_NODE` | Kubernetes node name |
| `K8S_NS` and `POD_NAMESPACE` | Kubernetes namespace. `K8S_NS` is used in [production](operator/pkg/resources/cmn/env.go), while `POD_NAMESPACE` - development |

Kubernetes POD name is also reported via `ais show cluster` CLI - when it is a Kubernetes deployment, e.g.:

Expand Down
17 changes: 9 additions & 8 deletions docs/http_api.md
Original file line number Diff line number Diff line change
Expand Up @@ -669,15 +669,16 @@ Date: Tue, 08 Nov 2022 17:03:03 GMT
Content-Type: text/plain; charset=utf-8
Transfer-Encoding: chunked

Started up at 2022/11/08 11:33:54, host u2204, go1.19.3 for linux/amd64
I 11:33:54.537821 config.go:1774 log.dir: "/tmp/ais/1/log"; l4.proto: tcp; pub port: 8081; verbosity: 3
I 11:33:54.537990 config.go:1776 config: "/root/.ais1/.ais.conf"; stats_time: 10s; authentication: false; backends: [ais aws gcp]
I 11:33:54.538001 daemon.go:177 Version 3.12-rc1.c5a523de4, build time 2022-11-08T11:33:50-0500, debug true
I 11:33:54.538005 daemon.go:185 CPUs(16, runtime=16)
I 11:33:54.538165 util.go:39 Verifying type of deployment (HOSTNAME: "", K8S_NODE_NAME: "")
I 11:33:54.538169 util.go:46 Couldn't initiate a K8s client, assuming non-Kubernetes deployment
Started up at 2023/11/08 02:34:35, host ais-target-13, go1.21.4 for linux/amd64
W 02:34:35.701629 config:1238 control and data share one intra-cluster network (ais-target-13.ais.svc.cluster.local)
I 02:34:35.701785 config:1755 log.dir: "/var/log/ais"; l4.proto: tcp; pub port: 51081; verbosity: 3
I 02:34:35.701791 config:1757 config: "/etc/ais/.ais.conf"; stats_time: 10s; authentication: false; backends: [aws]
I 02:34:35.701811 daemon:195 Version 3.21.1.4ce0e0b, build time 2023-11-08T00:05:16+0000, debug false, CPUs(256, runtime=256), containerized
I 02:34:35.702060 init:42 Checking (HOSTNAME: "ais-target-13")
I 02:34:35.721086 init:60 K8s spec: NodeName 10.0.140.13 Hostname ais-target-13 HostNetwork false

...
I 11:33:55.564338 htrun.go:1637 t[CJet8081]: joined cluster via http://127.0.0.1:9080
I 02:34:54.772574 htrun:1916 t[DfooZbarT] via primary health: cluster startup Ok, Smap v34[t=10, p=10]
...
```

Expand Down

0 comments on commit 00d19ab

Please sign in to comment.