Skip to content

Commit f8efb00

Browse files
authored
fix k8s event patch permission and add test (#480)
1 parent 5a844ac commit f8efb00

File tree

3 files changed

+125
-0
lines changed

3 files changed

+125
-0
lines changed

config/helm/aws-node-termination-handler/templates/clusterrole.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,4 +44,5 @@ rules:
4444
- events
4545
verbs:
4646
- create
47+
- patch
4748
{{- end }}

test/e2e/emit-events-test

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
#!/bin/bash
2+
set -euo pipefail
3+
4+
# Available env vars:
5+
# $TMP_DIR
6+
# $CLUSTER_NAME
7+
# $KUBECONFIG
8+
# $NODE_TERMINATION_HANDLER_DOCKER_REPO
9+
# $NODE_TERMINATION_HANDLER_DOCKER_TAG
10+
# $WEBHOOK_DOCKER_REPO
11+
# $WEBHOOK_DOCKER_TAG
12+
# $AEMM_URL
13+
# $AEMM_VERSION
14+
15+
function fail_and_exit {
16+
echo "❌ K8s Emit Events Test failed $CLUSTER_NAME"
17+
exit ${1:-1}
18+
}
19+
20+
echo "Starting K8s Emit Events Test for Node Termination Handler"
21+
22+
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
23+
24+
common_helm_args=()
25+
[[ "${TEST_WINDOWS-}" == "true" ]] && common_helm_args+=(--set targetNodeOs="windows")
26+
[[ -n "${NTH_WORKER_LABEL-}" ]] && common_helm_args+=(--set nodeSelector."$NTH_WORKER_LABEL")
27+
28+
anth_helm_args=(
29+
upgrade
30+
--install
31+
"$CLUSTER_NAME-anth"
32+
"$SCRIPTPATH/../../config/helm/aws-node-termination-handler/"
33+
--force
34+
--namespace kube-system
35+
--set instanceMetadataURL="${INSTANCE_METADATA_URL:-"http://$AEMM_URL:$IMDS_PORT"}"
36+
--set image.repository="$NODE_TERMINATION_HANDLER_DOCKER_REPO"
37+
--set image.tag="$NODE_TERMINATION_HANDLER_DOCKER_TAG"
38+
--set enableSpotInterruptionDraining="true"
39+
--set enableScheduledEventDraining="true"
40+
--set emitKubernetesEvents="true"
41+
)
42+
[[ -n "${NODE_TERMINATION_HANDLER_DOCKER_PULL_POLICY-}" ]] &&
43+
anth_helm_args+=(--set image.pullPolicy="$NODE_TERMINATION_HANDLER_DOCKER_PULL_POLICY")
44+
[[ ${#common_helm_args[@]} -gt 0 ]] &&
45+
anth_helm_args+=("${common_helm_args[@]}")
46+
47+
set -x
48+
helm "${anth_helm_args[@]}"
49+
set +x
50+
51+
emtp_helm_args=(
52+
upgrade
53+
--install
54+
"$CLUSTER_NAME-emtp"
55+
"$SCRIPTPATH/../../config/helm/webhook-test-proxy/"
56+
--wait
57+
--namespace default
58+
--set webhookTestProxy.image.repository="$WEBHOOK_DOCKER_REPO"
59+
--set webhookTestProxy.image.tag="$WEBHOOK_DOCKER_TAG"
60+
)
61+
[[ -n "${WEBHOOK_DOCKER_PULL_POLICY-}" ]] &&
62+
emtp_helm_args+=(--set webhookTestProxy.image.pullPolicy="$WEBHOOK_DOCKER_PULL_POLICY")
63+
[[ ${#common_helm_args[@]} -gt 0 ]] &&
64+
emtp_helm_args+=("${common_helm_args[@]}")
65+
66+
set -x
67+
helm "${emtp_helm_args[@]}"
68+
set +x
69+
70+
aemm_helm_args=(
71+
upgrade
72+
--install
73+
"$CLUSTER_NAME-aemm"
74+
"$AEMM_DL_URL"
75+
--wait
76+
--namespace default
77+
--set aemm.IMDSv2="true"
78+
--set servicePort="$IMDS_PORT"
79+
)
80+
[[ ${#common_helm_args[@]} -gt 0 ]] &&
81+
aemm_helm_args+=("${common_helm_args[@]}")
82+
83+
set -x
84+
retry 5 helm "${aemm_helm_args[@]}"
85+
set +x
86+
87+
TAINT_CHECK_CYCLES=15
88+
TAINT_CHECK_SLEEP=15
89+
90+
DEPLOYED=0
91+
92+
for i in `seq 1 $TAINT_CHECK_CYCLES`; do
93+
if [[ $(kubectl get deployments regular-pod-test -o jsonpath='{.status.unavailableReplicas}') -eq 0 ]]; then
94+
echo "✅ Verified regular-pod-test pod was scheduled and started!"
95+
DEPLOYED=1
96+
break
97+
fi
98+
echo "Setup Loop $i/$TAINT_CHECK_CYCLES, sleeping for $TAINT_CHECK_SLEEP seconds"
99+
sleep $TAINT_CHECK_SLEEP
100+
done
101+
102+
if [[ $DEPLOYED -eq 0 ]]; then
103+
echo "❌ regular-pod-test pod deployment failed"
104+
fail_and_exit 2
105+
fi
106+
107+
test_node="${TEST_NODE:-$CLUSTER_NAME-worker}"
108+
for i in `seq 1 $TAINT_CHECK_CYCLES`; do
109+
if kubectl get events | tr -s " " | grep "CordonAndDrain node/${test_node} Node successfully cordoned and drained" >/dev/null; then
110+
echo "✅ Verified CordonAndDrain was emitted as a k8s event!"
111+
echo "✅ K8s Emit Events Test Passed $CLUSTER_NAME! ✅"s
112+
exit 0
113+
fi
114+
115+
echo "Assertion Loop $i/$TAINT_CHECK_CYCLES, sleeping for $TAINT_CHECK_SLEEP seconds"
116+
sleep $TAINT_CHECK_SLEEP
117+
done
118+
119+
120+
echo "❌ k8s CordonAndDrain event was not emitted to k8s"
121+
122+
echo "❌ K8s Emit Events Test failed $CLUSTER_NAME"
123+
fail_and_exit 1

test/k8s-local-cluster-test/run-test

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ function reset_cluster {
9191
kubectl taint node "$node" aws-node-termination-handler/rebalance-recommendation- || true
9292
done
9393
remove_labels || :
94+
kubectl delete events --all
9495
sleep 2
9596
}
9697

0 commit comments

Comments
 (0)