Skip to content

Commit 23d0153

Browse files
authored
Log all pods on a node when cordoning the node (#259)
1 parent c9813c2 commit 23d0153

File tree

3 files changed

+50
-0
lines changed

3 files changed

+50
-0
lines changed

cmd/node-termination-handler.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,10 @@ func drainOrCordonIfNecessary(interruptionEventStore *interruptioneventstore.Sto
221221
os.Exit(1)
222222
}
223223
log.Log().Str("node_name", nodeName).Msg("Node successfully cordoned")
224+
err = node.LogPods(nodeName)
225+
if err != nil {
226+
log.Log().Err(err).Msg("There was a problem while trying to log all pod names on the node")
227+
}
224228
metrics.NodeActionsInc("cordon", nodeName, err)
225229
} else {
226230
err := node.CordonAndDrain(nodeName)

pkg/node/node.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222

2323
"github.com/aws/aws-node-termination-handler/pkg/config"
2424
"github.com/aws/aws-node-termination-handler/pkg/uptime"
25+
"github.com/rs/zerolog"
2526
"github.com/rs/zerolog/log"
2627
corev1 "k8s.io/api/core/v1"
2728
"k8s.io/apimachinery/pkg/api/errors"
@@ -286,6 +287,21 @@ func (n Node) TaintSpotItn(nodeName string, eventID string) error {
286287
return addTaint(k8sNode, n, SpotInterruptionTaint, eventID, corev1.TaintEffectNoSchedule)
287288
}
288289

290+
// LogPods logs all the pod names on a node
291+
func (n Node) LogPods(nodeName string) error {
292+
podList, err := n.fetchAllPods(nodeName)
293+
if err != nil {
294+
return fmt.Errorf("Unable to fetch all pods from API: %w", err)
295+
}
296+
podNamesArr := zerolog.Arr()
297+
for _, pod := range podList.Items {
298+
podNamesArr = podNamesArr.Str(pod.Name)
299+
}
300+
log.Log().Array("pod_names", podNamesArr).Str("node_name", nodeName).Msg("Pods on node")
301+
302+
return nil
303+
}
304+
289305
// TaintScheduledMaintenance adds the scheduled maintenance taint onto a node
290306
func (n Node) TaintScheduledMaintenance(nodeName string, eventID string) error {
291307
if !n.nthConfig.TaintNode {
@@ -399,6 +415,12 @@ func (n Node) fetchKubernetesNode(nodeName string) (*corev1.Node, error) {
399415
return n.drainHelper.Client.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
400416
}
401417

418+
func (n Node) fetchAllPods(nodeName string) (*corev1.PodList, error) {
419+
return n.drainHelper.Client.CoreV1().Pods("").List(metav1.ListOptions{
420+
FieldSelector: "spec.nodeName=" + nodeName,
421+
})
422+
}
423+
402424
func getDrainHelper(nthConfig config.Config) (*drain.Helper, error) {
403425
drainHelper := &drain.Helper{
404426
Client: &kubernetes.Clientset{},

pkg/node/node_test.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,30 @@ func TestMarkForUncordonAfterRebootAddActionLabelFailure(t *testing.T) {
240240
h.Assert(t, err != nil, "Failed to return error on MarkForUncordonAfterReboot failing to add action Label")
241241
}
242242

243+
func TestLogPods(t *testing.T) {
244+
resetFlagsForTest()
245+
246+
client := fake.NewSimpleClientset(
247+
&v1.Pod{
248+
ObjectMeta: metav1.ObjectMeta{
249+
Name: "myPod",
250+
Labels: map[string]string{
251+
"spec.nodeName": nodeName,
252+
},
253+
},
254+
},
255+
&v1.Node{
256+
ObjectMeta: metav1.ObjectMeta{
257+
Name: nodeName,
258+
},
259+
},
260+
)
261+
262+
tNode := getNode(t, getDrainHelper(client))
263+
err := tNode.LogPods(nodeName)
264+
h.Ok(t, err)
265+
}
266+
243267
func TestIsLableledWithActionFailure(t *testing.T) {
244268
resetFlagsForTest()
245269

0 commit comments

Comments
 (0)