Skip to content

Commit

Permalink
[CNM] Fix counters for failed connections telemetry (#34446)
Browse files Browse the repository at this point in the history
Co-authored-by: hmahmood <[email protected]>
  • Loading branch information
akarpz and hmahmood authored Feb 26, 2025
1 parent 980a9e9 commit bd44a13
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 20 deletions.
46 changes: 30 additions & 16 deletions pkg/network/tracer/connection/ebpf_tracer.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,12 @@ var EbpfTracerTelemetry = struct {
type ebpfTracer struct {
m *ddebpf.Manager

conns *maps.GenericMap[netebpf.ConnTuple, netebpf.ConnStats]
tcpStats *maps.GenericMap[netebpf.ConnTuple, netebpf.TCPStats]
tcpRetransmits *maps.GenericMap[netebpf.ConnTuple, uint32]
config *config.Config
conns *maps.GenericMap[netebpf.ConnTuple, netebpf.ConnStats]
tcpStats *maps.GenericMap[netebpf.ConnTuple, netebpf.TCPStats]
tcpRetransmits *maps.GenericMap[netebpf.ConnTuple, uint32]
ebpfTelemetryMap *maps.GenericMap[uint32, netebpf.Telemetry]
tcpFailuresTelemetryMap *maps.GenericMap[int32, uint64]
config *config.Config

// tcp_close events
closeConsumer *tcpCloseConsumer
Expand All @@ -147,6 +149,8 @@ type ebpfTracer struct {
ebpfTracerType TracerType

ch *cookieHasher

lastTCPFailureTelemetry map[int32]uint64
}

// NewTracer creates a new tracer
Expand Down Expand Up @@ -199,8 +203,9 @@ func newEbpfTracer(config *config.Config, _ telemetryComponent.Component) (Trace
}

tr := &ebpfTracer{
removeTuple: &netebpf.ConnTuple{},
ch: newCookieHasher(),
removeTuple: &netebpf.ConnTuple{},
ch: newCookieHasher(),
lastTCPFailureTelemetry: make(map[int32]uint64),
}

connCloseEventHandler, err := initClosedConnEventHandler(config, tr.closedPerfCallback, connPool, extractor)
Expand Down Expand Up @@ -271,6 +276,16 @@ func newEbpfTracer(config *config.Config, _ telemetryComponent.Component) (Trace
return nil, fmt.Errorf("error retrieving the bpf %s map: %s", probes.TCPRetransmitsMap, err)
}

tr.ebpfTelemetryMap, err = maps.GetMap[uint32, netebpf.Telemetry](m.Manager, probes.TelemetryMap)
if err != nil {
log.Warnf("error retrieving telemetry map: %s", err)
}

tr.tcpFailuresTelemetryMap, err = maps.GetMap[int32, uint64](m.Manager, probes.TCPFailureTelemetry)
if err != nil {
log.Warnf("error retrieving tcp failure telemetry map: %s", err)
}

return tr, nil
}

Expand Down Expand Up @@ -534,15 +549,13 @@ func (t *ebpfTracer) Remove(conn *network.ConnectionStats) error {
}

func (t *ebpfTracer) getEBPFTelemetry() *netebpf.Telemetry {
var zero uint32
mp, err := maps.GetMap[uint32, netebpf.Telemetry](t.m.Manager, probes.TelemetryMap)
if err != nil {
log.Warnf("error retrieving telemetry map: %s", err)
if t.ebpfTelemetryMap == nil {
return nil
}

var zero uint32
tm := &netebpf.Telemetry{}
if err := mp.Lookup(&zero, tm); err != nil {
if err := t.ebpfTelemetryMap.Lookup(&zero, tm); err != nil {
// This can happen if we haven't initialized the telemetry object yet
// so let's just use a trace log
if log.ShouldLog(log.TraceLvl) {
Expand All @@ -554,12 +567,11 @@ func (t *ebpfTracer) getEBPFTelemetry() *netebpf.Telemetry {
}

func (t *ebpfTracer) getTCPFailureTelemetry() map[int32]uint64 {
mp, err := maps.GetMap[int32, uint64](t.m.Manager, probes.TCPFailureTelemetry)
if err != nil {
log.Warnf("error retrieving tcp failure telemetry map: %s", err)
if t.tcpFailuresTelemetryMap == nil {
return nil
}
it := mp.IterateWithBatchSize(100)

it := t.tcpFailuresTelemetryMap.IterateWithBatchSize(100)
var key int32
var val uint64
result := make(map[int32]uint64)
Expand All @@ -569,7 +581,9 @@ func (t *ebpfTracer) getTCPFailureTelemetry() map[int32]uint64 {
log.Warnf("error retrieving tcp failure telemetry map: %s", err)
return nil
}
result[key] = val

result[key] = val - t.lastTCPFailureTelemetry[key]
t.lastTCPFailureTelemetry[key] = val
}
return result
}
Expand Down
5 changes: 1 addition & 4 deletions pkg/network/tracer/connection/ebpf_tracer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,13 @@ import (

"github.com/stretchr/testify/require"

"github.com/DataDog/datadog-agent/pkg/ebpf/maps"
"github.com/DataDog/datadog-agent/pkg/network/config"
"github.com/DataDog/datadog-agent/pkg/network/ebpf/probes"
)

func TestFailedConnectionTelemetryMapLoads(t *testing.T) {
tr, err := newEbpfTracer(config.New(), nil)
require.NoError(t, err, "could not load tracer")
t.Cleanup(tr.Stop)

_, err = maps.GetMap[int32, uint64](tr.(*ebpfTracer).m.Manager, probes.TCPFailureTelemetry)
require.NoError(t, err, "error loading tcp failure telemetry map")
require.NotNil(t, tr.(*ebpfTracer).tcpFailuresTelemetryMap, "error loading tcp failure telemetry map")
}

0 comments on commit bd44a13

Please sign in to comment.