Skip to content

Commit ff3269e

Browse files
authored
Adds windows collection of failed conns. (#26617)
1 parent a0f8078 commit ff3269e

8 files changed

+180
-110
lines changed

pkg/network/driver/ddnpmapi.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,9 @@ typedef enum _ConnectionStatus {
241241
CONN_STAT_ATTEMPTED,
242242
CONN_STAT_ESTABLISHED,
243243
CONN_STAT_ACKRST,
244-
CONN_STAT_TIMEOUT
244+
CONN_STAT_TIMEOUT,
245+
CONN_STAT_EST_SENT_RST,
246+
CONN_STAT_EST_RECV_RST
245247
} CONNECTION_STATUS;
246248

247249
typedef struct _tcpFlowData {

pkg/network/driver/types.go

+12
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,18 @@ const (
6868
TCPFlowEstablishedMask = C.TCP_FLOW_ESTABLISHED_MASK
6969
)
7070

71+
type ConnectionStatus C.enum__ConnectionStatus
72+
73+
const (
74+
ConnectionStatusUnknown ConnectionStatus = C.CONN_STAT_UNKNOWN
75+
ConnectionStatusAttempted ConnectionStatus = C.CONN_STAT_ATTEMPTED
76+
ConnectionStatusEstablished ConnectionStatus = C.CONN_STAT_ESTABLISHED
77+
ConnectionStatusACKRST ConnectionStatus = C.CONN_STAT_ACKRST
78+
ConnectionStatusTimeout ConnectionStatus = C.CONN_STAT_TIMEOUT
79+
ConnectionStatusSentRst ConnectionStatus = C.CONN_STAT_EST_SENT_RST
80+
ConnectionStatusRecvRst ConnectionStatus = C.CONN_STAT_EST_RECV_RST
81+
)
82+
7183
const (
7284
DirectionInbound = C.DIRECTION_INBOUND
7385
DirectionOutbound = C.DIRECTION_OUTBOUND

pkg/network/driver/types_windows.go

+12
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/network/event_windows.go

+13
Original file line numberDiff line numberDiff line change
@@ -119,9 +119,22 @@ func FlowToConnStat(cs *ConnectionStats, flow *driver.PerFlowData, enableMonoton
119119

120120
tf := flow.TCPFlow()
121121
if tf != nil {
122+
cs.TCPFailures = make(map[uint32]uint32)
122123
cs.Monotonic.Retransmits = uint32(tf.RetransmitCount)
123124
cs.RTT = uint32(tf.SRTT)
124125
cs.RTTVar = uint32(tf.RttVariance)
126+
127+
switch driver.ConnectionStatus(tf.ConnectionStatus) {
128+
case driver.ConnectionStatusACKRST:
129+
cs.TCPFailures[111] = 1 // ECONNREFUSED in posix is 111
130+
case driver.ConnectionStatusTimeout:
131+
cs.TCPFailures[110] = 1 // ETIMEDOUT in posix is 110
132+
case driver.ConnectionStatusSentRst:
133+
fallthrough
134+
case driver.ConnectionStatusRecvRst:
135+
cs.TCPFailures[104] = 1 // ECONNRESET in posix is 104
136+
}
137+
125138
}
126139

127140
if isTCPFlowEstablished(flow) {

pkg/network/tracer/tracer_linux_test.go

+8-105
Original file line numberDiff line numberDiff line change
@@ -2368,15 +2368,21 @@ var failedConnectionsBuildModes = map[ebpftest.BuildMode]struct{}{
23682368
ebpftest.RuntimeCompiled: {},
23692369
}
23702370

2371-
func (s *TracerSuite) TestTCPFailureConnectionTimeout() {
2372-
t := s.T()
2371+
func checkSkipFailureConnectionsTests(t *testing.T) {
23732372
if _, ok := failedConnectionsBuildModes[ebpftest.GetBuildMode()]; !ok {
23742373
t.Skip("Skipping test on unsupported build mode: ", ebpftest.GetBuildMode())
23752374
}
2375+
2376+
}
2377+
func (s *TracerSuite) TestTCPFailureConnectionTimeout() {
2378+
t := s.T()
2379+
2380+
checkSkipFailureConnectionsTests(t)
23762381
// TODO: remove this check when we fix this test on kernels < 4.19
23772382
if kv < kernel.VersionCode(4, 19, 0) {
23782383
t.Skip("Skipping test on kernels < 4.19")
23792384
}
2385+
23802386
setupDropTrafficRule(t)
23812387
cfg := testConfig()
23822388
cfg.TCPFailedConnectionsEnabled = true
@@ -2427,109 +2433,6 @@ func (s *TracerSuite) TestTCPFailureConnectionTimeout() {
24272433
}, 3*time.Second, 1000*time.Millisecond, "Failed connection not recorded properly")
24282434
}
24292435

2430-
func (s *TracerSuite) TestTCPFailureConnectionRefused() {
2431-
t := s.T()
2432-
if _, ok := failedConnectionsBuildModes[ebpftest.GetBuildMode()]; !ok {
2433-
t.Skip("Skipping test on unsupported build mode: ", ebpftest.GetBuildMode())
2434-
}
2435-
cfg := testConfig()
2436-
cfg.TCPFailedConnectionsEnabled = true
2437-
tr := setupTracer(t, cfg)
2438-
2439-
// try to connect to a port where no server is accepting connections
2440-
srvAddr := "127.0.0.1:9998"
2441-
conn, err := net.Dial("tcp", srvAddr)
2442-
if err == nil {
2443-
conn.Close() // If the connection unexpectedly succeeds, close it immediately.
2444-
require.Fail(t, "expected connection to be refused, but it succeeded")
2445-
}
2446-
require.Error(t, err, "expected connection refused error but got none")
2447-
2448-
// Check if the connection was recorded as refused
2449-
require.Eventually(t, func() bool {
2450-
conns := getConnections(t, tr)
2451-
// Check for the refusal record
2452-
return findFailedConnectionByRemoteAddr(srvAddr, conns, 111)
2453-
}, 3*time.Second, 100*time.Millisecond, "Failed connection not recorded properly")
2454-
}
2455-
2456-
func (s *TracerSuite) TestTCPFailureConnectionReset() {
2457-
t := s.T()
2458-
if _, ok := failedConnectionsBuildModes[ebpftest.GetBuildMode()]; !ok {
2459-
t.Skip("Skipping test on unsupported build mode: ", ebpftest.GetBuildMode())
2460-
}
2461-
cfg := testConfig()
2462-
cfg.TCPFailedConnectionsEnabled = true
2463-
tr := setupTracer(t, cfg)
2464-
2465-
srv := NewTCPServer(func(c net.Conn) {
2466-
if tcpConn, ok := c.(*net.TCPConn); ok {
2467-
tcpConn.SetLinger(0)
2468-
buf := make([]byte, 10)
2469-
_, _ = c.Read(buf)
2470-
time.Sleep(10 * time.Millisecond)
2471-
}
2472-
c.Close()
2473-
})
2474-
2475-
require.NoError(t, srv.Run(), "error running server")
2476-
t.Cleanup(srv.Shutdown)
2477-
2478-
serverAddr := srv.ln.Addr()
2479-
c, err := net.Dial("tcp", serverAddr.String())
2480-
require.NoError(t, err, "could not connect to server: ", err)
2481-
2482-
// Write to the server and expect a reset
2483-
_, writeErr := c.Write([]byte("ping"))
2484-
if writeErr != nil {
2485-
t.Log("Write error:", writeErr)
2486-
}
2487-
2488-
// Read from server to ensure that the server has a chance to reset the connection
2489-
_, readErr := c.Read(make([]byte, 4))
2490-
require.Error(t, readErr, "expected connection reset error but got none")
2491-
2492-
// Check if the connection was recorded as reset
2493-
require.Eventually(t, func() bool {
2494-
conns := getConnections(t, tr)
2495-
// 104 is the errno for ECONNRESET
2496-
return findFailedConnection(t, c.LocalAddr().String(), serverAddr.String(), conns, 104)
2497-
}, 3*time.Second, 100*time.Millisecond, "Failed connection not recorded properly")
2498-
2499-
require.NoError(t, c.Close(), "error closing client connection")
2500-
}
2501-
2502-
// findFailedConnection is a utility function to find a failed connection based on specific TCP error codes
2503-
func findFailedConnection(t *testing.T, local, remote string, conns *network.Connections, errorCode uint32) bool {
2504-
// Extract the address and port from the net.Addr types
2505-
localAddrPort, err := netip.ParseAddrPort(local)
2506-
if err != nil {
2507-
t.Logf("Failed to parse local address: %v", err)
2508-
return false
2509-
}
2510-
remoteAddrPort, err := netip.ParseAddrPort(remote)
2511-
if err != nil {
2512-
t.Logf("Failed to parse remote address: %v", err)
2513-
return false
2514-
}
2515-
2516-
failureFilter := func(cs network.ConnectionStats) bool {
2517-
localMatch := netip.AddrPortFrom(cs.Source.Addr, cs.SPort) == localAddrPort
2518-
remoteMatch := netip.AddrPortFrom(cs.Dest.Addr, cs.DPort) == remoteAddrPort
2519-
return localMatch && remoteMatch && cs.TCPFailures[errorCode] > 0
2520-
}
2521-
2522-
return network.FirstConnection(conns, failureFilter) != nil
2523-
}
2524-
2525-
// for some failed connections we don't know the local addr/port so we need to search by remote addr only
2526-
func findFailedConnectionByRemoteAddr(remoteAddr string, conns *network.Connections, errorCode uint32) bool {
2527-
failureFilter := func(cs network.ConnectionStats) bool {
2528-
return netip.MustParseAddrPort(remoteAddr) == netip.AddrPortFrom(cs.Dest.Addr, cs.DPort) && cs.TCPFailures[errorCode] > 0
2529-
}
2530-
return network.FirstConnection(conns, failureFilter) != nil
2531-
}
2532-
25332436
func setupDropTrafficRule(tb testing.TB) (ns string) {
25342437
state := testutil.IptablesSave(tb)
25352438
tb.Cleanup(func() {

pkg/network/tracer/tracer_test.go

+104
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"math/rand"
1919
"net"
2020
nethttp "net/http"
21+
"net/netip"
2122
"os"
2223
"runtime"
2324
"strconv"
@@ -1253,3 +1254,106 @@ func (s *TracerSuite) TestTCPDirection() {
12531254
conn = incomingConns[0]
12541255
assert.Equal(t, conn.Direction, network.INCOMING, "connection direction must be incoming: %s", conn)
12551256
}
1257+
1258+
func (s *TracerSuite) TestTCPFailureConnectionRefused() {
1259+
t := s.T()
1260+
1261+
checkSkipFailureConnectionsTests(t)
1262+
1263+
cfg := testConfig()
1264+
cfg.TCPFailedConnectionsEnabled = true
1265+
tr := setupTracer(t, cfg)
1266+
1267+
// try to connect to a port where no server is accepting connections
1268+
srvAddr := "127.0.0.1:9998"
1269+
conn, err := net.Dial("tcp", srvAddr)
1270+
if err == nil {
1271+
conn.Close() // If the connection unexpectedly succeeds, close it immediately.
1272+
require.Fail(t, "expected connection to be refused, but it succeeded")
1273+
}
1274+
require.Error(t, err, "expected connection refused error but got none")
1275+
1276+
// Check if the connection was recorded as refused
1277+
require.Eventually(t, func() bool {
1278+
conns := getConnections(t, tr)
1279+
// Check for the refusal record
1280+
return findFailedConnectionByRemoteAddr(srvAddr, conns, 111)
1281+
}, 3*time.Second, 100*time.Millisecond, "Failed connection not recorded properly")
1282+
}
1283+
1284+
func (s *TracerSuite) TestTCPFailureConnectionReset() {
1285+
t := s.T()
1286+
1287+
checkSkipFailureConnectionsTests(t)
1288+
1289+
cfg := testConfig()
1290+
cfg.TCPFailedConnectionsEnabled = true
1291+
tr := setupTracer(t, cfg)
1292+
1293+
srv := NewTCPServer(func(c net.Conn) {
1294+
if tcpConn, ok := c.(*net.TCPConn); ok {
1295+
tcpConn.SetLinger(0)
1296+
buf := make([]byte, 10)
1297+
_, _ = c.Read(buf)
1298+
time.Sleep(10 * time.Millisecond)
1299+
}
1300+
c.Close()
1301+
})
1302+
1303+
require.NoError(t, srv.Run(), "error running server")
1304+
t.Cleanup(srv.Shutdown)
1305+
1306+
serverAddr := srv.ln.Addr()
1307+
c, err := net.Dial("tcp", serverAddr.String())
1308+
require.NoError(t, err, "could not connect to server: ", err)
1309+
1310+
// Write to the server and expect a reset
1311+
_, writeErr := c.Write([]byte("ping"))
1312+
if writeErr != nil {
1313+
t.Log("Write error:", writeErr)
1314+
}
1315+
1316+
// Read from server to ensure that the server has a chance to reset the connection
1317+
_, readErr := c.Read(make([]byte, 4))
1318+
require.Error(t, readErr, "expected connection reset error but got none")
1319+
1320+
// Check if the connection was recorded as reset
1321+
require.Eventually(t, func() bool {
1322+
conns := getConnections(t, tr)
1323+
// 104 is the errno for ECONNRESET
1324+
return findFailedConnection(t, c.LocalAddr().String(), serverAddr.String(), conns, 104)
1325+
}, 3*time.Second, 100*time.Millisecond, "Failed connection not recorded properly")
1326+
1327+
require.NoError(t, c.Close(), "error closing client connection")
1328+
}
1329+
1330+
// findFailedConnection is a utility function to find a failed connection based on specific TCP error codes
1331+
func findFailedConnection(t *testing.T, local, remote string, conns *network.Connections, errorCode uint32) bool { // nolint:unused
1332+
// Extract the address and port from the net.Addr types
1333+
localAddrPort, err := netip.ParseAddrPort(local)
1334+
if err != nil {
1335+
t.Logf("Failed to parse local address: %v", err)
1336+
return false
1337+
}
1338+
remoteAddrPort, err := netip.ParseAddrPort(remote)
1339+
if err != nil {
1340+
t.Logf("Failed to parse remote address: %v", err)
1341+
return false
1342+
}
1343+
1344+
failureFilter := func(cs network.ConnectionStats) bool {
1345+
localMatch := netip.AddrPortFrom(cs.Source.Addr, cs.SPort) == localAddrPort
1346+
remoteMatch := netip.AddrPortFrom(cs.Dest.Addr, cs.DPort) == remoteAddrPort
1347+
return localMatch && remoteMatch && cs.TCPFailures[errorCode] > 0
1348+
}
1349+
1350+
return network.FirstConnection(conns, failureFilter) != nil
1351+
}
1352+
1353+
// for some failed connections we don't know the local addr/port so we need to search by remote addr only
1354+
func findFailedConnectionByRemoteAddr(remoteAddr string, conns *network.Connections, errorCode uint32) bool {
1355+
failureFilter := func(cs network.ConnectionStats) bool {
1356+
return netip.MustParseAddrPort(remoteAddr) == netip.AddrPortFrom(cs.Dest.Addr, cs.DPort) && cs.TCPFailures[errorCode] > 0
1357+
}
1358+
return network.FirstConnection(conns, failureFilter) != nil
1359+
}

pkg/network/tracer/tracer_windows_test.go

+24
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
package tracer
99

1010
import (
11+
"github.com/DataDog/datadog-agent/pkg/network/testutil"
12+
"testing"
13+
1114
sysconfigtypes "github.com/DataDog/datadog-agent/cmd/system-probe/config/types"
1215
"github.com/DataDog/datadog-agent/pkg/network/config"
1316
"github.com/DataDog/datadog-agent/pkg/network/driver"
@@ -30,3 +33,24 @@ func testConfig() *config.Config {
3033
cfg := config.New()
3134
return cfg
3235
}
36+
37+
// nolint:unused // this function currently unused but will be.
38+
func setupDropTrafficRule(tb testing.TB) (ns string) {
39+
//
40+
// note. This does not seem to function as advertised; localhost traffic is not being
41+
// blocked. More testing is necessary.
42+
tb.Cleanup(func() {
43+
cmds := []string{
44+
"powershell -c \"Remove-NetFirewallRule -DisplayName 'Datadog Test Rule'\"",
45+
}
46+
testutil.RunCommands(tb, cmds, false)
47+
})
48+
cmds := []string{
49+
"powershell -c \"New-NetFirewallRule -DisplayName 'Datadog Test Rule' -Direction Outbound -Action Block -Profile Any -RemotePort 10000 -Protocol TCP\"",
50+
}
51+
testutil.RunCommands(tb, cmds, false)
52+
return
53+
}
54+
55+
func checkSkipFailureConnectionsTests(_ *testing.T) {
56+
}

release.json

+4-4
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
"JMXFETCH_HASH": "6c5d4ae2f858aa57fac7f92d6615931bf95f9a99c92979090f2de9d4b3c1f0d3",
1414
"MACOS_BUILD_VERSION": "master",
1515
"WINDOWS_DDNPM_DRIVER": "release-signed",
16-
"WINDOWS_DDNPM_VERSION": "2.6.0",
17-
"WINDOWS_DDNPM_SHASUM": "b1611ad4ceb8366c88767aeb638abefb226081efbf546b8b886952dd1b18ec05",
16+
"WINDOWS_DDNPM_VERSION": "2.7.0",
17+
"WINDOWS_DDNPM_SHASUM": "de6a2f437b906d1d0f3cfc9222c7f686b3d69726355c940476448a34535064c8",
1818
"SECURITY_AGENT_POLICIES_VERSION": "master",
1919
"WINDOWS_DDPROCMON_DRIVER": "release-signed",
2020
"WINDOWS_DDPROCMON_VERSION": "1.0.1",
@@ -32,8 +32,8 @@
3232
"JMXFETCH_HASH": "6c5d4ae2f858aa57fac7f92d6615931bf95f9a99c92979090f2de9d4b3c1f0d3",
3333
"MACOS_BUILD_VERSION": "master",
3434
"WINDOWS_DDNPM_DRIVER": "release-signed",
35-
"WINDOWS_DDNPM_VERSION": "2.6.0",
36-
"WINDOWS_DDNPM_SHASUM": "b1611ad4ceb8366c88767aeb638abefb226081efbf546b8b886952dd1b18ec05",
35+
"WINDOWS_DDNPM_VERSION": "2.7.0",
36+
"WINDOWS_DDNPM_SHASUM": "de6a2f437b906d1d0f3cfc9222c7f686b3d69726355c940476448a34535064c8",
3737
"SECURITY_AGENT_POLICIES_VERSION": "master",
3838
"WINDOWS_DDPROCMON_DRIVER": "release-signed",
3939
"WINDOWS_DDPROCMON_VERSION": "1.0.1",

0 commit comments

Comments
 (0)