From 4939e0aec0785b7ab69bc7ba3f24d7bd46ddd07b Mon Sep 17 00:00:00 2001 From: James Thomas Date: Thu, 14 Nov 2024 22:49:44 -0500 Subject: [PATCH 1/3] sets IP_DONTFRAG (mac) and IP_PMTUDISC_PROBE (linux and windows). adds udx-level packets-dropped-by-kernel stat (linux) --- include/udx.h | 2 ++ src/io.h | 3 +++ src/io_posix.c | 36 +++++++++++++++++++++---- src/io_win.c | 14 ++++++++++ src/udx.c | 10 +++++++ test/stream-write-read-perf.c | 2 ++ test/stream-write-read-receive-window.c | 2 +- 7 files changed, 63 insertions(+), 6 deletions(-) diff --git a/include/udx.h b/include/udx.h index 7b52293..3e3d991 100644 --- a/include/udx.h +++ b/include/udx.h @@ -119,6 +119,8 @@ struct udx_s { uint64_t packets_rx; uint64_t packets_tx; + + int64_t packets_dropped_by_kernel; }; struct udx_queue_node_s { diff --git a/src/io.h b/src/io.h index 12f0ca6..76f1279 100644 --- a/src/io.h +++ b/src/io.h @@ -15,4 +15,7 @@ udx__recvmsg (udx_socket_t *handle, uv_buf_t *buf, struct sockaddr *addr, int ad int udx__udp_set_rxq_ovfl (uv_os_sock_t fd); +int +udx__udp_set_dontfrag (uv_os_sock_t fd, bool is_ipv6); + #endif // UDX_IO_H diff --git a/src/io_posix.c b/src/io_posix.c index 048a2a1..d9eabbc 100644 --- a/src/io_posix.c +++ b/src/io_posix.c @@ -1,9 +1,5 @@ #define _GNU_SOURCE -#if defined(__linux__) || defined(__FreeBSD__) -#define UDX_PLATFORM_HAS_SENDMMSG -#endif - #include #include #include @@ -23,6 +19,19 @@ udx__get_link_mtu (const struct sockaddr *addr) { return -1; } +int +udx__udp_set_dontfrag (uv_os_sock_t fd, bool is_ipv6) { + int val = 1; + int rc; + if (is_ipv6) { + rc = setsockopt(fd, IPPROTO_IPV6, IPV6_DONTFRAG, &val, sizeof(val)); + } else { + rc = setsockopt(fd, IPPROTO_IP, IP_DONTFRAG, &val, sizeof(val)); + } + + return rc; +} + #else int @@ -55,6 +64,21 @@ udx__get_link_mtu (const struct sockaddr *addr) { close(s); return mtu; } + +int +udx__udp_set_dontfrag (uv_os_sock_t fd, bool is_ipv6) { + int rc; + if (is_ipv6) { + int val = IPV6_PMTUDISC_PROBE; + rc = setsockopt(fd, IPPROTO_IPV6, IPV6_MTU_DISCOVER, &val, sizeof(val)); + } else { + int val = IP_PMTUDISC_PROBE; + rc = setsockopt(fd, IPPROTO_IP, IP_MTU_DISCOVER, &val, sizeof(val)); + } + + return rc; +} + #endif ssize_t @@ -111,11 +135,13 @@ udx__recvmsg (udx_socket_t *handle, uv_buf_t *buf, struct sockaddr *addr, int ad for (struct cmsghdr *cmsg = CMSG_FIRSTHDR(&h); cmsg != NULL; cmsg = CMSG_NXTHDR(&h, cmsg)) { if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SO_RXQ_OVFL) { - packets_dropped_by_kernel = *(uint32_t *) CMSG_DATA(cmsg); + memcpy(&packets_dropped_by_kernel, CMSG_DATA(cmsg), sizeof(packets_dropped_by_kernel)); } } if (packets_dropped_by_kernel) { + uint32_t delta = packets_dropped_by_kernel - handle->packets_dropped_by_kernel; + handle->udx->packets_dropped_by_kernel += delta; handle->packets_dropped_by_kernel = packets_dropped_by_kernel; } } diff --git a/src/io_win.c b/src/io_win.c index 630f76e..b852b38 100644 --- a/src/io_win.c +++ b/src/io_win.c @@ -87,3 +87,17 @@ udx__udp_set_rxq_ovfl (uv_os_sock_t fd) { UDX_UNUSED(fd); return -1; } + +int +udx__udp_set_dontfrag (uv_os_sock_t fd, bool is_ipv6) { + int rc; + if (is_ipv6) { + int val = IPV6_PMTUDISC_PROBE; + rc = setsockopt(fd, IPPROTO_IPV6, IPV6_MTU_DISCOVER, &val, sizeof(val)); + } else { + int val = IP_PMTUDISC_PROBE; + rc = setsockopt(fd, IPPROTO_IP, IP_MTU_DISCOVER, &val, sizeof(val)); + } + + return rc; +} diff --git a/src/udx.c b/src/udx.c index 08538f7..f5bfd93 100644 --- a/src/udx.c +++ b/src/udx.c @@ -1965,6 +1965,7 @@ udx_init (uv_loop_t *loop, udx_t *udx) { udx->packets_rx = 0; udx->packets_tx = 0; + udx->packets_dropped_by_kernel = -1; udx->loop = loop; return 0; @@ -2089,6 +2090,15 @@ udx_socket_bind (udx_socket_t *socket, const struct sockaddr *addr, unsigned int if (!err) { socket->cmsg_wanted = true; socket->packets_dropped_by_kernel = 0; + + if (socket->udx->packets_dropped_by_kernel == -1) { + socket->udx->packets_dropped_by_kernel = 0; + } + } + + err = udx__udp_set_dontfrag((uv_os_sock_t) fd, socket->family == 6); + if (err) { + debug_printf("udx: failed to set IP Don't Fragment socket option\n"); } socket->status |= UDX_SOCKET_BOUND; diff --git a/test/stream-write-read-perf.c b/test/stream-write-read-perf.c index bb91e60..02e13b2 100644 --- a/test/stream-write-read-perf.c +++ b/test/stream-write-read-perf.c @@ -152,6 +152,8 @@ main () { if (asock.packets_dropped_by_kernel != -1 && bsock.packets_dropped_by_kernel != -1) { printf("stats: socket a: packets_dropped=%" PRIi64 "\n", asock.packets_dropped_by_kernel); printf("stats: socket b: packets_dropped=%" PRIi64 "\n", bsock.packets_dropped_by_kernel); + + assert(asock.packets_dropped_by_kernel + bsock.packets_dropped_by_kernel == udx.packets_dropped_by_kernel); } return 0; diff --git a/test/stream-write-read-receive-window.c b/test/stream-write-read-receive-window.c index 49a521e..5889139 100644 --- a/test/stream-write-read-receive-window.c +++ b/test/stream-write-read-receive-window.c @@ -129,7 +129,7 @@ main () { assert(e == 0); int data_sz = UDX_MTU_MAX * 6; - uint8_t *data = malloc(data_sz); + char *data = malloc(data_sz); uv_buf_t buf = uv_buf_init(data, data_sz); e = udx_stream_write(req, &send_stream, &buf, 1, on_ack); From 2796d29b951ba2dc6eb79dc0300bfbc45b638018 Mon Sep 17 00:00:00 2001 From: James Thomas Date: Fri, 15 Nov 2024 00:26:33 -0500 Subject: [PATCH 2/3] set feature flag for IPV6 options on Apple --- src/io_posix.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/io_posix.c b/src/io_posix.c index d9eabbc..bc06f20 100644 --- a/src/io_posix.c +++ b/src/io_posix.c @@ -1,5 +1,9 @@ #define _GNU_SOURCE +#if defined(__APPLE__) +#define __APPLE_USE_RFC_3542 +#endif + #include #include #include From 5e81c9040136856b5ad2513d0d4976927a8a0c4e Mon Sep 17 00:00:00 2001 From: James Thomas Date: Fri, 15 Nov 2024 14:34:04 -0500 Subject: [PATCH 3/3] windows IPV6_PMTUDISC_* options are IP_PMTUDISC_SET, IP_PMTUDISC_DO, IP_PMTUDISCDONT and IP_PMTUDISC_PROBE --- src/io_win.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/io_win.c b/src/io_win.c index b852b38..b5cd0ce 100644 --- a/src/io_win.c +++ b/src/io_win.c @@ -91,11 +91,10 @@ udx__udp_set_rxq_ovfl (uv_os_sock_t fd) { int udx__udp_set_dontfrag (uv_os_sock_t fd, bool is_ipv6) { int rc; + int val = IP_PMTUDISC_PROBE; if (is_ipv6) { - int val = IPV6_PMTUDISC_PROBE; rc = setsockopt(fd, IPPROTO_IPV6, IPV6_MTU_DISCOVER, &val, sizeof(val)); } else { - int val = IP_PMTUDISC_PROBE; rc = setsockopt(fd, IPPROTO_IP, IP_MTU_DISCOVER, &val, sizeof(val)); }