diff --git a/include/udx.h b/include/udx.h index fac5e28..36ce713 100644 --- a/include/udx.h +++ b/include/udx.h @@ -119,6 +119,8 @@ struct udx_s { uint64_t packets_rx; uint64_t packets_tx; + + int64_t packets_dropped_by_kernel; }; struct udx_queue_node_s { diff --git a/src/io.h b/src/io.h index 12f0ca6..76f1279 100644 --- a/src/io.h +++ b/src/io.h @@ -15,4 +15,7 @@ udx__recvmsg (udx_socket_t *handle, uv_buf_t *buf, struct sockaddr *addr, int ad int udx__udp_set_rxq_ovfl (uv_os_sock_t fd); +int +udx__udp_set_dontfrag (uv_os_sock_t fd, bool is_ipv6); + #endif // UDX_IO_H diff --git a/src/io_posix.c b/src/io_posix.c index 048a2a1..bc06f20 100644 --- a/src/io_posix.c +++ b/src/io_posix.c @@ -1,7 +1,7 @@ #define _GNU_SOURCE -#if defined(__linux__) || defined(__FreeBSD__) -#define UDX_PLATFORM_HAS_SENDMMSG +#if defined(__APPLE__) +#define __APPLE_USE_RFC_3542 #endif #include @@ -23,6 +23,19 @@ udx__get_link_mtu (const struct sockaddr *addr) { return -1; } +int +udx__udp_set_dontfrag (uv_os_sock_t fd, bool is_ipv6) { + int val = 1; + int rc; + if (is_ipv6) { + rc = setsockopt(fd, IPPROTO_IPV6, IPV6_DONTFRAG, &val, sizeof(val)); + } else { + rc = setsockopt(fd, IPPROTO_IP, IP_DONTFRAG, &val, sizeof(val)); + } + + return rc; +} + #else int @@ -55,6 +68,21 @@ udx__get_link_mtu (const struct sockaddr *addr) { close(s); return mtu; } + +int +udx__udp_set_dontfrag (uv_os_sock_t fd, bool is_ipv6) { + int rc; + if (is_ipv6) { + int val = IPV6_PMTUDISC_PROBE; + rc = setsockopt(fd, IPPROTO_IPV6, IPV6_MTU_DISCOVER, &val, sizeof(val)); + } else { + int val = IP_PMTUDISC_PROBE; + rc = setsockopt(fd, IPPROTO_IP, IP_MTU_DISCOVER, &val, sizeof(val)); + } + + return rc; +} + #endif ssize_t @@ -111,11 +139,13 @@ udx__recvmsg (udx_socket_t *handle, uv_buf_t *buf, struct sockaddr *addr, int ad for (struct cmsghdr *cmsg = CMSG_FIRSTHDR(&h); cmsg != NULL; cmsg = CMSG_NXTHDR(&h, cmsg)) { if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SO_RXQ_OVFL) { - packets_dropped_by_kernel = *(uint32_t *) CMSG_DATA(cmsg); + memcpy(&packets_dropped_by_kernel, CMSG_DATA(cmsg), sizeof(packets_dropped_by_kernel)); } } if (packets_dropped_by_kernel) { + uint32_t delta = packets_dropped_by_kernel - handle->packets_dropped_by_kernel; + handle->udx->packets_dropped_by_kernel += delta; handle->packets_dropped_by_kernel = packets_dropped_by_kernel; } } diff --git a/src/io_win.c b/src/io_win.c index 630f76e..b5cd0ce 100644 --- a/src/io_win.c +++ b/src/io_win.c @@ -87,3 +87,16 @@ udx__udp_set_rxq_ovfl (uv_os_sock_t fd) { UDX_UNUSED(fd); return -1; } + +int +udx__udp_set_dontfrag (uv_os_sock_t fd, bool is_ipv6) { + int rc; + int val = IP_PMTUDISC_PROBE; + if (is_ipv6) { + rc = setsockopt(fd, IPPROTO_IPV6, IPV6_MTU_DISCOVER, &val, sizeof(val)); + } else { + rc = setsockopt(fd, IPPROTO_IP, IP_MTU_DISCOVER, &val, sizeof(val)); + } + + return rc; +} diff --git a/src/udx.c b/src/udx.c index 5bfc456..8557d66 100644 --- a/src/udx.c +++ b/src/udx.c @@ -1956,6 +1956,7 @@ udx_init (uv_loop_t *loop, udx_t *udx) { udx->packets_rx = 0; udx->packets_tx = 0; + udx->packets_dropped_by_kernel = -1; udx->loop = loop; return 0; @@ -2080,6 +2081,15 @@ udx_socket_bind (udx_socket_t *socket, const struct sockaddr *addr, unsigned int if (!err) { socket->cmsg_wanted = true; socket->packets_dropped_by_kernel = 0; + + if (socket->udx->packets_dropped_by_kernel == -1) { + socket->udx->packets_dropped_by_kernel = 0; + } + } + + err = udx__udp_set_dontfrag((uv_os_sock_t) fd, socket->family == 6); + if (err) { + debug_printf("udx: failed to set IP Don't Fragment socket option\n"); } socket->status |= UDX_SOCKET_BOUND; diff --git a/test/stream-write-read-perf.c b/test/stream-write-read-perf.c index bb91e60..02e13b2 100644 --- a/test/stream-write-read-perf.c +++ b/test/stream-write-read-perf.c @@ -152,6 +152,8 @@ main () { if (asock.packets_dropped_by_kernel != -1 && bsock.packets_dropped_by_kernel != -1) { printf("stats: socket a: packets_dropped=%" PRIi64 "\n", asock.packets_dropped_by_kernel); printf("stats: socket b: packets_dropped=%" PRIi64 "\n", bsock.packets_dropped_by_kernel); + + assert(asock.packets_dropped_by_kernel + bsock.packets_dropped_by_kernel == udx.packets_dropped_by_kernel); } return 0; diff --git a/test/stream-write-read-receive-window.c b/test/stream-write-read-receive-window.c index 49a521e..5889139 100644 --- a/test/stream-write-read-receive-window.c +++ b/test/stream-write-read-receive-window.c @@ -129,7 +129,7 @@ main () { assert(e == 0); int data_sz = UDX_MTU_MAX * 6; - uint8_t *data = malloc(data_sz); + char *data = malloc(data_sz); uv_buf_t buf = uv_buf_init(data, data_sz); e = udx_stream_write(req, &send_stream, &buf, 1, on_ack);