diff --git a/.cirrus.yml b/.cirrus.yml index 20fe346..375ec01 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -6,10 +6,18 @@ task: freebsd_instance: matrix: image: freebsd-12-0-release-amd64 - image: freebsd-11-2-release-amd64 + image: freebsd-11-3-stable-amd64-v20190808 install_script: - sed -i.bak -e 's,pkg+http://pkg.FreeBSD.org/\${ABI}/quarterly,pkg+http://pkg.FreeBSD.org/\${ABI}/latest,' /etc/pkg/FreeBSD.conf - pkg upgrade -y - - script: | - make -j 4 + - pkg install -y cmake + build_script: + - pwd + - mkdir build + - cd build + - cmake .. -DCMAKE_BUILD_TYPE=Release + - cmake --build . -- -j4 + test_script: + - pwd + - cd build + - ctest -VV --output-on-failure diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a890447 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +build*/ +.vscode diff --git a/CMakeLists.txt b/CMakeLists.txt index 91590d3..73a3d77 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,8 +1,76 @@ -cmake_minimum_required(VERSION 3.9) -project(epoll-shim C) +cmake_minimum_required(VERSION 3.14) +project(epoll-shim LANGUAGES C) -set(CMAKE_C_STANDARD 99) +option(BUILD_SHARED_LIBS "build libepoll-shim as shared lib" ON) +option(ENABLE_COMPILER_WARNINGS "enable compiler warnings" OFF) + +if(ENABLE_COMPILER_WARNINGS) + add_compile_options("-Wall" + "-Wextra" + "-Wconversion" + "-Wsign-conversion" + "-Wmissing-prototypes" + "-pedantic" + "-Werror=implicit-function-declaration" + "-Werror=return-type" + "-Werror=incompatible-pointer-types") +endif() + +include(CTest) + +set(CMAKE_C_STANDARD 11) set(CMAKE_C_EXTENSIONS ON) add_subdirectory(src) -add_subdirectory(test) + +if(PROJECT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR) + set(_namespace "${PROJECT_NAME}") + + if(BUILD_TESTING) + file(WRITE "${PROJECT_BINARY_DIR}/${PROJECT_NAME}-config.cmake" + "add_library(${_namespace}::epoll-shim ALIAS epoll-shim)\n") + set(${PROJECT_NAME}_DIR "${PROJECT_BINARY_DIR}") + if(ENABLE_COMPILER_WARNINGS) + add_compile_options("-w") + endif() + add_subdirectory(test) + endif() + + include(GNUInstallDirs) + + set(CMAKE_INSTALL_PKGCONFIGDIR + "libdata/pkgconfig" + CACHE PATH "Installation directory for pkgconfig (.pc) files") + mark_as_advanced(CMAKE_INSTALL_PKGCONFIGDIR) + set(CMAKE_INSTALL_CMAKEBASEDIR + "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}" + CACHE PATH "Installation directory for CMake config (.cmake) files") + mark_as_advanced(CMAKE_INSTALL_CMAKEBASEDIR) + + configure_file("${PROJECT_SOURCE_DIR}/${PROJECT_NAME}.pc.cmakein" + "${PROJECT_BINARY_DIR}/${PROJECT_NAME}.pc" @ONLY) + install(FILES "${PROJECT_BINARY_DIR}/${PROJECT_NAME}.pc" + DESTINATION "${CMAKE_INSTALL_PKGCONFIGDIR}") + + set(CMAKE_INSTALL_INCLUDEDIR "${CMAKE_INSTALL_INCLUDEDIR}/libepoll-shim") + install(TARGETS epoll-shim + EXPORT ${PROJECT_NAME}-targets + LIBRARY + INCLUDES DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") + install(DIRECTORY "${PROJECT_SOURCE_DIR}/include/" TYPE INCLUDE) + + install(EXPORT ${PROJECT_NAME}-targets + NAMESPACE "${_namespace}::" + DESTINATION "${CMAKE_INSTALL_CMAKEBASEDIR}") + if(NOT BUILD_SHARED_LIBS) + set(_deps "include(CMakeFindDependencyMacro)\n" + "set(THREADS_PREFER_PTHREAD_FLAG ON)\n" + "find_dependency(Threads)\n") + endif() + file( + WRITE "${PROJECT_BINARY_DIR}/${PROJECT_NAME}-config.cmake" + ${_deps} + "include(\"\${CMAKE_CURRENT_LIST_DIR}/${PROJECT_NAME}-targets.cmake\")\n") + install(FILES "${PROJECT_BINARY_DIR}/${PROJECT_NAME}-config.cmake" + DESTINATION "${CMAKE_INSTALL_CMAKEBASEDIR}") +endif() diff --git a/Makefile b/Makefile deleted file mode 100644 index ca5c218..0000000 --- a/Makefile +++ /dev/null @@ -1,27 +0,0 @@ -LIB= epoll-shim -SHLIB_MAJOR= 0 -SRCS= src/epoll.c src/timerfd.c src/signalfd.c src/common.c -INCS= include/sys/epoll.h include/sys/timerfd.h include/sys/signalfd.h -VERSION_MAP= Version.map - -FILES= src/epoll-shim.pc -FILESDIR= ${LIBDATADIR}/pkgconfig - -LIBDIR= /usr/local/lib -INCSDIR= /usr/local/include/libepoll-shim/sys -LIBDATADIR= /usr/local/libdata - -CFLAGS+= -I${.CURDIR}/include -pthread -Wall -Wextra -Wno-missing-prototypes -Wno-padded -Wno-missing-variable-declarations -Wno-thread-safety-analysis -LDFLAGS+= -pthread -lrt - -src/epoll-shim.pc: src/epoll-shim.pc.in - sed -e 's,@CMAKE_INSTALL_PREFIX@,/usr/local,' \ - -e 's,@PROJECT_VERSION@,,' \ - $> >$@ - -distrib-dirs: - mkdir -p "${DESTDIR}/${LIBDIR}" - mkdir -p "${DESTDIR}/${INCSDIR}" - mkdir -p "${DESTDIR}/${FILESDIR}" - -.include diff --git a/README.md b/README.md index f7ce4c3..685ea8c 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,44 @@ -# epoll-shim -Small epoll implementation using kqueue; includes all features needed for libinput/libevdev - -This repo is used by the port `devel/libepoll-shim` +epoll-shim +========== + +This is a small library that implements epoll on top of kqueue. +It has been successfully used to port libinput, libevdev, Wayland and more +software to FreeBSD: https://www.freshports.org/devel/libepoll-shim/ + +It may be useful for porting other software that uses epoll as well. + +There are some tests inside `test/`. They should also compile under Linux and +can be used to verify proper epoll behavior. + +However, this library contains some very ugly hacks and workarounds. For +example: + - When using timerfd, signalfd or eventfd, `read`, `write` and `close` are + redefined as macros to internal helper functions. This is needed as there + is some internal context that has to be free'd properly. This means that + you shouldn't create a timerfd/signalfd in one part of a program and close + it in a different part where `sys/timerfd.h` isn't included. The context + would leak. Luckily, software such as `libinput` behaves very nicely and + puts all timerfd related code in a single source file. + - There is exactly one static int reserved for fds that can be polled but are + not supported by kqueue under FreeBSD. This includes graphics or sound + devices under `/dev`. You can only have one of them throughout all epoll + instances in your process! + + +Installation +------------ + +Run the following commands to build libepoll-shim: + + $ mkdir build + $ cd build + $ cmake .. -DCMAKE_BUILD_TYPE=RelWithDebInfo + $ cmake --build . + +To run the tests: + + $ ctest --output-on-failure + +To install (as root): + + # cmake --build . --target install diff --git a/Version.map b/Version.map index e9ed66f..27685b4 100644 --- a/Version.map +++ b/Version.map @@ -2,6 +2,7 @@ global: epoll_shim_close; epoll_shim_read; + epoll_shim_write; epoll_create; epoll_create1; epoll_ctl; @@ -9,5 +10,8 @@ signalfd; timerfd_create; timerfd_settime; + eventfd; + eventfd_read; + eventfd_write; local: *; }; diff --git a/src/epoll-shim.pc.in b/epoll-shim.pc.cmakein similarity index 60% rename from src/epoll-shim.pc.in rename to epoll-shim.pc.cmakein index f4fb1ab..62388db 100644 --- a/src/epoll-shim.pc.in +++ b/epoll-shim.pc.cmakein @@ -1,12 +1,12 @@ prefix=@CMAKE_INSTALL_PREFIX@ exec_prefix=${prefix} -libdir=${exec_prefix}/lib -includedir=${prefix}/include +libdir=${exec_prefix}/@CMAKE_INSTALL_LIBDIR@ +includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@ Name: epoll-shim -URL: https://github.com/FreeBSDDesktop/epoll-shim +URL: https://github.com/jiixyj/epoll-shim Description: Small epoll implementation using kqueue -Version: @PROJECT_VERSION@ +Version: Libs: -L${libdir} -lepoll-shim Libs.private: -pthread -lrt Cflags: -I${includedir}/libepoll-shim diff --git a/include/sys/epoll.h b/include/sys/epoll.h index 4894d89..8ecac06 100644 --- a/include/sys/epoll.h +++ b/include/sys/epoll.h @@ -5,11 +5,9 @@ extern "C" { #endif -/* include the same file as musl */ -#include /* IWYU pragma: keep */ - -#include #include +#include +#include #if 0 #define __NEED_sigset_t @@ -25,6 +23,7 @@ enum EPOLL_EVENTS { __EPOLL_DUMMY }; #define EPOLLPRI 0x002 #define EPOLLOUT 0x004 #define EPOLLRDNORM 0x040 +#define EPOLLNVAL 0x020 #define EPOLLRDBAND 0x080 #define EPOLLWRNORM 0x100 #define EPOLLWRBAND 0x200 @@ -32,6 +31,7 @@ enum EPOLL_EVENTS { __EPOLL_DUMMY }; #define EPOLLERR 0x008 #define EPOLLHUP 0x010 #define EPOLLRDHUP 0x2000 +#define EPOLLEXCLUSIVE (1U<<28) #define EPOLLWAKEUP (1U<<29) #define EPOLLONESHOT (1U<<30) #define EPOLLET (1U<<31) @@ -51,22 +51,30 @@ struct epoll_event { uint32_t events; epoll_data_t data; } -#if defined(__amd64__) -__attribute__((packed)) +#ifdef __x86_64__ +__attribute__ ((__packed__)) #endif ; -int epoll_create(int /*size*/); -int epoll_create1(int /*flags*/); -int epoll_ctl( - int /*fd*/, int /*op*/, int /*fd2*/, struct epoll_event * /*ev*/); -int epoll_wait( - int /*fd*/, struct epoll_event * /*ev*/, int /*cnt*/, int /*to*/); + +int epoll_create(int); +int epoll_create1(int); +int epoll_ctl(int, int, int, struct epoll_event *); +int epoll_wait(int, struct epoll_event *, int, int); #if 0 int epoll_pwait(int, struct epoll_event *, int, int, const sigset_t *); #endif +#ifndef SHIM_SYS_SHIM_HELPERS +#define SHIM_SYS_SHIM_HELPERS +#include /* IWYU pragma: keep */ + +extern int epoll_shim_close(int); +#define close epoll_shim_close +#endif + + #ifdef __cplusplus } #endif diff --git a/include/sys/eventfd.h b/include/sys/eventfd.h new file mode 100644 index 0000000..6cb5426 --- /dev/null +++ b/include/sys/eventfd.h @@ -0,0 +1,47 @@ +#ifndef SHIM_SYS_EVENTFD_H +#define SHIM_SYS_EVENTFD_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +typedef uint64_t eventfd_t; + +#define EFD_SEMAPHORE 1 +#define EFD_CLOEXEC O_CLOEXEC +#define EFD_NONBLOCK O_NONBLOCK + +int eventfd(unsigned int, int); +int eventfd_read(int, eventfd_t *); +int eventfd_write(int, eventfd_t); + + +#ifndef SHIM_SYS_SHIM_HELPERS +#define SHIM_SYS_SHIM_HELPERS +#include /* IWYU pragma: keep */ + +extern int epoll_shim_close(int); +#define close epoll_shim_close +#endif + +#ifndef SHIM_SYS_SHIM_HELPERS_READ +#define SHIM_SYS_SHIM_HELPERS_READ +extern ssize_t epoll_shim_read(int, void *, size_t); +#define read epoll_shim_read +#endif + +#ifndef SHIM_SYS_SHIM_HELPERS_WRITE +#define SHIM_SYS_SHIM_HELPERS_WRITE +extern ssize_t epoll_shim_write(int, void const*, size_t); +#define write epoll_shim_write +#endif + + +#ifdef __cplusplus +} +#endif + +#endif /* sys/eventfd.h */ diff --git a/include/sys/signalfd.h b/include/sys/signalfd.h index 0a3f273..162fae5 100644 --- a/include/sys/signalfd.h +++ b/include/sys/signalfd.h @@ -45,16 +45,22 @@ struct signalfd_siginfo { uint8_t pad[128-12*4-4*8-2]; }; + #ifndef SHIM_SYS_SHIM_HELPERS #define SHIM_SYS_SHIM_HELPERS #include /* IWYU pragma: keep */ -extern int epoll_shim_close(int /*fd*/); -extern ssize_t epoll_shim_read(int /*fd*/, void * /*buf*/, size_t /*nbytes*/); -#define read epoll_shim_read +extern int epoll_shim_close(int); #define close epoll_shim_close #endif +#ifndef SHIM_SYS_SHIM_HELPERS_READ +#define SHIM_SYS_SHIM_HELPERS_READ +extern ssize_t epoll_shim_read(int, void *, size_t); +#define read epoll_shim_read +#endif + + #ifdef __cplusplus } #endif diff --git a/include/sys/timerfd.h b/include/sys/timerfd.h index 7b84699..f5e7797 100644 --- a/include/sys/timerfd.h +++ b/include/sys/timerfd.h @@ -22,16 +22,22 @@ int timerfd_settime(int /*fd*/, int /*flags*/, int timerfd_gettime(int, struct itimerspec *); #endif + #ifndef SHIM_SYS_SHIM_HELPERS #define SHIM_SYS_SHIM_HELPERS #include /* IWYU pragma: keep */ -extern int epoll_shim_close(int /*fd*/); -extern ssize_t epoll_shim_read(int /*fd*/, void * /*buf*/, size_t /*nbytes*/); -#define read epoll_shim_read +extern int epoll_shim_close(int); #define close epoll_shim_close #endif +#ifndef SHIM_SYS_SHIM_HELPERS_READ +#define SHIM_SYS_SHIM_HELPERS_READ +extern ssize_t epoll_shim_read(int, void *, size_t); +#define read epoll_shim_read +#endif + + #ifdef __cplusplus } #endif diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 905a795..e5c91ae 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,20 +1,21 @@ set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) -set(INCLUDES_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../include") +add_library(epoll-shim + epoll_shim_ctx.c + epoll.c + epollfd_ctx.c + timerfd.c + timerfd_ctx.c + signalfd.c + signalfd_ctx.c + eventfd.c + eventfd_ctx.c) +target_link_libraries(epoll-shim PRIVATE Threads::Threads rt) +target_include_directories( + epoll-shim + PUBLIC $) -add_library(epoll-shim SHARED epoll.c timerfd.c signalfd.c common.c) -target_link_libraries(epoll-shim PUBLIC Threads::Threads rt) -target_include_directories(epoll-shim PUBLIC "${INCLUDES_DIR}") - -set_target_properties(epoll-shim PROPERTIES - LINK_FLAGS "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../Version.map") +target_link_options(epoll-shim PRIVATE + "LINKER:--version-script=${PROJECT_SOURCE_DIR}/Version.map") set_target_properties(epoll-shim PROPERTIES SOVERSION 0) - -configure_file("epoll-shim.pc.in" "epoll-shim.pc" @ONLY) - -install(TARGETS epoll-shim LIBRARY DESTINATION lib) -install(FILES "${INCLUDES_DIR}/sys/epoll.h" DESTINATION include/libepoll-shim/sys) -install(FILES "${INCLUDES_DIR}/sys/signalfd.h" DESTINATION include/libepoll-shim/sys) -install(FILES "${INCLUDES_DIR}/sys/timerfd.h" DESTINATION include/libepoll-shim/sys) -install(FILES "epoll-shim.pc" DESTINATION lib/pkgconfig) diff --git a/src/common.c b/src/common.c deleted file mode 100644 index 6c6eddc..0000000 --- a/src/common.c +++ /dev/null @@ -1,52 +0,0 @@ -#include - -#include -#include -#include - -struct signalfd_context; -struct timerfd_context; - -extern pthread_mutex_t timerfd_context_mtx; -extern struct timerfd_context *get_timerfd_context(int fd, bool create_new); -extern ssize_t timerfd_read( - struct timerfd_context *, void *buf, size_t nbytes); -extern int timerfd_close(struct timerfd_context *); - -extern pthread_mutex_t signalfd_context_mtx; -extern struct signalfd_context *get_signalfd_context(int fd, bool create_new); -extern ssize_t signalfd_read( - struct signalfd_context *, void *buf, size_t nbytes); -extern int signalfd_close(struct signalfd_context *); - -#define WRAP(context, return_type, call, unlock_after_call) \ - if (fd >= 0) { \ - pthread_mutex_lock(&context##_mtx); \ - struct context *ctx = get_##context(fd, false); \ - if (ctx) { \ - return_type ret = (call); \ - if (unlock_after_call) { \ - pthread_mutex_unlock(&context##_mtx); \ - } \ - return ret; \ - } \ - pthread_mutex_unlock(&context##_mtx); \ - } - -int -epoll_shim_close(int fd) -{ - WRAP(timerfd_context, int, timerfd_close(ctx), true) - WRAP(signalfd_context, int, signalfd_close(ctx), true) - - return close(fd); -} - -ssize_t -epoll_shim_read(int fd, void *buf, size_t nbytes) -{ - WRAP(timerfd_context, ssize_t, timerfd_read(ctx, buf, nbytes), false) - WRAP(signalfd_context, ssize_t, signalfd_read(ctx, buf, nbytes), false) - - return read(fd, buf, nbytes); -} diff --git a/src/epoll.c b/src/epoll.c index 771a7f8..bd5a8be 100644 --- a/src/epoll.c +++ b/src/epoll.c @@ -1,343 +1,126 @@ #include #include -#include -#include +#include +#include +#include #include #include #include #include -int -epoll_create(int size) -{ - (void)size; +#include "epoll_shim_ctx.h" - errno = EINVAL; - return -1; -} +// TODO(jan): Remove this once the definition is exposed in in +// all supported FreeBSD versions. +#ifndef timespecsub +#define timespecsub(tsp, usp, vsp) \ + do { \ + (vsp)->tv_sec = (tsp)->tv_sec - (usp)->tv_sec; \ + (vsp)->tv_nsec = (tsp)->tv_nsec - (usp)->tv_nsec; \ + if ((vsp)->tv_nsec < 0) { \ + (vsp)->tv_sec--; \ + (vsp)->tv_nsec += 1000000000L; \ + } \ + } while (0) +#endif -int -epoll_create1(int flags) +static errno_t +epollfd_close(FDContextMapNode *node) { - if (flags != EPOLL_CLOEXEC) { - errno = EINVAL; - return -1; - } - - return kqueue(); + return epollfd_ctx_terminate(&node->ctx.epollfd); } -static int poll_fd = -1; -static int poll_epoll_fd = -1; -static void *poll_ptr; +static FDContextVTable const epollfd_vtable = { + .read_fun = fd_context_default_read, + .write_fun = fd_context_default_write, + .close_fun = epollfd_close, +}; -#define KEY_BITS (20) -#define VAL_BITS (32 - KEY_BITS) -static int -kqueue_save_state(int kq, uint32_t key, uint16_t val) +static FDContextMapNode * +epoll_create_impl(errno_t *ec) { - struct kevent kev[VAL_BITS * 2]; - int n = 0; - int i; - int oe, e; - - if ((key & ~(((uint32_t)1 << KEY_BITS) - 1)) || - (val & ~(((uint16_t)1 << VAL_BITS) - 1))) { - return (-EINVAL); - } + FDContextMapNode *node; - for (i = 0; i < VAL_BITS; ++i) { - uint32_t info_bit = (uint32_t)1 << i; - uint32_t kev_key = key | (info_bit << KEY_BITS); - EV_SET(&kev[n], kev_key, EVFILT_USER, EV_ADD, 0, 0, 0); - ++n; - if (!(val & info_bit)) { - EV_SET(&kev[n], kev_key, EVFILT_USER, /**/ - EV_DELETE, 0, 0, 0); - ++n; - } + node = epoll_shim_ctx_create_node(&epoll_shim_ctx, ec); + if (!node) { + return NULL; } - oe = errno; - if ((n = kevent(kq, kev, n, NULL, 0, NULL)) < 0) { - e = errno; - errno = oe; - return (-e); - } + node->flags = 0; - return (0); -} - -static int -kqueue_load_state(int kq, uint32_t key, uint16_t *val) -{ - struct kevent kev[VAL_BITS]; - int n = 0; - int i; - uint16_t nval = 0; - int oe, e; - - if ((key & ~(((uint32_t)1 << KEY_BITS) - 1))) { - return (-EINVAL); + if ((*ec = epollfd_ctx_init(&node->ctx.epollfd, /**/ + node->fd)) != 0) { + goto fail; } - for (i = 0; i < VAL_BITS; ++i) { - uint32_t info_bit = (uint32_t)1 << i; - uint32_t kev_key = key | (info_bit << KEY_BITS); - EV_SET(&kev[i], kev_key, EVFILT_USER, EV_RECEIPT, 0, 0, 0); - } - - oe = errno; - if ((n = kevent(kq, kev, VAL_BITS, kev, VAL_BITS, NULL)) < 0) { - e = errno; - errno = oe; - return (-e); - } - - for (i = 0; i < n; ++i) { - if (!(kev[i].flags & EV_ERROR)) { - return (-EINVAL); - } + node->vtable = &epollfd_vtable; + return node; - if (kev[i].data == 0) { - nval |= (uint32_t)1 << i; - } else if (kev[i].data != ENOENT) { - return (-EINVAL); - } - } - - *val = nval; - - return (0); +fail: + epoll_shim_ctx_remove_node_explicit(&epoll_shim_ctx, node); + (void)fd_context_map_node_destroy(node); + return NULL; } -#define KQUEUE_STATE_REGISTERED 0x1u -#define KQUEUE_STATE_EPOLLIN 0x2u -#define KQUEUE_STATE_EPOLLOUT 0x4u -#define KQUEUE_STATE_EPOLLRDHUP 0x8u -#define KQUEUE_STATE_NYCSS 0x10u -#define KQUEUE_STATE_ISFIFO 0x20u -#define KQUEUE_STATE_ISSOCK 0x40u - static int -is_not_yet_connected_stream_socket(int s) +epoll_create_common(void) { + FDContextMapNode *node; + errno_t ec; - { - int val; - socklen_t length = sizeof(int); - - if (getsockopt(s, SOL_SOCKET, SO_ACCEPTCONN, /**/ - &val, &length) == 0 && - val) { - return 0; - } - } - - { - int type; - socklen_t length = sizeof(int); - - if (getsockopt(s, SOL_SOCKET, SO_TYPE, &type, &length) == 0 && - (type == SOCK_STREAM || type == SOCK_SEQPACKET)) { - struct sockaddr name; - socklen_t namelen = 0; - if (getpeername(s, &name, &namelen) < 0 && - errno == ENOTCONN) { - return 1; - } - } + node = epoll_create_impl(&ec); + if (!node) { + errno = ec; + return -1; } - return 0; + return node->fd; } int -epoll_ctl(int fd, int op, int fd2, struct epoll_event *ev) +epoll_create(int size) { - struct kevent kev[2]; - uint16_t flags; - int e; - - if ((!ev && op != EPOLL_CTL_DEL) || - (ev && - ((ev->events & - ~(EPOLLIN | EPOLLOUT | EPOLLHUP | EPOLLRDHUP | EPOLLERR)) - /* the user should really set one of EPOLLIN or EPOLLOUT - * so that EPOLLHUP and EPOLLERR work. Don't make this a - * hard error for now, though. */ - /* || !(ev->events & (EPOLLIN | EPOLLOUT)) */))) { + if (size <= 0) { errno = EINVAL; - return (-1); - } - - if (fd2 < 0 || ((uint32_t)fd2 & ~(((uint32_t)1 << KEY_BITS) - 1))) { - errno = EBADF; - return (-1); - } - - if ((e = kqueue_load_state(fd, fd2, &flags)) < 0) { - errno = e; - return (-1); + return -1; } - if (op == EPOLL_CTL_ADD) { - if (flags & KQUEUE_STATE_REGISTERED) { - errno = EEXIST; - return (-1); - } - - EV_SET(&kev[0], fd2, EVFILT_READ, - EV_ADD | (ev->events & EPOLLIN ? 0 : EV_DISABLE), 0, 0, - ev->data.ptr); - EV_SET(&kev[1], fd2, EVFILT_WRITE, - EV_ADD | (ev->events & EPOLLOUT ? 0 : EV_DISABLE), 0, 0, - ev->data.ptr); - - flags = KQUEUE_STATE_REGISTERED; - -#define SET_FLAG(flag) \ - do { \ - if (ev->events & (flag)) { \ - flags |= KQUEUE_STATE_##flag; \ - } \ - } while (0) - - SET_FLAG(EPOLLIN); - SET_FLAG(EPOLLOUT); - SET_FLAG(EPOLLRDHUP); - -#undef SET_FLAG - - } else if (op == EPOLL_CTL_DEL) { - if (poll_fd == fd2 && fd == poll_epoll_fd) { - poll_fd = -1; - poll_epoll_fd = -1; - poll_ptr = NULL; - return 0; - } - - if (!(flags & KQUEUE_STATE_REGISTERED)) { - errno = ENOENT; - return (-1); - } - - EV_SET(&kev[0], fd2, EVFILT_READ, EV_DELETE, 0, 0, 0); - EV_SET(&kev[1], fd2, EVFILT_WRITE, EV_DELETE, 0, 0, 0); - - flags = 0; - } else if (op == EPOLL_CTL_MOD) { - if (!(flags & KQUEUE_STATE_REGISTERED)) { - errno = ENOENT; - return (-1); - } - - EV_SET(&kev[0], fd2, EVFILT_READ, - ev->events & EPOLLIN ? EV_ENABLE : EV_DISABLE, 0, 0, - ev->data.ptr); - EV_SET(&kev[1], fd2, EVFILT_WRITE, - ev->events & EPOLLOUT ? EV_ENABLE : EV_DISABLE, 0, 0, - ev->data.ptr); - -#define SET_FLAG(flag) \ - do { \ - if (ev->events & (flag)) { \ - flags |= KQUEUE_STATE_##flag; \ - } else { \ - flags &= ~KQUEUE_STATE_##flag; \ - } \ - } while (0) - - SET_FLAG(EPOLLIN); - SET_FLAG(EPOLLOUT); - SET_FLAG(EPOLLRDHUP); - -#undef SET_FLAG + return epoll_create_common(); +} - } else { +int +epoll_create1(int flags) +{ + if (flags & ~EPOLL_CLOEXEC) { errno = EINVAL; - return (-1); + return -1; } - for (int i = 0; i < 2; ++i) { - kev[i].flags |= EV_RECEIPT; - } + return epoll_create_common(); +} - int ret = kevent(fd, kev, 2, kev, 2, NULL); - if (ret < 0) { - return -1; - } +int +epoll_ctl(int fd, int op, int fd2, struct epoll_event *ev) +{ + errno_t ec; + FDContextMapNode *node; - if (ret != 2) { + node = epoll_shim_ctx_find_node(&epoll_shim_ctx, fd); + if (!node || node->vtable != &epollfd_vtable) { errno = EINVAL; return -1; } - for (int i = 0; i < 2; ++i) { - if (!(kev[i].flags & EV_ERROR)) { - errno = EINVAL; - return -1; - } - - if (kev[i].data == ENODEV && poll_fd < 0) { - poll_fd = fd2; - poll_epoll_fd = fd; - poll_ptr = ev->data.ptr; - return 0; - } - - /* - * Ignore EVFILT_WRITE registration EINVAL errors (some fd - * types such as kqueues themselves don't support it). - * Also ignore ENOENT -- this happens when trying to remove a - * previously added fd where the EVFILT_WRITE registration - * failed. - */ - if (i == 1 && - (kev[i].data == EINVAL || kev[i].data == ENOENT)) { - continue; - } - - if (kev[i].data != 0) { - errno = kev[i].data; - return -1; - } - } - - if (op != EPOLL_CTL_DEL && is_not_yet_connected_stream_socket(fd2)) { - EV_SET(&kev[0], fd2, EVFILT_READ, EV_ENABLE | EV_FORCEONESHOT, - 0, 0, ev->data.ptr); - if (kevent(fd, kev, 1, NULL, 0, NULL) < 0) { - return -1; - } - - flags |= KQUEUE_STATE_NYCSS; - } - - struct stat statbuf; - if (fstat(fd2, &statbuf) < 0) { + if ((ec = epollfd_ctx_ctl(&node->ctx.epollfd, op, fd2, ev)) != 0) { + errno = ec; return -1; } - if (S_ISFIFO(statbuf.st_mode)) { - flags |= KQUEUE_STATE_ISFIFO; - } else if (S_ISSOCK(statbuf.st_mode)) { - flags |= KQUEUE_STATE_ISSOCK; - } - - if ((e = kqueue_save_state(fd, fd2, flags)) < 0) { - errno = e; - return (-1); - } - return 0; } -#undef VAL_BITS -#undef KEY_BITS - #if 0 int epoll_pwait( @@ -352,215 +135,96 @@ epoll_pwait( } #endif -int -epoll_wait(int fd, struct epoll_event *ev, int cnt, int to) +static errno_t +epollfd_ctx_wait_or_block(EpollFDCtx *epollfd, struct epoll_event *ev, int cnt, + int *actual_cnt, int to) { - if (cnt < 1 || cnt > 32) { - errno = EINVAL; - return -1; - } + struct timespec deadline; - if (poll_fd != -1 && fd == poll_epoll_fd) { - struct pollfd pfds[2]; - pfds[0].fd = poll_fd; - pfds[0].events = POLLIN; - pfds[1].fd = fd; - pfds[1].events = POLLIN; - int ret = poll(pfds, 2, to); - if (ret <= 0) { - return ret; + if (to > 0) { + if (clock_gettime(CLOCK_MONOTONIC, &deadline) < 0) { + return errno; } - if (pfds[0].revents & POLLIN) { - ev[0].events = EPOLLIN; - ev[0].data.ptr = poll_ptr; - return 1; + + if (__builtin_add_overflow(deadline.tv_sec, to / 1000 + 1, + &deadline.tv_sec)) { + return EINVAL; } - to = 0; - } + deadline.tv_sec -= 1; - struct timespec timeout = {0, 0}; - if (to > 0) { - timeout.tv_sec = to / 1000; - timeout.tv_nsec = (to % 1000) * 1000 * 1000; + deadline.tv_nsec += (to % 1000) * 1000000L; + if (deadline.tv_nsec >= 1000000000) { + deadline.tv_nsec -= 1000000000; + deadline.tv_sec += 1; + } } - struct timespec *ptimeout = NULL; - if (to >= 0) { - ptimeout = &timeout; - } + for (;;) { + errno_t ec = epollfd_ctx_wait(epollfd, ev, cnt, actual_cnt); + if (ec || *actual_cnt || to == 0) { + return ec; + } -again:; - struct kevent evlist[32]; - int ret = kevent(fd, NULL, 0, evlist, cnt, ptimeout); - if (ret < 0) { - return -1; - } + struct timespec current_time; + struct timespec timeout; - int j = 0; - - for (int i = 0; i < ret; ++i) { - int events = 0; - if (evlist[i].filter == EVFILT_READ) { - events |= EPOLLIN; - if (evlist[i].flags & EV_ONESHOT) { - uint16_t flags = 0; - kqueue_load_state(fd, evlist[i].ident, &flags); - - if (flags & KQUEUE_STATE_NYCSS) { - if (is_not_yet_connected_stream_socket( - evlist[i].ident)) { - - events = EPOLLHUP; - if (flags & - KQUEUE_STATE_EPOLLOUT) { - events |= EPOLLOUT; - } - - struct kevent nkev[2]; - EV_SET(&nkev[0], - evlist[i].ident, - EVFILT_READ, EV_ADD, /**/ - 0, 0, evlist[i].udata); - EV_SET(&nkev[1], - evlist[i].ident, - EVFILT_READ, - EV_ENABLE | - EV_FORCEONESHOT, - 0, 0, evlist[i].udata); - - kevent(fd, nkev, 2, NULL, 0, - NULL); - } else { - flags &= ~KQUEUE_STATE_NYCSS; - - struct kevent nkev[2]; - EV_SET(&nkev[0], - evlist[i].ident, - EVFILT_READ, EV_ADD, /**/ - 0, 0, evlist[i].udata); - EV_SET(&nkev[1], - evlist[i].ident, - EVFILT_READ, - flags & KQUEUE_STATE_EPOLLIN - ? EV_ENABLE - : EV_DISABLE, - 0, 0, evlist[i].udata); - - kevent(fd, nkev, 2, NULL, 0, - NULL); - kqueue_save_state(fd, - evlist[i].ident, flags); - - continue; - } - } + if (to > 0) { + if (clock_gettime(CLOCK_MONOTONIC, /**/ + ¤t_time) < 0) { + return errno; } - } else if (evlist[i].filter == EVFILT_WRITE) { - events |= EPOLLOUT; - } - - if (evlist[i].flags & EV_ERROR) { - events |= EPOLLERR; - } - if (evlist[i].flags & EV_EOF) { - if (evlist[i].fflags) { - events |= EPOLLERR; + timespecsub(&deadline, ¤t_time, &timeout); + if (timeout.tv_sec < 0) { + timeout.tv_sec = 0; + timeout.tv_nsec = 0; } - uint16_t flags = 0; - kqueue_load_state(fd, evlist[i].ident, &flags); - - int epoll_event; - - if (flags & KQUEUE_STATE_ISFIFO) { - if (evlist[i].filter == EVFILT_READ) { - epoll_event = EPOLLHUP; - if (evlist[i].data == 0) { - events &= ~EPOLLIN; - } - } else if (evlist[i].filter == EVFILT_WRITE) { - epoll_event = EPOLLERR; - } else { - /* should not happen */ - return -1; - } - } else if (flags & KQUEUE_STATE_ISSOCK) { - if (evlist[i].filter == EVFILT_READ) { - /* do some special EPOLLRDHUP handling - * for sockets */ - - /* if we are reading, we just know for - * sure that we can't receive any more, - * so use EPOLLIN/EPOLLRDHUP per - * default */ - epoll_event = EPOLLIN; - - if (flags & KQUEUE_STATE_EPOLLRDHUP) { - epoll_event |= EPOLLRDHUP; - } - } else if (evlist[i].filter == EVFILT_WRITE) { - epoll_event = EPOLLOUT; - } else { - /* should not happen */ - return -1; - } - - struct pollfd pfd = {.fd = evlist[i].ident, - .events = POLLIN | POLLOUT | POLLHUP}; - - if (poll(&pfd, 1, 0) == 1) { - if (pfd.revents & POLLHUP) { - /* - * We need to set these flags - * so that readers still have a - * chance to read the last data - * from the socket. This is - * very important to preserve - * Linux poll/epoll semantics - * when coming from an - * EVFILT_WRITE event. - */ - if (flags & - KQUEUE_STATE_EPOLLIN) { - epoll_event |= EPOLLIN; - } - if (flags & - KQUEUE_STATE_EPOLLRDHUP) { - epoll_event |= - EPOLLRDHUP; - } - - epoll_event |= EPOLLHUP; - } - - /* might as well steal flags from the - * poll call while we're here */ - - if ((pfd.revents & POLLIN) && - (flags & KQUEUE_STATE_EPOLLIN)) { - epoll_event |= EPOLLIN; - } - - if ((pfd.revents & POLLOUT) && - (flags & KQUEUE_STATE_EPOLLOUT)) { - epoll_event |= EPOLLOUT; - } - } - } else { - epoll_event = EPOLLHUP; + to = (int)((timeout.tv_sec * 1000) + + (timeout.tv_nsec / 1000000) + + !!(timeout.tv_nsec % 1000000)); + + if (to == 0) { + continue; } + } + + assert(to != 0); - events |= epoll_event; + /* + * We should add a notification mechanism when a new poll-only + * fd gets registered when this thread sleeps... + */ + struct pollfd pfds[2]; + (void)pthread_mutex_lock(&epollfd->mutex); + pfds[0] = epollfd->pfds[0]; + pfds[1] = epollfd->pfds[1]; + (void)pthread_mutex_unlock(&epollfd->mutex); + + if (poll(pfds, 2, MAX(to, -1)) < 0) { + return errno; } - ev[j].events = events; - ev[j].data.ptr = evlist[i].udata; - ++j; } +} - if (ret && j == 0) { - goto again; +int +epoll_wait(int fd, struct epoll_event *ev, int cnt, int to) +{ + errno_t ec; + FDContextMapNode *node; + + node = epoll_shim_ctx_find_node(&epoll_shim_ctx, fd); + if (!node || node->vtable != &epollfd_vtable) { + errno = EINVAL; + return -1; + } + + int actual_cnt; + if ((ec = epollfd_ctx_wait_or_block(&node->ctx.epollfd, ev, cnt, + &actual_cnt, to)) != 0) { + errno = ec; + return -1; } - return j; + return actual_cnt; } diff --git a/src/epoll_shim_ctx.c b/src/epoll_shim_ctx.c new file mode 100644 index 0000000..82a3dad --- /dev/null +++ b/src/epoll_shim_ctx.c @@ -0,0 +1,278 @@ +#include "epoll_shim_ctx.h" + +#include + +#include +#include +#include + +static void +fd_context_map_node_init(FDContextMapNode *node, int kq) +{ + node->fd = kq; + node->vtable = NULL; +} + +static FDContextMapNode * +fd_context_map_node_create(int kq, errno_t *ec) +{ + FDContextMapNode *node; + + node = malloc(sizeof(FDContextMapNode)); + if (!node) { + *ec = errno; + return NULL; + } + + fd_context_map_node_init(node, kq); + return node; +} + +static errno_t +fd_context_map_node_terminate(FDContextMapNode *node, bool close_fd) +{ + errno_t ec = node->vtable ? node->vtable->close_fun(node) : 0; + + if (close_fd && close(node->fd) < 0) { + ec = ec ? ec : errno; + } + + return ec; +} + +errno_t +fd_context_map_node_destroy(FDContextMapNode *node) +{ + errno_t ec = fd_context_map_node_terminate(node, true); + free(node); + return ec; +} + +/**/ + +errno_t +fd_context_default_read(FDContextMapNode *node, /**/ + void *buf, size_t nbytes, size_t *bytes_transferred) +{ + (void)node; + (void)buf; + (void)nbytes; + (void)bytes_transferred; + + return EINVAL; +} + +errno_t +fd_context_default_write(FDContextMapNode *node, /**/ + void const *buf, size_t nbytes, size_t *bytes_transferred) +{ + (void)node; + (void)buf; + (void)nbytes; + (void)bytes_transferred; + + return EINVAL; +} + +/**/ + +static int +fd_context_map_node_cmp(FDContextMapNode *e1, FDContextMapNode *e2) +{ + return (e1->fd < e2->fd) ? -1 : (e1->fd > e2->fd); +} + +RB_GENERATE_STATIC(fd_context_map_, fd_context_map_node_, entry, + fd_context_map_node_cmp) + +EpollShimCtx epoll_shim_ctx = { + .fd_context_map = RB_INITIALIZER(&fd_context_map), + .mutex = PTHREAD_MUTEX_INITIALIZER, +}; + +static FDContextMapNode * +epoll_shim_ctx_create_node_impl(EpollShimCtx *epoll_shim_ctx, int kq, + errno_t *ec) +{ + FDContextMapNode *node; + + { + FDContextMapNode find; + find.fd = kq; + + node = RB_FIND(fd_context_map_, /**/ + &epoll_shim_ctx->fd_context_map, &find); + } + + if (node) { + /* + * If we get here, someone must have already closed the old fd + * with a normal 'close()' call, i.e. not with our + * 'epoll_shim_close()' wrapper. The fd inside the node + * refers now to the new kq we are currently creating. We + * must not close it, but we must clean up the old context + * object! + */ + (void)fd_context_map_node_terminate(node, false); + fd_context_map_node_init(node, kq); + } else { + node = fd_context_map_node_create(kq, ec); + if (!node) { + return NULL; + } + + if (RB_INSERT(fd_context_map_, &epoll_shim_ctx->fd_context_map, + node)) { + assert(0); + } + } + + return node; +} + +FDContextMapNode * +epoll_shim_ctx_create_node(EpollShimCtx *epoll_shim_ctx, errno_t *ec) +{ + FDContextMapNode *node; + + int kq = kqueue(); + if (kq < 0) { + *ec = errno; + return NULL; + } + + (void)pthread_mutex_lock(&epoll_shim_ctx->mutex); + node = epoll_shim_ctx_create_node_impl(epoll_shim_ctx, kq, ec); + (void)pthread_mutex_unlock(&epoll_shim_ctx->mutex); + + if (!node) { + close(kq); + } + + return node; +} + +static FDContextMapNode * +epoll_shim_ctx_find_node_impl(EpollShimCtx *epoll_shim_ctx, int fd) +{ + FDContextMapNode *node; + + FDContextMapNode find; + find.fd = fd; + + node = RB_FIND(fd_context_map_, /**/ + &epoll_shim_ctx->fd_context_map, &find); + + return node; +} + +FDContextMapNode * +epoll_shim_ctx_find_node(EpollShimCtx *epoll_shim_ctx, int fd) +{ + FDContextMapNode *node; + + (void)pthread_mutex_lock(&epoll_shim_ctx->mutex); + node = epoll_shim_ctx_find_node_impl(epoll_shim_ctx, fd); + (void)pthread_mutex_unlock(&epoll_shim_ctx->mutex); + + return node; +} + +FDContextMapNode * +epoll_shim_ctx_remove_node(EpollShimCtx *epoll_shim_ctx, int fd) +{ + FDContextMapNode *node; + + (void)pthread_mutex_lock(&epoll_shim_ctx->mutex); + node = epoll_shim_ctx_find_node_impl(epoll_shim_ctx, fd); + if (node) { + RB_REMOVE(fd_context_map_, /**/ + &epoll_shim_ctx->fd_context_map, node); + } + (void)pthread_mutex_unlock(&epoll_shim_ctx->mutex); + + return node; +} + +void +epoll_shim_ctx_remove_node_explicit(EpollShimCtx *epoll_shim_ctx, + FDContextMapNode *node) +{ + (void)pthread_mutex_lock(&epoll_shim_ctx->mutex); + RB_REMOVE(fd_context_map_, /**/ + &epoll_shim_ctx->fd_context_map, node); + (void)pthread_mutex_unlock(&epoll_shim_ctx->mutex); +} + +/**/ + +int +epoll_shim_close(int fd) +{ + FDContextMapNode *node; + + node = epoll_shim_ctx_remove_node(&epoll_shim_ctx, fd); + if (!node) { + return close(fd); + } + + errno_t ec = fd_context_map_node_destroy(node); + if (ec != 0) { + errno = ec; + return -1; + } + + return 0; +} + +ssize_t +epoll_shim_read(int fd, void *buf, size_t nbytes) +{ + FDContextMapNode *node; + + node = epoll_shim_ctx_find_node(&epoll_shim_ctx, fd); + if (!node) { + return read(fd, buf, nbytes); + } + + if (nbytes > SSIZE_MAX) { + errno = EINVAL; + return -1; + } + + size_t bytes_transferred; + errno_t ec = node->vtable->read_fun(node, /**/ + buf, nbytes, &bytes_transferred); + if (ec != 0) { + errno = ec; + return -1; + } + + return (ssize_t)bytes_transferred; +} + +ssize_t +epoll_shim_write(int fd, void const *buf, size_t nbytes) +{ + FDContextMapNode *node; + + node = epoll_shim_ctx_find_node(&epoll_shim_ctx, fd); + if (!node) { + return write(fd, buf, nbytes); + } + + if (nbytes > SSIZE_MAX) { + errno = EINVAL; + return -1; + } + + size_t bytes_transferred; + errno_t ec = node->vtable->write_fun(node, /**/ + buf, nbytes, &bytes_transferred); + if (ec != 0) { + errno = ec; + return -1; + } + + return (ssize_t)bytes_transferred; +} diff --git a/src/epoll_shim_ctx.h b/src/epoll_shim_ctx.h new file mode 100644 index 0000000..d0497e8 --- /dev/null +++ b/src/epoll_shim_ctx.h @@ -0,0 +1,74 @@ +#ifndef EPOLL_SHIM_CTX_H_ +#define EPOLL_SHIM_CTX_H_ + +#include + +#include + +#include "epollfd_ctx.h" +#include "eventfd_ctx.h" +#include "signalfd_ctx.h" +#include "timerfd_ctx.h" + +struct fd_context_map_node_; +typedef struct fd_context_map_node_ FDContextMapNode; + +typedef errno_t (*fd_context_read_fun)(FDContextMapNode *node, /**/ + void *buf, size_t nbytes, size_t *bytes_transferred); +typedef errno_t (*fd_context_write_fun)(FDContextMapNode *node, /**/ + const void *buf, size_t nbytes, size_t *bytes_transferred); +typedef errno_t (*fd_context_close_fun)(FDContextMapNode *node); + +typedef struct { + fd_context_read_fun read_fun; + fd_context_write_fun write_fun; + fd_context_close_fun close_fun; +} FDContextVTable; + +errno_t fd_context_default_read(FDContextMapNode *node, /**/ + void *buf, size_t nbytes, size_t *bytes_transferred); +errno_t fd_context_default_write(FDContextMapNode *node, /**/ + void const *buf, size_t nbytes, size_t *bytes_transferred); + +struct fd_context_map_node_ { + RB_ENTRY(fd_context_map_node_) entry; + int fd; + int flags; + union { + EpollFDCtx epollfd; + EventFDCtx eventfd; + TimerFDCtx timerfd; + SignalFDCtx signalfd; + } ctx; + FDContextVTable const *vtable; +}; + +errno_t fd_context_map_node_destroy(FDContextMapNode *node); + +/**/ + +typedef RB_HEAD(fd_context_map_, fd_context_map_node_) FDContextMap; + +typedef struct { + FDContextMap fd_context_map; + pthread_mutex_t mutex; +} EpollShimCtx; + +extern EpollShimCtx epoll_shim_ctx; + +FDContextMapNode *epoll_shim_ctx_create_node(EpollShimCtx *epoll_shim_ctx, + errno_t *ec); +FDContextMapNode *epoll_shim_ctx_find_node(EpollShimCtx *epoll_shim_ctx, + int fd); +FDContextMapNode *epoll_shim_ctx_remove_node(EpollShimCtx *epoll_shim_ctx, + int fd); +void epoll_shim_ctx_remove_node_explicit(EpollShimCtx *epoll_shim_ctx, + FDContextMapNode *node); + +/**/ + +int epoll_shim_close(int fd); +ssize_t epoll_shim_read(int fd, void *buf, size_t nbytes); +ssize_t epoll_shim_write(int fd, void const *buf, size_t nbytes); + +#endif diff --git a/src/epollfd_ctx.c b/src/epollfd_ctx.c new file mode 100644 index 0000000..42547ed --- /dev/null +++ b/src/epollfd_ctx.c @@ -0,0 +1,626 @@ +#include "epollfd_ctx.h" + +#include + +#include +#include +#include + +#include + +#include + +static int +fd_cmp(RegisteredFDsNode *e1, RegisteredFDsNode *e2) +{ + return (e1->fd < e2->fd) ? -1 : (e1->fd > e2->fd); +} + +RB_GENERATE_STATIC(registered_fds_set_, registered_fds_node_, entry, fd_cmp) + +errno_t +epollfd_ctx_init(EpollFDCtx *epollfd, int kq) +{ + errno_t ec; + + *epollfd = (EpollFDCtx){ + .kq = kq, + .registered_fds = RB_INITIALIZER(®istered_fds), + }; + + epollfd->pfds[0].fd = -1; + epollfd->pfds[1] = (struct pollfd){.fd = kq, .events = POLLIN}; + + if ((ec = pthread_mutex_init(&epollfd->mutex, NULL)) != 0) { + return ec; + } + + return 0; +} + +errno_t +epollfd_ctx_terminate(EpollFDCtx *epollfd) +{ + errno_t ec = 0; + errno_t ec_local = 0; + + ec_local = pthread_mutex_destroy(&epollfd->mutex); + ec = ec ? ec : ec_local; + + return ec; +} + +#define KEY_BITS (20) +#define VAL_BITS (32 - KEY_BITS) +static errno_t +kqueue_save_state(int kq, uint32_t key, uint16_t val) +{ + struct kevent kev[VAL_BITS * 2]; + int n = 0; + int i; + int oe, ec; + + if ((key & ~(((uint32_t)1 << KEY_BITS) - 1)) || + (val & ~(((uint16_t)1 << VAL_BITS) - 1))) { + return (EINVAL); + } + + for (i = 0; i < VAL_BITS; ++i) { + uint32_t info_bit = (uint32_t)1 << i; + uint32_t kev_key = key | (info_bit << KEY_BITS); + EV_SET(&kev[n], kev_key, EVFILT_USER, EV_ADD, 0, 0, 0); + ++n; + if (!(val & info_bit)) { + EV_SET(&kev[n], kev_key, EVFILT_USER, /**/ + EV_DELETE, 0, 0, 0); + ++n; + } + } + + oe = errno; + if ((n = kevent(kq, kev, n, NULL, 0, NULL)) < 0) { + ec = errno; + errno = oe; + return (ec); + } + + return (0); +} + +static errno_t +kqueue_load_state(int kq, uint32_t key, uint16_t *val) +{ + struct kevent kev[VAL_BITS]; + int n = 0; + int i; + uint16_t nval = 0; + int oe, ec; + + if ((key & ~(((uint32_t)1 << KEY_BITS) - 1))) { + return (EINVAL); + } + + for (i = 0; i < VAL_BITS; ++i) { + uint32_t info_bit = (uint32_t)1 << i; + uint32_t kev_key = key | (info_bit << KEY_BITS); + EV_SET(&kev[i], kev_key, EVFILT_USER, EV_RECEIPT, 0, 0, 0); + } + + oe = errno; + if ((n = kevent(kq, kev, VAL_BITS, kev, VAL_BITS, NULL)) < 0) { + ec = errno; + errno = oe; + return (ec); + } + + for (i = 0; i < n; ++i) { + if (!(kev[i].flags & EV_ERROR)) { + return (EINVAL); + } + + if (kev[i].data == 0) { + nval |= (uint32_t)1 << i; + } else if (kev[i].data != ENOENT) { + return (EINVAL); + } + } + + *val = nval; + + return (0); +} + +#define KQUEUE_STATE_REGISTERED 0x1u +#define KQUEUE_STATE_EPOLLIN 0x2u +#define KQUEUE_STATE_EPOLLOUT 0x4u +#define KQUEUE_STATE_EPOLLRDHUP 0x8u +#define KQUEUE_STATE_NYCSS 0x10u +#define KQUEUE_STATE_ISFIFO 0x20u +#define KQUEUE_STATE_ISSOCK 0x40u + +static int +is_not_yet_connected_stream_socket(int s) +{ + + { + int val; + socklen_t length = sizeof(int); + + if (getsockopt(s, SOL_SOCKET, SO_ACCEPTCONN, /**/ + &val, &length) == 0 && + val) { + return 0; + } + } + + { + int type; + socklen_t length = sizeof(int); + + if (getsockopt(s, SOL_SOCKET, SO_TYPE, &type, &length) == 0 && + (type == SOCK_STREAM || type == SOCK_SEQPACKET)) { + struct sockaddr name; + socklen_t namelen = 0; + if (getpeername(s, &name, &namelen) < 0 && + errno == ENOTCONN) { + return 1; + } + } + } + + return 0; +} + +static errno_t +epollfd_ctx_ctl_impl(EpollFDCtx *epollfd, int op, int fd2, + struct epoll_event *ev) +{ + struct kevent kev[2]; + uint16_t flags; + errno_t ec; + + if ((!ev && op != EPOLL_CTL_DEL) || + (ev && + ((ev->events & + ~(uint32_t)(EPOLLIN | EPOLLOUT | EPOLLHUP /**/ + | EPOLLRDHUP | EPOLLERR)) + /* the user should really set one of EPOLLIN or EPOLLOUT + * so that EPOLLHUP and EPOLLERR work. Don't make this a + * hard error for now, though. */ + /* || !(ev->events & (EPOLLIN | EPOLLOUT)) */))) { + return EINVAL; + } + + if (fd2 < 0 || ((uint32_t)fd2 & ~(((uint32_t)1 << KEY_BITS) - 1))) { + return EBADF; + } + + if ((ec = kqueue_load_state(epollfd->kq, /**/ + (uint32_t)fd2, &flags)) != 0) { + return ec; + } + + if (op == EPOLL_CTL_ADD) { + if (flags & KQUEUE_STATE_REGISTERED) { + return EEXIST; + } + + EV_SET(&kev[0], fd2, EVFILT_READ, + EV_ADD | (ev->events & EPOLLIN ? 0 : EV_DISABLE), 0, 0, + ev->data.ptr); + EV_SET(&kev[1], fd2, EVFILT_WRITE, + EV_ADD | (ev->events & EPOLLOUT ? 0 : EV_DISABLE), 0, 0, + ev->data.ptr); + + flags = KQUEUE_STATE_REGISTERED; + +#define SET_FLAG(flag) \ + do { \ + if (ev->events & (flag)) { \ + flags |= KQUEUE_STATE_##flag; \ + } \ + } while (0) + + SET_FLAG(EPOLLIN); + SET_FLAG(EPOLLOUT); + SET_FLAG(EPOLLRDHUP); + +#undef SET_FLAG + + } else if (op == EPOLL_CTL_DEL) { + if (!(flags & KQUEUE_STATE_REGISTERED)) { + return ENOENT; + } + + if (fd2 >= 0 && fd2 == epollfd->pfds[0].fd) { + epollfd->pfds[0].fd = -1; + kqueue_save_state(epollfd->kq, (uint32_t)fd2, 0); + return 0; + } + + EV_SET(&kev[0], fd2, EVFILT_READ, EV_DELETE, 0, 0, 0); + EV_SET(&kev[1], fd2, EVFILT_WRITE, EV_DELETE, 0, 0, 0); + + flags = 0; + } else if (op == EPOLL_CTL_MOD) { + if (!(flags & KQUEUE_STATE_REGISTERED)) { + return ENOENT; + } + + EV_SET(&kev[0], fd2, EVFILT_READ, + ev->events & EPOLLIN ? EV_ENABLE : EV_DISABLE, 0, 0, + ev->data.ptr); + EV_SET(&kev[1], fd2, EVFILT_WRITE, + ev->events & EPOLLOUT ? EV_ENABLE : EV_DISABLE, 0, 0, + ev->data.ptr); + +#define SET_FLAG(flag) \ + do { \ + if (ev->events & (flag)) { \ + flags |= KQUEUE_STATE_##flag; \ + } else { \ + flags &= ~KQUEUE_STATE_##flag; \ + } \ + } while (0) + + SET_FLAG(EPOLLIN); + SET_FLAG(EPOLLOUT); + SET_FLAG(EPOLLRDHUP); + +#undef SET_FLAG + + } else { + return EINVAL; + } + + for (int i = 0; i < 2; ++i) { + kev[i].flags |= EV_RECEIPT; + } + + int ret = kevent(epollfd->kq, kev, 2, kev, 2, NULL); + if (ret < 0) { + return errno; + } + + if (ret != 2) { + return EINVAL; + } + + for (int i = 0; i < 2; ++i) { + if (!(kev[i].flags & EV_ERROR)) { + return EINVAL; + } + + /* Check for fds that only support poll. */ + if (kev[i].data == ENODEV && fd2 >= 0 && + !(ev->events & ~(uint32_t)(EPOLLIN | EPOLLOUT)) && + (epollfd->pfds[0].fd < 0 || epollfd->pfds[0].fd == fd2)) { + epollfd->pfds[0] = (struct pollfd){ + .fd = fd2, + .events = ((ev->events & EPOLLIN) ? POLLIN : 0) | + ((ev->events & EPOLLOUT) ? POLLOUT : 0), + }; + epollfd->pollfd_data = ev->data; + goto out; + } + + /* + * Ignore EVFILT_WRITE registration EINVAL errors (some fd + * types such as kqueues themselves don't support it). + * Also ignore ENOENT -- this happens when trying to remove a + * previously added fd where the EVFILT_WRITE registration + * failed. + */ + if (i == 1 && + (kev[i].data == EINVAL || kev[i].data == ENOENT)) { + continue; + } + + if (kev[i].data != 0) { + if (i == 0 && + (kev[i].data == ENOENT || kev[i].data == EBADF)) { + kqueue_save_state(epollfd->kq, /**/ + (uint32_t)fd2, 0); + } + return (int)kev[i].data; + } + } + + if (op != EPOLL_CTL_DEL && is_not_yet_connected_stream_socket(fd2)) { + EV_SET(&kev[0], fd2, EVFILT_READ, EV_ENABLE | EV_FORCEONESHOT, + 0, 0, ev->data.ptr); + if (kevent(epollfd->kq, kev, 1, NULL, 0, NULL) < 0) { + return errno; + } + + flags |= KQUEUE_STATE_NYCSS; + } + + if (op == EPOLL_CTL_ADD) { + struct stat statbuf; + if (fstat(fd2, &statbuf) < 0) { + ec = errno; + /* If the fstat fails for some reason we must clear + * internal state to avoid EEXIST errors in future + * calls to epoll_ctl. */ + (void)kqueue_save_state(epollfd->kq, (uint32_t)fd2, 0); + return ec; + } + + if (S_ISFIFO(statbuf.st_mode)) { + flags |= KQUEUE_STATE_ISFIFO; + } else if (S_ISSOCK(statbuf.st_mode)) { + flags |= KQUEUE_STATE_ISSOCK; + } + } + +out: + if ((ec = kqueue_save_state(epollfd->kq, (uint32_t)fd2, flags)) != 0) { + return ec; + } + + return 0; +} + +#undef VAL_BITS +#undef KEY_BITS + +errno_t +epollfd_ctx_ctl(EpollFDCtx *epollfd, int op, int fd2, struct epoll_event *ev) +{ + errno_t ec; + + (void)pthread_mutex_lock(&epollfd->mutex); + ec = epollfd_ctx_ctl_impl(epollfd, op, fd2, ev); + (void)pthread_mutex_unlock(&epollfd->mutex); + + return ec; +} + +#define SUPPORTED_POLLFLAGS (POLLIN | POLLOUT | POLLERR | POLLHUP | POLLNVAL) + +static uint32_t +poll_to_epoll(int flags) +{ + uint32_t epoll_flags = 0; + + if (flags & POLLIN) { + epoll_flags |= EPOLLIN; + } + if (flags & POLLOUT) { + epoll_flags |= EPOLLOUT; + } + if (flags & POLLERR) { + epoll_flags |= EPOLLERR; + } + if (flags & POLLHUP) { + epoll_flags |= EPOLLHUP; + } + if (flags & POLLNVAL) { + epoll_flags |= EPOLLNVAL; + } + + return epoll_flags; +} + +static errno_t +epollfd_ctx_wait_impl(EpollFDCtx *epollfd, struct epoll_event *ev, int cnt, + int *actual_cnt) +{ + if (cnt < 1 || cnt > 32) { + return EINVAL; + } + + int ret = poll(epollfd->pfds, 2, 0); + if (ret < 0) { + return errno; + } + if (ret == 0) { + *actual_cnt = 0; + return 0; + } + + if (epollfd->pfds[0].revents & SUPPORTED_POLLFLAGS) { + ev[0].events = poll_to_epoll(epollfd->pfds[0].revents); + ev[0].data = epollfd->pollfd_data; + *actual_cnt = 1; + return 0; + } + +again:; + struct kevent evlist[32]; + ret = kevent(epollfd->kq, NULL, 0, evlist, cnt, + &(struct timespec){0, 0}); + if (ret < 0) { + return errno; + } + + int j = 0; + + for (int i = 0; i < ret; ++i) { + int events = 0; + if (evlist[i].filter == EVFILT_READ) { + events |= EPOLLIN; + if (evlist[i].flags & EV_ONESHOT) { + uint16_t flags = 0; + kqueue_load_state(epollfd->kq, + (uint32_t)evlist[i].ident, &flags); + + if (flags & KQUEUE_STATE_NYCSS) { + if (is_not_yet_connected_stream_socket( + (int)evlist[i].ident)) { + + events = EPOLLHUP; + if (flags & + KQUEUE_STATE_EPOLLOUT) { + events |= EPOLLOUT; + } + + struct kevent nkev[2]; + EV_SET(&nkev[0], + evlist[i].ident, + EVFILT_READ, EV_ADD, /**/ + 0, 0, evlist[i].udata); + EV_SET(&nkev[1], + evlist[i].ident, + EVFILT_READ, + EV_ENABLE | + EV_FORCEONESHOT, + 0, 0, evlist[i].udata); + + kevent(epollfd->kq, nkev, 2, + NULL, 0, NULL); + } else { + flags &= ~KQUEUE_STATE_NYCSS; + + struct kevent nkev[2]; + EV_SET(&nkev[0], + evlist[i].ident, + EVFILT_READ, EV_ADD, /**/ + 0, 0, evlist[i].udata); + EV_SET(&nkev[1], + evlist[i].ident, + EVFILT_READ, + flags & KQUEUE_STATE_EPOLLIN + ? EV_ENABLE + : EV_DISABLE, + 0, 0, evlist[i].udata); + + kevent(epollfd->kq, nkev, 2, + NULL, 0, NULL); + kqueue_save_state(epollfd->kq, + (uint32_t)evlist[i].ident, + flags); + + continue; + } + } + } + } else if (evlist[i].filter == EVFILT_WRITE) { + events |= EPOLLOUT; + } + + if (evlist[i].flags & EV_ERROR) { + events |= EPOLLERR; + } + + if (evlist[i].flags & EV_EOF) { + if (evlist[i].fflags) { + events |= EPOLLERR; + } + + uint16_t flags = 0; + kqueue_load_state(epollfd->kq, + (uint32_t)evlist[i].ident, &flags); + + int epoll_event; + + if (flags & KQUEUE_STATE_ISFIFO) { + if (evlist[i].filter == EVFILT_READ) { + epoll_event = EPOLLHUP; + if (evlist[i].data == 0) { + events &= ~EPOLLIN; + } + } else if (evlist[i].filter == EVFILT_WRITE) { + epoll_event = EPOLLERR; + } else { + /* should not happen */ + assert(0); + return EIO; + } + } else if (flags & KQUEUE_STATE_ISSOCK) { + if (evlist[i].filter == EVFILT_READ) { + /* do some special EPOLLRDHUP handling + * for sockets */ + + /* if we are reading, we just know for + * sure that we can't receive any more, + * so use EPOLLIN/EPOLLRDHUP per + * default */ + epoll_event = EPOLLIN; + + if (flags & KQUEUE_STATE_EPOLLRDHUP) { + epoll_event |= EPOLLRDHUP; + } + } else if (evlist[i].filter == EVFILT_WRITE) { + epoll_event = EPOLLOUT; + } else { + /* should not happen */ + assert(0); + return EIO; + } + + struct pollfd pfd = { + .fd = (int)evlist[i].ident, + .events = POLLIN | POLLOUT | POLLHUP, + }; + + if (poll(&pfd, 1, 0) == 1) { + if (pfd.revents & POLLHUP) { + /* + * We need to set these flags + * so that readers still have a + * chance to read the last data + * from the socket. This is + * very important to preserve + * Linux poll/epoll semantics + * when coming from an + * EVFILT_WRITE event. + */ + if (flags & + KQUEUE_STATE_EPOLLIN) { + epoll_event |= EPOLLIN; + } + if (flags & + KQUEUE_STATE_EPOLLRDHUP) { + epoll_event |= + EPOLLRDHUP; + } + + epoll_event |= EPOLLHUP; + } + + /* might as well steal flags from the + * poll call while we're here */ + + if ((pfd.revents & POLLIN) && + (flags & KQUEUE_STATE_EPOLLIN)) { + epoll_event |= EPOLLIN; + } + + if ((pfd.revents & POLLOUT) && + (flags & KQUEUE_STATE_EPOLLOUT)) { + epoll_event |= EPOLLOUT; + } + } + } else { + epoll_event = EPOLLHUP; + } + + events |= epoll_event; + } + ev[j].events = (uint32_t)events; + ev[j].data.ptr = evlist[i].udata; + ++j; + } + + if (ret && j == 0) { + goto again; + } + + *actual_cnt = j; + return 0; +} + +errno_t +epollfd_ctx_wait(EpollFDCtx *epollfd, struct epoll_event *ev, int cnt, + int *actual_cnt) +{ + errno_t ec; + + (void)pthread_mutex_lock(&epollfd->mutex); + ec = epollfd_ctx_wait_impl(epollfd, ev, cnt, actual_cnt); + (void)pthread_mutex_unlock(&epollfd->mutex); + + return ec; +} diff --git a/src/epollfd_ctx.h b/src/epollfd_ctx.h new file mode 100644 index 0000000..fec09bd --- /dev/null +++ b/src/epollfd_ctx.h @@ -0,0 +1,44 @@ +#ifndef EPOLLFD_CTX_H_ +#define EPOLLFD_CTX_H_ + +#define SHIM_SYS_SHIM_HELPERS +#include + +#include + +#include +#include + +#include +#include + +struct registered_fds_node_; +typedef struct registered_fds_node_ RegisteredFDsNode; + +struct registered_fds_node_ { + RB_ENTRY(registered_fds_node_) entry; + int fd; + uint16_t flags; +}; + +typedef RB_HEAD(registered_fds_set_, registered_fds_node_) RegisteredFDsSet; + +typedef struct { + int kq; // non owning + pthread_mutex_t mutex; + + struct pollfd pfds[2]; + epoll_data_t pollfd_data; + + RegisteredFDsSet registered_fds; +} EpollFDCtx; + +errno_t epollfd_ctx_init(EpollFDCtx *epollfd, int kq); +errno_t epollfd_ctx_terminate(EpollFDCtx *epollfd); + +errno_t epollfd_ctx_ctl(EpollFDCtx *epollfd, int op, int fd2, + struct epoll_event *ev); +errno_t epollfd_ctx_wait(EpollFDCtx *epollfd, struct epoll_event *ev, int cnt, + int *actual_cnt); + +#endif diff --git a/src/eventfd.c b/src/eventfd.c new file mode 100644 index 0000000..31d0e3d --- /dev/null +++ b/src/eventfd.c @@ -0,0 +1,189 @@ +#include +#undef read +#undef write +#undef close + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "epoll_shim_ctx.h" + +static errno_t +eventfd_ctx_read_or_block(EventFDCtx *eventfd_ctx, uint64_t *value, + bool nonblock) +{ + for (;;) { + errno_t ec = eventfd_ctx_read(eventfd_ctx, value); + if (nonblock || ec != EAGAIN) { + return (ec); + } + + struct pollfd pfd = {.fd = eventfd_ctx->kq_, .events = POLLIN}; + if (poll(&pfd, 1, -1) < 0) { + return (errno); + } + } +} + +static errno_t +eventfd_helper_read(FDContextMapNode *node, void *buf, size_t nbytes, + size_t *bytes_transferred) +{ + if (nbytes != sizeof(uint64_t)) { + return EINVAL; + } + + uint64_t value; + errno_t ec; + if ((ec = eventfd_ctx_read_or_block(&node->ctx.eventfd, &value, + node->flags & EFD_NONBLOCK)) != 0) { + return ec; + } + + memcpy(buf, &value, sizeof(value)); + *bytes_transferred = sizeof(value); + return 0; +} + +static errno_t +eventfd_helper_write(FDContextMapNode *node, void const *buf, size_t nbytes, + size_t *bytes_transferred) +{ + if (nbytes != sizeof(uint64_t)) { + return EINVAL; + } + + uint64_t value; + memcpy(&value, buf, sizeof(uint64_t)); + + errno_t ec; + if ((ec = eventfd_ctx_write(&node->ctx.eventfd, value)) != 0) { + return ec; + } + + *bytes_transferred = sizeof(value); + return 0; +} + +static errno_t +eventfd_close(FDContextMapNode *node) +{ + return eventfd_ctx_terminate(&node->ctx.eventfd); +} + +static FDContextVTable const eventfd_vtable = { + .read_fun = eventfd_helper_read, + .write_fun = eventfd_helper_write, + .close_fun = eventfd_close, +}; + +static FDContextMapNode * +eventfd_impl(unsigned int initval, int flags, errno_t *ec) +{ + FDContextMapNode *node; + + if (flags & ~(EFD_SEMAPHORE | EFD_CLOEXEC | EFD_NONBLOCK)) { + *ec = EINVAL; + return NULL; + } + + /* + * Don't check that EFD_CLOEXEC is set -- but our kqueue based eventfd + * will always be CLOEXEC. + */ + + node = epoll_shim_ctx_create_node(&epoll_shim_ctx, ec); + if (!node) { + return NULL; + } + + node->flags = flags; + + int ctx_flags = 0; + if (flags & EFD_SEMAPHORE) { + ctx_flags |= EVENTFD_CTX_FLAG_SEMAPHORE; + } + + if ((*ec = eventfd_ctx_init(&node->ctx.eventfd, /**/ + node->fd, initval, ctx_flags)) != 0) { + goto fail; + } + + node->vtable = &eventfd_vtable; + return node; + +fail: + epoll_shim_ctx_remove_node_explicit(&epoll_shim_ctx, node); + (void)fd_context_map_node_destroy(node); + return NULL; +} + +int +eventfd(unsigned int initval, int flags) +{ + FDContextMapNode *node; + errno_t ec; + + node = eventfd_impl(initval, flags, &ec); + if (!node) { + errno = ec; + return -1; + } + + return node->fd; +} + +int +eventfd_read(int fd, eventfd_t *value) +{ + FDContextMapNode *node; + + node = epoll_shim_ctx_find_node(&epoll_shim_ctx, fd); + if (!node || node->vtable != &eventfd_vtable) { + errno = EBADF; + return -1; + } + + size_t bytes_transferred; + errno_t ec; + if ((ec = eventfd_helper_read(node, value, sizeof(*value), + &bytes_transferred)) != 0) { + errno = ec; + return -1; + } + + return 0; +} + +int +eventfd_write(int fd, eventfd_t value) +{ + FDContextMapNode *node; + + node = epoll_shim_ctx_find_node(&epoll_shim_ctx, fd); + if (!node || node->vtable != &eventfd_vtable) { + errno = EBADF; + return -1; + } + + size_t bytes_transferred; + errno_t ec; + if ((ec = eventfd_helper_write(node, &value, sizeof(value), + &bytes_transferred)) != 0) { + errno = ec; + return -1; + } + + return 0; +} diff --git a/src/eventfd_ctx.c b/src/eventfd_ctx.c new file mode 100644 index 0000000..52a1057 --- /dev/null +++ b/src/eventfd_ctx.c @@ -0,0 +1,137 @@ +#include "eventfd_ctx.h" + +#include + +#include +#include + +#include +#include + +#include + +static_assert(sizeof(unsigned int) < sizeof(uint64_t), ""); + +errno_t +eventfd_ctx_init(EventFDCtx *eventfd, int kq, unsigned int counter, int flags) +{ + if (flags & ~(EVENTFD_CTX_FLAG_SEMAPHORE)) { + return (EINVAL); + } + + *eventfd = (EventFDCtx){ + .kq_ = kq, + .flags_ = flags, + .counter_ = counter, + }; + + struct kevent kevs[1]; + + EV_SET(&kevs[0], 0, EVFILT_USER, EV_ADD | EV_CLEAR, 0, 0, 0); + if (kevent(eventfd->kq_, kevs, nitems(kevs), NULL, 0, NULL) < 0) { + errno_t err = errno; + return (err); + } + + if (counter > 0) { + struct kevent kevs[1]; + EV_SET(&kevs[0], 0, EVFILT_USER, 0, NOTE_TRIGGER, 0, 0); + + if (kevent(eventfd->kq_, kevs, nitems(kevs), /**/ + NULL, 0, NULL) < 0) { + return (errno); + } + } + + return (0); +} + +errno_t +eventfd_ctx_terminate(EventFDCtx *eventfd) +{ + (void)eventfd; + return (0); +} + +errno_t +eventfd_ctx_write(EventFDCtx *eventfd, uint64_t value) +{ + if (value == UINT64_MAX) { + return (EINVAL); + } + + for (;;) { + uint_least64_t current_value = atomic_load(&eventfd->counter_); + + uint_least64_t new_value; + if (__builtin_add_overflow(current_value, value, &new_value) || + new_value > UINT64_MAX - 1) { + return (EAGAIN); + } + + if (atomic_compare_exchange_weak(&eventfd->counter_, + ¤t_value, new_value)) { + break; + } + } + + struct kevent kevs[1]; + EV_SET(&kevs[0], 0, EVFILT_USER, 0, NOTE_TRIGGER, 0, 0); + + if (kevent(eventfd->kq_, kevs, nitems(kevs), NULL, 0, NULL) < 0) { + return (errno); + } + + return (0); +} + +errno_t +eventfd_ctx_read(EventFDCtx *eventfd, uint64_t *value) +{ + uint_least64_t current_value; + + for (;;) { + current_value = atomic_load(&eventfd->counter_); + if (current_value == 0) { + return (EAGAIN); + } + + uint_least64_t new_value = + (eventfd->flags_ & EVENTFD_CTX_FLAG_SEMAPHORE) + ? current_value - 1 + : 0; + + if (new_value == 0) { + struct kevent kevs[32]; + int n; + + while ((n = kevent(eventfd->kq_, NULL, 0, /**/ + kevs, nitems(kevs), + &(struct timespec){0, 0})) > 0) { + } + if (n < 0) { + return (errno); + } + } + + if (atomic_compare_exchange_weak(&eventfd->counter_, + ¤t_value, new_value)) { + break; + } + + if (new_value == 0 && current_value > 0) { + struct kevent kevs[1]; + EV_SET(&kevs[0], 0, EVFILT_USER, /**/ + 0, NOTE_TRIGGER, 0, 0); + + if (kevent(eventfd->kq_, kevs, nitems(kevs), /**/ + NULL, 0, NULL) < 0) { + return (errno); + } + } + } + + *value = + (eventfd->flags_ & EVENTFD_CTX_FLAG_SEMAPHORE) ? 1 : current_value; + return (0); +} diff --git a/src/eventfd_ctx.h b/src/eventfd_ctx.h new file mode 100644 index 0000000..a8606a2 --- /dev/null +++ b/src/eventfd_ctx.h @@ -0,0 +1,24 @@ +#ifndef EVENTFD_CTX_H_ +#define EVENTFD_CTX_H_ + +#include +#include +#include +#include + +#define EVENTFD_CTX_FLAG_SEMAPHORE (1 << 0) + +typedef struct { + int kq_; // non owning + int flags_; + atomic_uint_least64_t counter_; +} EventFDCtx; + +errno_t eventfd_ctx_init(EventFDCtx *eventfd, int kq, unsigned int counter, + int flags); +errno_t eventfd_ctx_terminate(EventFDCtx *eventfd); + +errno_t eventfd_ctx_write(EventFDCtx *eventfd, uint64_t value); +errno_t eventfd_ctx_read(EventFDCtx *eventfd, uint64_t *value); + +#endif diff --git a/src/signalfd.c b/src/signalfd.c index c85d1e3..597f915 100644 --- a/src/signalfd.c +++ b/src/signalfd.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -15,129 +16,107 @@ #include #include -struct signalfd_context { - int fd; - int flags; - struct signalfd_context *next; -}; - -static struct signalfd_context *signalfd_contexts; -pthread_mutex_t signalfd_context_mtx = PTHREAD_MUTEX_INITIALIZER; +#include "epoll_shim_ctx.h" -struct signalfd_context * -get_signalfd_context(int fd, bool create_new) +static errno_t +signalfd_ctx_read_or_block(SignalFDCtx *signalfd_ctx, uint32_t *value, + bool nonblock) { - for (struct signalfd_context *ctx = signalfd_contexts; ctx; - ctx = ctx->next) { - if (fd == ctx->fd) { - return ctx; + for (;;) { + errno_t ec = signalfd_ctx_read(signalfd_ctx, value); + if (nonblock || ec != EAGAIN) { + return (ec); } - } - if (create_new) { - struct signalfd_context *new_ctx = - calloc(1, sizeof(struct signalfd_context)); - if (!new_ctx) { - return NULL; + struct pollfd pfd = {.fd = signalfd_ctx->kq, .events = POLLIN}; + if (poll(&pfd, 1, -1) < 0) { + return (errno); } - new_ctx->fd = -1; - new_ctx->next = signalfd_contexts; - signalfd_contexts = new_ctx; - return new_ctx; } - - return NULL; } -static int -signalfd_impl(int fd, const sigset_t *sigs, int flags) +static errno_t +signalfd_read(FDContextMapNode *node, void *buf, size_t nbytes, + size_t *bytes_transferred) { - if (fd != -1) { - errno = EINVAL; - return -1; + // TODO(jan): fix this to read multiple signals + if (nbytes != sizeof(struct signalfd_siginfo)) { + return EINVAL; } - if (flags & ~(SFD_NONBLOCK | SFD_CLOEXEC)) { - errno = EINVAL; - return -1; + uint32_t signo; + errno_t ec; + if ((ec = signalfd_ctx_read_or_block(&node->ctx.signalfd, &signo, + node->flags & SFD_NONBLOCK)) != 0) { + return ec; } - struct signalfd_context *ctx = get_signalfd_context(-1, true); - if (!ctx) { - errno = EMFILE; - return -1; + struct signalfd_siginfo siginfo = {.ssi_signo = signo}; + memcpy(buf, &siginfo, sizeof(siginfo)); + + *bytes_transferred = sizeof(siginfo); + return 0; +} + +static errno_t +signalfd_close(FDContextMapNode *node) +{ + return signalfd_ctx_terminate(&node->ctx.signalfd); +} + +static FDContextVTable const signalfd_vtable = { + .read_fun = signalfd_read, + .write_fun = fd_context_default_write, + .close_fun = signalfd_close, +}; + +static FDContextMapNode * +signalfd_impl(int fd, const sigset_t *sigs, int flags, errno_t *ec) +{ + FDContextMapNode *node; + + if (fd != -1) { + *ec = EINVAL; + return NULL; } - ctx->fd = kqueue(); - if (ctx->fd < 0) { - return -1; + if (flags & ~(SFD_NONBLOCK | SFD_CLOEXEC)) { + *ec = EINVAL; + return NULL; } - ctx->flags = flags; + node = epoll_shim_ctx_create_node(&epoll_shim_ctx, ec); + if (!node) { + return NULL; + } - struct kevent kevs[_SIG_MAXSIG]; - int n = 0; + node->flags = flags; - for (int i = 1; i <= _SIG_MAXSIG; ++i) { - if (sigismember(sigs, i)) { - EV_SET(&kevs[n++], i, EVFILT_SIGNAL, EV_ADD, 0, 0, 0); - } + if ((*ec = signalfd_ctx_init(&node->ctx.signalfd, /**/ + node->fd, sigs)) != 0) { + goto fail; } - int ret = kevent(ctx->fd, kevs, n, NULL, 0, NULL); - if (ret < 0) { - close(ctx->fd); - ctx->fd = -1; - return -1; - } + node->vtable = &signalfd_vtable; + return node; - return ctx->fd; +fail: + epoll_shim_ctx_remove_node_explicit(&epoll_shim_ctx, node); + (void)fd_context_map_node_destroy(node); + return NULL; } int signalfd(int fd, const sigset_t *sigs, int flags) { - pthread_mutex_lock(&signalfd_context_mtx); - int ret = signalfd_impl(fd, sigs, flags); - pthread_mutex_unlock(&signalfd_context_mtx); - return ret; -} - -ssize_t -signalfd_read(struct signalfd_context *ctx, void *buf, size_t nbytes) -{ - int fd = ctx->fd; - int flags = ctx->flags; - pthread_mutex_unlock(&signalfd_context_mtx); + FDContextMapNode *node; + errno_t ec; - // TODO(jan): fix this to read multiple signals - if (nbytes != sizeof(struct signalfd_siginfo)) { - errno = EINVAL; + node = signalfd_impl(fd, sigs, flags, &ec); + if (!node) { + errno = ec; return -1; } - struct timespec timeout = {0, 0}; - struct kevent kev; - int ret = kevent( - fd, NULL, 0, &kev, 1, (flags & SFD_NONBLOCK) ? &timeout : NULL); - if (ret == -1) { - return -1; - } - if (ret == 0) { - errno = EAGAIN; - return -1; - } - - memset(buf, '\0', nbytes); - struct signalfd_siginfo *sig_buf = buf; - sig_buf->ssi_signo = (uint32_t)kev.ident; - return (ssize_t)nbytes; -} - -int -signalfd_close(struct signalfd_context *ctx) -{ - int ret = close(ctx->fd); - ctx->fd = -1; - return ret; + return node->fd; } diff --git a/src/signalfd_ctx.c b/src/signalfd_ctx.c new file mode 100644 index 0000000..15a5485 --- /dev/null +++ b/src/signalfd_ctx.c @@ -0,0 +1,59 @@ +#include "signalfd_ctx.h" + +#include + +#include + +#include +#include + +#include + +errno_t +signalfd_ctx_init(SignalFDCtx *signalfd, int kq, const sigset_t *sigs) +{ + *signalfd = (SignalFDCtx){.kq = kq}; + + struct kevent kevs[_SIG_MAXSIG]; + int n = 0; + + for (int i = 1; i <= _SIG_MAXSIG; ++i) { + if (sigismember(sigs, i)) { + EV_SET(&kevs[n++], i, EVFILT_SIGNAL, EV_ADD, 0, 0, 0); + } + } + + n = kevent(signalfd->kq, kevs, n, NULL, 0, NULL); + if (n < 0) { + return errno; + } + + return 0; +} + +errno_t +signalfd_ctx_terminate(SignalFDCtx *signalfd) +{ + (void)signalfd; + + return (0); +} + +errno_t +signalfd_ctx_read(SignalFDCtx *signalfd, uint32_t *ident) +{ + struct kevent kev; + + int n = kevent(signalfd->kq, NULL, 0, /**/ + &kev, 1, &(struct timespec){0, 0}); + if (n < 0) { + return errno; + } + + if (n == 0) { + return EAGAIN; + } + + *ident = (uint32_t)kev.ident; + return 0; +} diff --git a/src/signalfd_ctx.h b/src/signalfd_ctx.h new file mode 100644 index 0000000..742dd9f --- /dev/null +++ b/src/signalfd_ctx.h @@ -0,0 +1,17 @@ +#ifndef SIGNALFD_CTX_H_ +#define SIGNALFD_CTX_H_ + +#include +#include +#include + +typedef struct { + int kq; // non owning +} SignalFDCtx; + +errno_t signalfd_ctx_init(SignalFDCtx *signalfd, int kq, const sigset_t *sigs); +errno_t signalfd_ctx_terminate(SignalFDCtx *signalfd); + +errno_t signalfd_ctx_read(SignalFDCtx *signalfd, uint32_t *ident); + +#endif diff --git a/src/timerfd.c b/src/timerfd.c index b42a3ab..4148ca2 100644 --- a/src/timerfd.c +++ b/src/timerfd.c @@ -3,234 +3,166 @@ #undef close #include +#include +#include #include #include +#include #include -#include #include #include #include #include -struct timerfd_context { - int fd; - pthread_t worker; - timer_t timer; - int flags; - struct timerfd_context *next; -}; - -static struct timerfd_context *timerfd_contexts; -pthread_mutex_t timerfd_context_mtx = PTHREAD_MUTEX_INITIALIZER; +#include "epoll_shim_ctx.h" -struct timerfd_context * -get_timerfd_context(int fd, bool create_new) +static errno_t +timerfd_ctx_read_or_block(TimerFDCtx *timerfd, uint64_t *value, bool nonblock) { - for (struct timerfd_context *ctx = timerfd_contexts; ctx; - ctx = ctx->next) { - if (fd == ctx->fd) { - return ctx; + for (;;) { + errno_t ec = timerfd_ctx_read(timerfd, value); + if (nonblock || ec != EAGAIN) { + return (ec); } - } - if (create_new) { - struct timerfd_context *new_ctx = - calloc(1, sizeof(struct timerfd_context)); - if (!new_ctx) { - return NULL; + struct pollfd pfd = {.fd = timerfd->kq, .events = POLLIN}; + if (poll(&pfd, 1, -1) < 0) { + return (errno); } - new_ctx->fd = -1; - new_ctx->next = timerfd_contexts; - timerfd_contexts = new_ctx; - return new_ctx; } - - return NULL; } -static void * -worker_function(void *arg) +static errno_t +timerfd_read(FDContextMapNode *node, void *buf, size_t nbytes, + size_t *bytes_transferred) { - struct timerfd_context *ctx = arg; - - siginfo_t info; - sigset_t rt_set; - sigset_t block_set; - - sigemptyset(&rt_set); - sigaddset(&rt_set, SIGRTMIN); - sigaddset(&rt_set, SIGRTMIN + 1); - - sigfillset(&block_set); + if (nbytes < sizeof(uint64_t)) { + return EINVAL; + } - (void)pthread_sigmask(SIG_BLOCK, &block_set, NULL); + errno_t ec; + uint64_t nr_expired; + if ((ec = timerfd_ctx_read_or_block(&node->ctx.timerfd, &nr_expired, + node->flags & TFD_NONBLOCK)) != 0) { + return ec; + } - struct kevent kev; - EV_SET(&kev, 0, EVFILT_USER, 0, NOTE_TRIGGER, 0, - (void *)(intptr_t)pthread_getthreadid_np()); - (void)kevent(ctx->fd, &kev, 1, NULL, 0, NULL); + memcpy(buf, &nr_expired, sizeof(uint64_t)); - for (;;) { - if (sigwaitinfo(&rt_set, &info) != SIGRTMIN) { - break; - } - EV_SET(&kev, 0, EVFILT_USER, 0, NOTE_TRIGGER, 0, - (void *)(intptr_t)timer_getoverrun(ctx->timer)); - (void)kevent(ctx->fd, &kev, 1, NULL, 0, NULL); - } + *bytes_transferred = sizeof(uint64_t); + return 0; +} - return NULL; +static errno_t +timerfd_close(FDContextMapNode *node) +{ + return timerfd_ctx_terminate(&node->ctx.timerfd); } -static int -timerfd_create_impl(int clockid, int flags) +static FDContextVTable const timerfd_vtable = { + .read_fun = timerfd_read, + .write_fun = fd_context_default_write, + .close_fun = timerfd_close, +}; + +static FDContextMapNode * +timerfd_create_impl(int clockid, int flags, errno_t *ec) { + FDContextMapNode *node; + if (clockid != CLOCK_MONOTONIC && clockid != CLOCK_REALTIME) { - return EINVAL; + *ec = EINVAL; + return NULL; } if (flags & ~(TFD_CLOEXEC | TFD_NONBLOCK)) { - return EINVAL; + *ec = EINVAL; + return NULL; } - struct timerfd_context *ctx = get_timerfd_context(-1, true); - if (!ctx) { - errno = ENOMEM; - return -1; - } - - ctx->fd = kqueue(); - if (ctx->fd < 0) { - return -1; + node = epoll_shim_ctx_create_node(&epoll_shim_ctx, ec); + if (!node) { + return NULL; } - ctx->flags = flags; + node->flags = flags; - struct kevent kev; - EV_SET(&kev, 0, EVFILT_USER, EV_ADD | EV_CLEAR, 0, 0, 0); - if (kevent(ctx->fd, &kev, 1, NULL, 0, NULL) < 0) { - close(ctx->fd); - ctx->fd = -1; - return -1; + if ((*ec = timerfd_ctx_init(&node->ctx.timerfd, /**/ + node->fd, clockid)) != 0) { + goto fail; } - if (pthread_create(&ctx->worker, NULL, worker_function, ctx) < 0) { - close(ctx->fd); - ctx->fd = -1; - return -1; - } + node->vtable = &timerfd_vtable; + return node; - int ret = kevent(ctx->fd, NULL, 0, &kev, 1, NULL); - if (ret < 0) { - pthread_kill(ctx->worker, SIGRTMIN + 1); - pthread_join(ctx->worker, NULL); - close(ctx->fd); - ctx->fd = -1; - return -1; - } - - int tid = (int)(intptr_t)kev.udata; - - struct sigevent sigev = {.sigev_notify = SIGEV_THREAD_ID, - .sigev_signo = SIGRTMIN, - .sigev_notify_thread_id = tid}; - - if (timer_create(clockid, &sigev, &ctx->timer) < 0) { - pthread_kill(ctx->worker, SIGRTMIN + 1); - pthread_join(ctx->worker, NULL); - close(ctx->fd); - ctx->fd = -1; - return -1; - } - - return ctx->fd; +fail: + epoll_shim_ctx_remove_node_explicit(&epoll_shim_ctx, node); + (void)fd_context_map_node_destroy(node); + return NULL; } int timerfd_create(int clockid, int flags) { - pthread_mutex_lock(&timerfd_context_mtx); - int ret = timerfd_create_impl(clockid, flags); - pthread_mutex_unlock(&timerfd_context_mtx); - return ret; -} - -static int -timerfd_settime_impl( - int fd, int flags, const struct itimerspec *new, struct itimerspec *old) -{ - struct timerfd_context *ctx = get_timerfd_context(fd, false); - if (!ctx) { - errno = EINVAL; - return -1; - } + FDContextMapNode *node; + errno_t ec; - if (flags & ~(TFD_TIMER_ABSTIME)) { - errno = EINVAL; + node = timerfd_create_impl(clockid, flags, &ec); + if (!node) { + errno = ec; return -1; } - return timer_settime(ctx->timer, - (flags & TFD_TIMER_ABSTIME) ? TIMER_ABSTIME : 0, new, old); + return node->fd; } -int -timerfd_settime( - int fd, int flags, const struct itimerspec *new, struct itimerspec *old) +static errno_t +timerfd_settime_impl(int fd, int flags, const struct itimerspec *new, + struct itimerspec *old) { - pthread_mutex_lock(&timerfd_context_mtx); - int ret = timerfd_settime_impl(fd, flags, new, old); - pthread_mutex_unlock(&timerfd_context_mtx); - return ret; -} + errno_t ec; + FDContextMapNode *node; -#if 0 -int timerfd_gettime(int fd, struct itimerspec *cur) -{ - return syscall(SYS_timerfd_gettime, fd, cur); -} -#endif + if (!new) { + return EFAULT; + } -ssize_t -timerfd_read(struct timerfd_context *ctx, void *buf, size_t nbytes) -{ - int fd = ctx->fd; - int flags = ctx->flags; - pthread_mutex_unlock(&timerfd_context_mtx); + node = epoll_shim_ctx_find_node(&epoll_shim_ctx, fd); + if (!node || node->vtable != &timerfd_vtable) { + return EINVAL; + } - if (nbytes < sizeof(uint64_t)) { - errno = EINVAL; - return -1; + if (flags & ~(TFD_TIMER_ABSTIME)) { + return EINVAL; } - struct timespec timeout = {0, 0}; - struct kevent kev; - int ret = kevent( - fd, NULL, 0, &kev, 1, (flags & TFD_NONBLOCK) ? &timeout : NULL); - if (ret == -1) { - return -1; + if ((ec = timerfd_ctx_settime(&node->ctx.timerfd, + (flags & TFD_TIMER_ABSTIME) ? TIMER_ABSTIME : 0, /**/ + new, old)) != 0) { + return ec; } - if (ret == 0) { - errno = EAGAIN; + return 0; +} + +int +timerfd_settime(int fd, int flags, const struct itimerspec *new, + struct itimerspec *old) +{ + errno_t ec = timerfd_settime_impl(fd, flags, new, old); + if (ec != 0) { + errno = ec; return -1; } - uint64_t nr_expired = 1 + (uint64_t)kev.udata; - memcpy(buf, &nr_expired, sizeof(uint64_t)); - - return sizeof(uint64_t); + return 0; } -int -timerfd_close(struct timerfd_context *ctx) +#if 0 +int timerfd_gettime(int fd, struct itimerspec *cur) { - timer_delete(ctx->timer); - pthread_kill(ctx->worker, SIGRTMIN + 1); - pthread_join(ctx->worker, NULL); - int ret = close(ctx->fd); - ctx->fd = -1; - return ret; + return syscall(SYS_timerfd_gettime, fd, cur); } +#endif diff --git a/src/timerfd_ctx.c b/src/timerfd_ctx.c new file mode 100644 index 0000000..949f3e1 --- /dev/null +++ b/src/timerfd_ctx.c @@ -0,0 +1,304 @@ +#include "timerfd_ctx.h" + +#include + +#include +#include + +#include +#include + +#include + +static void * +worker_function(void *arg) +{ + TimerFDCtx *ctx = arg; + + uint64_t total_expirations = 0; + + siginfo_t info; + sigset_t rt_set; + sigset_t block_set; + + sigemptyset(&rt_set); + sigaddset(&rt_set, SIGRTMIN); + sigaddset(&rt_set, SIGRTMIN + 1); + + sigfillset(&block_set); + + (void)pthread_sigmask(SIG_BLOCK, &block_set, NULL); + + struct kevent kev; + EV_SET(&kev, 0, EVFILT_USER, 0, NOTE_TRIGGER, 0, + (void *)(intptr_t)pthread_getthreadid_np()); + (void)kevent(ctx->kq, &kev, 1, NULL, 0, NULL); + + for (;;) { + if (sigwaitinfo(&rt_set, &info) != SIGRTMIN) { + break; + } + int overrun = timer_getoverrun(ctx->complx.timer); + total_expirations += 1 + (uint64_t)MAX(0, overrun); + EV_SET(&kev, 0, EVFILT_USER, 0, NOTE_TRIGGER, 0, + (void *)(uintptr_t)total_expirations); + (void)kevent(ctx->kq, &kev, 1, NULL, 0, NULL); + } + + return NULL; +} + +static errno_t +upgrade_to_complex_timer(TimerFDCtx *ctx, int clockid) +{ + errno_t ec; + + if (ctx->kind == TIMERFD_KIND_COMPLEX) { + return 0; + } + + if (ctx->kind == TIMERFD_KIND_SIMPLE) { + struct kevent kev[1]; + EV_SET(&kev[0], 0, EVFILT_TIMER, EV_DELETE, 0, 0, 0); + (void)kevent(ctx->kq, kev, nitems(kev), NULL, 0, NULL); + + ctx->kind = TIMERFD_KIND_UNDETERMINED; + } + + assert(ctx->kind == TIMERFD_KIND_UNDETERMINED); + + struct kevent kev; + EV_SET(&kev, 0, EVFILT_USER, EV_ADD | EV_CLEAR, 0, 0, 0); + if (kevent(ctx->kq, &kev, 1, NULL, 0, NULL) < 0) { + assert(errno != 0); + return errno; + } + + if ((ec = pthread_create(&ctx->complx.worker, /**/ + NULL, worker_function, ctx)) != 0) { + return ec; + } + + if (kevent(ctx->kq, NULL, 0, &kev, 1, NULL) < 0) { + goto out; + } + + int tid = (int)(intptr_t)kev.udata; + + struct sigevent sigev = { + .sigev_notify = SIGEV_THREAD_ID, + .sigev_signo = SIGRTMIN, + .sigev_notify_thread_id = tid, + }; + + if (timer_create(clockid, &sigev, &ctx->complx.timer) < 0) { + goto out; + } + + ctx->complx.current_expirations = 0; + ctx->kind = TIMERFD_KIND_COMPLEX; + return 0; + +out: + ec = errno; + assert(ec != 0); + pthread_kill(ctx->complx.worker, SIGRTMIN + 1); + pthread_join(ctx->complx.worker, NULL); + return ec; +} + +errno_t +timerfd_ctx_init(TimerFDCtx *timerfd, int kq, int clockid) +{ + errno_t ec; + + if (clockid != CLOCK_MONOTONIC && clockid != CLOCK_REALTIME) { + return EINVAL; + } + + *timerfd = (TimerFDCtx){.kq = kq, .kind = TIMERFD_KIND_UNDETERMINED}; + + if ((ec = pthread_mutex_init(&timerfd->mutex, NULL)) != 0) { + return ec; + } + + if (clockid == CLOCK_REALTIME) { + if ((ec = upgrade_to_complex_timer(timerfd, /**/ + CLOCK_REALTIME)) != 0) { + (void)pthread_mutex_destroy(&timerfd->mutex); + return ec; + } + } + + return 0; +} + +errno_t +timerfd_ctx_terminate(TimerFDCtx *timerfd) +{ + errno_t ec = 0; + errno_t ec_local = 0; + + if (timerfd->kind == TIMERFD_KIND_COMPLEX) { + if (timer_delete(timerfd->complx.timer) < 0 && ec == 0) { + ec = errno; + } + ec_local = pthread_kill(timerfd->complx.worker, SIGRTMIN + 1); + ec = ec ? ec : ec_local; + ec_local = pthread_join(timerfd->complx.worker, NULL); + ec = ec ? ec : ec_local; + } + + ec_local = pthread_mutex_destroy(&timerfd->mutex); + ec = ec ? ec : ec_local; + + return ec; +} + +static errno_t +timerfd_ctx_settime_impl(TimerFDCtx *timerfd, int flags, + const struct itimerspec *new, struct itimerspec *old) +{ + errno_t ec; + + if (flags & ~(TIMER_ABSTIME)) { + return EINVAL; + } + + if ((flags & TIMER_ABSTIME) || + ((new->it_interval.tv_sec != 0 || new->it_interval.tv_nsec != 0) && + (new->it_interval.tv_sec != new->it_value.tv_sec || + new->it_interval.tv_nsec != new->it_value.tv_nsec))) { + if ((ec = upgrade_to_complex_timer(timerfd, /**/ + CLOCK_MONOTONIC)) != 0) { + return ec; + } + } + + if (timerfd->kind == TIMERFD_KIND_COMPLEX) { + if (timer_settime(timerfd->complx.timer, /**/ + flags, new, old) < 0) { + return errno; + } + } else { + struct kevent kev[1]; + int oneshot_flag; + int64_t micros; + + if (old) { + *old = timerfd->simple.current_itimerspec; + } + + if (new->it_value.tv_sec == 0 && new->it_value.tv_nsec == 0) { + struct kevent kev[1]; + EV_SET(&kev[0], 0, EVFILT_TIMER, EV_DELETE, 0, 0, 0); + (void)kevent(timerfd->kq, kev, nitems(kev), NULL, 0, + NULL); + } else { + if (__builtin_mul_overflow(new->it_value.tv_sec, + 1000000, µs) || + __builtin_add_overflow(micros, + new->it_value.tv_nsec / 1000, µs)) { + return EOVERFLOW; + } + + if ((new->it_value.tv_nsec % 1000) && + __builtin_add_overflow(micros, 1, µs)) { + return EOVERFLOW; + } + + if (new->it_interval.tv_sec == 0 && + new->it_interval.tv_nsec == 0) { + oneshot_flag = EV_ONESHOT; + } else { + oneshot_flag = 0; + } + + EV_SET(&kev[0], 0, EVFILT_TIMER, EV_ADD | oneshot_flag, + NOTE_USECONDS, micros, 0); + + if (kevent(timerfd->kq, kev, nitems(kev), /**/ + NULL, 0, NULL) < 0) { + return errno; + } + } + + timerfd->simple.current_itimerspec = *new; + timerfd->kind = TIMERFD_KIND_SIMPLE; + } + + return 0; +} + +errno_t +timerfd_ctx_settime(TimerFDCtx *timerfd, int flags, + const struct itimerspec *new, struct itimerspec *old) +{ + errno_t ec; + + (void)pthread_mutex_lock(&timerfd->mutex); + ec = timerfd_ctx_settime_impl(timerfd, flags, new, old); + (void)pthread_mutex_unlock(&timerfd->mutex); + + return ec; +} + +static errno_t +timerfd_ctx_read_impl(TimerFDCtx *timerfd, uint64_t *value) +{ + uint64_t nr_expired; + + for (;;) { + struct kevent kev; + + int n = kevent(timerfd->kq, NULL, 0, &kev, 1, + &(struct timespec){0, 0}); + if (n < 0) { + return errno; + } + + if (n == 0) { + return EAGAIN; + } + + nr_expired = 0; + + if (timerfd->kind == TIMERFD_KIND_COMPLEX) { + uint64_t expired_new = (uint64_t)kev.udata; + + assert(expired_new && kev.filter == EVFILT_USER); + + if (expired_new > + timerfd->complx.current_expirations) { + nr_expired = expired_new - + timerfd->complx.current_expirations; + timerfd->complx.current_expirations = + expired_new; + } + } else { + assert(!kev.udata && kev.filter == EVFILT_TIMER && + kev.data >= 0); + + nr_expired = (uint64_t)kev.data; + } + + if (nr_expired != 0) { + break; + } + } + + *value = nr_expired; + return 0; +} + +errno_t +timerfd_ctx_read(TimerFDCtx *timerfd, uint64_t *value) +{ + errno_t ec; + + (void)pthread_mutex_lock(&timerfd->mutex); + ec = timerfd_ctx_read_impl(timerfd, value); + (void)pthread_mutex_unlock(&timerfd->mutex); + + return ec; +} diff --git a/src/timerfd_ctx.h b/src/timerfd_ctx.h new file mode 100644 index 0000000..ef3d78e --- /dev/null +++ b/src/timerfd_ctx.h @@ -0,0 +1,44 @@ +#ifndef TIMERFD_CTX_H_ +#define TIMERFD_CTX_H_ + +#include + +#include +#include +#include +#include + +#include + +enum timerfd_kind { + TIMERFD_KIND_UNDETERMINED, + TIMERFD_KIND_SIMPLE, + TIMERFD_KIND_COMPLEX, +}; + +typedef struct { + int kq; // non owning + int flags; + pthread_mutex_t mutex; + enum timerfd_kind kind; + union { + struct { + struct itimerspec current_itimerspec; + } simple; + struct { + pthread_t worker; + timer_t timer; + uint64_t current_expirations; + } complx; + }; +} TimerFDCtx; + +errno_t timerfd_ctx_init(TimerFDCtx *timerfd, int kq, int clockid); +errno_t timerfd_ctx_terminate(TimerFDCtx *timerfd); + +errno_t timerfd_ctx_settime(TimerFDCtx *timerfd, int flags, + const struct itimerspec *new, struct itimerspec *old); + +errno_t timerfd_ctx_read(TimerFDCtx *timerfd, uint64_t *value); + +#endif diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index dbac513..9410f50 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,9 +1,51 @@ +cmake_minimum_required(VERSION 3.10) +project(epoll-shim-tests LANGUAGES C) + +include(CTest) + +if(NOT CMAKE_SYSTEM_NAME STREQUAL "Linux") + find_package(epoll-shim REQUIRED) +else() + add_library(epoll-shim INTERFACE) + add_library(epoll-shim::epoll-shim ALIAS epoll-shim) +endif() + +set(THREADS_PREFER_PTHREAD_FLAG ON) +find_package(Threads REQUIRED) + add_executable(epoll-test epoll-test.c) -target_link_libraries(epoll-test PRIVATE epoll-shim) +target_link_libraries(epoll-test + PRIVATE epoll-shim::epoll-shim Threads::Threads) +add_test(NAME epoll-test COMMAND epoll-test) add_executable(expire-five expire-five.c) -target_link_libraries(expire-five PRIVATE epoll-shim) +target_link_libraries(expire-five PRIVATE epoll-shim::epoll-shim) +add_test(NAME expire-five COMMAND expire-five) + +add_executable(many-timers many-timers.c) +target_link_libraries(many-timers PRIVATE epoll-shim::epoll-shim) +add_test(NAME many-timers COMMAND many-timers) + +add_executable(tst-eventfd tst-eventfd.c) +target_link_libraries(tst-eventfd + PRIVATE epoll-shim::epoll-shim Threads::Threads) +add_test(NAME tst-eventfd COMMAND tst-eventfd) + +add_executable(perf-many-fds perf-many-fds.c) +target_link_libraries(perf-many-fds PRIVATE epoll-shim::epoll-shim) +add_test(NAME perf-many-fds COMMAND perf-many-fds) + +if(NOT CMAKE_SYSTEM_NAME STREQUAL "Linux") + add_executable(kqueue-state kqueue-state.c) + target_include_directories(kqueue-state + PRIVATE "${CMAKE_CURRENT_LIST_DIR}/../include") + target_link_options(kqueue-state PRIVATE + "LINKER:--unresolved-symbols=ignore-all") + add_test(NAME kqueue-state COMMAND kqueue-state) -add_executable(kqueue-state kqueue-state.c) -target_include_directories(kqueue-state PRIVATE - "${CMAKE_CURRENT_SOURCE_DIR}/../include") + add_executable(eventfd-ctx-test eventfd-ctx-test.c) + target_link_libraries(eventfd-ctx-test PRIVATE Threads::Threads) + target_include_directories(eventfd-ctx-test + PRIVATE "${CMAKE_CURRENT_LIST_DIR}/../src") + add_test(NAME eventfd-ctx-test COMMAND eventfd-ctx-test) +endif() diff --git a/test/epoll-test.c b/test/epoll-test.c index 7746137..05ceab0 100644 --- a/test/epoll-test.c +++ b/test/epoll-test.c @@ -8,10 +8,12 @@ #include #include +#include #include -#include #include +#include + #include #include @@ -21,14 +23,22 @@ #include #include +/* Uncomment this if you want the interactive test. */ +/* #define INTERACTIVE_TESTS */ + #define XSTR(a) STR(a) #define STR(a) #a #define TEST(fun) \ if ((fun) != 0) { \ - printf(STR((fun)) " failed\n"); \ + fprintf(stderr, STR((fun)) " failed\n"); \ + r = 1; \ } else { \ - printf(STR((fun)) " successful\n"); \ + fprintf(stderr, STR((fun)) " successful\n"); \ + if (check_for_fd_leaks() != 0) { \ + fprintf(stderr, "but there was a fd leak...\n"); \ + r = 1; \ + }; \ } static int @@ -496,13 +506,14 @@ test11() return -1; } - int fd = open("/dev/dri/card0", O_RDWR | O_CLOEXEC); + int fd = open("/dev/dsp", O_WRONLY | O_CLOEXEC); if (fd < 0) { - return -1; + // Don't fail the test when there is no sound card. + goto out; } struct epoll_event event; - event.events = EPOLLIN | EPOLLOUT; + event.events = EPOLLOUT; event.data.fd = fd; if (epoll_ctl(ep, EPOLL_CTL_ADD, fd, &event) < 0) { @@ -510,7 +521,11 @@ test11() } struct epoll_event event_result; - if (epoll_wait(ep, &event_result, 1, 300) != 0) { + if (epoll_wait(ep, &event_result, 1, 300) != 1) { + return -1; + } + + if (event_result.events != EPOLLOUT || event_result.data.fd != fd) { return -1; } @@ -518,7 +533,12 @@ test11() return -1; } + if (epoll_ctl(ep, EPOLL_CTL_DEL, fd, NULL) >= 0 || errno != ENOENT) { + return -1; + } + close(fd); +out: close(ep); return 0; } @@ -759,9 +779,12 @@ test15() return 0; } +static int fd_leak_test_a; +static int fd_leak_test_b; static int -testxx() +check_for_fd_leaks() { + int r = 0; /* test that all fds of previous tests have been closed successfully */ int fds[3]; @@ -769,13 +792,14 @@ testxx() return -1; } - if (fds[0] != 3 || fds[1] != 4) { - return -1; + if (fds[0] != fd_leak_test_a || fds[1] != fd_leak_test_b) { + r = -1; } close(fds[0]); close(fds[1]); - return 0; + + return r; } static void * @@ -1120,6 +1144,7 @@ test20(int (*fd_fun)(int fds[3])) // fprintf(stderr, "got %d\n", (int)c); } else if (event_result.events == EPOLLOUT) { + write(event.data.fd, &c, 1); // continue } else if (fd_fun == fd_domain_socket && (event_result.events & (EPOLLOUT | EPOLLHUP)) == @@ -1451,9 +1476,198 @@ test_recursive_register() return 0; } +static int +test_remove_closed() +{ + int ep = epoll_create1(EPOLL_CLOEXEC); + if (ep < 0) { + return -1; + } + + int fds[3]; + if (fd_pipe(fds) < 0) { + return -1; + } + + struct epoll_event event = {0}; + event.events = EPOLLIN; + + if (epoll_ctl(ep, EPOLL_CTL_ADD, fds[0], &event) < 0) { + return -1; + } + + close(fds[0]); + close(fds[1]); + + // Trying to delete an event that was already deleted by closing the + // associated fd should fail. + if (epoll_ctl(ep, EPOLL_CTL_DEL, fds[0], &event) != -1) { + return -1; + } + + close(ep); + return 0; +} + +static int +test_same_fd_value() +{ + int ep = epoll_create1(EPOLL_CLOEXEC); + if (ep < 0) { + return -1; + } + + int fds[3]; + if (fd_pipe(fds) < 0) { + return -1; + } + + struct epoll_event event = {0}; + event.events = EPOLLIN; + + if (epoll_ctl(ep, EPOLL_CTL_ADD, fds[0], &event) < 0) { + return -1; + } + + int ret; + close(fds[0]); + close(fds[1]); + + // Note: This wouldn't be needed under Linux as the close() calls above + // properly removes the descriptor from the epoll instance. However, in + // our epoll emulation we cannot (yet?) reliably detect if a descriptor + // has been closed before it is deleted from the epoll instance. + // See also: https://github.com/jiixyj/epoll-shim/pull/7 + if (epoll_ctl(ep, EPOLL_CTL_DEL, fds[0], &event) != -1) { + return -1; + } + + // Creating new pipe. The file descriptors will have the same numerical + // values as the previous ones. + if (fd_pipe(fds) < 0) { + return -1; + } + + // If status of closed fds would not be cleared, adding an event with + // the fd that has the same numerical value as the closed one would + // fail. + struct epoll_event event2 = {0}; + event2.events = EPOLLIN; + if ((ret = epoll_ctl(ep, EPOLL_CTL_ADD, fds[0], &event2)) < 0) { + return -1; + } + + pthread_t writer_thread; + pthread_create(&writer_thread, NULL, sleep_then_write, + (void *)(intptr_t)(fds[1])); + + if ((ret = epoll_wait(ep, &event, 1, 300)) != 1) { + return -1; + } + + pthread_join(writer_thread, NULL); + + close(ep); + close(fds[0]); + close(fds[1]); + return 0; +} + +static int +test_invalid_writes() +{ + sigset_t mask; + int fd; + + sigemptyset(&mask); + sigaddset(&mask, SIGINT); + + if (sigprocmask(SIG_BLOCK, &mask, NULL) < 0) { + return -1; + } + + char dummy = 0; + + { + fd = signalfd(-1, &mask, 0); + if (fd < 0) { + return -1; + } + + if (write(fd, &dummy, 1) >= 0) { + return -1; + } + + if (errno != EINVAL) { + return -1; + } + + close(fd); + } + + { + fd = timerfd_create(CLOCK_MONOTONIC, 0); + if (fd < 0) { + return -1; + } + + if (write(fd, &dummy, 1) >= 0) { + return -1; + } + + if (errno != EINVAL) { + return -1; + } + + close(fd); + } + + { + fd = epoll_create1(EPOLL_CLOEXEC); + if (fd < 0) { + return -1; + } + + if (write(fd, &dummy, 1) >= 0) { + return -1; + } + + if (errno != EINVAL) { + return -1; + } + + if (read(fd, &dummy, 1) >= 0) { + return -1; + } + + if (errno != EINVAL) { + return -1; + } + + close(fd); + } + + return 0; +} + int main() { + int r = 0; + + /* We check for fd leaks after each test. Remember fd numbers for + * checking here. */ + { + int fds[3]; + if (fd_pipe(fds) < 0) { + return 1; + } + fd_leak_test_a = fds[0]; + fd_leak_test_b = fds[1]; + close(fds[0]); + close(fds[1]); + } + TEST(test1()); TEST(test2()); TEST(test3()); @@ -1483,10 +1697,14 @@ main() TEST(test20(fd_domain_socket)); TEST(test21()); // TEST(test22()); +#ifdef INTERACTIVE_TESTS TEST(test23()); +#endif TEST(test24(fd_tcp_socket)); TEST(test_recursive_register()); + TEST(test_remove_closed()); + TEST(test_same_fd_value()); + TEST(test_invalid_writes()); - TEST(testxx()); - return 0; + return r; } diff --git a/test/eventfd-ctx-test.c b/test/eventfd-ctx-test.c new file mode 100644 index 0000000..b3a56dd --- /dev/null +++ b/test/eventfd-ctx-test.c @@ -0,0 +1,233 @@ +#include + +#include +#include + +#include +#include +#include +#include + +#include "eventfd_ctx.h" + +#include "eventfd_ctx.c" + +#define REQUIRE(x) \ + do { \ + if (!(x)) { \ + fprintf(stderr, "failed assertion: %d\n", __LINE__); \ + abort(); \ + } \ + } while (0) + +static void +tc_init_terminate(void) +{ + int kq; + EventFDCtx eventfd; + + REQUIRE((kq = kqueue()) >= 0); + REQUIRE(eventfd_ctx_init(&eventfd, kq, 0, + EVENTFD_CTX_FLAG_SEMAPHORE) == 0); + { + struct pollfd pfd = {.fd = kq, .events = POLLIN}; + REQUIRE(poll(&pfd, 1, 0) == 0); + } + REQUIRE(eventfd_ctx_terminate(&eventfd) == 0); + REQUIRE(close(kq) == 0); + + REQUIRE((kq = kqueue()) >= 0); + REQUIRE(eventfd_ctx_init(&eventfd, kq, 1, + EVENTFD_CTX_FLAG_SEMAPHORE) == 0); + { + struct pollfd pfd = {.fd = kq, .events = POLLIN}; + REQUIRE(poll(&pfd, 1, 0) == 1); + REQUIRE(pfd.revents == POLLIN); + } + REQUIRE(eventfd_ctx_terminate(&eventfd) == 0); + REQUIRE(close(kq) == 0); +} + +static void +tc_simple_write(void) +{ + int kq; + EventFDCtx eventfd; + + REQUIRE((kq = kqueue()) >= 0); + REQUIRE(eventfd_ctx_init(&eventfd, kq, 0, 0) == 0); + { + REQUIRE(eventfd_ctx_write(&eventfd, UINT64_MAX) == EINVAL); + REQUIRE(eventfd_ctx_write(&eventfd, UINT64_MAX - 1) == 0); + REQUIRE(eventfd_ctx_write(&eventfd, 1) == EAGAIN); + REQUIRE(eventfd_ctx_write(&eventfd, 1) == EAGAIN); + + struct pollfd pfd = {.fd = kq, .events = POLLIN}; + REQUIRE(poll(&pfd, 1, 0) == 1); + REQUIRE(pfd.revents == POLLIN); + + uint64_t value; + REQUIRE(eventfd_ctx_read(&eventfd, &value) == 0); + REQUIRE(value == UINT64_MAX - 1); + + REQUIRE(poll(&pfd, 1, 0) == 0); + } + REQUIRE(eventfd_ctx_terminate(&eventfd) == 0); + REQUIRE(close(kq) == 0); +} + +static void +tc_simple_read(void) +{ + int kq; + EventFDCtx eventfd; + uint64_t value; + + REQUIRE((kq = kqueue()) >= 0); + REQUIRE(eventfd_ctx_init(&eventfd, kq, 3, + EVENTFD_CTX_FLAG_SEMAPHORE) == 0); + { + struct pollfd pfd = {.fd = kq, .events = POLLIN}; + REQUIRE(poll(&pfd, 1, 0) == 1); + REQUIRE(pfd.revents == POLLIN); + + REQUIRE(eventfd_ctx_read(&eventfd, &value) == 0); + REQUIRE(value == 1); + REQUIRE(eventfd_ctx_read(&eventfd, &value) == 0); + REQUIRE(value == 1); + REQUIRE(eventfd_ctx_read(&eventfd, &value) == 0); + REQUIRE(value == 1); + REQUIRE(eventfd_ctx_read(&eventfd, &value) == EAGAIN); + + REQUIRE(poll(&pfd, 1, 0) == 0); + } + REQUIRE(eventfd_ctx_terminate(&eventfd) == 0); + REQUIRE(close(kq) == 0); +} + +static void +tc_simple_write_read(void) +{ + int kq; + EventFDCtx eventfd; + uint64_t value; + + REQUIRE((kq = kqueue()) >= 0); + REQUIRE(eventfd_ctx_init(&eventfd, kq, 0, + EVENTFD_CTX_FLAG_SEMAPHORE) == 0); + { + struct pollfd pfd = {.fd = kq, .events = POLLIN}; + REQUIRE(poll(&pfd, 1, 0) == 0); + + REQUIRE(eventfd_ctx_write(&eventfd, 2) == 0); + + REQUIRE(poll(&pfd, 1, 0) == 1); + REQUIRE(pfd.revents == POLLIN); + + REQUIRE(eventfd_ctx_read(&eventfd, &value) == 0); + REQUIRE(value == 1); + REQUIRE(eventfd_ctx_read(&eventfd, &value) == 0); + REQUIRE(value == 1); + REQUIRE(eventfd_ctx_read(&eventfd, &value) == EAGAIN); + + REQUIRE(poll(&pfd, 1, 0) == 0); + } + REQUIRE(eventfd_ctx_terminate(&eventfd) == 0); + REQUIRE(close(kq) == 0); +} + +typedef struct { + EventFDCtx *eventfd; + int signal_pipe[2]; +} ReadThreadArgs; + +static atomic_int read_counter; + +static void * +read_fun(void *arg) +{ + ReadThreadArgs *args = arg; + EventFDCtx *eventfd = args->eventfd; + + for (;;) { + uint64_t value; + errno_t err; + + if ((err = eventfd_ctx_read(eventfd, &value)) == 0) { + int current_counter = + atomic_fetch_add(&read_counter, 1); + + if (current_counter % 10 == 0 && + current_counter < 100) { + REQUIRE(eventfd_ctx_write(eventfd, /**/ + 10) == 0); + } + + continue; + } + + REQUIRE(err == EAGAIN); + + struct pollfd pfds[2] = {/**/ + {.fd = eventfd->kq_, .events = POLLIN}, + {.fd = args->signal_pipe[0], .events = POLLIN}}; + REQUIRE(poll(pfds, nitems(pfds), -1) > 0); + + if (pfds[1].revents) { + break; + } + } + + return (NULL); +} + +static void +tc_threads_read(void) +{ + int kq; + EventFDCtx eventfd; + pthread_t threads[4]; + ReadThreadArgs thread_args[4]; + + for (int i = 0; i < 1000; ++i) { + read_counter = 0; + REQUIRE((kq = kqueue()) >= 0); + REQUIRE(eventfd_ctx_init(&eventfd, kq, 0, + EVENTFD_CTX_FLAG_SEMAPHORE) == 0); + + uint64_t counter_val = 100; + + for (int i = 0; i < (int)nitems(threads); ++i) { + thread_args[i].eventfd = &eventfd; + REQUIRE(pipe2(thread_args[i].signal_pipe, + O_CLOEXEC | O_NONBLOCK) == 0); + REQUIRE(pthread_create(&threads[i], NULL, /**/ + read_fun, &thread_args[i]) == 0); + } + + REQUIRE(eventfd_ctx_write(&eventfd, counter_val) == 0); + + while (atomic_load(&read_counter) != 2 * (int)counter_val) { + } + + for (int i = 0; i < (int)nitems(threads); ++i) { + REQUIRE(close(thread_args[i].signal_pipe[1]) == 0); + REQUIRE(pthread_join(threads[i], NULL) == 0); + REQUIRE(close(thread_args[i].signal_pipe[0]) == 0); + } + + REQUIRE(eventfd_ctx_terminate(&eventfd) == 0); + REQUIRE(close(kq) == 0); + REQUIRE(read_counter == 2 * counter_val); + } +} + +int +main() +{ + tc_init_terminate(); + tc_simple_write(); + tc_simple_read(); + tc_simple_write_read(); + tc_threads_read(); +} diff --git a/test/kqueue-state.c b/test/kqueue-state.c index 1dc725f..cf6be5d 100644 --- a/test/kqueue-state.c +++ b/test/kqueue-state.c @@ -1,3 +1,5 @@ +#include + #include #include @@ -6,13 +8,13 @@ #include -#include "../src/epoll.c" +#include "../src/epollfd_ctx.c" int main() { int kq; - int e; + errno_t ec; uint16_t retval; kq = kqueue(); @@ -20,30 +22,30 @@ main() err(1, "kqueue"); } - if ((e = kqueue_save_state(kq, 42, 0xfffu)) < 0) { - errno = -e; + if ((ec = kqueue_save_state(kq, 42, 0xfffu)) != 0) { + errno = ec; err(1, "kqueue_save_state"); } - if ((e = kqueue_save_state(kq, 42, 0xf0fu)) < 0) { - errno = -e; + if ((ec = kqueue_save_state(kq, 42, 0xf0fu)) != 0) { + errno = ec; err(1, "kqueue_save_state"); } - if ((e = kqueue_save_state(kq, 41, 0x123u)) < 0) { - errno = -e; + if ((ec = kqueue_save_state(kq, 41, 0x123u)) != 0) { + errno = ec; err(1, "kqueue_save_state"); } - if ((e = kqueue_load_state(kq, 42, &retval)) < 0) { - errno = -e; + if ((ec = kqueue_load_state(kq, 42, &retval)) != 0) { + errno = ec; err(1, "kqueue_load_state"); } fprintf(stderr, "got %x, expected %x\n", (unsigned)retval, 0xf0fu); - if ((e = kqueue_load_state(kq, 41, &retval)) < 0) { - errno = -e; + if ((ec = kqueue_load_state(kq, 41, &retval)) != 0) { + errno = ec; err(1, "kqueue_load_state"); } diff --git a/test/many-timers.c b/test/many-timers.c new file mode 100644 index 0000000..eb6e16e --- /dev/null +++ b/test/many-timers.c @@ -0,0 +1,273 @@ +#include + +#ifndef __linux__ +#include +#include +#endif + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#define REQUIRE(x) \ + do { \ + if (!(x)) { \ + fprintf(stderr, "REQUIRE in line %d failed.\n", \ + __LINE__); \ + exit(EXIT_FAILURE); \ + } \ + } while (0) + +// TODO(jan): Remove this once the definition is exposed in in +// all supported FreeBSD versions. +#ifndef timespecsub +#define timespecsub(tsp, usp, vsp) \ + do { \ + (vsp)->tv_sec = (tsp)->tv_sec - (usp)->tv_sec; \ + (vsp)->tv_nsec = (tsp)->tv_nsec - (usp)->tv_nsec; \ + if ((vsp)->tv_nsec < 0) { \ + (vsp)->tv_sec--; \ + (vsp)->tv_nsec += 1000000000L; \ + } \ + } while (0) +#endif + +#ifndef nitems +#define nitems(x) (sizeof((x)) / sizeof((x)[0])) +#endif + +#ifndef __linux__ +static bool +is_fast_timer(int fd) +{ + struct kevent kev[1]; + EV_SET(&kev[0], 0, EVFILT_TIMER, EV_DELETE, 0, 0, 0); + + bool is_fast = kevent(fd, kev, nitems(kev), NULL, 0, NULL) == 0; + close(fd); + return (is_fast); +} +#endif + +int +main() +{ + int timer_fds[1024]; + int i; + + for (i = 0; i < nitems(timer_fds); ++i) { + REQUIRE((timer_fds[i] = timerfd_create(CLOCK_MONOTONIC, /**/ + TFD_CLOEXEC | TFD_NONBLOCK)) >= 0); + } + + struct pollfd pfd; + struct timespec b, e; + struct itimerspec time; + int timerfd; + + { + timerfd = timer_fds[0]; + + time = (struct itimerspec){ + .it_value.tv_sec = 0, + .it_value.tv_nsec = 100000000, + }; + + REQUIRE(timerfd_settime(timerfd, 0, &time, NULL) == 0); + + pfd = (struct pollfd){.fd = timerfd, .events = POLLIN}; + REQUIRE(clock_gettime(CLOCK_MONOTONIC, &b) == 0); + REQUIRE(poll(&pfd, 1, -1) == 1); + REQUIRE(clock_gettime(CLOCK_MONOTONIC, &e) == 0); + timespecsub(&e, &b, &e); + REQUIRE(e.tv_sec == 0 && e.tv_nsec >= 100000000 && + e.tv_nsec < 150000000); + +#ifndef __linux__ + REQUIRE(is_fast_timer(timerfd)); +#endif + } + + { + timerfd = timer_fds[1]; + + time = (struct itimerspec){ + .it_value.tv_sec = 0, + .it_value.tv_nsec = 100000000, + .it_interval.tv_sec = 0, + .it_interval.tv_nsec = 100000000, + }; + + REQUIRE(timerfd_settime(timerfd, 0, &time, NULL) == 0); + + pfd = (struct pollfd){.fd = timerfd, .events = POLLIN}; + REQUIRE(clock_gettime(CLOCK_MONOTONIC, &b) == 0); + REQUIRE(poll(&pfd, 1, -1) == 1); + REQUIRE(clock_gettime(CLOCK_MONOTONIC, &e) == 0); + timespecsub(&e, &b, &e); + REQUIRE(e.tv_sec == 0 && e.tv_nsec >= 100000000 && + e.tv_nsec < 150000000); + + poll(&pfd, 1, -1); + uint64_t timeouts; + REQUIRE(read(timerfd, &timeouts, sizeof(timeouts)) == + (ssize_t)sizeof(timeouts)); + REQUIRE(timeouts == 1); + + usleep(230000); + + REQUIRE(read(timerfd, &timeouts, sizeof(timeouts)) == + (ssize_t)sizeof(timeouts)); + REQUIRE(timeouts == 2); + +#ifndef __linux__ + REQUIRE(is_fast_timer(timerfd)); +#endif + } + + { + timerfd = timer_fds[2]; + + time = (struct itimerspec){ + .it_value.tv_sec = 0, + .it_value.tv_nsec = 100000000, + .it_interval.tv_sec = 0, + .it_interval.tv_nsec = 100000001, + }; + + REQUIRE(timerfd_settime(timerfd, 0, &time, NULL) == 0); + + pfd = (struct pollfd){.fd = timerfd, .events = POLLIN}; + REQUIRE(clock_gettime(CLOCK_MONOTONIC, &b) == 0); + REQUIRE(poll(&pfd, 1, -1) == 1); + REQUIRE(clock_gettime(CLOCK_MONOTONIC, &e) == 0); + timespecsub(&e, &b, &e); + REQUIRE(e.tv_sec == 0 && e.tv_nsec >= 100000000 && + e.tv_nsec < 150000000); + + poll(&pfd, 1, -1); + uint64_t timeouts; + REQUIRE(read(timerfd, &timeouts, sizeof(timeouts)) == + (ssize_t)sizeof(timeouts)); + REQUIRE(timeouts == 1); + + usleep(230000); + + REQUIRE(read(timerfd, &timeouts, sizeof(timeouts)) == + (ssize_t)sizeof(timeouts)); + REQUIRE(timeouts == 2); + +#ifndef __linux__ + REQUIRE(!is_fast_timer(timerfd)); +#endif + } + + { + timerfd = timer_fds[3]; + + time = (struct itimerspec){ + .it_value.tv_sec = 0, + .it_value.tv_nsec = 100000000, + .it_interval.tv_sec = 0, + .it_interval.tv_nsec = 100000000, + }; + + REQUIRE(timerfd_settime(timerfd, 0, &time, NULL) == 0); + + REQUIRE(clock_gettime(CLOCK_MONOTONIC, &b) == 0); + + pfd = (struct pollfd){.fd = timerfd, .events = POLLIN}; + REQUIRE(poll(&pfd, 1, -1) == 1); + + time = (struct itimerspec){ + .it_value.tv_sec = 0, + .it_value.tv_nsec = 50000000, + .it_interval.tv_sec = 0, + .it_interval.tv_nsec = 50000000, + }; + + REQUIRE(timerfd_settime(timerfd, 0, &time, NULL) == 0); + REQUIRE(poll(&pfd, 1, -1) == 1); + + uint64_t timeouts; + REQUIRE(read(timerfd, &timeouts, sizeof(timeouts)) == + (ssize_t)sizeof(timeouts)); + REQUIRE(timeouts == 1); + + REQUIRE(clock_gettime(CLOCK_MONOTONIC, &e) == 0); + timespecsub(&e, &b, &e); + fprintf(stderr, "line %d: %ld/%ld\n", __LINE__, /**/ + (long)e.tv_sec, (long)e.tv_nsec); + REQUIRE(e.tv_sec == 0 && e.tv_nsec >= 150000000 && + e.tv_nsec < 200000000); + +#ifndef __linux__ + REQUIRE(is_fast_timer(timerfd)); +#endif + } + + { + timerfd = timer_fds[4]; + + time = (struct itimerspec){ + .it_value.tv_sec = 0, + .it_value.tv_nsec = 100000000, + .it_interval.tv_sec = 0, + .it_interval.tv_nsec = 100000000, + }; + + REQUIRE(timerfd_settime(timerfd, 0, &time, NULL) == 0); + + REQUIRE(clock_gettime(CLOCK_MONOTONIC, &b) == 0); + + pfd = (struct pollfd){.fd = timerfd, .events = POLLIN}; + REQUIRE(poll(&pfd, 1, -1) == 1); + + uint64_t timeouts; + REQUIRE(read(timerfd, &timeouts, sizeof(timeouts)) == + (ssize_t)sizeof(timeouts)); + REQUIRE(timeouts == 1); + + time = (struct itimerspec){ + .it_value.tv_sec = 0, + .it_value.tv_nsec = 0, + .it_interval.tv_sec = 0, + .it_interval.tv_nsec = 0, + }; + + REQUIRE(timerfd_settime(timerfd, 0, &time, NULL) == 0); + REQUIRE(poll(&pfd, 1, 200) == 0); + + REQUIRE(clock_gettime(CLOCK_MONOTONIC, &e) == 0); + timespecsub(&e, &b, &e); + + fprintf(stderr, "line %d: %ld/%ld\n", __LINE__, /**/ + (long)e.tv_sec, (long)e.tv_nsec); + REQUIRE(e.tv_sec == 0 && e.tv_nsec >= 300000000 && + e.tv_nsec < 350000000); + + time = (struct itimerspec){ + .it_value.tv_sec = 1, + .it_value.tv_nsec = 0, + .it_interval.tv_sec = 1, + .it_interval.tv_nsec = 0, + }; + REQUIRE(timerfd_settime(timerfd, 0, &time, NULL) == 0); + +#ifndef __linux__ + REQUIRE(is_fast_timer(timerfd)); +#endif + } +} diff --git a/test/perf-many-fds.c b/test/perf-many-fds.c new file mode 100644 index 0000000..9698992 --- /dev/null +++ b/test/perf-many-fds.c @@ -0,0 +1,43 @@ +#include + +#include +#include +#include + +#define REQUIRE(x) \ + do { \ + if (!(x)) { \ + fprintf(stderr, "REQUIRE in line %d failed.\n", \ + __LINE__); \ + exit(EXIT_FAILURE); \ + } \ + } while (0) + +#define NR_EVENTFDS (20000) + +int +main() +{ + struct timespec time1; + struct timespec time2; + + REQUIRE(clock_gettime(CLOCK_MONOTONIC, &time1) == 0); + + int *eventfds = malloc(NR_EVENTFDS * sizeof(int)); + REQUIRE(eventfds); + + for (long i = 0; i < NR_EVENTFDS; ++i) { + eventfds[i] = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); + REQUIRE(eventfds[i] >= 0); + } + + for (long i = 0; i < 2000000; ++i) { + REQUIRE(eventfd_write(eventfds[0], 1) == 0); + if (i % 10000 == 0) { + fprintf(stderr, "."); + } + } + + REQUIRE(clock_gettime(CLOCK_MONOTONIC, &time2) == 0); + REQUIRE(time2.tv_sec - time1.tv_sec < 15); +} diff --git a/test/tst-eventfd.c b/test/tst-eventfd.c new file mode 100644 index 0000000..5e9d831 --- /dev/null +++ b/test/tst-eventfd.c @@ -0,0 +1,573 @@ +/* clang-format off */ + +/* + * Based on: + * https://raw.githubusercontent.com/cloudius-systems/osv/master/tests/tst-eventfd.cc + * + * https://github.com/cloudius-systems/osv + * https://github.com/cloudius-systems/osv/blob/master/LICENSE + */ + +#if 0 +Copyright (C) 2013 Cloudius Systems, Ltd. + +Parts are copyright by other contributors. Please refer to copyright notices +in the individual source files, and to the git commit log, for a more accurate +list of copyright holders. + +OSv is open-source software, distributed under the 3-clause BSD license: + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name of the Cloudius Systems, Ltd. nor the names of its + contributors may be used to endorse or promote products derived from this + software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +This project also includes source code adopted and adapted from four other +open-source projects - FreeBSD, OpenSolaris, Prex and Musl. These projects have +their own licenses. Please refer to the files documentation/LICENSE-* +for the licenses and copyright statements of these projects. +#endif + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define TMP_FILE "/tmp/f1" + +#define handle_perror(msg) \ + do { perror(msg); printf("\n"); exit(EXIT_FAILURE); } while (0) + +#define handle_error(msg) \ + do { fprintf(stderr, "%s\n", msg); exit(EXIT_FAILURE); } while (0) + +int simple_test(void) +{ + int efd; + uint64_t c; + uint64_t u; + ssize_t s; + + printf("eventfd: running basic test: "); + fflush(stdout); + c = 5; + efd = eventfd(c, EFD_CLOEXEC | EFD_NONBLOCK); + if (efd == -1) { + handle_perror("eventfd"); + } + + s = read(efd, &u, sizeof(u)); + if (s != sizeof(u)) { + handle_perror("read"); + } + + if (c != u) { + handle_error("Incorrect value read."); + } + + s = read(efd, &u, sizeof(u)); + if (s < 0) { + int e = errno; + errno = e; + if (errno != EAGAIN) { + handle_error("EAGAIN expected"); + } + } else { + handle_error("read failure and EAGAIN expected"); + } + + s = write(efd, &c, sizeof(c)); + if (s != sizeof(c)) { + handle_perror("write"); + } + + s = read(efd, &u, sizeof(u)); + if (s != sizeof(u)) { + handle_perror("read"); + } + + if (c != u) { + handle_perror("Incorrect value read."); + } + + close(efd); + printf(" PASS\n"); + fflush(stdout); + return (0); +} + +int semaphore_test(void) +{ + int efd; + uint64_t c; + uint64_t u; + ssize_t s; + uint64_t i; + + printf("eventfd: Running semaphore_test: "); + fflush(stdout); + c = 5; + efd = eventfd(c, EFD_CLOEXEC | EFD_NONBLOCK | EFD_SEMAPHORE); + if (efd == -1) { + handle_perror("eventfd"); + } + + for (i = 0; i < c; i++) { + s = read(efd, &u, sizeof(u)); + if (s != sizeof(u)) { + handle_perror("read"); + } + + if (u != 1) { + handle_error("Semaphore read count 1 expected."); + } + } + + s = read(efd, &u, sizeof(u)); + if (s < 0) { + if (errno != EAGAIN) { + handle_error("EAGAIN expected"); + } + } else { + handle_error("read failure and EAGAIN expected"); + } + + printf(" PASS\n"); + fflush(stdout); + close(efd); + return (0); +} + +struct thread_data { + uint64_t ev_count; + int loop; + int efd; +}; + +void *thread_write(void *arg) +{ + struct thread_data *td = (struct thread_data *) arg; + ssize_t s; + int i; + + for (i = 0; i < td->loop; i++) { + s = write(td->efd, &td->ev_count, sizeof(td->ev_count)); + if (s != sizeof(td->ev_count)) { + handle_perror("write"); + } + usleep(100); + } + return (NULL); +} + +int threaded_test(void) +{ + uint64_t count[] = { + 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, + 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, + 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, + 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, + 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, + 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, + 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, + 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, + 419, 421, 431, 433, 439, 443, 449, 457, 461, 463, + 467, 479, 487, 491, 499, 503, 509, 521, 523, 541, + 547, 557, 563, 569, 571, 577, 587, 593, 599, 601, + 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, + 661, 673, 677, 683, 691, 701, 709, 719, 727, 733, + 739, 743, 751, 757, 761, 769, 773, 787, 797, 809, + 811, 821, 823, 827, 829, 839, 853, 857, 859, 863, + 877, 881, 883, 887, 907, 911, 919, 929, 937, 941, + 947, 953, 967, 971, 977, 983, 991, 997, 1009, + }; + const int LOOP = 1000; + const int THREADS = sizeof(count) / sizeof(count[0]); + int efd; + pthread_t thread[THREADS]; + struct thread_data td[THREADS]; + uint64_t total; + int i; + int rc; + ssize_t s; + uint64_t u; + uint64_t v; + + printf("eventfd: running simple threaded test: "); + fflush(stdout); + efd = eventfd(0, EFD_CLOEXEC); + if (efd == -1) { + handle_perror("eventfd"); + } + + total = 0; + for (i = 0; i < THREADS; i++) { + td[i].efd = efd; + td[i].ev_count = count[i]; + td[i].loop = LOOP; + total += (count[i] * LOOP); + } + + for (i = 0; i < THREADS; i++) { + rc = pthread_create(&thread[i], NULL, thread_write, &td[i]); + if (rc != 0) { + handle_perror("pthread_create"); + } + } + + v = 0; + while (total != v) { + s = read(efd, &u, sizeof(u)); + if (s != sizeof(u)) { + handle_perror("read"); + } + + v += u; + } + + if (v != total) { + handle_error("Unexpected value read"); + } + + /* verify all threads have finished */ + for (i = 0; i < THREADS; i++) { + rc = pthread_join(thread[i], NULL); + if (rc != 0) { + handle_perror("pthread_join"); + } + } + close(efd); + printf(" PASS\n"); + fflush(stdout); + return (0); +} + +int poll_test(void) +{ + int efd; + ssize_t s; + uint64_t c; + uint64_t u; + struct pollfd pfd[2]; + int rc; + + printf("eventfd: running poll test: "); + fflush(stdout); + efd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); + if (efd == -1) { + handle_error("eventfd"); + } + + s = read(efd, &c, sizeof(c)); + if (s < 0) { + if (errno != EAGAIN) { + handle_error("EAGAIN expected"); + } + } else { + handle_error("read failure and EAGAIN expected"); + } + + /* no event count - read poll */ + pfd[0].fd = efd; + pfd[0].events = POLLIN; + rc = poll(pfd, 1, 10); + if (rc != 0) { + handle_error("expected timeout.\n"); + } + + if ((pfd[0].revents & POLLIN) != 0) { + handle_error("no read event - POLLIN must not be set"); + } + + if ((pfd[0].revents & POLLOUT) != 0) { + handle_error("write event on read fd is not expected"); + } + +#ifndef __FreeBSD__ + /* no event count - write poll */ + pfd[0].fd = efd; + pfd[0].events = POLLOUT; + rc = poll(pfd, 1, -1); + if (rc != 1 || ((pfd[0].revents & POLLOUT) == 0)) { + handle_error("expected write event."); + } + + /* combined read - write poll */ + pfd[0].fd = efd; + pfd[0].events = POLLOUT; + pfd[1].fd = efd; + pfd[1].events = POLLIN; + rc = poll(pfd, 2, -1); + if (rc == 1) { + if ((pfd[0].revents & POLLOUT) == 0) { + handle_error("expected write event"); + } + + if ((pfd[0].revents & POLLIN) != 0) { + handle_error("read event on write fd is not expected"); + } + + if (((pfd[1].revents & POLLOUT) != 0) || + ((pfd[1].revents & POLLIN) != 0) ) { + handle_error("expected no events on read fd."); + } + } else { + handle_error("one event expected."); + } +#endif + + /* write to event and check read poll */ + c = 1; + s = write(efd, &c, sizeof(c)); + if (s != sizeof(s)) { + handle_error(""); + } + +#ifndef __FreeBSD__ + pfd[0].fd = efd; + pfd[0].events = POLLOUT; + pfd[1].fd = efd; + pfd[1].events = POLLIN; + rc = poll(pfd, 2, -1); + if (rc == 2) { + if ((pfd[0].revents & POLLOUT) == 0) { + handle_error("expected write event"); + } + + if ((pfd[0].revents & POLLIN) != 0) { + handle_error("read event on write fd is not expected"); + } + + if ((pfd[1].revents & POLLOUT) != 0) { + handle_error("write event on read fd is not expected"); + } + + if ((pfd[1].revents & POLLIN) == 0) { + handle_error("expected read event"); + } + } else { + handle_error("expected two event."); + } +#endif + + s = read(efd, &u, sizeof(u)); + if (s != sizeof(u)) { + handle_perror("read"); + } + + /* max value boundary condition checking */ + c = ULLONG_MAX - 2; + s = write(efd, &c, sizeof(c)); + if (s != sizeof(c)) { + handle_perror("write"); + } + +#ifndef __FreeBSD__ + pfd[0].fd = efd; + pfd[0].events = POLLOUT; + pfd[1].fd = efd; + pfd[1].events = POLLIN; + rc = poll(pfd, 2, -1); + if (rc == 2) { + if ((pfd[0].revents & POLLOUT) == 0) { + handle_error("expected write event"); + } + + if ((pfd[0].revents & POLLIN) != 0) { + handle_error("read event on write fd is not expected"); + } + + if ((pfd[1].revents & POLLOUT) != 0) { + handle_error("write event on read fd is not expected"); + } + + if ((pfd[1].revents & POLLIN) == 0) { + handle_error("expected read event"); + } + } else { + handle_error("expected two event."); + } +#endif + + c = 2; + s = write(efd, &c, sizeof(c)); + if (s < 0) { + if (errno != EAGAIN) { + handle_error("write - expected EAGAIN"); + } + } else { + handle_error("write failure and EAGAIN expected"); + } + + c = 1; + s = write(efd, &c, sizeof(c)); + if (s != sizeof(c)) { + handle_perror("write"); + } + + pfd[0].fd = efd; + pfd[0].events = POLLOUT; + pfd[1].fd = efd; + pfd[1].events = POLLIN; + rc = poll(pfd, 2, -1); + if (rc == 1) { + if ((pfd[0].revents & POLLOUT) != 0) { + handle_error("write event not expected"); + } + + if ((pfd[0].revents & POLLIN) != 0) { + handle_error("read event on write fd is not expected"); + } + + if ((pfd[1].revents & POLLOUT) != 0) { + handle_error("write event on read fd is not expected"); + } + + if ((pfd[1].revents & POLLIN) == 0) { + handle_error("expected read event"); + } + } else { + handle_error("expected two event."); + } + + c = 1; + s = write(efd, &c, sizeof(c)); + if (s < 0) { + if (errno != EAGAIN) { + handle_error("write - expected EAGAIN"); + } + } else { + handle_error("write failure and EAGAIN expected"); + } + + s = read(efd, &u, sizeof(u)); + if (s != sizeof(u)) { + handle_perror("read"); + } + + if (u != ULLONG_MAX - 1) { + handle_error("Incorrect value read"); + } + + close(efd); + printf(" PASS\n"); + fflush(stdout); + return (0); +} + +int api_test(void) +{ + int efd; + int rc; + eventfd_t v; + eventfd_t u; + int fd; + + printf("eventfd: running API test: "); + fflush(stdout); + efd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); + if (efd == -1) { + handle_error("eventfd"); + } + + rc = eventfd_read(efd, &v); + if (rc < 0) { + if (errno != EAGAIN) { + handle_perror("eventfd_read"); + } + } + + u = 10; + rc = eventfd_write(efd, u); + if (rc < 0) { + handle_perror("eventfd_write"); + } + + rc = eventfd_read(efd, &v); + if (rc < 0) { + handle_perror("eventfd_read"); + } + + close(efd); + + /* check errors */ + rc = eventfd_read(efd, &v); + if (rc < 0) { + if (errno != EBADF) { + handle_perror("eventfd_read"); + } + } + + rc = eventfd_write(efd, u); + if (rc < 0) { + if (errno != EBADF) { + handle_perror("eventfd_write"); + } + } + + fd = creat(TMP_FILE, 0777); + if (fd < 0) { + handle_perror("open"); + } + + rc = eventfd_read(fd, &v); + if (rc < 0) { + if (errno != EBADF) { + handle_perror("eventfd_read"); + } + } + + rc = eventfd_write(fd, u); + if (rc < 0) { + if (errno != EBADF) { + handle_perror("eventfd_write"); + } + } + close(fd); + unlink(TMP_FILE); + printf(" PASS\n"); + fflush(stdout); + return (0); +} + +int main(void) +{ + simple_test(); + semaphore_test(); + threaded_test(); + poll_test(); + api_test(); + return (0); +}