From 0f8cdb70c27e8c9f921b91a5cf0e4950139cc4a4 Mon Sep 17 00:00:00 2001 From: Francesco Lavra Date: Fri, 18 Oct 2024 16:09:40 +0200 Subject: [PATCH] epoll: add support for polling on epoll file descriptors Closes ##1192. --- src/unix/poll.c | 27 ++++++++++++++++++- test/runtime/epoll.c | 63 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 1 deletion(-) diff --git a/src/unix/poll.c b/src/unix/poll.c index 5aed22510..31d135e1d 100644 --- a/src/unix/poll.c +++ b/src/unix/poll.c @@ -63,6 +63,7 @@ struct epoll { struct list blocked_head; /* an epoll_blocked per thread (in epoll_wait) */ struct refcount refcount; enum epoll_type epoll_type; + closure_struct(fdesc_events, fd_events); closure_struct(fdesc_close, close); closure_struct(thunk, free); heap h; @@ -285,6 +286,30 @@ void epoll_finish(epoll e) refcount_release(&e->refcount); } +closure_func_basic(fdesc_events, u32, epoll_events, + thread t) +{ + epoll e = struct_from_closure(epoll, fd_events); + u32 events = 0; + spin_rlock(&e->fds_lock); + bitmap_foreach_set(e->fds, fd) { + epollfd efd = vector_get(e->events, fd); + if (efd->zombie) + continue; + spin_lock(&efd->lock); + if (efd->registered) { + fdesc f = efd->f; + events = apply(f->events, t) & (efd->eventmask | POLL_EXCEPTIONS); + events = report_from_notify_events(efd, events); + } + spin_unlock(&efd->lock); + if (events) + break; + } + spin_runlock(&e->fds_lock); + return events ? EPOLLIN : 0; +} + closure_func_basic(fdesc_close, sysreturn, epoll_close, context ctx, io_completion completion) { @@ -301,6 +326,7 @@ sysreturn epoll_create(int flags) if (e == INVALID_ADDRESS) return -ENOMEM; init_fdesc(e->h, &e->f, FDESC_TYPE_EPOLL); + e->f.events = init_closure_func(&e->fd_events, fdesc_events, epoll_events); e->f.close = init_closure_func(&e->close, fdesc_close, epoll_close); u64 fd = allocate_fd(current->p, e); if (fd == INVALID_PHYSICAL) { @@ -633,7 +659,6 @@ sysreturn epoll_ctl(int epfd, int op, int fd, struct epoll_event *event) if (rv) return rv; - /* XXX verify that fd is not an epoll instance*/ epoll e = resolve_fd(current->p, epfd); if ((e->f.type != FDESC_TYPE_EPOLL) || (f == &e->f)) { rv = -EINVAL; diff --git a/test/runtime/epoll.c b/test/runtime/epoll.c index 2f7ae5a37..09c586861 100644 --- a/test/runtime/epoll.c +++ b/test/runtime/epoll.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "../test_utils.h" @@ -312,6 +313,67 @@ static void test_epollexclusive(void) close(evfd); } +static void test_poll_nested(void) +{ + int sock_fd, polled_efd, efd; + struct pollfd pfd; + fd_set rfds, wfds, efds; + struct timeval tv; + struct epoll_event event; + + polled_efd = epoll_create1(0); + if (polled_efd < 0) + test_perror("epoll_create1"); + efd = epoll_create1(0); + if (efd < 0) + test_perror("epoll_create1"); + event.events = EPOLLIN | EPOLLOUT; + event.data.fd = polled_efd; + test_assert(epoll_ctl(efd, EPOLL_CTL_ADD, polled_efd, &event) == 0); + + /* try to put an epoll file descriptor into its own file descriptor set */ + test_assert((epoll_ctl(efd, EPOLL_CTL_ADD, efd, &event) == -1) && (errno == EINVAL)); + + /* polled_efd not is not ready for I/O */ + pfd.fd = polled_efd; + pfd.events = POLLIN | POLLOUT; + test_assert(poll(&pfd, 1, 0) == 0); + FD_ZERO(&rfds); + FD_SET(polled_efd, &rfds); + FD_ZERO(&wfds); + FD_SET(polled_efd, &wfds); + FD_ZERO(&efds); + FD_SET(polled_efd, &wfds); + tv.tv_sec = tv.tv_usec = 0; + test_assert(select(polled_efd + 1, &rfds, &wfds, &efds, &tv) == 0); + event.events = EPOLLIN | EPOLLOUT; + test_assert(epoll_wait(efd, &event, 1, 0) == 0); + + /* add a socket file descriptor to polled_efd to make it ready for I/O */ + sock_fd = socket(AF_INET, SOCK_DGRAM, 0); + if (sock_fd < 0) + test_perror("socket"); + event.events = EPOLLIN | EPOLLOUT; + test_assert(epoll_ctl(polled_efd, EPOLL_CTL_ADD, sock_fd, &event) == 0); + + /* polled_efd is ready for I/O */ + test_assert((poll(&pfd, 1, 0) == 1) && (pfd.revents == POLLIN)); + FD_ZERO(&rfds); + FD_SET(polled_efd, &rfds); + FD_ZERO(&wfds); + FD_SET(polled_efd, &wfds); + FD_ZERO(&efds); + FD_SET(polled_efd, &wfds); + test_assert(select(polled_efd + 1, &rfds, &wfds, &efds, &tv) == 1); + test_assert(FD_ISSET(polled_efd, &rfds)); + test_assert(epoll_wait(efd, &event, 1, 0) == 1); + test_assert((event.data.fd == polled_efd) && (event.events == EPOLLIN)); + + close(efd); + close(polled_efd); + close(sock_fd); +} + int main(int argc, char **argv) { test_ctl(); @@ -319,6 +381,7 @@ int main(int argc, char **argv) test_edgetrigger(); test_eventfd_et(); test_epollexclusive(); + test_poll_nested(); printf("test passed\n"); return EXIT_SUCCESS;