From 3a1c15b03a77a97bd86c44848b6aa9060d34775a Mon Sep 17 00:00:00 2001 From: lind Date: Mon, 14 Oct 2024 22:00:44 +0000 Subject: [PATCH 1/2] rewrite poll syscall --- src/interface/comm.rs | 89 +++++++ src/safeposix/net.rs | 35 +++ src/safeposix/syscalls/net_calls.rs | 368 +++++++++++++++++++++++----- 3 files changed, 437 insertions(+), 55 deletions(-) diff --git a/src/interface/comm.rs b/src/interface/comm.rs index 23cf655a0..85b1dc12a 100644 --- a/src/interface/comm.rs +++ b/src/interface/comm.rs @@ -639,3 +639,92 @@ pub fn kernel_select( return result; } + + +// Implementations of select related FD_SET structure +pub struct PollFd(pub libc::pollfd); + +impl PollFd { + pub fn new_with_fd(fd: i32) -> PollFd { + let raw_pollfd = libc::pollfd { + fd, + events: 0, + revents: 0, + }; + PollFd(raw_pollfd) + } + + // set the event in pollfd + pub fn set_event(&mut self, event: i16) { + self.0.events |= event; + } + + pub fn get_revent(&self) -> i16 { + self.0.revents + } + + pub fn get_fd(&self) -> i32 { + self.0.fd + } +} + +// for unwrapping in kernel_poll +// fn to_pollfd_ptr(opt: &mut [PollFd]) -> *mut libc::pollfd { +// let length = opt.len(); + +// // Convert your PollFd structs into libc::pollfd pointers +// let mut poll_fds: Vec = opt.iter_mut() +// .map(|poll_fd| poll_fd.0) // Directly use the value +// .collect(); + +// // println!("tmp:"); +// // for pollfd in &poll_fds { +// // println!("fd: {}, events: {}, revents: {}", pollfd.fd, pollfd.events, pollfd.revents); +// // } + +// // Get a raw pointer to the first element of the vector +// let ret = poll_fds.as_mut_ptr(); + +// // Ensure the vector is not dropped +// std::mem::forget(poll_fds); // Prevent drop + +// let slice = unsafe { std::slice::from_raw_parts(ret, length) }; + +// // println!("converted raw pollfd:"); +// // for pollfd in slice { +// // println!("fd: {}, events: {}, revents: {}", pollfd.fd, pollfd.events, pollfd.revents); +// // } + +// ret +// } + + +pub fn kernel_poll( + fds: &mut [PollFd], + nfds: u64, +) -> i32 { + let mut poll_fds: Vec = fds.iter_mut() + .map(|poll_fd| poll_fd.0) // Directly use the value + .collect(); + + // Call libc::poll and store the result + let result = unsafe { + // Create a timeval struct with zero timeout + let kpoll_timeout: i32 = 0; + + let ret = libc::poll( + poll_fds.as_mut_ptr(), + nfds as u64, + kpoll_timeout + ); + + for (index, pollfd) in poll_fds.iter().enumerate() { + let item = fds.get_mut(index).unwrap(); + item.0.revents = pollfd.revents; + } + + ret + }; + + return result; +} \ No newline at end of file diff --git a/src/safeposix/net.rs b/src/safeposix/net.rs index 9acd68bab..e7b6e7798 100644 --- a/src/safeposix/net.rs +++ b/src/safeposix/net.rs @@ -573,3 +573,38 @@ pub fn update_readfds_from_kernel_select( } return kernel_ret; } + +pub struct PollInetInfo { + pub rawfd_lindfd_index_tuples: interface::RustHashMap, + pub kernel_pollfd: Vec, +} + +impl PollInetInfo { + pub fn new() -> Self { + PollInetInfo { + rawfd_lindfd_index_tuples: interface::RustHashMap::new(), + kernel_pollfd: Vec::new(), + } + } +} + +pub fn update_pollstruct_from_kernel_poll( + pollfds: &mut [interface::PollStruct], + inet_info: &mut PollInetInfo, +) -> i32 { + let kernel_ret; + // note that this poll call always have timeout = 0, so it doesn't block + let nfds = inet_info.kernel_pollfd.len() as u64; + kernel_ret = interface::kernel_poll( + inet_info.kernel_pollfd.as_mut_slice(), + nfds + ); + if kernel_ret > 0 { + for pollfd in &inet_info.kernel_pollfd { + let index = (*inet_info.rawfd_lindfd_index_tuples.get(&pollfd.get_fd()).unwrap()) as usize; + let pollstruct = pollfds.get_mut(index).unwrap(); + pollstruct.revents = pollfd.get_revent(); + } + } + return kernel_ret; +} \ No newline at end of file diff --git a/src/safeposix/syscalls/net_calls.rs b/src/safeposix/syscalls/net_calls.rs index 90f738d53..b55b76234 100644 --- a/src/safeposix/syscalls/net_calls.rs +++ b/src/safeposix/syscalls/net_calls.rs @@ -4482,6 +4482,141 @@ impl Cage { /// /// ### Panics /// No panic is expected from this syscall + // pub fn poll_syscall( + // &self, + // fds: &mut [PollStruct], + // timeout: Option, + // ) -> i32 { + // // timeout is supposed to be in milliseconds + + // // current implementation of poll_syscall is based on select_syscall + // // which gives several issues: + // // 1. according to standards, select_syscall should only support file descriptor + // // that is smaller than 1024, while poll_syscall should not have such + // // limitation but our implementation of poll_syscall is actually calling + // // select_syscall directly which would mean poll_syscall would also have the + // // 1024 maximum size limitation However, rustposix itself only support file + // // descriptor that is smaller than 1024 which solves this issue automatically + // // in an interesting way + // // 2. current implementation of poll_syscall is very inefficient, that it passes + // // each of the file descriptor into select_syscall one by one. A better + // // solution might be transforming pollstruct into fdsets and pass into + // // select_syscall once (TODO). A even more efficienct way would be completely + // // rewriting poll_syscall so it does not depend on select_syscall anymore. + // // This is also how Linux does for poll_syscall since Linux claims that poll + // // have a better performance than select. + // // 3. several revent value such as POLLERR (which should be set when pipe is + // // broken), or POLLHUP (when peer closed its channel) are not possible to + // // monitor. Since select_syscall does not have these features, so our + // // poll_syscall, which derived from select_syscall, would subsequently not be + // // able to support these features. + + // let mut return_code: i32 = 0; + // let start_time = interface::starttimer(); + + // let end_time = match timeout { + // Some(time) => time, + // None => interface::RustDuration::MAX, + // }; + + // // according to standard, we should clear all revents + // for structpoll in &mut *fds { + // structpoll.revents = 0; + // } + + // // we loop until either timeout + // // or any of the file descriptor is ready + // loop { + // // iterate through each file descriptor + // for structpoll in &mut *fds { + // // get the file descriptor + // let fd = structpoll.fd; + + // // according to standard, we should ignore all file descriptor + // // that is smaller than 0 + // if fd < 0 { + // continue; + // } + + // // get the associated events to monitor + // let events = structpoll.events; + + // // init FdSet structures + // let reads = &mut interface::FdSet::new(); + // let writes = &mut interface::FdSet::new(); + // let errors = &mut interface::FdSet::new(); + + // // POLLIN for readable fd + // if events & POLLIN > 0 { + // reads.set(fd) + // } + // // POLLOUT for writable fd + // if events & POLLOUT > 0 { + // writes.set(fd) + // } + // // POLLPRI for except fd + // if events & POLLPRI > 0 { + // errors.set(fd) + // } + + // // this mask is used for storing final revent result + // let mut mask: i16 = 0; + + // // here we just call select_syscall with timeout of zero, + // // which essentially just check each fd set once then return + // // NOTE that the nfds argument is highest fd + 1 + // let selectret = Self::select_syscall( + // &self, + // fd + 1, + // Some(reads), + // Some(writes), + // Some(errors), + // Some(interface::RustDuration::ZERO), + // ); + // // if there is any file descriptor ready + // if selectret > 0 { + // // is the file descriptor ready to read? + // mask |= if !reads.is_empty() { POLLIN } else { 0 }; + // // is the file descriptor ready to write? + // mask |= if !writes.is_empty() { POLLOUT } else { 0 }; + // // is there any exception conditions on the file descriptor? + // mask |= if !errors.is_empty() { POLLPRI } else { 0 }; + // // this file descriptor is ready for something, + // // increment the return value + // return_code += 1; + // } else if selectret < 0 { + // // if there is any error, first check if the error + // // is EBADF, which refers to invalid file descriptor error + // // in this case, we should set POLLNVAL to revent + // if selectret == -(Errno::EBADF as i32) { + // mask |= POLLNVAL; + // // according to standard, return value is the number of fds + // // with non-zero revent, which may indicate an error as well + // return_code += 1; + // } else { + // return selectret; + // } + // } + // // set the revents + // structpoll.revents = mask; + // } + + // // we break if there is any file descriptor ready + // // or timeout is reached + // if return_code != 0 || interface::readtimer(start_time) > end_time { + // break; + // } else { + // // otherwise, check for signal and loop again + // if interface::sigcheck() { + // return syscall_error(Errno::EINTR, "poll", "interrupted function call"); + // } + // // We yield to let other threads continue if we've found no ready descriptors + // interface::lind_yield(); + // } + // } + // return return_code; + // } + pub fn poll_syscall( &self, fds: &mut [PollStruct], @@ -4524,11 +4659,15 @@ impl Cage { structpoll.revents = 0; } + // For INET: prepare the data structures for the kernel_poll's use + let mut inet_info = PollInetInfo::new(); + let mut first_iteration = true; + // we loop until either timeout // or any of the file descriptor is ready loop { // iterate through each file descriptor - for structpoll in &mut *fds { + for (index, structpoll) in fds.iter_mut().enumerate() { // get the file descriptor let fd = structpoll.fd; @@ -4541,64 +4680,183 @@ impl Cage { // get the associated events to monitor let events = structpoll.events; - // init FdSet structures - let reads = &mut interface::FdSet::new(); - let writes = &mut interface::FdSet::new(); - let errors = &mut interface::FdSet::new(); + // try to get the FileDescriptor Object from fd number + // if the fd exists, do further processing based on the file descriptor type + // otherwise, raise an error + let checkedfd = self.get_filedescriptor(fd).unwrap(); + let unlocked_fd = checkedfd.read(); + if let Some(filedesc_enum) = &*unlocked_fd { + match filedesc_enum { + Socket(ref sockfdobj) => { + let mut mask = 0; - // POLLIN for readable fd - if events & POLLIN > 0 { - reads.set(fd) - } - // POLLOUT for writable fd - if events & POLLOUT > 0 { - writes.set(fd) - } - // POLLPRI for except fd - if events & POLLPRI > 0 { - errors.set(fd) - } + // sockethandle lock with read access + let sock_tmp = sockfdobj.handle.clone(); + let sockhandle = sock_tmp.read(); + let mut newconnection = false; + match sockhandle.domain { + AF_UNIX => { + if events & POLLIN > 0 { + if sockhandle.state == ConnState::LISTEN { + // if connection state is LISTEN + // then check if there are any pending connections + + // get the path of the socket + let localpathbuf = normpath( + convpath(sockhandle.localaddr.unwrap().path()), + self, + ); + // check if there is any connections associated with the path + let dsconnobj = + NET_METADATA.domsock_accept_table.get(&localpathbuf); + if dsconnobj.is_some() { + // we have a connecting domain socket, return as readable to + // be accepted + mask |= POLLIN; + } + } else if sockhandle.state == ConnState::CONNECTED || newconnection { + // otherwise, the connection is already established + // check if the pipe has any thing + let sockinfo = &sockhandle.unix_info.as_ref().unwrap(); + let receivepipe = sockinfo.receivepipe.as_ref().unwrap(); + if receivepipe.check_select_read() { + mask |= POLLIN; + } + } + } + if events & POLLOUT > 0 { + let sockinfo = &sockhandle.unix_info.as_ref().unwrap(); + let receivepipe = sockinfo.receivepipe.as_ref().unwrap(); + if receivepipe.check_select_write() { + mask |= POLLOUT; + } + } + // we should handle INPROGRESS state as well, but nonblocking connect for + // domain socket is a half-broken feature right now, so we might want to + // add this after it is fixed + } + AF_INET | AF_INET6 => { + if events & POLLOUT > 0 { + // For AF_INET or AF_INET6 socket, currently we still rely on kernel + // implementation, so here we simply + // call check_rawconnection with innersocket if connection state + // is INPROGRESS (non-blocking AF_INET/AF_INET6 socket connection) + if sockhandle.state == ConnState::INPROGRESS + && sockhandle + .innersocket + .as_ref() + .unwrap() + .check_rawconnection() + { + newconnection = true; + // sockhandle.state = ConnState::CONNECTED; + } + } - // this mask is used for storing final revent result - let mut mask: i16 = 0; - - // here we just call select_syscall with timeout of zero, - // which essentially just check each fd set once then return - // NOTE that the nfds argument is highest fd + 1 - let selectret = Self::select_syscall( - &self, - fd + 1, - Some(reads), - Some(writes), - Some(errors), - Some(interface::RustDuration::ZERO), - ); - // if there is any file descriptor ready - if selectret > 0 { - // is the file descriptor ready to read? - mask |= if !reads.is_empty() { POLLIN } else { 0 }; - // is the file descriptor ready to write? - mask |= if !writes.is_empty() { POLLOUT } else { 0 }; - // is there any exception conditions on the file descriptor? - mask |= if !errors.is_empty() { POLLPRI } else { 0 }; - // this file descriptor is ready for something, - // increment the return value - return_code += 1; - } else if selectret < 0 { - // if there is any error, first check if the error - // is EBADF, which refers to invalid file descriptor error - // in this case, we should set POLLNVAL to revent - if selectret == -(Errno::EBADF as i32) { - mask |= POLLNVAL; - // according to standard, return value is the number of fds - // with non-zero revent, which may indicate an error as well - return_code += 1; - } else { - return selectret; + if sockfdobj.rawfd < 0 { + continue; + } + + if first_iteration { + // push to inet poll_fds + let mut new_pollfd = interface::PollFd::new_with_fd(sockfdobj.rawfd); + new_pollfd.set_event(events); + inet_info.kernel_pollfd.push(new_pollfd); + inet_info.rawfd_lindfd_index_tuples.insert(sockfdobj.rawfd, index as i32); + } + } + _ => { + return syscall_error(Errno::EINVAL, "poll", "Unsupported domain") + } + } + + // non-blocking AF_INET/AF_INET6 socket connection now established + // change the state to connected + if newconnection { + let mut newconnhandle = sock_tmp.write(); + newconnhandle.state = ConnState::CONNECTED; + } + + // set the revents + if mask != 0 { + structpoll.revents = mask; + return_code += 1; + } + } + + // we always say streams are writable? + Stream(_) => { + let mut mask = 0; + if events & POLLIN > 0 { + // doing nothing here, since we always + // say streams are not readable + } + if events & POLLOUT > 0 { + mask |= POLLOUT; + } + // set the revents + if mask != 0 { + structpoll.revents = mask; + return_code += 1; + } + } + + Pipe(pipefdobj) => { + let mut mask = 0; + if events & POLLIN > 0 { + if pipefdobj.pipe.check_select_read() { + // set the mask + mask |= POLLIN; + } + } + if events & POLLOUT > 0 { + if pipefdobj.pipe.check_select_write() { + // set the mask + mask |= POLLOUT; + } + } + // set the revents + if mask != 0 { + structpoll.revents = mask; + return_code += 1; + } + } + + // these file writes never block + _ => { + let mut mask = 0; + if events & POLLIN > 0 { + // set the mask + mask |= POLLIN; + } + if events & POLLOUT > 0 { + // set the mask + mask |= POLLOUT; + } + // set the revents + if mask != 0 { + structpoll.revents = mask; + return_code += 1; + } + } } + } else { + // handle invalid file descriptor + structpoll.revents = POLLNVAL; + return_code += 1; + } + // if return_code != 0 { + // return return_code; + // } + } + first_iteration = false; + + if inet_info.kernel_pollfd.len() != 0 { + let kernel_ret = update_pollstruct_from_kernel_poll(fds, &mut inet_info); + if kernel_ret < 0 { + return kernel_ret; } - // set the revents - structpoll.revents = mask; + return_code += kernel_ret; } // we break if there is any file descriptor ready From 2492ab2e7a36d520b95a071ba9642c6f1cb29a14 Mon Sep 17 00:00:00 2001 From: lind Date: Mon, 14 Oct 2024 22:17:33 +0000 Subject: [PATCH 2/2] clean up --- src/interface/comm.rs | 36 +----- src/safeposix/net.rs | 3 + src/safeposix/syscalls/net_calls.rs | 184 ++++------------------------ 3 files changed, 30 insertions(+), 193 deletions(-) diff --git a/src/interface/comm.rs b/src/interface/comm.rs index 85b1dc12a..ad221ce79 100644 --- a/src/interface/comm.rs +++ b/src/interface/comm.rs @@ -641,7 +641,7 @@ pub fn kernel_select( } -// Implementations of select related FD_SET structure +// Implementations of poll related pollfd structure pub struct PollFd(pub libc::pollfd); impl PollFd { @@ -668,41 +668,11 @@ impl PollFd { } } -// for unwrapping in kernel_poll -// fn to_pollfd_ptr(opt: &mut [PollFd]) -> *mut libc::pollfd { -// let length = opt.len(); - -// // Convert your PollFd structs into libc::pollfd pointers -// let mut poll_fds: Vec = opt.iter_mut() -// .map(|poll_fd| poll_fd.0) // Directly use the value -// .collect(); - -// // println!("tmp:"); -// // for pollfd in &poll_fds { -// // println!("fd: {}, events: {}, revents: {}", pollfd.fd, pollfd.events, pollfd.revents); -// // } - -// // Get a raw pointer to the first element of the vector -// let ret = poll_fds.as_mut_ptr(); - -// // Ensure the vector is not dropped -// std::mem::forget(poll_fds); // Prevent drop - -// let slice = unsafe { std::slice::from_raw_parts(ret, length) }; - -// // println!("converted raw pollfd:"); -// // for pollfd in slice { -// // println!("fd: {}, events: {}, revents: {}", pollfd.fd, pollfd.events, pollfd.revents); -// // } - -// ret -// } - - pub fn kernel_poll( fds: &mut [PollFd], nfds: u64, ) -> i32 { + // convert [PollFd] to Vec let mut poll_fds: Vec = fds.iter_mut() .map(|poll_fd| poll_fd.0) // Directly use the value .collect(); @@ -712,12 +682,14 @@ pub fn kernel_poll( // Create a timeval struct with zero timeout let kpoll_timeout: i32 = 0; + // do the libc poll let ret = libc::poll( poll_fds.as_mut_ptr(), nfds as u64, kpoll_timeout ); + // convert the result back to [PollFd] for (index, pollfd) in poll_fds.iter().enumerate() { let item = fds.get_mut(index).unwrap(); item.0.revents = pollfd.revents; diff --git a/src/safeposix/net.rs b/src/safeposix/net.rs index e7b6e7798..390bcfa2c 100644 --- a/src/safeposix/net.rs +++ b/src/safeposix/net.rs @@ -595,14 +595,17 @@ pub fn update_pollstruct_from_kernel_poll( let kernel_ret; // note that this poll call always have timeout = 0, so it doesn't block let nfds = inet_info.kernel_pollfd.len() as u64; + // do the kernel poll kernel_ret = interface::kernel_poll( inet_info.kernel_pollfd.as_mut_slice(), nfds ); if kernel_ret > 0 { + // fill the pollfds with kernel poll result for pollfd in &inet_info.kernel_pollfd { let index = (*inet_info.rawfd_lindfd_index_tuples.get(&pollfd.get_fd()).unwrap()) as usize; let pollstruct = pollfds.get_mut(index).unwrap(); + // set revents pollstruct.revents = pollfd.get_revent(); } } diff --git a/src/safeposix/syscalls/net_calls.rs b/src/safeposix/syscalls/net_calls.rs index b55b76234..75f583b7b 100644 --- a/src/safeposix/syscalls/net_calls.rs +++ b/src/safeposix/syscalls/net_calls.rs @@ -4482,141 +4482,6 @@ impl Cage { /// /// ### Panics /// No panic is expected from this syscall - // pub fn poll_syscall( - // &self, - // fds: &mut [PollStruct], - // timeout: Option, - // ) -> i32 { - // // timeout is supposed to be in milliseconds - - // // current implementation of poll_syscall is based on select_syscall - // // which gives several issues: - // // 1. according to standards, select_syscall should only support file descriptor - // // that is smaller than 1024, while poll_syscall should not have such - // // limitation but our implementation of poll_syscall is actually calling - // // select_syscall directly which would mean poll_syscall would also have the - // // 1024 maximum size limitation However, rustposix itself only support file - // // descriptor that is smaller than 1024 which solves this issue automatically - // // in an interesting way - // // 2. current implementation of poll_syscall is very inefficient, that it passes - // // each of the file descriptor into select_syscall one by one. A better - // // solution might be transforming pollstruct into fdsets and pass into - // // select_syscall once (TODO). A even more efficienct way would be completely - // // rewriting poll_syscall so it does not depend on select_syscall anymore. - // // This is also how Linux does for poll_syscall since Linux claims that poll - // // have a better performance than select. - // // 3. several revent value such as POLLERR (which should be set when pipe is - // // broken), or POLLHUP (when peer closed its channel) are not possible to - // // monitor. Since select_syscall does not have these features, so our - // // poll_syscall, which derived from select_syscall, would subsequently not be - // // able to support these features. - - // let mut return_code: i32 = 0; - // let start_time = interface::starttimer(); - - // let end_time = match timeout { - // Some(time) => time, - // None => interface::RustDuration::MAX, - // }; - - // // according to standard, we should clear all revents - // for structpoll in &mut *fds { - // structpoll.revents = 0; - // } - - // // we loop until either timeout - // // or any of the file descriptor is ready - // loop { - // // iterate through each file descriptor - // for structpoll in &mut *fds { - // // get the file descriptor - // let fd = structpoll.fd; - - // // according to standard, we should ignore all file descriptor - // // that is smaller than 0 - // if fd < 0 { - // continue; - // } - - // // get the associated events to monitor - // let events = structpoll.events; - - // // init FdSet structures - // let reads = &mut interface::FdSet::new(); - // let writes = &mut interface::FdSet::new(); - // let errors = &mut interface::FdSet::new(); - - // // POLLIN for readable fd - // if events & POLLIN > 0 { - // reads.set(fd) - // } - // // POLLOUT for writable fd - // if events & POLLOUT > 0 { - // writes.set(fd) - // } - // // POLLPRI for except fd - // if events & POLLPRI > 0 { - // errors.set(fd) - // } - - // // this mask is used for storing final revent result - // let mut mask: i16 = 0; - - // // here we just call select_syscall with timeout of zero, - // // which essentially just check each fd set once then return - // // NOTE that the nfds argument is highest fd + 1 - // let selectret = Self::select_syscall( - // &self, - // fd + 1, - // Some(reads), - // Some(writes), - // Some(errors), - // Some(interface::RustDuration::ZERO), - // ); - // // if there is any file descriptor ready - // if selectret > 0 { - // // is the file descriptor ready to read? - // mask |= if !reads.is_empty() { POLLIN } else { 0 }; - // // is the file descriptor ready to write? - // mask |= if !writes.is_empty() { POLLOUT } else { 0 }; - // // is there any exception conditions on the file descriptor? - // mask |= if !errors.is_empty() { POLLPRI } else { 0 }; - // // this file descriptor is ready for something, - // // increment the return value - // return_code += 1; - // } else if selectret < 0 { - // // if there is any error, first check if the error - // // is EBADF, which refers to invalid file descriptor error - // // in this case, we should set POLLNVAL to revent - // if selectret == -(Errno::EBADF as i32) { - // mask |= POLLNVAL; - // // according to standard, return value is the number of fds - // // with non-zero revent, which may indicate an error as well - // return_code += 1; - // } else { - // return selectret; - // } - // } - // // set the revents - // structpoll.revents = mask; - // } - - // // we break if there is any file descriptor ready - // // or timeout is reached - // if return_code != 0 || interface::readtimer(start_time) > end_time { - // break; - // } else { - // // otherwise, check for signal and loop again - // if interface::sigcheck() { - // return syscall_error(Errno::EINTR, "poll", "interrupted function call"); - // } - // // We yield to let other threads continue if we've found no ready descriptors - // interface::lind_yield(); - // } - // } - // return return_code; - // } - pub fn poll_syscall( &self, fds: &mut [PollStruct], @@ -4624,27 +4489,9 @@ impl Cage { ) -> i32 { // timeout is supposed to be in milliseconds - // current implementation of poll_syscall is based on select_syscall - // which gives several issues: - // 1. according to standards, select_syscall should only support file descriptor - // that is smaller than 1024, while poll_syscall should not have such - // limitation but our implementation of poll_syscall is actually calling - // select_syscall directly which would mean poll_syscall would also have the - // 1024 maximum size limitation However, rustposix itself only support file - // descriptor that is smaller than 1024 which solves this issue automatically - // in an interesting way - // 2. current implementation of poll_syscall is very inefficient, that it passes - // each of the file descriptor into select_syscall one by one. A better - // solution might be transforming pollstruct into fdsets and pass into - // select_syscall once (TODO). A even more efficienct way would be completely - // rewriting poll_syscall so it does not depend on select_syscall anymore. - // This is also how Linux does for poll_syscall since Linux claims that poll - // have a better performance than select. - // 3. several revent value such as POLLERR (which should be set when pipe is - // broken), or POLLHUP (when peer closed its channel) are not possible to - // monitor. Since select_syscall does not have these features, so our - // poll_syscall, which derived from select_syscall, would subsequently not be - // able to support these features. + // To-do: several revent value such as POLLERR (which should be set when pipe is + // broken), or POLLHUP (when peer closed its channel) are currently not + // monitored. let mut return_code: i32 = 0; let start_time = interface::starttimer(); @@ -4661,6 +4508,11 @@ impl Cage { // For INET: prepare the data structures for the kernel_poll's use let mut inet_info = PollInetInfo::new(); + + // we record whether the first iteration or not + // since inet_info only need to be filled up once + // so during the second iteration or so, the step + // could be skipped for optmization let mut first_iteration = true; // we loop until either timeout @@ -4688,6 +4540,7 @@ impl Cage { if let Some(filedesc_enum) = &*unlocked_fd { match filedesc_enum { Socket(ref sockfdobj) => { + // in case of socket let mut mask = 0; // sockethandle lock with read access @@ -4696,6 +4549,7 @@ impl Cage { let mut newconnection = false; match sockhandle.domain { AF_UNIX => { + // in case of domain socket if events & POLLIN > 0 { if sockhandle.state == ConnState::LISTEN { // if connection state is LISTEN @@ -4720,6 +4574,7 @@ impl Cage { let sockinfo = &sockhandle.unix_info.as_ref().unwrap(); let receivepipe = sockinfo.receivepipe.as_ref().unwrap(); if receivepipe.check_select_read() { + //set the mask mask |= POLLIN; } } @@ -4728,6 +4583,7 @@ impl Cage { let sockinfo = &sockhandle.unix_info.as_ref().unwrap(); let receivepipe = sockinfo.receivepipe.as_ref().unwrap(); if receivepipe.check_select_write() { + // set the mask mask |= POLLOUT; } } @@ -4736,6 +4592,9 @@ impl Cage { // add this after it is fixed } AF_INET | AF_INET6 => { + // for inet, we are not handling the poll in this loop + // we just gathering all the INET pollfd and ready to feed + // to kernel poll outside the loop if events & POLLOUT > 0 { // For AF_INET or AF_INET6 socket, currently we still rely on kernel // implementation, so here we simply @@ -4749,7 +4608,6 @@ impl Cage { .check_rawconnection() { newconnection = true; - // sockhandle.state = ConnState::CONNECTED; } } @@ -4757,11 +4615,13 @@ impl Cage { continue; } + // inet_info only need to be filled up on first iteration if first_iteration { // push to inet poll_fds let mut new_pollfd = interface::PollFd::new_with_fd(sockfdobj.rawfd); new_pollfd.set_event(events); inet_info.kernel_pollfd.push(new_pollfd); + // mantain the mapping from rawfd to lindfd inet_info.rawfd_lindfd_index_tuples.insert(sockfdobj.rawfd, index as i32); } } @@ -4784,14 +4644,15 @@ impl Cage { } } - // we always say streams are writable? Stream(_) => { + // in case of stream let mut mask = 0; if events & POLLIN > 0 { // doing nothing here, since we always // say streams are not readable } if events & POLLOUT > 0 { + // we always say stream socket are wrtiable mask |= POLLOUT; } // set the revents @@ -4802,6 +4663,7 @@ impl Cage { } Pipe(pipefdobj) => { + // in case of pipe let mut mask = 0; if events & POLLIN > 0 { if pipefdobj.pipe.check_select_read() { @@ -4845,17 +4707,17 @@ impl Cage { structpoll.revents = POLLNVAL; return_code += 1; } - // if return_code != 0 { - // return return_code; - // } } + // first iteration done, set to false first_iteration = false; + // if there are any INET pollfd, pass it down to kernel if inet_info.kernel_pollfd.len() != 0 { let kernel_ret = update_pollstruct_from_kernel_poll(fds, &mut inet_info); if kernel_ret < 0 { return kernel_ret; } + // update return code return_code += kernel_ret; }