From 3a1c15b03a77a97bd86c44848b6aa9060d34775a Mon Sep 17 00:00:00 2001
From: lind <lind@nyu.edu>
Date: Mon, 14 Oct 2024 22:00:44 +0000
Subject: [PATCH 1/2] rewrite poll syscall

---
 src/interface/comm.rs               |  89 +++++++
 src/safeposix/net.rs                |  35 +++
 src/safeposix/syscalls/net_calls.rs | 368 +++++++++++++++++++++++-----
 3 files changed, 437 insertions(+), 55 deletions(-)

diff --git a/src/interface/comm.rs b/src/interface/comm.rs
index 23cf655a0..85b1dc12a 100644
--- a/src/interface/comm.rs
+++ b/src/interface/comm.rs
@@ -639,3 +639,92 @@ pub fn kernel_select(
 
     return result;
 }
+
+
+// Implementations of select related FD_SET structure
+pub struct PollFd(pub libc::pollfd);
+
+impl PollFd {
+    pub fn new_with_fd(fd: i32) -> PollFd {
+        let raw_pollfd = libc::pollfd {
+            fd,
+            events: 0,
+            revents: 0,
+        };
+        PollFd(raw_pollfd)
+    }
+
+    // set the event in pollfd
+    pub fn set_event(&mut self, event: i16) {
+        self.0.events |= event;
+    }
+
+    pub fn get_revent(&self) -> i16 {
+        self.0.revents
+    }
+
+    pub fn get_fd(&self) -> i32 {
+        self.0.fd
+    }
+}
+
+// for unwrapping in kernel_poll
+// fn to_pollfd_ptr(opt: &mut [PollFd]) -> *mut libc::pollfd {
+//     let length = opt.len();
+
+//     // Convert your PollFd structs into libc::pollfd pointers
+//     let mut poll_fds: Vec<libc::pollfd> = opt.iter_mut()
+//         .map(|poll_fd| poll_fd.0) // Directly use the value
+//         .collect();
+
+//     // println!("tmp:");
+//     // for pollfd in &poll_fds {
+//     //     println!("fd: {}, events: {}, revents: {}", pollfd.fd, pollfd.events, pollfd.revents);
+//     // }
+
+//     // Get a raw pointer to the first element of the vector
+//     let ret = poll_fds.as_mut_ptr();
+
+//     // Ensure the vector is not dropped
+//     std::mem::forget(poll_fds); // Prevent drop
+
+//     let slice = unsafe { std::slice::from_raw_parts(ret, length) };
+
+//     // println!("converted raw pollfd:");
+//     // for pollfd in slice {
+//     //     println!("fd: {}, events: {}, revents: {}", pollfd.fd, pollfd.events, pollfd.revents);
+//     // }
+
+//     ret
+// }
+
+
+pub fn kernel_poll(
+    fds: &mut [PollFd],
+    nfds: u64,
+) -> i32 {
+    let mut poll_fds: Vec<libc::pollfd> = fds.iter_mut()
+        .map(|poll_fd| poll_fd.0) // Directly use the value
+        .collect();
+
+    // Call libc::poll and store the result
+    let result = unsafe {
+        // Create a timeval struct with zero timeout
+        let kpoll_timeout: i32 = 0;
+
+        let ret = libc::poll(
+            poll_fds.as_mut_ptr(),
+            nfds as u64,
+            kpoll_timeout
+        );
+
+        for (index, pollfd) in poll_fds.iter().enumerate() {
+            let item = fds.get_mut(index).unwrap();
+            item.0.revents = pollfd.revents;
+        }
+
+        ret
+    };
+
+    return result;
+}
\ No newline at end of file
diff --git a/src/safeposix/net.rs b/src/safeposix/net.rs
index 9acd68bab..e7b6e7798 100644
--- a/src/safeposix/net.rs
+++ b/src/safeposix/net.rs
@@ -573,3 +573,38 @@ pub fn update_readfds_from_kernel_select(
     }
     return kernel_ret;
 }
+
+pub struct PollInetInfo {
+    pub rawfd_lindfd_index_tuples: interface::RustHashMap<i32, i32>,
+    pub kernel_pollfd: Vec<interface::PollFd>,
+}
+
+impl PollInetInfo {
+    pub fn new() -> Self {
+        PollInetInfo {
+            rawfd_lindfd_index_tuples: interface::RustHashMap::new(),
+            kernel_pollfd: Vec::new(),
+        }
+    }
+}
+
+pub fn update_pollstruct_from_kernel_poll(
+    pollfds: &mut [interface::PollStruct],
+    inet_info: &mut PollInetInfo,
+) -> i32 {
+    let kernel_ret;
+    // note that this poll call always have timeout = 0, so it doesn't block
+    let nfds = inet_info.kernel_pollfd.len() as u64;
+    kernel_ret = interface::kernel_poll(
+        inet_info.kernel_pollfd.as_mut_slice(),
+        nfds
+    );
+    if kernel_ret > 0 {
+        for pollfd in &inet_info.kernel_pollfd {
+            let index = (*inet_info.rawfd_lindfd_index_tuples.get(&pollfd.get_fd()).unwrap()) as usize;
+            let pollstruct = pollfds.get_mut(index).unwrap();
+            pollstruct.revents = pollfd.get_revent();
+        }
+    }
+    return kernel_ret;
+}
\ No newline at end of file
diff --git a/src/safeposix/syscalls/net_calls.rs b/src/safeposix/syscalls/net_calls.rs
index 90f738d53..b55b76234 100644
--- a/src/safeposix/syscalls/net_calls.rs
+++ b/src/safeposix/syscalls/net_calls.rs
@@ -4482,6 +4482,141 @@ impl Cage {
     ///
     /// ### Panics
     /// No panic is expected from this syscall
+    // pub fn poll_syscall(
+    //     &self,
+    //     fds: &mut [PollStruct],
+    //     timeout: Option<interface::RustDuration>,
+    // ) -> i32 {
+    //     // timeout is supposed to be in milliseconds
+
+    //     // current implementation of poll_syscall is based on select_syscall
+    //     // which gives several issues:
+    //     // 1. according to standards, select_syscall should only support file descriptor
+    //     //    that is smaller than 1024, while poll_syscall should not have such
+    //     //    limitation but our implementation of poll_syscall is actually calling
+    //     //    select_syscall directly which would mean poll_syscall would also have the
+    //     //    1024 maximum size limitation However, rustposix itself only support file
+    //     //    descriptor that is smaller than 1024 which solves this issue automatically
+    //     //    in an interesting way
+    //     // 2. current implementation of poll_syscall is very inefficient, that it passes
+    //     //    each of the file descriptor into select_syscall one by one. A better
+    //     //    solution might be transforming pollstruct into fdsets and pass into
+    //     //    select_syscall once (TODO). A even more efficienct way would be completely
+    //     //    rewriting poll_syscall so it does not depend on select_syscall anymore.
+    //     //    This is also how Linux does for poll_syscall since Linux claims that poll
+    //     //    have a better performance than select.
+    //     // 3. several revent value such as POLLERR (which should be set when pipe is
+    //     //    broken), or POLLHUP (when peer closed its channel) are not possible to
+    //     //    monitor. Since select_syscall does not have these features, so our
+    //     //    poll_syscall, which derived from select_syscall, would subsequently not be
+    //     //    able to support these features.
+
+    //     let mut return_code: i32 = 0;
+    //     let start_time = interface::starttimer();
+
+    //     let end_time = match timeout {
+    //         Some(time) => time,
+    //         None => interface::RustDuration::MAX,
+    //     };
+
+    //     // according to standard, we should clear all revents
+    //     for structpoll in &mut *fds {
+    //         structpoll.revents = 0;
+    //     }
+
+    //     // we loop until either timeout
+    //     // or any of the file descriptor is ready
+    //     loop {
+    //         // iterate through each file descriptor
+    //         for structpoll in &mut *fds {
+    //             // get the file descriptor
+    //             let fd = structpoll.fd;
+
+    //             // according to standard, we should ignore all file descriptor
+    //             // that is smaller than 0
+    //             if fd < 0 {
+    //                 continue;
+    //             }
+
+    //             // get the associated events to monitor
+    //             let events = structpoll.events;
+
+    //             // init FdSet structures
+    //             let reads = &mut interface::FdSet::new();
+    //             let writes = &mut interface::FdSet::new();
+    //             let errors = &mut interface::FdSet::new();
+
+    //             // POLLIN for readable fd
+    //             if events & POLLIN > 0 {
+    //                 reads.set(fd)
+    //             }
+    //             // POLLOUT for writable fd
+    //             if events & POLLOUT > 0 {
+    //                 writes.set(fd)
+    //             }
+    //             // POLLPRI for except fd
+    //             if events & POLLPRI > 0 {
+    //                 errors.set(fd)
+    //             }
+
+    //             // this mask is used for storing final revent result
+    //             let mut mask: i16 = 0;
+
+    //             // here we just call select_syscall with timeout of zero,
+    //             // which essentially just check each fd set once then return
+    //             // NOTE that the nfds argument is highest fd + 1
+    //             let selectret = Self::select_syscall(
+    //                 &self,
+    //                 fd + 1,
+    //                 Some(reads),
+    //                 Some(writes),
+    //                 Some(errors),
+    //                 Some(interface::RustDuration::ZERO),
+    //             );
+    //             // if there is any file descriptor ready
+    //             if selectret > 0 {
+    //                 // is the file descriptor ready to read?
+    //                 mask |= if !reads.is_empty() { POLLIN } else { 0 };
+    //                 // is the file descriptor ready to write?
+    //                 mask |= if !writes.is_empty() { POLLOUT } else { 0 };
+    //                 // is there any exception conditions on the file descriptor?
+    //                 mask |= if !errors.is_empty() { POLLPRI } else { 0 };
+    //                 // this file descriptor is ready for something,
+    //                 // increment the return value
+    //                 return_code += 1;
+    //             } else if selectret < 0 {
+    //                 // if there is any error, first check if the error
+    //                 // is EBADF, which refers to invalid file descriptor error
+    //                 // in this case, we should set POLLNVAL to revent
+    //                 if selectret == -(Errno::EBADF as i32) {
+    //                     mask |= POLLNVAL;
+    //                     // according to standard, return value is the number of fds
+    //                     // with non-zero revent, which may indicate an error as well
+    //                     return_code += 1;
+    //                 } else {
+    //                     return selectret;
+    //                 }
+    //             }
+    //             // set the revents
+    //             structpoll.revents = mask;
+    //         }
+
+    //         // we break if there is any file descriptor ready
+    //         // or timeout is reached
+    //         if return_code != 0 || interface::readtimer(start_time) > end_time {
+    //             break;
+    //         } else {
+    //             // otherwise, check for signal and loop again
+    //             if interface::sigcheck() {
+    //                 return syscall_error(Errno::EINTR, "poll", "interrupted function call");
+    //             }
+    //             // We yield to let other threads continue if we've found no ready descriptors
+    //             interface::lind_yield();
+    //         }
+    //     }
+    //     return return_code;
+    // }
+
     pub fn poll_syscall(
         &self,
         fds: &mut [PollStruct],
@@ -4524,11 +4659,15 @@ impl Cage {
             structpoll.revents = 0;
         }
 
+        // For INET: prepare the data structures for the kernel_poll's use
+        let mut inet_info = PollInetInfo::new();
+        let mut first_iteration = true;
+
         // we loop until either timeout
         // or any of the file descriptor is ready
         loop {
             // iterate through each file descriptor
-            for structpoll in &mut *fds {
+            for (index, structpoll) in fds.iter_mut().enumerate() {
                 // get the file descriptor
                 let fd = structpoll.fd;
 
@@ -4541,64 +4680,183 @@ impl Cage {
                 // get the associated events to monitor
                 let events = structpoll.events;
 
-                // init FdSet structures
-                let reads = &mut interface::FdSet::new();
-                let writes = &mut interface::FdSet::new();
-                let errors = &mut interface::FdSet::new();
+                // try to get the FileDescriptor Object from fd number
+                // if the fd exists, do further processing based on the file descriptor type
+                // otherwise, raise an error
+                let checkedfd = self.get_filedescriptor(fd).unwrap();
+                let unlocked_fd = checkedfd.read();
+                if let Some(filedesc_enum) = &*unlocked_fd {
+                    match filedesc_enum {
+                        Socket(ref sockfdobj) => {
+                            let mut mask = 0;
 
-                // POLLIN for readable fd
-                if events & POLLIN > 0 {
-                    reads.set(fd)
-                }
-                // POLLOUT for writable fd
-                if events & POLLOUT > 0 {
-                    writes.set(fd)
-                }
-                // POLLPRI for except fd
-                if events & POLLPRI > 0 {
-                    errors.set(fd)
-                }
+                            // sockethandle lock with read access
+                            let sock_tmp = sockfdobj.handle.clone();
+                            let sockhandle = sock_tmp.read();
+                            let mut newconnection = false;
+                            match sockhandle.domain {
+                                AF_UNIX => {
+                                    if events & POLLIN > 0 {
+                                        if sockhandle.state == ConnState::LISTEN {
+                                            // if connection state is LISTEN
+                                            // then check if there are any pending connections
+
+                                            // get the path of the socket
+                                            let localpathbuf = normpath(
+                                                convpath(sockhandle.localaddr.unwrap().path()),
+                                                self,
+                                            );
+                                            // check if there is any connections associated with the path
+                                            let dsconnobj =
+                                                NET_METADATA.domsock_accept_table.get(&localpathbuf);
+                                            if dsconnobj.is_some() {
+                                                // we have a connecting domain socket, return as readable to
+                                                // be accepted
+                                                mask |= POLLIN;
+                                            }
+                                        } else if sockhandle.state == ConnState::CONNECTED || newconnection {
+                                            // otherwise, the connection is already established
+                                            // check if the pipe has any thing
+                                            let sockinfo = &sockhandle.unix_info.as_ref().unwrap();
+                                            let receivepipe = sockinfo.receivepipe.as_ref().unwrap();
+                                            if receivepipe.check_select_read() {
+                                                mask |= POLLIN;
+                                            }
+                                        }
+                                    }
+                                    if events & POLLOUT > 0 {
+                                        let sockinfo = &sockhandle.unix_info.as_ref().unwrap();
+                                        let receivepipe = sockinfo.receivepipe.as_ref().unwrap();
+                                        if receivepipe.check_select_write() {
+                                            mask |= POLLOUT;
+                                        }
+                                    }
+                                    // we should handle INPROGRESS state as well, but nonblocking connect for
+                                    // domain socket is a half-broken feature right now, so we might want to
+                                    // add this after it is fixed
+                                }
+                                AF_INET | AF_INET6 => {
+                                    if events & POLLOUT > 0 {
+                                        // For AF_INET or AF_INET6 socket, currently we still rely on kernel
+                                        // implementation, so here we simply
+                                        // call check_rawconnection with innersocket if connection state
+                                        // is INPROGRESS (non-blocking AF_INET/AF_INET6 socket connection)
+                                        if sockhandle.state == ConnState::INPROGRESS
+                                            && sockhandle
+                                                .innersocket
+                                                .as_ref()
+                                                .unwrap()
+                                                .check_rawconnection()
+                                        {
+                                            newconnection = true;
+                                            // sockhandle.state = ConnState::CONNECTED;
+                                        }
+                                    }
 
-                // this mask is used for storing final revent result
-                let mut mask: i16 = 0;
-
-                // here we just call select_syscall with timeout of zero,
-                // which essentially just check each fd set once then return
-                // NOTE that the nfds argument is highest fd + 1
-                let selectret = Self::select_syscall(
-                    &self,
-                    fd + 1,
-                    Some(reads),
-                    Some(writes),
-                    Some(errors),
-                    Some(interface::RustDuration::ZERO),
-                );
-                // if there is any file descriptor ready
-                if selectret > 0 {
-                    // is the file descriptor ready to read?
-                    mask |= if !reads.is_empty() { POLLIN } else { 0 };
-                    // is the file descriptor ready to write?
-                    mask |= if !writes.is_empty() { POLLOUT } else { 0 };
-                    // is there any exception conditions on the file descriptor?
-                    mask |= if !errors.is_empty() { POLLPRI } else { 0 };
-                    // this file descriptor is ready for something,
-                    // increment the return value
-                    return_code += 1;
-                } else if selectret < 0 {
-                    // if there is any error, first check if the error
-                    // is EBADF, which refers to invalid file descriptor error
-                    // in this case, we should set POLLNVAL to revent
-                    if selectret == -(Errno::EBADF as i32) {
-                        mask |= POLLNVAL;
-                        // according to standard, return value is the number of fds
-                        // with non-zero revent, which may indicate an error as well
-                        return_code += 1;
-                    } else {
-                        return selectret;
+                                    if sockfdobj.rawfd < 0 {
+                                        continue;
+                                    }
+
+                                    if first_iteration {
+                                        // push to inet poll_fds
+                                        let mut new_pollfd = interface::PollFd::new_with_fd(sockfdobj.rawfd);
+                                        new_pollfd.set_event(events);
+                                        inet_info.kernel_pollfd.push(new_pollfd);
+                                        inet_info.rawfd_lindfd_index_tuples.insert(sockfdobj.rawfd, index as i32);
+                                    }
+                                }
+                                _ => {
+                                    return syscall_error(Errno::EINVAL, "poll", "Unsupported domain")
+                                }
+                            }
+
+                            // non-blocking AF_INET/AF_INET6 socket connection now established
+                            // change the state to connected
+                            if newconnection {
+                                let mut newconnhandle = sock_tmp.write();
+                                newconnhandle.state = ConnState::CONNECTED;
+                            }
+
+                            // set the revents
+                            if mask != 0 {
+                                structpoll.revents = mask;
+                                return_code += 1;
+                            }
+                        }
+
+                        // we always say streams are writable?
+                        Stream(_) => {
+                            let mut mask = 0;
+                            if events & POLLIN > 0 {
+                                // doing nothing here, since we always
+                                // say streams are not readable
+                            }
+                            if events & POLLOUT > 0 {
+                                mask |= POLLOUT;
+                            }
+                            // set the revents
+                            if mask != 0 {
+                                structpoll.revents = mask;
+                                return_code += 1;
+                            }
+                        }
+
+                        Pipe(pipefdobj) => {
+                            let mut mask = 0;
+                            if events & POLLIN > 0 {
+                                if pipefdobj.pipe.check_select_read() {
+                                    // set the mask
+                                    mask |= POLLIN;
+                                }
+                            }
+                            if events & POLLOUT > 0 {
+                                if pipefdobj.pipe.check_select_write() {
+                                    // set the mask
+                                    mask |= POLLOUT;
+                                }
+                            }
+                            // set the revents
+                            if mask != 0 {
+                                structpoll.revents = mask;
+                                return_code += 1;
+                            }
+                        }
+
+                        // these file writes never block
+                        _ => {
+                            let mut mask = 0;
+                            if events & POLLIN > 0 {
+                                // set the mask
+                                mask |= POLLIN;
+                            }
+                            if events & POLLOUT > 0 {
+                                // set the mask
+                                mask |= POLLOUT;
+                            }
+                            // set the revents
+                            if mask != 0 {
+                                structpoll.revents = mask;
+                                return_code += 1;
+                            }
+                        }
                     }
+                } else {
+                    // handle invalid file descriptor
+                    structpoll.revents = POLLNVAL;
+                    return_code += 1;
+                }
+                // if return_code != 0 {
+                //     return return_code;
+                // }
+            }
+            first_iteration = false;
+
+            if inet_info.kernel_pollfd.len() != 0 {
+                let kernel_ret = update_pollstruct_from_kernel_poll(fds, &mut inet_info);
+                if kernel_ret < 0 {
+                    return kernel_ret;
                 }
-                // set the revents
-                structpoll.revents = mask;
+                return_code += kernel_ret;
             }
 
             // we break if there is any file descriptor ready

From 2492ab2e7a36d520b95a071ba9642c6f1cb29a14 Mon Sep 17 00:00:00 2001
From: lind <lind@nyu.edu>
Date: Mon, 14 Oct 2024 22:17:33 +0000
Subject: [PATCH 2/2] clean up

---
 src/interface/comm.rs               |  36 +-----
 src/safeposix/net.rs                |   3 +
 src/safeposix/syscalls/net_calls.rs | 184 ++++------------------------
 3 files changed, 30 insertions(+), 193 deletions(-)

diff --git a/src/interface/comm.rs b/src/interface/comm.rs
index 85b1dc12a..ad221ce79 100644
--- a/src/interface/comm.rs
+++ b/src/interface/comm.rs
@@ -641,7 +641,7 @@ pub fn kernel_select(
 }
 
 
-// Implementations of select related FD_SET structure
+// Implementations of poll related pollfd structure
 pub struct PollFd(pub libc::pollfd);
 
 impl PollFd {
@@ -668,41 +668,11 @@ impl PollFd {
     }
 }
 
-// for unwrapping in kernel_poll
-// fn to_pollfd_ptr(opt: &mut [PollFd]) -> *mut libc::pollfd {
-//     let length = opt.len();
-
-//     // Convert your PollFd structs into libc::pollfd pointers
-//     let mut poll_fds: Vec<libc::pollfd> = opt.iter_mut()
-//         .map(|poll_fd| poll_fd.0) // Directly use the value
-//         .collect();
-
-//     // println!("tmp:");
-//     // for pollfd in &poll_fds {
-//     //     println!("fd: {}, events: {}, revents: {}", pollfd.fd, pollfd.events, pollfd.revents);
-//     // }
-
-//     // Get a raw pointer to the first element of the vector
-//     let ret = poll_fds.as_mut_ptr();
-
-//     // Ensure the vector is not dropped
-//     std::mem::forget(poll_fds); // Prevent drop
-
-//     let slice = unsafe { std::slice::from_raw_parts(ret, length) };
-
-//     // println!("converted raw pollfd:");
-//     // for pollfd in slice {
-//     //     println!("fd: {}, events: {}, revents: {}", pollfd.fd, pollfd.events, pollfd.revents);
-//     // }
-
-//     ret
-// }
-
-
 pub fn kernel_poll(
     fds: &mut [PollFd],
     nfds: u64,
 ) -> i32 {
+    // convert [PollFd] to Vec<libc::pollfd>
     let mut poll_fds: Vec<libc::pollfd> = fds.iter_mut()
         .map(|poll_fd| poll_fd.0) // Directly use the value
         .collect();
@@ -712,12 +682,14 @@ pub fn kernel_poll(
         // Create a timeval struct with zero timeout
         let kpoll_timeout: i32 = 0;
 
+        // do the libc poll
         let ret = libc::poll(
             poll_fds.as_mut_ptr(),
             nfds as u64,
             kpoll_timeout
         );
 
+        // convert the result back to [PollFd]
         for (index, pollfd) in poll_fds.iter().enumerate() {
             let item = fds.get_mut(index).unwrap();
             item.0.revents = pollfd.revents;
diff --git a/src/safeposix/net.rs b/src/safeposix/net.rs
index e7b6e7798..390bcfa2c 100644
--- a/src/safeposix/net.rs
+++ b/src/safeposix/net.rs
@@ -595,14 +595,17 @@ pub fn update_pollstruct_from_kernel_poll(
     let kernel_ret;
     // note that this poll call always have timeout = 0, so it doesn't block
     let nfds = inet_info.kernel_pollfd.len() as u64;
+    // do the kernel poll
     kernel_ret = interface::kernel_poll(
         inet_info.kernel_pollfd.as_mut_slice(),
         nfds
     );
     if kernel_ret > 0 {
+        // fill the pollfds with kernel poll result
         for pollfd in &inet_info.kernel_pollfd {
             let index = (*inet_info.rawfd_lindfd_index_tuples.get(&pollfd.get_fd()).unwrap()) as usize;
             let pollstruct = pollfds.get_mut(index).unwrap();
+            // set revents
             pollstruct.revents = pollfd.get_revent();
         }
     }
diff --git a/src/safeposix/syscalls/net_calls.rs b/src/safeposix/syscalls/net_calls.rs
index b55b76234..75f583b7b 100644
--- a/src/safeposix/syscalls/net_calls.rs
+++ b/src/safeposix/syscalls/net_calls.rs
@@ -4482,141 +4482,6 @@ impl Cage {
     ///
     /// ### Panics
     /// No panic is expected from this syscall
-    // pub fn poll_syscall(
-    //     &self,
-    //     fds: &mut [PollStruct],
-    //     timeout: Option<interface::RustDuration>,
-    // ) -> i32 {
-    //     // timeout is supposed to be in milliseconds
-
-    //     // current implementation of poll_syscall is based on select_syscall
-    //     // which gives several issues:
-    //     // 1. according to standards, select_syscall should only support file descriptor
-    //     //    that is smaller than 1024, while poll_syscall should not have such
-    //     //    limitation but our implementation of poll_syscall is actually calling
-    //     //    select_syscall directly which would mean poll_syscall would also have the
-    //     //    1024 maximum size limitation However, rustposix itself only support file
-    //     //    descriptor that is smaller than 1024 which solves this issue automatically
-    //     //    in an interesting way
-    //     // 2. current implementation of poll_syscall is very inefficient, that it passes
-    //     //    each of the file descriptor into select_syscall one by one. A better
-    //     //    solution might be transforming pollstruct into fdsets and pass into
-    //     //    select_syscall once (TODO). A even more efficienct way would be completely
-    //     //    rewriting poll_syscall so it does not depend on select_syscall anymore.
-    //     //    This is also how Linux does for poll_syscall since Linux claims that poll
-    //     //    have a better performance than select.
-    //     // 3. several revent value such as POLLERR (which should be set when pipe is
-    //     //    broken), or POLLHUP (when peer closed its channel) are not possible to
-    //     //    monitor. Since select_syscall does not have these features, so our
-    //     //    poll_syscall, which derived from select_syscall, would subsequently not be
-    //     //    able to support these features.
-
-    //     let mut return_code: i32 = 0;
-    //     let start_time = interface::starttimer();
-
-    //     let end_time = match timeout {
-    //         Some(time) => time,
-    //         None => interface::RustDuration::MAX,
-    //     };
-
-    //     // according to standard, we should clear all revents
-    //     for structpoll in &mut *fds {
-    //         structpoll.revents = 0;
-    //     }
-
-    //     // we loop until either timeout
-    //     // or any of the file descriptor is ready
-    //     loop {
-    //         // iterate through each file descriptor
-    //         for structpoll in &mut *fds {
-    //             // get the file descriptor
-    //             let fd = structpoll.fd;
-
-    //             // according to standard, we should ignore all file descriptor
-    //             // that is smaller than 0
-    //             if fd < 0 {
-    //                 continue;
-    //             }
-
-    //             // get the associated events to monitor
-    //             let events = structpoll.events;
-
-    //             // init FdSet structures
-    //             let reads = &mut interface::FdSet::new();
-    //             let writes = &mut interface::FdSet::new();
-    //             let errors = &mut interface::FdSet::new();
-
-    //             // POLLIN for readable fd
-    //             if events & POLLIN > 0 {
-    //                 reads.set(fd)
-    //             }
-    //             // POLLOUT for writable fd
-    //             if events & POLLOUT > 0 {
-    //                 writes.set(fd)
-    //             }
-    //             // POLLPRI for except fd
-    //             if events & POLLPRI > 0 {
-    //                 errors.set(fd)
-    //             }
-
-    //             // this mask is used for storing final revent result
-    //             let mut mask: i16 = 0;
-
-    //             // here we just call select_syscall with timeout of zero,
-    //             // which essentially just check each fd set once then return
-    //             // NOTE that the nfds argument is highest fd + 1
-    //             let selectret = Self::select_syscall(
-    //                 &self,
-    //                 fd + 1,
-    //                 Some(reads),
-    //                 Some(writes),
-    //                 Some(errors),
-    //                 Some(interface::RustDuration::ZERO),
-    //             );
-    //             // if there is any file descriptor ready
-    //             if selectret > 0 {
-    //                 // is the file descriptor ready to read?
-    //                 mask |= if !reads.is_empty() { POLLIN } else { 0 };
-    //                 // is the file descriptor ready to write?
-    //                 mask |= if !writes.is_empty() { POLLOUT } else { 0 };
-    //                 // is there any exception conditions on the file descriptor?
-    //                 mask |= if !errors.is_empty() { POLLPRI } else { 0 };
-    //                 // this file descriptor is ready for something,
-    //                 // increment the return value
-    //                 return_code += 1;
-    //             } else if selectret < 0 {
-    //                 // if there is any error, first check if the error
-    //                 // is EBADF, which refers to invalid file descriptor error
-    //                 // in this case, we should set POLLNVAL to revent
-    //                 if selectret == -(Errno::EBADF as i32) {
-    //                     mask |= POLLNVAL;
-    //                     // according to standard, return value is the number of fds
-    //                     // with non-zero revent, which may indicate an error as well
-    //                     return_code += 1;
-    //                 } else {
-    //                     return selectret;
-    //                 }
-    //             }
-    //             // set the revents
-    //             structpoll.revents = mask;
-    //         }
-
-    //         // we break if there is any file descriptor ready
-    //         // or timeout is reached
-    //         if return_code != 0 || interface::readtimer(start_time) > end_time {
-    //             break;
-    //         } else {
-    //             // otherwise, check for signal and loop again
-    //             if interface::sigcheck() {
-    //                 return syscall_error(Errno::EINTR, "poll", "interrupted function call");
-    //             }
-    //             // We yield to let other threads continue if we've found no ready descriptors
-    //             interface::lind_yield();
-    //         }
-    //     }
-    //     return return_code;
-    // }
-
     pub fn poll_syscall(
         &self,
         fds: &mut [PollStruct],
@@ -4624,27 +4489,9 @@ impl Cage {
     ) -> i32 {
         // timeout is supposed to be in milliseconds
 
-        // current implementation of poll_syscall is based on select_syscall
-        // which gives several issues:
-        // 1. according to standards, select_syscall should only support file descriptor
-        //    that is smaller than 1024, while poll_syscall should not have such
-        //    limitation but our implementation of poll_syscall is actually calling
-        //    select_syscall directly which would mean poll_syscall would also have the
-        //    1024 maximum size limitation However, rustposix itself only support file
-        //    descriptor that is smaller than 1024 which solves this issue automatically
-        //    in an interesting way
-        // 2. current implementation of poll_syscall is very inefficient, that it passes
-        //    each of the file descriptor into select_syscall one by one. A better
-        //    solution might be transforming pollstruct into fdsets and pass into
-        //    select_syscall once (TODO). A even more efficienct way would be completely
-        //    rewriting poll_syscall so it does not depend on select_syscall anymore.
-        //    This is also how Linux does for poll_syscall since Linux claims that poll
-        //    have a better performance than select.
-        // 3. several revent value such as POLLERR (which should be set when pipe is
-        //    broken), or POLLHUP (when peer closed its channel) are not possible to
-        //    monitor. Since select_syscall does not have these features, so our
-        //    poll_syscall, which derived from select_syscall, would subsequently not be
-        //    able to support these features.
+        // To-do: several revent value such as POLLERR (which should be set when pipe is
+        // broken), or POLLHUP (when peer closed its channel) are currently not
+        // monitored. 
 
         let mut return_code: i32 = 0;
         let start_time = interface::starttimer();
@@ -4661,6 +4508,11 @@ impl Cage {
 
         // For INET: prepare the data structures for the kernel_poll's use
         let mut inet_info = PollInetInfo::new();
+
+        // we record whether the first iteration or not
+        // since inet_info only need to be filled up once
+        // so during the second iteration or so, the step
+        // could be skipped for optmization
         let mut first_iteration = true;
 
         // we loop until either timeout
@@ -4688,6 +4540,7 @@ impl Cage {
                 if let Some(filedesc_enum) = &*unlocked_fd {
                     match filedesc_enum {
                         Socket(ref sockfdobj) => {
+                            // in case of socket
                             let mut mask = 0;
 
                             // sockethandle lock with read access
@@ -4696,6 +4549,7 @@ impl Cage {
                             let mut newconnection = false;
                             match sockhandle.domain {
                                 AF_UNIX => {
+                                    // in case of domain socket
                                     if events & POLLIN > 0 {
                                         if sockhandle.state == ConnState::LISTEN {
                                             // if connection state is LISTEN
@@ -4720,6 +4574,7 @@ impl Cage {
                                             let sockinfo = &sockhandle.unix_info.as_ref().unwrap();
                                             let receivepipe = sockinfo.receivepipe.as_ref().unwrap();
                                             if receivepipe.check_select_read() {
+                                                //set the mask
                                                 mask |= POLLIN;
                                             }
                                         }
@@ -4728,6 +4583,7 @@ impl Cage {
                                         let sockinfo = &sockhandle.unix_info.as_ref().unwrap();
                                         let receivepipe = sockinfo.receivepipe.as_ref().unwrap();
                                         if receivepipe.check_select_write() {
+                                            // set the mask
                                             mask |= POLLOUT;
                                         }
                                     }
@@ -4736,6 +4592,9 @@ impl Cage {
                                     // add this after it is fixed
                                 }
                                 AF_INET | AF_INET6 => {
+                                    // for inet, we are not handling the poll in this loop
+                                    // we just gathering all the INET pollfd and ready to feed
+                                    // to kernel poll outside the loop
                                     if events & POLLOUT > 0 {
                                         // For AF_INET or AF_INET6 socket, currently we still rely on kernel
                                         // implementation, so here we simply
@@ -4749,7 +4608,6 @@ impl Cage {
                                                 .check_rawconnection()
                                         {
                                             newconnection = true;
-                                            // sockhandle.state = ConnState::CONNECTED;
                                         }
                                     }
 
@@ -4757,11 +4615,13 @@ impl Cage {
                                         continue;
                                     }
 
+                                    // inet_info only need to be filled up on first iteration
                                     if first_iteration {
                                         // push to inet poll_fds
                                         let mut new_pollfd = interface::PollFd::new_with_fd(sockfdobj.rawfd);
                                         new_pollfd.set_event(events);
                                         inet_info.kernel_pollfd.push(new_pollfd);
+                                        // mantain the mapping from rawfd to lindfd
                                         inet_info.rawfd_lindfd_index_tuples.insert(sockfdobj.rawfd, index as i32);
                                     }
                                 }
@@ -4784,14 +4644,15 @@ impl Cage {
                             }
                         }
 
-                        // we always say streams are writable?
                         Stream(_) => {
+                            // in case of stream
                             let mut mask = 0;
                             if events & POLLIN > 0 {
                                 // doing nothing here, since we always
                                 // say streams are not readable
                             }
                             if events & POLLOUT > 0 {
+                                // we always say stream socket are wrtiable
                                 mask |= POLLOUT;
                             }
                             // set the revents
@@ -4802,6 +4663,7 @@ impl Cage {
                         }
 
                         Pipe(pipefdobj) => {
+                            // in case of pipe
                             let mut mask = 0;
                             if events & POLLIN > 0 {
                                 if pipefdobj.pipe.check_select_read() {
@@ -4845,17 +4707,17 @@ impl Cage {
                     structpoll.revents = POLLNVAL;
                     return_code += 1;
                 }
-                // if return_code != 0 {
-                //     return return_code;
-                // }
             }
+            // first iteration done, set to false
             first_iteration = false;
 
+            // if there are any INET pollfd, pass it down to kernel
             if inet_info.kernel_pollfd.len() != 0 {
                 let kernel_ret = update_pollstruct_from_kernel_poll(fds, &mut inet_info);
                 if kernel_ret < 0 {
                     return kernel_ret;
                 }
+                // update return code
                 return_code += kernel_ret;
             }