diff --git a/kernel/src/mm/mincore.rs b/kernel/src/mm/mincore.rs new file mode 100644 index 000000000..398ec81f6 --- /dev/null +++ b/kernel/src/mm/mincore.rs @@ -0,0 +1,124 @@ +use crate::arch::MMArch; +use crate::{ + arch::mm::PageMapper, + mm::{ucontext::LockedVMA, MemoryManagementArch, VirtAddr}, +}; +use system_error::SystemError; + +impl LockedVMA { + pub fn do_mincore( + &self, + mapper: &PageMapper, + vec: &mut [u8], + start_addr: VirtAddr, + end_addr: VirtAddr, + offset: usize, + ) -> Result<(), SystemError> { + let total_pages = (end_addr - start_addr) >> MMArch::PAGE_SHIFT; + if vec.len() < total_pages + offset { + return Err(SystemError::EINVAL); + } + + if !self.can_do_mincore() { + let pages = (end_addr - start_addr) >> MMArch::PAGE_SHIFT; + vec[offset..offset + pages].fill(0); + return Ok(()); + } + // 支持多级页表遍历;在遇到大页时按4K粒度填充 + self.mincore_walk_page_range(mapper, start_addr, end_addr, 3, vec, offset); + Ok(()) + } + + fn mincore_walk_page_range( + &self, + mapper: &PageMapper, + start_addr: VirtAddr, + end_addr: VirtAddr, + level: usize, + vec: &mut [u8], + vec_offset: usize, + ) -> usize { + let mut page_count = 0; + let mut start = start_addr; + while start < end_addr { + let entry_size = MMArch::PAGE_SIZE << (level * MMArch::PAGE_ENTRY_SHIFT); + let next = core::cmp::min(end_addr, start + entry_size); + if let Some(entry) = mapper.get_entry(start, level) { + // 大页处理:当上层条目标记为大页时,按子页数量批量填充 + if level > 0 && entry.flags().has_flag(MMArch::ENTRY_FLAG_HUGE_PAGE) { + let sub_pages = (next - start) >> MMArch::PAGE_SHIFT; + let val = if entry.present() { 1 } else { 0 }; + vec[vec_offset + page_count..vec_offset + page_count + sub_pages].fill(val); + page_count += sub_pages; + } else if level > 0 { + let sub_pages = self.mincore_walk_page_range( + mapper, + start, + next, + level - 1, + vec, + vec_offset + page_count, + ); + page_count += sub_pages; + } else { + vec[vec_offset + page_count] = if entry.present() { 1 } else { 0 }; + page_count += 1; + } + } else { + let unmapped_pages = + self.mincore_unmapped_range(start, next, vec, vec_offset + page_count); + page_count += unmapped_pages; + } + start = next; + } + page_count + } + + fn mincore_unmapped_range( + &self, + start_addr: VirtAddr, + end_addr: VirtAddr, + vec: &mut [u8], + vec_offset: usize, + ) -> usize { + let nr = (end_addr - start_addr) >> MMArch::PAGE_SHIFT; + if self.is_anonymous() { + vec[vec_offset..vec_offset + nr].fill(0); + } else { + let guard = self.lock_irqsave(); + let pgoff = ((start_addr - guard.region().start()) >> MMArch::PAGE_SHIFT) + + guard.file_page_offset().unwrap(); + if guard.vm_file().is_none() { + vec[vec_offset..vec_offset + nr].fill(0); + return nr; + } + let page_cache = guard.vm_file().unwrap().inode().page_cache(); + match page_cache { + Some(page_cache) => { + let cache_guard = page_cache.lock_irqsave(); + for i in 0..nr { + if cache_guard.get_page(pgoff + i).is_some() { + vec[vec_offset + i] = 1; + } else { + vec[vec_offset + i] = 0; + } + } + } + None => { + vec[vec_offset..vec_offset + nr].fill(0); + } + } + } + nr + } + + pub fn can_do_mincore(&self) -> bool { + //todo: 没有实现vm_ops,这里只能找到匿名映射和文件映射。对于设备映射和其他特殊映射(对应linux中vm_ops有值,但不是文件映射的vma),返回false + if self.is_accessible() { + return true; + } else { + //todo: 若文件不是当前用户所有,需要当前用户对文件有写权限,否则返回false + return true; + } + } +} diff --git a/kernel/src/mm/mod.rs b/kernel/src/mm/mod.rs index 3944c995b..e7cd940a8 100644 --- a/kernel/src/mm/mod.rs +++ b/kernel/src/mm/mod.rs @@ -27,6 +27,7 @@ pub mod init; pub mod kernel_mapper; pub mod madvise; pub mod memblock; +pub mod mincore; pub mod mmio_buddy; pub mod no_init; pub mod page; diff --git a/kernel/src/mm/syscall/sys_mincore.rs b/kernel/src/mm/syscall/sys_mincore.rs index 5db6fc2fb..a6f882d3b 100644 --- a/kernel/src/mm/syscall/sys_mincore.rs +++ b/kernel/src/mm/syscall/sys_mincore.rs @@ -1,8 +1,15 @@ use crate::arch::interrupt::TrapFrame; use crate::arch::syscall::nr::SYS_MINCORE; +use crate::arch::MMArch; +use crate::libs::align::page_align_up; +use crate::mm::allocator::page_frame::{PageFrameCount, VirtPageFrame}; +use crate::mm::ucontext::AddressSpace; +use crate::mm::{verify_area, MemoryManagementArch}; use crate::syscall::table::{FormattedSyscallParam, Syscall}; +use crate::syscall::user_access::UserBufferWriter; use system_error::SystemError; +use crate::mm::VirtAddr; use alloc::vec::Vec; pub struct SysMincoreHandle; @@ -14,9 +21,39 @@ impl Syscall for SysMincoreHandle { /// ## mincore系统调用 /// - /// todo: 参考 https://code.dragonos.org.cn/xref/linux-6.6.21/mm/mincore.c#232 实现mincore - fn handle(&self, _args: &[usize], _frame: &mut TrapFrame) -> Result { - return Err(SystemError::ENOSYS); + /// ## 参数 + /// + /// - `start_vaddr`:起始地址(已经对齐到页) + /// - `len`:需要遍历的长度 + /// - `vec`:用户空间的vec指针 + fn handle(&self, args: &[usize], _frame: &mut TrapFrame) -> Result { + let start_vaddr = VirtAddr::new(Self::start_vaddr(args)); + let len = Self::len(args); + let vec = Self::vec(args); + // 未对齐返回 EINVAL,而不是触发 panic + if !start_vaddr.check_aligned(MMArch::PAGE_SIZE) { + return Err(SystemError::EINVAL); + } + + if verify_area(start_vaddr, len).is_err() { + return Err(SystemError::ENOMEM); + } + if len == 0 { + return Err(SystemError::EINVAL); + } + let len = page_align_up(len); + let current_address_space = AddressSpace::current()?; + let start_frame = VirtPageFrame::new(start_vaddr); + let page_count = len >> MMArch::PAGE_SHIFT; + + // 严格验证 vec 映射与写权限,失败返回 EFAULT + let mut writer = UserBufferWriter::new_checked(vec as *mut u8, page_count, true)?; + let buf: &mut [u8] = writer.buffer(0)?; + let page_count = PageFrameCount::new(page_count); + current_address_space + .read() + .mincore(start_frame, page_count, buf)?; + return Ok(0); } /// Formats the syscall arguments for display/debugging purposes. diff --git a/kernel/src/mm/ucontext.rs b/kernel/src/mm/ucontext.rs index f9d6ac6c9..400b3782b 100644 --- a/kernel/src/mm/ucontext.rs +++ b/kernel/src/mm/ucontext.rs @@ -398,7 +398,7 @@ impl InnerAddressSpace { drop(fd_table_guard); // offset需要4K对齐 - if !offset & (MMArch::PAGE_SIZE - 1) == 0 { + if (offset & (MMArch::PAGE_SIZE - 1)) != 0 { return Err(SystemError::EINVAL); } let pgoff = offset >> MMArch::PAGE_SHIFT; @@ -435,7 +435,10 @@ impl InnerAddressSpace { // todo!(impl mmap for other file) // https://github.com/DragonOS-Community/DragonOS/pull/912#discussion_r1765334272 let file = file.unwrap(); - let _ = file.inode().mmap(start_vaddr.data(), len, offset); + // 传入实际映射后的起始虚拟地址,而非用户传入的 hint + let _ = file + .inode() + .mmap(start_page.virt_address().data(), len, offset); return Ok(start_page); } @@ -704,6 +707,45 @@ impl InnerAddressSpace { return Ok(()); } + pub fn mincore( + &self, + start_page: VirtPageFrame, + page_count: PageFrameCount, + vec: &mut [u8], + ) -> Result<(), SystemError> { + let mapper = &self.user_mapper.utable; + + if self.mappings.contains(start_page.virt_address()).is_none() { + return Err(SystemError::ENOMEM); + } + + let mut last_vaddr = start_page.virt_address(); + let region = VirtRegion::new(start_page.virt_address(), page_count.bytes()); + let mut vmas = self.mappings.conflicts(region).collect::>(); + // 为保证与地址连续性的判断正确,这里按起始地址升序遍历 + vmas.sort_by_key(|v| v.lock_irqsave().region().start().data()); + let mut offset = 0; + for v in vmas { + let region = *v.lock_irqsave().region(); + // 保证相邻的两个vma连续 + if region.start() != last_vaddr && last_vaddr != start_page.virt_address() { + return Err(SystemError::ENOMEM); + } + let start_vaddr = last_vaddr; + let end_vaddr = core::cmp::min(region.end(), start_vaddr + page_count.bytes()); + v.do_mincore(mapper, vec, start_vaddr, end_vaddr, offset)?; + let page_count_this_vma = (end_vaddr - start_vaddr) >> MMArch::PAGE_SHIFT; + offset += page_count_this_vma; + last_vaddr = end_vaddr; + } + + // 校验覆盖完整性:若末尾未覆盖到请求范围,则返回 ENOMEM + if last_vaddr != region.end() { + return Err(SystemError::ENOMEM); + } + + return Ok(()); + } pub fn madvise( &mut self, start_page: VirtPageFrame, @@ -974,9 +1016,10 @@ impl UserMappings { if guard.region.contains(vaddr) { return Some(v.clone()); } - if guard.region.start >= vaddr - && if let Some(ref nearest) = nearest { - guard.region.start < nearest.lock_irqsave().region.start + // 选择起始地址不大于 vaddr 的 VMA 中,起始地址最大的一个 + if guard.region.start <= vaddr + && if let Some(ref current) = nearest { + guard.region.start > current.lock_irqsave().region.start } else { true } diff --git a/user/apps/c_unitest/test_mincore.c b/user/apps/c_unitest/test_mincore.c new file mode 100644 index 000000000..645f6d103 --- /dev/null +++ b/user/apps/c_unitest/test_mincore.c @@ -0,0 +1,152 @@ +// Unified mincore test suite with reporting and cleanup + +#include +#include +#include +#include +#include +#include +#include + +typedef int (*test_func_t)(void); + +static void report(const char *name, int ok, const char *msg) { + if (ok) { + printf("[PASS] %s\n", name); + } else { + if (msg) { + printf("[FAILED] %s: %s\n", name, msg); + } else { + printf("[FAILED] %s\n", name); + } + } +} + +// Test 1: Anonymous mapping pages become resident after write +static int test_anonymous_incore(void) { + size_t pagesize = (size_t)sysconf(_SC_PAGESIZE); + size_t npages = 4; + size_t length = pagesize * npages; + int rc = 1; + char *addr = mmap(NULL, length, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (addr == MAP_FAILED) return 0; + memset(addr, 0xaa, length); + unsigned char *vec = (unsigned char *)malloc(npages); + if (!vec) { munmap(addr, length); return 0; } + if (mincore(addr, length, vec) == -1) { free(vec); munmap(addr, length); return 0; } + for (size_t i = 0; i < npages; i++) { + if (!(vec[i] & 1)) { rc = 0; break; } + } + free(vec); + munmap(addr, length); + return rc; +} + +// Test 2: Unaligned addr -> EINVAL +static int test_unaligned_einval(void) { + size_t pagesize = (size_t)sysconf(_SC_PAGESIZE); + char *addr = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (addr == MAP_FAILED) return 0; + unsigned char vec[1]; + errno = 0; + int ret = mincore(addr + 1, pagesize, vec); + int ok = (ret == -1 && errno == EINVAL); + munmap(addr, pagesize); + return ok; +} + +// Test 3: len == 0 -> EINVAL +static int test_len0_einval(void) { + size_t pagesize = (size_t)sysconf(_SC_PAGESIZE); + char *addr = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (addr == MAP_FAILED) return 0; + unsigned char vec[1]; + errno = 0; + int ret = mincore(addr, 0, vec); + int ok = (ret == -1 && errno == EINVAL); + munmap(addr, pagesize); + return ok; +} + +// Test 4: Range crosses a hole -> ENOMEM +static int test_range_hole_enomem(void) { + size_t pagesize = (size_t)sysconf(_SC_PAGESIZE); + size_t length = pagesize * 2; + char *addr = mmap(NULL, length, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (addr == MAP_FAILED) return 0; + if (munmap(addr + pagesize, pagesize) != 0) { munmap(addr, pagesize); return 0; } + unsigned char vec[2] = {0}; + errno = 0; + int ret = mincore(addr, length, vec); + int ok = (ret == -1 && errno == ENOMEM); + munmap(addr, pagesize); + return ok; +} + +// Test 5: vec not writable -> EFAULT +static int test_vec_efault(void) { + size_t pagesize = (size_t)sysconf(_SC_PAGESIZE); + char *addr = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (addr == MAP_FAILED) return 0; + char *ro = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (ro == MAP_FAILED) { munmap(addr, pagesize); return 0; } + errno = 0; + int ret = mincore(addr, pagesize, (unsigned char*)ro); + int ok = (ret == -1 && errno == EFAULT); + munmap(addr, pagesize); + munmap(ro, pagesize); + return ok; +} + +// Test 6: file-backed mapping reflects page cache presence after read +static int test_filemap_pagecache(void) { + size_t pagesize = (size_t)sysconf(_SC_PAGESIZE); + char tmpl[] = "mincore_test_file_XXXXXX"; + int fd = mkstemp(tmpl); + if (fd < 0) return 0; + // write 2 pages + char *buf = (char*)malloc(pagesize * 2); + if (!buf) { close(fd); unlink(tmpl); return 0; } + memset(buf, 0xab, pagesize * 2); + ssize_t w = write(fd, buf, pagesize * 2); + free(buf); + if (w != (ssize_t)(pagesize * 2)) { close(fd); unlink(tmpl); return 0; } + char *addr = mmap(NULL, pagesize * 2, PROT_READ, MAP_PRIVATE, fd, 0); + if (addr == MAP_FAILED) { close(fd); unlink(tmpl); return 0; } + unsigned char vec_before[2] = {0}; + if (mincore(addr, pagesize * 2, vec_before) != 0) { + munmap(addr, pagesize * 2); close(fd); unlink(tmpl); return 0; + } + volatile char c = addr[0]; (void)c; // fault-in first page + unsigned char vec_after[2] = {0}; + if (mincore(addr, pagesize * 2, vec_after) != 0) { + munmap(addr, pagesize * 2); close(fd); unlink(tmpl); return 0; + } + int ok = ((vec_after[0] & 1) == 1); + munmap(addr, pagesize * 2); + close(fd); + unlink(tmpl); + return ok; +} + +struct test_entry { const char *name; test_func_t fn; }; + +int main(void) { + struct test_entry tests[] = { + {"anonymous_incore", test_anonymous_incore}, + {"unaligned_einval", test_unaligned_einval}, + {"len0_einval", test_len0_einval}, + {"range_hole_enomem", test_range_hole_enomem}, + {"vec_efault", test_vec_efault}, + {"filemap_pagecache", test_filemap_pagecache}, + }; + int total = (int)(sizeof(tests)/sizeof(tests[0])); + int passed = 0; + for (int i = 0; i < total; i++) { + int ok = tests[i].fn(); + report(tests[i].name, ok, NULL); + if (ok) passed++; + } + printf("Summary: %d/%d passed\n", passed, total); + return passed == total ? 0 : 1; +}