Skip to content

Commit a91605c

Browse files
fix(mm/vfs): 修复 writev 无法正确处理部分 iov 为受保护内存空间的情况
- 引入 user_accessible_len() 以测量从给定地址 `address` 开始,能够被访问的最长连续字节长度(注:使用vma进行校验) - 使 IoVecs::gather 返回 Result 并仅聚合可以被读取的 `buf` 部分(注意,一旦碰到不可访问的 iov,后面的iov都会被抛弃) - 在 writev/pwritev 中传播新的 Result 以支持 gVisor 下的部分写入
1 parent 7b90d1d commit a91605c

File tree

4 files changed

+131
-18
lines changed

4 files changed

+131
-18
lines changed

kernel/src/filesystem/vfs/iov.rs

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
use alloc::vec::Vec;
22
use system_error::SystemError;
33

4-
use crate::syscall::user_access::{UserBufferReader, UserBufferWriter};
4+
use crate::{
5+
mm::VirtAddr,
6+
syscall::user_access::{user_accessible_len, UserBufferReader, UserBufferWriter},
7+
};
58
#[repr(C)]
69
#[derive(Debug, Clone, Copy)]
710
pub struct IoVec {
@@ -73,24 +76,55 @@ impl IoVecs {
7376
/// This function reads data from each IoVec in sequence and combines them into
7477
/// a single contiguous buffer.
7578
///
76-
/// # Returns
79+
/// **Returns:**
7780
///
78-
/// Returns a [`Vec<u8>`] containing all the data from the IoVecs.
81+
/// Returns a [`Vec<u8>`] containing the data copied from the IoVecs.
7982
///
80-
/// # Examples
83+
/// **To Be patient:**
8184
///
82-
/// ```rust
83-
/// let iovecs = IoVecs::from_user(/* ... */)?;
84-
/// let buffer = iovecs.gather();
85-
/// ```
86-
pub fn gather(&self) -> Vec<u8> {
87-
let mut buf = Vec::new();
88-
for slice in self.0.iter() {
89-
let buf_reader = UserBufferReader::new(slice.iov_base, slice.iov_len, true).unwrap();
90-
let slice = buf_reader.buffer::<u8>(0).unwrap();
91-
buf.extend_from_slice(slice);
85+
/// If a buffer is only partially accessible, data is copied up to **the first
86+
/// inaccessible byte** and the remaining iovecs are ignored. If no data can be
87+
/// read at all, `Err(SystemError::EFAULT)` is returned.
88+
pub fn gather(&self) -> Result<Vec<u8>, SystemError> {
89+
let mut buf = Vec::with_capacity(self.total_len());
90+
91+
for iov in self.0.iter() {
92+
// 检查从 iov_base 开始有多少连续可访问的空间
93+
let accessible =
94+
user_accessible_len(VirtAddr::new(iov.iov_base as usize), iov.iov_len, false);
95+
96+
// log::debug!(
97+
// "iov is {:?}. iov_len: {}; accessible len:{}",
98+
// iov,
99+
// iov.iov_len,
100+
// accessible
101+
// );
102+
103+
// 如果一个字节都不能访问
104+
if accessible == 0 {
105+
if buf.is_empty() {
106+
// log::error!(
107+
// "The first iov is empty, returning EFAULT. iov shape: {:?}",
108+
// iov
109+
// );
110+
return Err(SystemError::EFAULT);
111+
}
112+
return Ok(buf);
113+
}
114+
115+
// 复制可访问的部分
116+
unsafe {
117+
let src = core::slice::from_raw_parts(iov.iov_base as *const u8, accessible);
118+
buf.extend_from_slice(src);
119+
}
120+
121+
// 如果没有读取完整个 iov,说明遇到了不可访问的区域
122+
if accessible < iov.iov_len {
123+
return Ok(buf);
124+
}
92125
}
93-
return buf;
126+
127+
Ok(buf)
94128
}
95129

96130
/// Scatters the given data into the IoVecs.

kernel/src/filesystem/vfs/syscall/sys_pwritev.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ impl Syscall for SysPwriteVHandle {
4040

4141
// 将用户态传入的指向用户态应用的数据结构重新在内核栈上构造
4242
let iovecs = unsafe { IoVecs::from_user(iov, iov_count, false) }?;
43-
let data = iovecs.gather();
43+
let data = iovecs.gather()?;
4444

4545
do_pwritev(fd, &data, offset)
4646
}

kernel/src/filesystem/vfs/syscall/sys_writev.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ impl Syscall for SysWriteVHandle {
4545

4646
// IoVecs会进行用户态检验
4747
let iovecs = unsafe { IoVecs::from_user(iov, count, false) }?;
48-
let data = iovecs.gather();
48+
let data = iovecs.gather()?;
4949
do_write(fd, &data)
5050
}
5151

kernel/src/syscall/user_access.rs

Lines changed: 80 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
//! This file contains functions for kernel-space access to user-space data
22
33
use core::{
4+
cmp::min,
45
mem::size_of,
56
num::NonZero,
67
slice::{from_raw_parts, from_raw_parts_mut},
@@ -12,7 +13,7 @@ use defer::defer;
1213

1314
use crate::{
1415
arch::MMArch,
15-
mm::{verify_area, MemoryManagementArch, VirtAddr},
16+
mm::{verify_area, MemoryManagementArch, VirtAddr, VmFlags},
1617
process::ProcessManager,
1718
};
1819

@@ -704,3 +705,81 @@ fn check_user_access_by_page_table(addr: VirtAddr, size: usize, check_write: boo
704705

705706
return true;
706707
}
708+
709+
/// Compute the contiguous accessible length starting at `addr`.
710+
///
711+
/// Returns the number of bytes that can be accessed before hitting an unmapped
712+
/// page or a page that lacks the requested permissions.
713+
pub fn user_accessible_len(addr: VirtAddr, size: usize, check_write: bool) -> usize {
714+
// log::error!(
715+
// "user_accessible_len(addr: {:?}, size:{:?}, check_write:{:?}",
716+
// addr,
717+
// size,
718+
// check_write
719+
// );
720+
if size == 0 || addr.is_null() {
721+
return 0;
722+
}
723+
724+
// 获取当前进程的 VMA (可访问的地址空间)
725+
let vm = match ProcessManager::current_pcb().basic().user_vm() {
726+
Some(vm) => vm,
727+
None => return 0,
728+
};
729+
730+
let vma_read_guard = vm.read_irqsave();
731+
let mappings = &vma_read_guard.mappings;
732+
733+
let mut checked = 0usize;
734+
let mut current = addr;
735+
736+
while checked < size {
737+
// 判断当前地址是否落在一个有效 VMA 中
738+
let Some(vma) = mappings.contains(current) else {
739+
break;
740+
};
741+
742+
let (region_end, vm_flags) = {
743+
let guard = vma.lock_irqsave();
744+
let region_end = guard.region().end().data();
745+
let vm_flags = *guard.vm_flags();
746+
(region_end, vm_flags)
747+
};
748+
749+
// 根据 vm_flags 判断是否具备访问权限
750+
let has_permission = if check_write {
751+
vm_flags.contains(VmFlags::VM_WRITE)
752+
} else {
753+
vm_flags.contains(VmFlags::VM_READ)
754+
};
755+
756+
if !has_permission {
757+
break;
758+
}
759+
760+
let current_addr = current.data();
761+
let available = region_end.saturating_sub(current_addr);
762+
if available == 0 {
763+
break;
764+
}
765+
766+
// 这里的 `step` 要区分两种情况
767+
// - 第一种情况:`available`(当前 VMA 剩余长度)已经覆盖了 `size - checked`,说明
768+
// 本次检查的剩余数据全部落在这个 VMA 内,`step` 直接等于 `size - checked`。
769+
// - 第二种情况:`available` 比 `size - checked` 小,意味着我们会在这个 VMA 的末尾停下,
770+
// 需要等下一次循环再确认后续地址是否仍有 VMA 覆盖。
771+
// - 例如 (addr = 0x1, size = 10),若某个 VMA 只覆盖 [0x0, 0x5),则第一轮只能推进 4 个字节,
772+
// 后续是否继续完全取决于下一个 VMA 是否与 0x5 处相接且具有相同访问权限。
773+
// 若下一轮 VMA 覆盖 [0x5, 0xf),虽然这块 VMA 可访问空间 available == 10 ,但是我们需要检查的部分就只剩 10 - 4 = 6 bytes。
774+
// 所以 `step` 选择为 size - checked
775+
let step = min(available, size - checked);
776+
checked += step;
777+
778+
let Some(next) = current_addr.checked_add(step) else {
779+
break;
780+
};
781+
current = VirtAddr::new(next);
782+
}
783+
784+
checked
785+
}

0 commit comments

Comments
 (0)