Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions resources/seccomp/aarch64-unknown-linux-musl.json
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@
"syscall": "lseek",
"comment": "Used by the block device"
},
{
"syscall": "fallocate",
"comment": "Used by the block device for discard (hole punching)"
},
{
"syscall": "mremap",
"comment": "Used for re-allocating large memory regions, for example vectors"
Expand Down
4 changes: 4 additions & 0 deletions resources/seccomp/x86_64-unknown-linux-musl.json
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@
"syscall": "lseek",
"comment": "Used by the block device"
},
{
"syscall": "fallocate",
"comment": "Used by the block device for discard (hole punching)"
},
{
"syscall": "mremap",
"comment": "Used for re-allocating large memory regions, for example vectors"
Expand Down
6 changes: 4 additions & 2 deletions src/vmm/src/devices/virtio/block/virtio/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ use crate::devices::virtio::block::CacheType;
use crate::devices::virtio::block::virtio::metrics::{BlockDeviceMetrics, BlockMetricsPerDevice};
use crate::devices::virtio::device::{ActiveState, DeviceState, VirtioDevice};
use crate::devices::virtio::generated::virtio_blk::{
VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_RO, VIRTIO_BLK_ID_BYTES,
VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_RO, VIRTIO_BLK_ID_BYTES,
};
use crate::devices::virtio::generated::virtio_config::VIRTIO_F_VERSION_1;
use crate::devices::virtio::generated::virtio_ids::VIRTIO_ID_BLOCK;
Expand Down Expand Up @@ -298,7 +298,9 @@ impl VirtioBlock {
.map_err(VirtioBlockError::RateLimiter)?
.unwrap_or_default();

let mut avail_features = (1u64 << VIRTIO_F_VERSION_1) | (1u64 << VIRTIO_RING_F_EVENT_IDX);
let mut avail_features = (1u64 << VIRTIO_F_VERSION_1)
| (1u64 << VIRTIO_RING_F_EVENT_IDX)
| (1u64 << VIRTIO_BLK_F_DISCARD);

if config.cache_type == CacheType::Writeback {
avail_features |= 1u64 << VIRTIO_BLK_F_FLUSH;
Expand Down
22 changes: 22 additions & 0 deletions src/vmm/src/devices/virtio/block/virtio/io/async_io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,28 @@ impl AsyncFileEngine {
Ok(())
}

pub fn push_discard(
&mut self,
offset: u64,
len: u32,
req: PendingRequest,
) -> Result<(), RequestError<AsyncIoError>> {
let wrapped_user_data = WrappedRequest::new(req);

if let Err((io_uring_error, data)) =
self.ring
.push(Operation::fallocate(0, len, offset, wrapped_user_data))
{
error!(
"DISCARD fallocate failed (offset={}, len={}): {:?}",
offset, len, io_uring_error
);
return Ok(());
}

Ok(())
}

fn do_pop(&mut self) -> Result<Option<Cqe<WrappedRequest>>, AsyncIoError> {
self.ring.pop().map_err(AsyncIoError::IoUring)
}
Expand Down
28 changes: 28 additions & 0 deletions src/vmm/src/devices/virtio/block/virtio/io/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,13 @@ pub mod sync_io;

use std::fmt::Debug;
use std::fs::File;
use std::os::unix::io::AsRawFd;

use libc::{FALLOC_FL_KEEP_SIZE, FALLOC_FL_PUNCH_HOLE, c_int, off64_t};

pub use self::async_io::{AsyncFileEngine, AsyncIoError};
pub use self::sync_io::{SyncFileEngine, SyncIoError};
use crate::device_manager::mmio::MMIO_LEN;
use crate::devices::virtio::block::virtio::PendingRequest;
use crate::devices::virtio::block::virtio::device::FileEngineType;
use crate::vstate::memory::{GuestAddress, GuestMemoryMmap};
Expand Down Expand Up @@ -172,6 +176,30 @@ impl FileEngine {
FileEngine::Sync(engine) => engine.flush().map_err(BlockIoError::Sync),
}
}

pub fn discard(
&mut self,
offset: u64,
count: u32,
req: PendingRequest,
) -> Result<FileEngineOk, RequestError<BlockIoError>> {
match self {
FileEngine::Async(engine) => match engine.push_discard(offset, count, req) {
Ok(_) => Ok(FileEngineOk::Submitted),
Err(err) => Err(RequestError {
req: err.req,
error: BlockIoError::Async(err.error),
}),
},
FileEngine::Sync(engine) => match engine.discard(offset, count) {
Ok(count) => Ok(FileEngineOk::Executed(RequestOk { req, count })),
Err(err) => Err(RequestError {
req,
error: BlockIoError::Sync(err),
}),
},
}
}
}

#[cfg(test)]
Expand Down
40 changes: 40 additions & 0 deletions src/vmm/src/devices/virtio/block/virtio/io/sync_io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@

use std::fs::File;
use std::io::{Seek, SeekFrom, Write};
use std::os::unix::io::AsRawFd;

use libc::{FALLOC_FL_KEEP_SIZE, FALLOC_FL_PUNCH_HOLE, c_int, off64_t};
use vm_memory::{GuestMemoryError, ReadVolatile, WriteVolatile};

use crate::vstate::memory::{GuestAddress, GuestMemory, GuestMemoryMmap};
Expand All @@ -18,6 +20,8 @@ pub enum SyncIoError {
SyncAll(std::io::Error),
/// Transfer: {0}
Transfer(GuestMemoryError),
/// Discard: {0}
Discard(std::io::Error),
}

#[derive(Debug)]
Expand Down Expand Up @@ -81,4 +85,40 @@ impl SyncFileEngine {
// Sync data out to physical media on host.
self.file.sync_all().map_err(SyncIoError::SyncAll)
}

pub fn discard(&mut self, offset: u64, len: u32) -> Result<u32, SyncIoError> {
// Do checked conversion to avoid possible wrap/cast issues on 64-bit systems.
let off_i64 = i64::try_from(offset).map_err(|_| {
SyncIoError::Discard(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"offset overflow",
))
})?;

let len_i64: i64 = len.into();

// # Safety: calling libc::fallocate is safe here because:
// - `self.file.as_raw_fd()` is a valid file descriptor owned by this struct,
// - `off_i64` and `len_i64` are validated copies of the incoming unsigned values converted
// to the C `off64_t` type, and
// - the syscall is properly checked for an error return value.
unsafe {
let ret = libc::fallocate(
self.file.as_raw_fd(),
libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE,
off_i64,
len_i64,
);
if ret != 0 {
error!(
"DISCARD fallocate failed (offset={}, len={}): {:?}",
offset,
len,
std::io::Error::last_os_error()
);
return Ok(len);
}
}
Ok(len)
}
}
3 changes: 3 additions & 0 deletions src/vmm/src/devices/virtio/block/virtio/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,8 @@ pub struct BlockDeviceMetrics {
pub invalid_reqs_count: SharedIncMetric,
/// Number of flushes operation triggered on this block device.
pub flush_count: SharedIncMetric,
/// Number of discard operation triggered on this block device.
pub discard_count: SharedIncMetric,
/// Number of events triggered on the queue of this block device.
pub queue_event_count: SharedIncMetric,
/// Number of events ratelimiter-related.
Expand Down Expand Up @@ -210,6 +212,7 @@ impl BlockDeviceMetrics {
self.invalid_reqs_count
.add(other.invalid_reqs_count.fetch_diff());
self.flush_count.add(other.flush_count.fetch_diff());
self.discard_count.add(other.discard_count.fetch_diff());
self.queue_event_count
.add(other.queue_event_count.fetch_diff());
self.rate_limiter_event_count
Expand Down
8 changes: 8 additions & 0 deletions src/vmm/src/devices/virtio/block/virtio/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,12 @@ pub enum VirtioBlockError {
RateLimiter(std::io::Error),
/// Persistence error: {0}
Persist(crate::devices::virtio::persist::PersistError),
/// Sector overflow in discard segment
SectorOverflow,
/// Discard segment exceeds disk size
BeyondDiskSize,
/// Invalid flags in discard segment
InvalidDiscardFlags,
/// Invalid discard request (e.g., empty segments)
InvalidDiscardRequest,
}
84 changes: 81 additions & 3 deletions src/vmm/src/devices/virtio/block/virtio/request.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,19 @@ use std::convert::From;

use vm_memory::GuestMemoryError;

use super::io::{BlockIoError, SyncIoError};
use super::{SECTOR_SHIFT, SECTOR_SIZE, VirtioBlockError, io as block_io};
use crate::devices::virtio::block::virtio::device::DiskProperties;
use crate::devices::virtio::block::virtio::metrics::BlockDeviceMetrics;
pub use crate::devices::virtio::generated::virtio_blk::{
VIRTIO_BLK_ID_BYTES, VIRTIO_BLK_S_IOERR, VIRTIO_BLK_S_OK, VIRTIO_BLK_S_UNSUPP,
VIRTIO_BLK_T_FLUSH, VIRTIO_BLK_T_GET_ID, VIRTIO_BLK_T_IN, VIRTIO_BLK_T_OUT,
VIRTIO_BLK_T_DISCARD, VIRTIO_BLK_T_FLUSH, VIRTIO_BLK_T_GET_ID, VIRTIO_BLK_T_IN,
VIRTIO_BLK_T_OUT,
};
use crate::devices::virtio::queue::DescriptorChain;
use crate::logger::{IncMetric, error};
use crate::rate_limiter::{RateLimiter, TokenType};
use crate::vstate::memory::{ByteValued, Bytes, GuestAddress, GuestMemoryMmap};
use crate::vstate::memory::{Address, ByteValued, Bytes, GuestAddress, GuestMemoryMmap};

#[derive(Debug, derive_more::From)]
pub enum IoErr {
Expand All @@ -34,6 +36,7 @@ pub enum RequestType {
Out,
Flush,
GetDeviceID,
Discard,
Unsupported(u32),
}

Expand All @@ -44,6 +47,7 @@ impl From<u32> for RequestType {
VIRTIO_BLK_T_OUT => RequestType::Out,
VIRTIO_BLK_T_FLUSH => RequestType::Flush,
VIRTIO_BLK_T_GET_ID => RequestType::GetDeviceID,
VIRTIO_BLK_T_DISCARD => RequestType::Discard,
t => RequestType::Unsupported(t),
}
}
Expand Down Expand Up @@ -176,6 +180,12 @@ impl PendingRequest {
(Ok(transferred_data_len), RequestType::GetDeviceID) => {
Status::from_data(self.data_len, transferred_data_len, true)
}
(Ok(_), RequestType::Discard) => {
block_metrics.discard_count.inc();
Status::Ok {
num_bytes_to_mem: 0,
}
}
(_, RequestType::Unsupported(op)) => Status::Unsupported { op },
(Err(err), _) => Status::IoErr {
num_bytes_to_mem: 0,
Expand Down Expand Up @@ -231,13 +241,26 @@ impl RequestHeader {
}
}

// For this, please see VirtIO v1.2. This struct mimics that of the implementation in Virtio.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
#[repr(C)]
pub struct DiscardSegment {
sector: u64,
num_sectors: u32,
flags: u32,
}
// SAFETY: Safe because `DiscardSegment` only contains plain data (POD) with no padding-dependent
// invariants.
unsafe impl ByteValued for DiscardSegment {}

#[derive(Debug, PartialEq, Eq)]
pub struct Request {
pub r#type: RequestType,
pub data_len: u32,
pub status_addr: GuestAddress,
sector: u64,
data_addr: GuestAddress,
discard_segments: Option<Vec<DiscardSegment>>,
}

impl Request {
Expand All @@ -258,10 +281,11 @@ impl Request {
data_addr: GuestAddress(0),
data_len: 0,
status_addr: GuestAddress(0),
discard_segments: None,
};

let data_desc;
let status_desc;
let mut status_desc;
let desc = avail_desc
.next_descriptor()
.ok_or(VirtioBlockError::DescriptorChainTooShort)?;
Expand All @@ -272,6 +296,7 @@ impl Request {
if req.r#type != RequestType::Flush {
return Err(VirtioBlockError::DescriptorChainTooShort);
}
data_desc = desc;
} else {
data_desc = desc;
status_desc = data_desc
Expand All @@ -287,6 +312,9 @@ impl Request {
if !data_desc.is_write_only() && req.r#type == RequestType::GetDeviceID {
return Err(VirtioBlockError::UnexpectedReadOnlyDescriptor);
}
if data_desc.is_write_only() && req.r#type == RequestType::Discard {
return Err(VirtioBlockError::UnexpectedWriteOnlyDescriptor);
}

req.data_addr = data_desc.addr;
req.data_len = data_desc.len;
Expand All @@ -313,6 +341,48 @@ impl Request {
return Err(VirtioBlockError::InvalidDataLength);
}
}
RequestType::Discard => {
// Validate data length
let segment_size = std::mem::size_of::<DiscardSegment>();
if (data_desc.len as usize) % segment_size != 0 {
return Err(VirtioBlockError::InvalidDataLength);
}

// Calculate number of segments
let num_segments = (data_desc.len as usize) / segment_size;
if num_segments == 0 {
return Err(VirtioBlockError::InvalidDiscardRequest);
}
let mut segments = Vec::with_capacity(num_segments);

// Populate DiscardSegments vector
for i in 0..num_segments {
let offset = i * segment_size;
let segment: DiscardSegment = mem
.read_obj(data_desc.addr.unchecked_add(offset as u64))
.map_err(VirtioBlockError::GuestMemory)?;
let end_sector = segment.sector + segment.num_sectors as u64;
if end_sector > num_disk_sectors {
error!(
"Discard request out of bounds: sector={} num_sectors={} \
end_sector={} disk_sectors={}",
segment.sector, segment.num_sectors, end_sector, num_disk_sectors
);
continue;
}
segments.push(segment);
}

// Assign to req.discard_segments
req.discard_segments = Some(segments);
req.data_len = data_desc.len;
status_desc = data_desc
.next_descriptor()
.ok_or(VirtioBlockError::DescriptorChainTooShort)?;
if !status_desc.is_write_only() {
return Err(VirtioBlockError::UnexpectedReadOnlyDescriptor);
}
}
_ => {}
}

Expand Down Expand Up @@ -390,6 +460,11 @@ impl Request {
.map_err(IoErr::GetId);
return ProcessingResult::Executed(pending.finish(mem, res, block_metrics));
}
RequestType::Discard => {
let _metric = block_metrics.write_agg.record_latency_metrics();
disk.file_engine
.discard(self.offset(), self.data_len, pending)
}
RequestType::Unsupported(_) => {
return ProcessingResult::Executed(pending.finish(mem, Ok(0), block_metrics));
}
Expand Down Expand Up @@ -731,6 +806,7 @@ mod tests {
RequestType::Out => VIRTIO_BLK_T_OUT,
RequestType::Flush => VIRTIO_BLK_T_FLUSH,
RequestType::GetDeviceID => VIRTIO_BLK_T_GET_ID,
RequestType::Discard => VIRTIO_BLK_T_DISCARD,
RequestType::Unsupported(id) => id,
}
}
Expand All @@ -743,6 +819,7 @@ mod tests {
RequestType::Out => VIRTQ_DESC_F_NEXT,
RequestType::Flush => VIRTQ_DESC_F_NEXT,
RequestType::GetDeviceID => VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE,
RequestType::Discard => VIRTQ_DESC_F_NEXT,
RequestType::Unsupported(_) => VIRTQ_DESC_F_NEXT,
}
}
Expand Down Expand Up @@ -833,6 +910,7 @@ mod tests {
data_len: valid_data_len,
status_addr,
sector: sector & (NUM_DISK_SECTORS - sectors_len),
discard_segments: None,
data_addr,
};
let mut request_header = RequestHeader::new(virtio_request_id, request.sector);
Expand Down
Loading