diff --git a/Cargo.lock b/Cargo.lock index eb4446f..97eb479 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -14,6 +14,12 @@ version = "1.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + [[package]] name = "clap" version = "4.1.4" @@ -157,6 +163,15 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" +[[package]] +name = "log" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" +dependencies = [ + "cfg-if", +] + [[package]] name = "lumper" version = "0.1.0" @@ -259,6 +274,26 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "thiserror" +version = "1.0.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5ab016db510546d856297882807df8da66a16fb8c4101cb8b30054b0d5b2d9c" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5420d42e90af0c38c3290abcca25b9b3bdf379fc9f55c528f53a269d9c9a267e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "unicode-ident" version = "1.0.6" @@ -271,10 +306,55 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "virtio-bindings" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b9084faf91b9aa9676ae2cac8f1432df2839d9566e6f19f29dbc13a8b831dff" + +[[package]] +name = "virtio-bindings" +version = "0.2.0" +source = "git+https://github.com/rust-vmm/vm-virtio#467c8ec99375a5f4e08b85b18257cd7e0bac1dc0" + +[[package]] +name = "virtio-device" +version = "0.1.0" +source = "git+https://github.com/rust-vmm/vm-virtio#467c8ec99375a5f4e08b85b18257cd7e0bac1dc0" +dependencies = [ + "log", + "virtio-bindings 0.2.0 (git+https://github.com/rust-vmm/vm-virtio)", + "virtio-queue", + "vm-memory", +] + +[[package]] +name = "virtio-queue" +version = "0.7.1" +source = "git+https://github.com/rust-vmm/vm-virtio#467c8ec99375a5f4e08b85b18257cd7e0bac1dc0" +dependencies = [ + "log", + "virtio-bindings 0.2.0 (git+https://github.com/rust-vmm/vm-virtio)", + "vm-memory", + "vmm-sys-util", +] + +[[package]] +name = "vm-allocator" +version = "0.1.0" +source = "git+https://github.com/lucido-simon/vm-allocator?rev=dfb880a86763a0064fdd700fbe731f26fcbf6681#dfb880a86763a0064fdd700fbe731f26fcbf6681" +dependencies = [ + "libc", + "thiserror", +] + [[package]] name = "vm-device" version = "0.1.0" -source = "git+https://github.com/rust-vmm/vm-device?rev=5847f12#5847f1286492b7191f1400e6647fb220f8941f89" +source = "git+https://github.com/lucido-simon/vm-device?rev=63bf6ecea4ee851d500e283dc3809baf6f89555d#63bf6ecea4ee851d500e283dc3809baf6f89555d" +dependencies = [ + "vm-memory", +] [[package]] name = "vm-memory" @@ -301,6 +381,10 @@ dependencies = [ "kvm-ioctls", "libc", "linux-loader", + "virtio-bindings 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "virtio-device", + "virtio-queue", + "vm-allocator", "vm-device", "vm-memory", "vm-superio", diff --git a/src/main.rs b/src/main.rs index f9eeca0..c24482f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -25,6 +25,10 @@ struct VMMOpts { /// Stdout console file path #[clap(long)] console: Option, + + /// Interface name + #[clap(long)] + net: Option, } #[derive(Debug)] @@ -47,7 +51,7 @@ fn main() -> Result<(), Error> { // * Memory size (in MB) // * Path to a Linux kernel // * Optional path to console file - vmm.configure(opts.cpus, opts.memory, &opts.kernel, opts.console) + vmm.configure(opts.cpus, opts.memory, &opts.kernel, opts.console, opts.net) .map_err(Error::VmmConfigure)?; // Run the VMM diff --git a/src/vmm/Cargo.lock b/src/vmm/Cargo.lock index 04178bf..46e54f8 100644 --- a/src/vmm/Cargo.lock +++ b/src/vmm/Cargo.lock @@ -8,6 +8,12 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + [[package]] name = "epoll" version = "4.3.1" @@ -53,10 +59,119 @@ dependencies = [ "vm-memory", ] +[[package]] +name = "log" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "proc-macro2" +version = "1.0.53" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba466839c78239c09faf015484e5cc04860f88242cff4d03eb038f04b4699b73" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "syn" +version = "2.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0da4a3c17e109f700685ec577c0f85efd9b19bcf15c913985f14dc1ac01775aa" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-ident" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" + +[[package]] +name = "virtio-bindings" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b9084faf91b9aa9676ae2cac8f1432df2839d9566e6f19f29dbc13a8b831dff" + +[[package]] +name = "virtio-bindings" +version = "0.2.0" +source = "git+https://github.com/rust-vmm/vm-virtio#467c8ec99375a5f4e08b85b18257cd7e0bac1dc0" + +[[package]] +name = "virtio-device" +version = "0.1.0" +source = "git+https://github.com/rust-vmm/vm-virtio#467c8ec99375a5f4e08b85b18257cd7e0bac1dc0" +dependencies = [ + "log", + "virtio-bindings 0.2.0 (git+https://github.com/rust-vmm/vm-virtio)", + "virtio-queue", + "vm-memory", +] + +[[package]] +name = "virtio-queue" +version = "0.7.1" +source = "git+https://github.com/rust-vmm/vm-virtio#467c8ec99375a5f4e08b85b18257cd7e0bac1dc0" +dependencies = [ + "log", + "virtio-bindings 0.2.0 (git+https://github.com/rust-vmm/vm-virtio)", + "vm-memory", + "vmm-sys-util", +] + +[[package]] +name = "vm-allocator" +version = "0.1.0" +source = "git+https://github.com/lucido-simon/vm-allocator?rev=dfb880a86763a0064fdd700fbe731f26fcbf6681#dfb880a86763a0064fdd700fbe731f26fcbf6681" +dependencies = [ + "libc", + "thiserror", +] + [[package]] name = "vm-device" version = "0.1.0" -source = "git+https://github.com/rust-vmm/vm-device?rev=5847f12#5847f1286492b7191f1400e6647fb220f8941f89" +source = "git+https://github.com/lucido-simon/vm-device?rev=63bf6ecea4ee851d500e283dc3809baf6f89555d#63bf6ecea4ee851d500e283dc3809baf6f89555d" +dependencies = [ + "vm-memory", +] [[package]] name = "vm-memory" @@ -83,6 +198,10 @@ dependencies = [ "kvm-ioctls", "libc", "linux-loader", + "virtio-bindings 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "virtio-device", + "virtio-queue", + "vm-allocator", "vm-device", "vm-memory", "vm-superio", diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index 54c5c37..e70ae2b 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -11,10 +11,14 @@ libc = "0.2.91" linux-loader = { version = "0.8.1", features = ["bzimage", "elf"] } vm-memory = { version = "0.10.0", features = ["backend-mmap"] } vmm-sys-util = "0.11.1" +virtio-bindings = "0.2.0" # vm-device is not yet published on crates.io. # To make sure that breaking changes to vm-device are not breaking the # vm-vcpu build, we're using a fixed revision. -vm-device = { git = "https://github.com/rust-vmm/vm-device", rev = "5847f12" } +vm-device = { git = "https://github.com/lucido-simon/vm-device", rev = "63bf6ecea4ee851d500e283dc3809baf6f89555d" } +vm-allocator = { git = "https://github.com/lucido-simon/vm-allocator", rev = "dfb880a86763a0064fdd700fbe731f26fcbf6681"} +virtio-device = { git = "https://github.com/rust-vmm/vm-virtio" } +virtio-queue = { git = "https://github.com/rust-vmm/vm-virtio" } vm-superio = "0.7.0" diff --git a/src/vmm/src/cpu/mod.rs b/src/vmm/src/cpu/mod.rs index d81fdcf..02639c7 100644 --- a/src/vmm/src/cpu/mod.rs +++ b/src/vmm/src/cpu/mod.rs @@ -8,6 +8,7 @@ use std::{result, u64}; use kvm_bindings::{kvm_fpu, kvm_regs, CpuId}; use kvm_ioctls::{VcpuExit, VcpuFd, VmFd}; +use vm_device::device_manager::{IoManager, MmioManager}; use vm_memory::{Address, Bytes, GuestAddress, GuestMemoryError, GuestMemoryMmap}; use vmm_sys_util::terminal::Terminal; @@ -66,15 +67,22 @@ pub(crate) struct Vcpu { pub vcpu_fd: VcpuFd, serial: Arc>, + virtio_manager: Arc>, } impl Vcpu { /// Create a new vCPU. - pub fn new(vm_fd: &VmFd, index: u64, serial: Arc>) -> Result { + pub fn new( + vm_fd: &VmFd, + index: u64, + serial: Arc>, + virtio_manager: Arc>, + ) -> Result { Ok(Vcpu { index, vcpu_fd: vm_fd.create_vcpu(index).map_err(Error::KvmIoctl)?, serial, + virtio_manager, }) } @@ -266,10 +274,36 @@ impl Vcpu { println!("Unsupported device read at {:x?}", addr); } }, + + // This is a MMIO write, i.e. the guest is trying to write + // something to a memory-mapped I/O region. + VcpuExit::MmioWrite(addr, data) => { + self.virtio_manager + .lock() + .unwrap() + .mmio_write(GuestAddress(addr), data) + .unwrap_or_else(|e| { + eprintln!("Failed to write to MMIO at addrress {:#x}: {}", addr, e); + }); + } + + // This is a MMIO read, i.e. the guest is trying to read + // from a memory-mapped I/O region. + VcpuExit::MmioRead(addr, data) => { + self.virtio_manager + .lock() + .unwrap() + .mmio_read(GuestAddress(addr), data) + .unwrap_or_else(|e| { + eprintln!("Failed to read to MMIO at addrress {:#x}: {}", addr, e); + }); + } + _ => { eprintln!("Unhandled VM-Exit: {:?}", exit_reason); } }, + Err(e) => eprintln!("Emulation error: {}", e), } } diff --git a/src/vmm/src/devices/mod.rs b/src/vmm/src/devices/mod.rs index 38db994..ea91bcb 100644 --- a/src/vmm/src/devices/mod.rs +++ b/src/vmm/src/devices/mod.rs @@ -1,3 +1,4 @@ // SPDX-License-Identifier: Apache-2.0 +pub(crate) mod net; pub(crate) mod serial; diff --git a/src/vmm/src/devices/net/bindings.rs b/src/vmm/src/devices/net/bindings.rs new file mode 100644 index 0000000..a6e5ed6 --- /dev/null +++ b/src/vmm/src/devices/net/bindings.rs @@ -0,0 +1,271 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +// The following are manually copied from crosvm/firecracker. In the latter, they can be found as +// part of the `net_gen` local crate. We should figure out how to proceed going forward (i.e. +// create some bindings of our own, put them in a common crate, etc). + +#![allow(clippy::all)] +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] + +use virtio_bindings::virtio_net::virtio_net_hdr_v1; + +pub const TUN_F_CSUM: ::std::os::raw::c_uint = 1; +pub const TUN_F_TSO4: ::std::os::raw::c_uint = 2; +pub const TUN_F_TSO6: ::std::os::raw::c_uint = 4; +pub const TUN_F_UFO: ::std::os::raw::c_uint = 16; + +pub const VIRTIO_F_VERSION_1: u64 = 32; +pub const VIRTIO_HDR_LEN: usize = ::core::mem::size_of::(); +pub const VIRTIO_NET_DEVICE_ID: u32 = 1; + +#[repr(C)] +pub struct __BindgenUnionField(::std::marker::PhantomData); +impl __BindgenUnionField { + #[inline] + pub fn new() -> Self { + __BindgenUnionField(::std::marker::PhantomData) + } + #[inline] + pub unsafe fn as_mut(&mut self) -> &mut T { + ::std::mem::transmute(self) + } +} +impl ::std::default::Default for __BindgenUnionField { + #[inline] + fn default() -> Self { + Self::new() + } +} +impl ::std::clone::Clone for __BindgenUnionField { + #[inline] + fn clone(&self) -> Self { + Self::new() + } +} +impl ::std::marker::Copy for __BindgenUnionField {} +impl ::std::fmt::Debug for __BindgenUnionField { + fn fmt(&self, fmt: &mut ::std::fmt::Formatter) -> ::std::fmt::Result { + fmt.write_str("__BindgenUnionField") + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct ifreq { + pub ifr_ifrn: ifreq__bindgen_ty_1, + pub ifr_ifru: ifreq__bindgen_ty_2, +} + +impl Clone for ifreq { + fn clone(&self) -> Self { + *self + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct ifreq__bindgen_ty_1 { + pub ifrn_name: __BindgenUnionField<[::std::os::raw::c_uchar; 16usize]>, + pub bindgen_union_field: [u8; 16usize], +} + +impl Clone for ifreq__bindgen_ty_1 { + fn clone(&self) -> Self { + *self + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct ifreq__bindgen_ty_2 { + pub ifru_addr: __BindgenUnionField, + pub ifru_dstaddr: __BindgenUnionField, + pub ifru_broadaddr: __BindgenUnionField, + pub ifru_netmask: __BindgenUnionField, + pub ifru_hwaddr: __BindgenUnionField, + pub ifru_flags: __BindgenUnionField<::std::os::raw::c_short>, + pub ifru_ivalue: __BindgenUnionField<::std::os::raw::c_int>, + pub ifru_mtu: __BindgenUnionField<::std::os::raw::c_int>, + pub ifru_map: __BindgenUnionField, + pub ifru_slave: __BindgenUnionField<[::std::os::raw::c_char; 16usize]>, + pub ifru_newname: __BindgenUnionField<[::std::os::raw::c_char; 16usize]>, + pub ifru_data: __BindgenUnionField<*mut ::std::os::raw::c_void>, + pub ifru_settings: __BindgenUnionField, + pub bindgen_union_field: [u64; 3usize], +} + +impl Clone for ifreq__bindgen_ty_2 { + fn clone(&self) -> Self { + *self + } +} + +pub type sa_family_t = ::std::os::raw::c_ushort; + +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct sockaddr { + pub sa_family: sa_family_t, + pub sa_data: [::std::os::raw::c_char; 14usize], +} + +impl Clone for sockaddr { + fn clone(&self) -> Self { + *self + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct if_settings { + pub type_: ::std::os::raw::c_uint, + pub size: ::std::os::raw::c_uint, + pub ifs_ifsu: if_settings__bindgen_ty_1, +} + +impl Clone for if_settings { + fn clone(&self) -> Self { + *self + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct if_settings__bindgen_ty_1 { + pub raw_hdlc: __BindgenUnionField<*mut raw_hdlc_proto>, + pub cisco: __BindgenUnionField<*mut cisco_proto>, + pub fr: __BindgenUnionField<*mut fr_proto>, + pub fr_pvc: __BindgenUnionField<*mut fr_proto_pvc>, + pub fr_pvc_info: __BindgenUnionField<*mut fr_proto_pvc_info>, + pub sync: __BindgenUnionField<*mut sync_serial_settings>, + pub te1: __BindgenUnionField<*mut te1_settings>, + pub bindgen_union_field: u64, +} + +impl Clone for if_settings__bindgen_ty_1 { + fn clone(&self) -> Self { + *self + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct ifmap { + pub mem_start: ::std::os::raw::c_ulong, + pub mem_end: ::std::os::raw::c_ulong, + pub base_addr: ::std::os::raw::c_ushort, + pub irq: ::std::os::raw::c_uchar, + pub dma: ::std::os::raw::c_uchar, + pub port: ::std::os::raw::c_uchar, +} + +impl Clone for ifmap { + fn clone(&self) -> Self { + *self + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct raw_hdlc_proto { + pub encoding: ::std::os::raw::c_ushort, + pub parity: ::std::os::raw::c_ushort, +} + +impl Clone for raw_hdlc_proto { + fn clone(&self) -> Self { + *self + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct cisco_proto { + pub interval: ::std::os::raw::c_uint, + pub timeout: ::std::os::raw::c_uint, +} + +impl Clone for cisco_proto { + fn clone(&self) -> Self { + *self + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct fr_proto { + pub t391: ::std::os::raw::c_uint, + pub t392: ::std::os::raw::c_uint, + pub n391: ::std::os::raw::c_uint, + pub n392: ::std::os::raw::c_uint, + pub n393: ::std::os::raw::c_uint, + pub lmi: ::std::os::raw::c_ushort, + pub dce: ::std::os::raw::c_ushort, +} + +impl Clone for fr_proto { + fn clone(&self) -> Self { + *self + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct fr_proto_pvc { + pub dlci: ::std::os::raw::c_uint, +} + +impl Clone for fr_proto_pvc { + fn clone(&self) -> Self { + *self + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct fr_proto_pvc_info { + pub dlci: ::std::os::raw::c_uint, + pub master: [::std::os::raw::c_char; 16usize], +} + +impl Clone for fr_proto_pvc_info { + fn clone(&self) -> Self { + *self + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct sync_serial_settings { + pub clock_rate: ::std::os::raw::c_uint, + pub clock_type: ::std::os::raw::c_uint, + pub loopback: ::std::os::raw::c_ushort, +} + +impl Clone for sync_serial_settings { + fn clone(&self) -> Self { + *self + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct te1_settings { + pub clock_rate: ::std::os::raw::c_uint, + pub clock_type: ::std::os::raw::c_uint, + pub loopback: ::std::os::raw::c_ushort, + pub slot_map: ::std::os::raw::c_uint, +} + +impl Clone for te1_settings { + fn clone(&self) -> Self { + *self + } +} diff --git a/src/vmm/src/devices/net/interface.rs b/src/vmm/src/devices/net/interface.rs new file mode 100644 index 0000000..992767d --- /dev/null +++ b/src/vmm/src/devices/net/interface.rs @@ -0,0 +1,13 @@ +use std::{ + io::{Read, Write}, + os::fd::AsRawFd, +}; + +use super::Result; + +pub trait Interface: Read + Write + AsRawFd + Send + Sync { + fn activate(&self, virtio_flags: u64, virtio_header_size: usize) -> Result<()>; + fn open_named(if_name: &str) -> Result + where + Self: Sized; +} diff --git a/src/vmm/src/devices/net/mod.rs b/src/vmm/src/devices/net/mod.rs new file mode 100644 index 0000000..30116a7 --- /dev/null +++ b/src/vmm/src/devices/net/mod.rs @@ -0,0 +1,320 @@ +pub mod interface; + +pub(crate) mod bindings; +pub(crate) mod tap; + +use std::{ + borrow::{Borrow, BorrowMut}, + cmp, + error::Error, + fmt::{self, Debug, Display}, + os::fd::{AsRawFd, RawFd}, + sync::atomic::Ordering, +}; + +use virtio_device::{VirtioConfig, VirtioDeviceActions, VirtioDeviceType, VirtioMmioDevice}; + +use virtio_bindings::bindings::virtio_net::{ + self, VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_TSO4, + VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_GUEST_UFO, VIRTIO_NET_F_HOST_TSO4, + VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_UFO, +}; +use virtio_queue::{Queue, QueueOwnedT, QueueT}; +use vm_device::{MutVirtioMmioDevice, VirtioMmioOffset}; +use vm_memory::{Bytes, GuestAddress, GuestAddressSpace}; +use vmm_sys_util::eventfd::EventFd; + +use interface::Interface; + +// TODO: Make this configurable. +const VIRTIO_FEATURES: u64 = (1 << bindings::VIRTIO_F_VERSION_1) + | (1 << VIRTIO_NET_F_CSUM) + | (1 << VIRTIO_NET_F_GUEST_CSUM) + | (1 << VIRTIO_NET_F_HOST_TSO4) + | (1 << VIRTIO_NET_F_HOST_TSO6) + | (1 << VIRTIO_NET_F_HOST_UFO) + | (1 << VIRTIO_NET_F_GUEST_TSO4) + | (1 << VIRTIO_NET_F_GUEST_TSO6) + | (1 << VIRTIO_NET_F_GUEST_UFO); + +const MAX_BUFFER_SIZE: usize = 65565; + +#[derive(Debug)] + +pub enum VirtioNetError { + InvalidIfname, + VirtioQueueError(virtio_queue::Error), + IoCtlError(std::io::Error), + IoError(std::io::Error), + MemoryError(vm_memory::GuestMemoryError), + QueueError(virtio_queue::Error), +} +impl Error for VirtioNetError {} +impl Display for VirtioNetError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "virtio net error") + } +} + +pub type Result = std::result::Result; + +pub struct VirtioNet { + pub device_config: VirtioConfig, + pub guest_irq_fd: EventFd, + pub address_space: M, + pub interface: I, +} + +impl VirtioNet { + pub fn new(memory: M, irq_fd: EventFd, if_name: &str) -> Result { + Ok(Self { + device_config: VirtioConfig::new( + VIRTIO_FEATURES, + vec![ + Queue::new(256).map_err(VirtioNetError::QueueError)?, + Queue::new(256).map_err(VirtioNetError::QueueError)?, + ], + // Not used in the current implementation. + Self::config_vec(virtio_net::virtio_net_config { + ..Default::default() + }), + ), + address_space: memory, + guest_irq_fd: irq_fd, + interface: I::open_named(if_name)?, + }) + } + + fn config_vec(config: virtio_net::virtio_net_config) -> Vec { + let mut config_vec = Vec::new(); + config_vec.extend_from_slice(&config.mac); + config_vec.extend_from_slice(&config.status.to_le_bytes()); + config_vec.extend_from_slice(&config.max_virtqueue_pairs.to_le_bytes()); + config_vec.extend_from_slice(&config.mtu.to_le_bytes()); + config_vec.extend_from_slice(&config.speed.to_le_bytes()); + config_vec.extend_from_slice(&config.duplex.to_le_bytes()); + config_vec + } + + fn is_reading_register(&self, offset: &VirtioMmioOffset) -> bool { + if let VirtioMmioOffset::DeviceSpecific(offset) = offset { + !(*offset as usize) < self.device_config.config_space.len() * 8 + } else { + true + } + } + + fn write_frame_to_guest( + &mut self, + original_buffer: &mut [u8; MAX_BUFFER_SIZE], + size: usize, + ) -> Result { + let mem = self.address_space.memory(); + let mut chain = match &mut self.device_config.queues[0] + .iter(&*mem) + .map_err(VirtioNetError::QueueError)? + .next() + { + Some(c) => c.to_owned(), + _ => return Ok(false), + }; + + let mut count = 0; + let buffer = &mut original_buffer[..size]; + + while let Some(desc) = chain.next() { + let left = buffer.len() - count; + + if left == 0 { + break; + } + + let len = cmp::min(left, desc.len() as usize); + chain + .memory() + .write_slice(&buffer[count..count + len], desc.addr()) + .map_err(VirtioNetError::MemoryError)?; + + count += len; + } + + if count != buffer.len() { + // The frame was too large for the chain. + println!("rx frame too large"); + } + + self.device_config.queues[0] + .add_used(&*mem, chain.head_index(), count as u32) + .map_err(VirtioNetError::QueueError)?; + + Ok(true) + } + + pub fn process_tap(&mut self) -> Result<()> { + { + let buffer = &mut [0u8; MAX_BUFFER_SIZE]; + + loop { + let read_size = match self.interface.read(buffer) { + Ok(size) => size, + Err(_) => { + break; + } + }; + + let mem = self.address_space.memory().borrow_mut().clone(); + + if !self.write_frame_to_guest(buffer, read_size)? + && !self.device_config.queues[0] + .enable_notification(&*mem.clone()) + .map_err(VirtioNetError::QueueError)? + { + break; + } + } + } + + if self.device_config.queues[0] + .needs_notification(&*self.address_space.memory()) + .map_err(VirtioNetError::QueueError)? + { + // TODO: Figure out why we need to do that + self.device_config + .interrupt_status + .store(1, Ordering::SeqCst); + + // Error should be recoverable as is, so we just log it. + self.guest_irq_fd.write(1).unwrap_or_else(|e| { + println!("Failed to signal irq: {:?}", e); + }); + } + + Ok(()) + } +} + +impl AsRawFd for VirtioNet { + fn as_raw_fd(&self) -> RawFd { + self.interface.as_raw_fd() + } +} + +impl VirtioDeviceType for VirtioNet { + fn device_type(&self) -> u32 { + bindings::VIRTIO_NET_DEVICE_ID + } +} + +impl VirtioMmioDevice for VirtioNet { + // Please note that this method can be improved error handling wise. + // We are limited in how we can handle errors here, as we are not allowed to return a Result. + fn queue_notify(&mut self, val: u32) { + if val == 0 { + return; + } + + let mem = self.address_space.memory().clone(); + let irq = &mut self.guest_irq_fd; + let queue = &mut self.device_config.queues[1]; + + loop { + match queue.disable_notification(&*mem) { + Ok(_) => {} + Err(e) => { + println!("Failed to disable notification: {:?}", e); + break; + } + } + + // Consume entries from the available ring. + // Never fails since we know the memory is valid. + while let Some(chain) = queue.iter(&*mem).unwrap().next() { + let mut data_buffer: Vec = Vec::new(); + chain.clone().for_each(|desc| { + let initial_buffer_len = data_buffer.len(); + + data_buffer.resize(data_buffer.len() + desc.len() as usize, 0); + + // Safe as we just allocated the buffer and mem is valid. + // If it actually fails, it is probably unrecoverable anyway. + mem.read_slice(&mut data_buffer[initial_buffer_len..], desc.addr()) + .unwrap(); + }); + + if (data_buffer.len() as usize) < bindings::VIRTIO_HDR_LEN { + println!("invalid net packet"); + return; + } + + match self.interface.write(&data_buffer) { + Ok(_) => { + queue + .add_used(&*mem, chain.head_index(), 0x100) + // Try continuing even if we failed to add the used buffer. + .unwrap_or_else(|e| { + println!("Failed to add used buffer: {:?}", e); + }); + + if queue.needs_notification(&*mem).unwrap_or_default() { + irq.write(1).unwrap_or_else(|e| { + println!("Failed to signal irq: {:?}", e); + }); + } + } + Err(e) => { + println!("Failed to write to tap: {:?}", e); + } + } + } + + if !queue.enable_notification(&*mem).unwrap_or_default() { + break; + } + } + } +} + +impl Borrow> + for VirtioNet +{ + fn borrow(&self) -> &VirtioConfig { + &self.device_config + } +} + +impl BorrowMut> + for VirtioNet +{ + fn borrow_mut(&mut self) -> &mut VirtioConfig { + &mut self.device_config + } +} + +impl VirtioDeviceActions for VirtioNet { + type E = VirtioNetError; + + fn activate(&mut self) -> Result<()> { + self.interface + .activate(VIRTIO_FEATURES, bindings::VIRTIO_HDR_LEN)?; + + Ok(()) + } + fn reset(&mut self) -> std::result::Result<(), Self::E> { + println!("virtio net reset"); + Ok(()) + } +} + +impl MutVirtioMmioDevice for VirtioNet { + fn virtio_mmio_read(&mut self, _base: GuestAddress, offset: VirtioMmioOffset, data: &mut [u8]) { + if self.is_reading_register(&offset) { + self.read(u64::from(offset), data); + } + } + + fn virtio_mmio_write(&mut self, _base: GuestAddress, offset: VirtioMmioOffset, data: &[u8]) { + if self.is_reading_register(&offset) { + self.write(u64::from(offset), data); + } + } +} diff --git a/src/vmm/src/devices/net/tap.rs b/src/vmm/src/devices/net/tap.rs new file mode 100644 index 0000000..f67db92 --- /dev/null +++ b/src/vmm/src/devices/net/tap.rs @@ -0,0 +1,188 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +// We should add a tap abstraction to rust-vmm as well. Using this one, which is copied from +// Firecracker until then. + +use std::fs::File; +use std::io::{Error as IoError, Read, Result as IoResult, Write}; +use std::os::raw::{c_char, c_uint, c_ulong}; +use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; + +use virtio_bindings::bindings::virtio_net::{VIRTIO_NET_F_CSUM, VIRTIO_NET_F_HOST_UFO}; +use vmm_sys_util::ioctl::{ioctl_with_mut_ref, ioctl_with_ref, ioctl_with_val}; +use vmm_sys_util::{ioctl_ioc_nr, ioctl_iow_nr}; + +use super::bindings::{ifreq, TUN_F_CSUM, TUN_F_TSO4, TUN_F_TSO6, TUN_F_UFO}; +use super::interface::Interface; +use super::VirtioNetError; + +// As defined in the Linux UAPI: +// https://elixir.bootlin.com/linux/v4.17/source/include/uapi/linux/if.h#L33 +const IFACE_NAME_MAX_LEN: usize = 16; + +// Taken from firecracker net_gen/if_tun.rs ... we should see what to do about the net related +// bindings overall for rust-vmm. +const IFF_TAP: ::std::os::raw::c_uint = 2; +const IFF_NO_PI: ::std::os::raw::c_uint = 4096; +const IFF_VNET_HDR: ::std::os::raw::c_uint = 16384; + +const TUNTAP: ::std::os::raw::c_uint = 84; +ioctl_iow_nr!(TUNSETIFF, TUNTAP, 202, ::std::os::raw::c_int); +ioctl_iow_nr!(TUNSETOFFLOAD, TUNTAP, 208, ::std::os::raw::c_uint); +ioctl_iow_nr!(TUNSETVNETHDRSZ, TUNTAP, 216, ::std::os::raw::c_int); + +/// Handle for a network tap interface. +/// +/// For now, this simply wraps the file descriptor for the tap device so methods +/// can run ioctls on the interface. The tap interface fd will be closed when +/// Tap goes out of scope, and the kernel will clean up the interface automatically. +#[derive(Debug)] +pub struct Tap { + tap_file: File, +} + +impl Tap { + fn virtio_flags_to_tuntap_flags(virtio_flags: u64) -> c_uint { + // Check if VIRTIO_NET_F_CSUM is set and set TUN_F_CSUM if so. Do the same for UFO, TSO6 and TSO4. + let mut flags = 0; + if virtio_flags & (1 << VIRTIO_NET_F_CSUM) != 0 { + flags |= TUN_F_CSUM; + } + if virtio_flags & (1 << VIRTIO_NET_F_HOST_UFO) != 0 { + flags |= TUN_F_UFO; + } + if virtio_flags & (1 << VIRTIO_NET_F_HOST_UFO) != 0 { + flags |= TUN_F_TSO4; + } + if virtio_flags & (1 << VIRTIO_NET_F_HOST_UFO) != 0 { + flags |= TUN_F_TSO6; + } + + flags + } +} + +impl Interface for Tap { + fn activate(&self, virtio_flags: u64, virtio_header_size: usize) -> super::Result<()> { + let flags = Tap::virtio_flags_to_tuntap_flags(virtio_flags); + + let ret = unsafe { ioctl_with_val(self, TUNSETOFFLOAD(), flags as c_ulong) }; + if ret < 0 { + return Err(std::io::Error::last_os_error()).map_err(VirtioNetError::IoCtlError); + } + + // Safe because we know that our file is a valid tap device and we verify the result. + let ret = unsafe { ioctl_with_ref(self, TUNSETVNETHDRSZ(), &virtio_header_size) }; + if ret < 0 { + return Err(std::io::Error::last_os_error()).map_err(VirtioNetError::IoCtlError); + } + + Ok(()) + } + + fn open_named(if_name: &str) -> super::Result { + let terminated_if_name = build_terminated_if_name(if_name)?; + + let fd = unsafe { + // Open calls are safe because we give a constant null-terminated + // string and verify the result. + libc::open( + b"/dev/net/tun\0".as_ptr() as *const c_char, + libc::O_RDWR | libc::O_NONBLOCK, + ) + }; + if fd < 0 { + return Err(IoError::last_os_error()).map_err(VirtioNetError::IoError); + } + // We just checked that the fd is valid. + let tuntap = unsafe { File::from_raw_fd(fd) }; + + IfReqBuilder::new() + .if_name(&terminated_if_name) + .flags((IFF_TAP | IFF_NO_PI | IFF_VNET_HDR) as i16) + .execute(&tuntap, TUNSETIFF()) + .unwrap(); + + // Safe since only the name is accessed, and it's cloned out. + Ok(Tap { tap_file: tuntap }) + } +} + +// Returns a byte vector representing the contents of a null terminated C string which +// contains if_name. +fn build_terminated_if_name(if_name: &str) -> super::Result<[u8; IFACE_NAME_MAX_LEN]> { + // Convert the string slice to bytes, and shadow the variable, + // since we no longer need the &str version. + let if_name = if_name.as_bytes(); + + if if_name.len() >= IFACE_NAME_MAX_LEN { + return Err(VirtioNetError::InvalidIfname); + } + + let mut terminated_if_name = [b'\0'; IFACE_NAME_MAX_LEN]; + terminated_if_name[..if_name.len()].copy_from_slice(if_name); + + Ok(terminated_if_name) +} + +pub struct IfReqBuilder(ifreq); + +impl IfReqBuilder { + #[allow(clippy::new_without_default)] + pub fn new() -> Self { + Self(Default::default()) + } + + pub fn if_name(mut self, if_name: &[u8; IFACE_NAME_MAX_LEN]) -> Self { + // Since we don't call as_mut on the same union field more than once, this block is safe. + let ifrn_name = unsafe { self.0.ifr_ifrn.ifrn_name.as_mut() }; + ifrn_name.copy_from_slice(if_name.as_ref()); + + self + } + + pub(crate) fn flags(mut self, flags: i16) -> Self { + // Since we don't call as_mut on the same union field more than once, this block is safe. + let ifru_flags = unsafe { self.0.ifr_ifru.ifru_flags.as_mut() }; + *ifru_flags = flags; + + self + } + + pub(crate) fn execute(mut self, socket: &F, ioctl: u64) -> super::Result { + // ioctl is safe. Called with a valid socket fd, and we check the return. + let ret = unsafe { ioctl_with_mut_ref(socket, ioctl, &mut self.0) }; + if ret < 0 { + return Err(VirtioNetError::IoCtlError(IoError::last_os_error())); + } + + Ok(self.0) + } +} + +impl Read for Tap { + fn read(&mut self, buf: &mut [u8]) -> IoResult { + self.tap_file.read(buf) + } +} + +impl Write for Tap { + fn write(&mut self, buf: &[u8]) -> IoResult { + self.tap_file.write(buf) + } + + fn flush(&mut self) -> IoResult<()> { + Ok(()) + } +} + +impl AsRawFd for Tap { + fn as_raw_fd(&self) -> RawFd { + self.tap_file.as_raw_fd() + } +} diff --git a/src/vmm/src/epoll_context.rs b/src/vmm/src/epoll_context.rs index 47d453b..445a715 100644 --- a/src/vmm/src/epoll_context.rs +++ b/src/vmm/src/epoll_context.rs @@ -26,6 +26,16 @@ impl EpollContext { epoll::Event::new(epoll::Events::EPOLLIN, libc::STDIN_FILENO as u64), )?; + Ok(()) + } + pub fn add_fd(&self, fd: RawFd) -> result::Result<(), io::Error> { + epoll::ctl( + self.raw_fd, + epoll::ControlOptions::EPOLL_CTL_ADD, + fd, + epoll::Event::new(epoll::Events::EPOLLIN, fd as u64), + )?; + Ok(()) } } diff --git a/src/vmm/src/kernel.rs b/src/vmm/src/kernel.rs index bf094d1..12ba123 100644 --- a/src/vmm/src/kernel.rs +++ b/src/vmm/src/kernel.rs @@ -10,7 +10,7 @@ use linux_loader::bootparam::boot_params; use linux_loader::cmdline::Cmdline; use linux_loader::configurator::{linux::LinuxBootConfigurator, BootConfigurator, BootParams}; use linux_loader::loader::{elf::Elf, load_cmdline, KernelLoader, KernelLoaderResult}; -use vm_memory::{Address, GuestAddress, GuestMemory, GuestMemoryMmap}; +use vm_memory::{GuestAddress, GuestMemoryMmap}; use crate::{Error, Result}; @@ -27,14 +27,11 @@ const KERNEL_LOADER_OTHER: u8 = 0xff; // Header field: `kernel_alignment`. Alignment unit required by a relocatable kernel. const KERNEL_MIN_ALIGNMENT_BYTES: u32 = 0x0100_0000; -// Start address for the EBDA (Extended Bios Data Area). Older computers (like the one this VMM -// emulates) typically use 1 KiB for the EBDA, starting at 0x9fc00. -// See https://wiki.osdev.org/Memory_Map_(x86) for more information. -const EBDA_START: u64 = 0x0009_fc00; // RAM memory type. // TODO: this should be bindgen'ed and exported by linux-loader. // See https://github.com/rust-vmm/linux-loader/issues/51 const E820_RAM: u32 = 1; +const E820_RESERVED: u32 = 2; /// Address of the zeropage, where Linux kernel boot parameters are written. pub(crate) const ZEROPG_START: u64 = 0x7000; @@ -44,7 +41,7 @@ const HIMEM_START: u64 = 0x0010_0000; // 1 MB /// Address where the kernel command line is written. const CMDLINE_START: u64 = 0x0002_0000; // Default command line -const CMDLINE: &str = "console=ttyS0 i8042.nokbd reboot=k panic=1 pci=off"; +pub const DEFAULT_CMDLINE: &str = "console=ttyS0 i8042.nokbd reboot=k panic=1 pci=off"; fn add_e820_entry( params: &mut boot_params, @@ -73,8 +70,7 @@ fn add_e820_entry( /// * `mmio_gap_start` - address where the MMIO gap starts. /// * `mmio_gap_end` - address where the MMIO gap ends. pub fn build_bootparams( - guest_memory: &GuestMemoryMmap, - himem_start: GuestAddress, + allocator: &vm_allocator::AddressAllocator, ) -> std::result::Result { let mut params = boot_params::default(); @@ -83,19 +79,25 @@ pub fn build_bootparams( params.hdr.kernel_alignment = KERNEL_MIN_ALIGNMENT_BYTES; params.hdr.type_of_loader = KERNEL_LOADER_OTHER; - // Add an entry for EBDA itself. - add_e820_entry(&mut params, 0, EBDA_START, E820_RAM)?; - - // Add entries for the usable RAM regions. - let last_addr = guest_memory.last_addr(); - add_e820_entry( - &mut params, - himem_start.raw_value() as u64, - last_addr - .checked_offset_from(himem_start) - .ok_or(Error::HimemStartPastMemEnd)?, - E820_RAM, - )?; + allocator + .allocated_slots() + .iter() + .for_each(|slot| match slot.node_state() { + vm_allocator::NodeState::Ram => { + add_e820_entry(&mut params, slot.key().start(), slot.key().len(), E820_RAM) + .unwrap(); + } + vm_allocator::NodeState::ReservedAllocated => { + add_e820_entry( + &mut params, + slot.key().start(), + slot.key().len(), + E820_RESERVED, + ) + .unwrap(); + } + _ => {} + }); Ok(params) } @@ -109,6 +111,8 @@ pub fn build_bootparams( pub fn kernel_setup( guest_memory: &GuestMemoryMmap, kernel_path: PathBuf, + cmdline: &Cmdline, + allocator: &vm_allocator::AddressAllocator, ) -> Result { let mut kernel_image = File::open(kernel_path).map_err(Error::IO)?; let zero_page_addr = GuestAddress(ZEROPG_START); @@ -123,21 +127,34 @@ pub fn kernel_setup( .map_err(Error::KernelLoad)?; // Generate boot parameters. - let mut bootparams = build_bootparams(guest_memory, GuestAddress(HIMEM_START))?; + let mut bootparams = build_bootparams(allocator)?; + + let cmdline_str = cmdline + .as_cstring() + .map_err(Error::Cmdline)? + .into_string() + .map_err(Error::IntoStringError)?; + + let cmdline_size = cmdline_str.len() as u32; // Add the kernel command line to the boot parameters. bootparams.hdr.cmd_line_ptr = CMDLINE_START as u32; - bootparams.hdr.cmdline_size = CMDLINE.len() as u32 + 1; + bootparams.hdr.cmdline_size = cmdline_size + 1; - // Load the kernel command line into guest memory. - let mut cmdline = Cmdline::new(CMDLINE.len() + 1).map_err(Error::Cmdline)?; + // Shrink the command line to the actual size. + + let mut shrinked_cmdline = + linux_loader::cmdline::Cmdline::new(cmdline_size as usize + 1).map_err(Error::Cmdline)?; + shrinked_cmdline + .insert_str(&cmdline_str) + .map_err(Error::Cmdline)?; - cmdline.insert_str(CMDLINE).map_err(Error::Cmdline)?; + // Load the kernel command line into guest memory. load_cmdline( guest_memory, GuestAddress(CMDLINE_START), // Safe because the command line is valid. - &cmdline, + cmdline, ) .map_err(Error::KernelLoad)?; diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index 160dd2f..076d3b7 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -8,18 +8,24 @@ extern crate linux_loader; extern crate vm_memory; extern crate vm_superio; +use std::fs::File; use std::io::stdout; use std::os::unix::io::AsRawFd; use std::os::unix::prelude::RawFd; use std::sync::{Arc, Mutex}; use std::thread; use std::{io, path::PathBuf}; -use std::fs::File; +use devices::net::tap::Tap; +use devices::net::VirtioNet; use kvm_bindings::{kvm_userspace_memory_region, KVM_MAX_CPUID_ENTRIES}; use kvm_ioctls::{Kvm, VmFd}; use linux_loader::loader::{self, KernelLoaderResult}; +use vm_allocator::{AddressAllocator, NodeState}; +use vm_device::device_manager::IoManager; +use vm_device::resources::Resource; use vm_memory::{Address, GuestAddress, GuestMemory, GuestMemoryMmap, GuestMemoryRegion}; +use vmm_sys_util::eventfd::EventFd; use vmm_sys_util::terminal::Terminal; mod cpu; use cpu::{cpuid, mptable, Vcpu}; @@ -30,6 +36,15 @@ mod epoll_context; use epoll_context::{EpollContext, EPOLL_EVENTS_LEN}; mod kernel; +const CMDLINE_MAX_SIZE: usize = 4096; + +// Real mode address space constants: https://wiki.osdev.org/Memory_Map_(x86)#Overview +const LOW_MEM_RAM_SIZE: u64 = 0x000a_0000; +const LOW_MEM_RESERVED_SIZE: u64 = 0x0006_0000; + +const HIGH_MEM_HOLE_START: u64 = (1 << 32) - HIGH_MEM_HOLE_SIZE; +const HIGH_MEM_HOLE_SIZE: u64 = 0x1400000; + #[derive(Debug)] /// VMM errors. @@ -66,6 +81,16 @@ pub enum Error { TerminalConfigure(kvm_ioctls::Error), /// Console configuration error ConsoleError(io::Error), + /// IntoString error + IntoStringError(std::ffi::IntoStringError), + /// Error writing to the guest memory. + GuestMemory(vm_memory::guest_memory::Error), + /// Error related to the virtio-net device. + VirtioNet(devices::net::VirtioNetError), + /// Error related to IOManager. + IoManager(vm_device::device_manager::Error), + /// Allocator error. + Allocator(vm_allocator::Error), } /// Dedicated [`Result`](https://doc.rust-lang.org/std/result/) type. @@ -76,9 +101,15 @@ pub struct VMM { kvm: Kvm, guest_memory: GuestMemoryMmap, vcpus: Vec, + memory_allocator: AddressAllocator, serial: Arc>, + virtio_manager: Arc>, + virtio_net: Option, Tap>>>>, + epoll: EpollContext, + + cmdline: linux_loader::cmdline::Cmdline, } impl VMM { @@ -97,26 +128,117 @@ impl VMM { let vmm = VMM { vm_fd, kvm, + // x86_64 has a 48-bit physical address space. + memory_allocator: AddressAllocator::new(0, 1 << 47).map_err(Error::Allocator)?, guest_memory: GuestMemoryMmap::default(), vcpus: vec![], serial: Arc::new(Mutex::new( LumperSerial::new(Box::new(stdout())).map_err(Error::SerialCreation)?, )), + virtio_net: None, + virtio_manager: Arc::new(Mutex::new(IoManager::new())), epoll, + cmdline: linux_loader::cmdline::Cmdline::new(CMDLINE_MAX_SIZE) + .map_err(Error::Cmdline)?, }; Ok(vmm) } - pub fn configure_memory(&mut self, mem_size_mb: u32) -> Result<()> { + pub fn configure_allocator(&mut self, mem_size_mb: u32) -> Result<()> { // Convert memory size from MBytes to bytes. - let mem_size = ((mem_size_mb as u64) << 20) as usize; + let mem_size = (mem_size_mb as u64) << 20; + + // https://wiki.osdev.org/Memory_Map_(x86) + // Make sure that we allocated the first 1MB of memory for the low memory hole. + self.memory_allocator + .allocate( + LOW_MEM_RAM_SIZE, + 8, + vm_allocator::AllocPolicy::ExactMatch(0), + vm_allocator::NodeState::Ram, + ) + .map_err(Error::Allocator)?; + self.memory_allocator + .allocate( + LOW_MEM_RESERVED_SIZE, + 8, + vm_allocator::AllocPolicy::ExactMatch(LOW_MEM_RAM_SIZE), + vm_allocator::NodeState::ReservedAllocated, + ) + .map_err(Error::Allocator)?; + + // Reserve the end of the 4GB address space for the high memory hole. (intel specific) + // https://resources.infosecinstitute.com/wp-content/uploads/010814_1515_SystemAddre14.png + self.memory_allocator + .allocate( + HIGH_MEM_HOLE_SIZE, + 8, + vm_allocator::AllocPolicy::ExactMatch(HIGH_MEM_HOLE_START), + vm_allocator::NodeState::ReservedUnallocated, + ) + .map_err(Error::Allocator)?; + + // Allocate the rest of the memory for the guest. + // Fragment the memory if it doesn't fit in the low_mem -> high_mem, since the kernel is set to load in there. + let available_size_between_holes = + HIGH_MEM_HOLE_START - LOW_MEM_RESERVED_SIZE - LOW_MEM_RAM_SIZE; + if mem_size > available_size_between_holes { + self.memory_allocator + .allocate( + available_size_between_holes, + 8, + vm_allocator::AllocPolicy::FirstMatch, + NodeState::Ram, + ) + .map_err(Error::Allocator)?; + + self.memory_allocator + .allocate( + mem_size - available_size_between_holes, + 8, + vm_allocator::AllocPolicy::FirstMatch, + NodeState::Ram, + ) + .map_err(Error::Allocator)?; + } else { + self.memory_allocator + .allocate( + mem_size, + 8, + vm_allocator::AllocPolicy::FirstMatch, + NodeState::Ram, + ) + .map_err(Error::Allocator)?; + } - // Create one single memory region, from zero to mem_size. - let mem_regions = vec![(GuestAddress(0), mem_size)]; + Ok(()) + } + + fn register_memory(&mut self) -> Result<()> { + // Find all the regions that should be mapped as guest memory. + let mut mem_regions: Vec<_> = self + .memory_allocator + .allocated_slots() + .iter() + .filter_map(|slot| match slot.node_state() { + NodeState::ReservedAllocated | NodeState::Ram => { + let slot_key = slot.key(); + let (start, size) = ( + slot_key.start(), + slot_key.end() as usize - slot_key.start() as usize + 1, + ); + Some((GuestAddress(start), size)) + } + _ => None, + }) + .collect(); + + mem_regions.sort_by(|a, b| a.0.cmp(&b.0)); // Allocate the guest memory from the memory region. - let guest_memory = GuestMemoryMmap::from_ranges(&mem_regions).map_err(Error::Memory)?; + let guest_memory = + GuestMemoryMmap::from_ranges(mem_regions.as_slice()).map_err(Error::Memory)?; // For each memory region in guest_memory: // 1. Create a KVM memory region mapping the memory region guest physical address to the host virtual address. @@ -137,6 +259,70 @@ impl VMM { } self.guest_memory = guest_memory; + Ok(()) + } + + pub fn load_default_cmdline(&mut self) -> Result<()> { + self.cmdline + .insert_str(kernel::DEFAULT_CMDLINE) + .map_err(Error::Cmdline) + } + // configure the virtio-net device + pub fn configure_net(&mut self, interface: Option) -> Result<()> { + let if_name = match interface { + Some(if_name) => if_name, + None => return Ok(()), + }; + + let virtio_address = self + .memory_allocator + .allocate( + 0x1000, + 8, + vm_allocator::AllocPolicy::FirstMatch, + NodeState::Mmio, + ) + .map_err(Error::Allocator)?; + + let irq_fd = EventFd::new(libc::EFD_NONBLOCK).map_err(Error::IrqRegister)?; + + let virtio_net = VirtioNet::new( + Arc::new(self.guest_memory.clone()), + irq_fd, + if_name.as_str(), + ) + .map_err(Error::VirtioNet)?; + + self.epoll + .add_fd(virtio_net.as_raw_fd()) + .map_err(Error::EpollError)?; + let mut io_manager = self.virtio_manager.lock().unwrap(); + + self.virtio_net = Some(Arc::new(Mutex::new(virtio_net))); + + io_manager + .register_mmio_resources( + // It's safe to unwrap because the virtio-net was just assigned + self.virtio_net.as_ref().unwrap().clone(), + &[ + Resource::GuestAddressRange { + base: virtio_address.start(), + size: virtio_address.len(), + }, + Resource::LegacyIrq(5), + ], + ) + .map_err(Error::IoManager)?; + + // Add the virtio-net device to the cmdline. + self.cmdline + .add_virtio_mmio_device( + virtio_address.len(), + GuestAddress(virtio_address.start()), + 5, + None, + ) + .map_err(Error::Cmdline)?; Ok(()) } @@ -161,13 +347,15 @@ impl VMM { ) .map_err(Error::KvmIoctl)?; + if let Some(virtio_net) = self.virtio_net.as_ref() { + self.vm_fd + .register_irqfd(&virtio_net.lock().unwrap().guest_irq_fd, 5) + .map_err(Error::KvmIoctl)?; + } Ok(()) } - pub fn configure_console( - &mut self, - console_path: Option - ) -> Result<()> { + pub fn configure_console(&mut self, console_path: Option) -> Result<()> { if let Some(console_path) = console_path { // We create the file if it does not exist, else we open let file = File::create(&console_path).map_err(Error::ConsoleError)?; @@ -193,8 +381,13 @@ impl VMM { .map_err(Error::KvmIoctl)?; for index in 0..num_vcpus { - let vcpu = Vcpu::new(&self.vm_fd, index.into(), Arc::clone(&self.serial)) - .map_err(Error::Vcpu)?; + let vcpu = Vcpu::new( + &self.vm_fd, + index.into(), + Arc::clone(&self.serial), + self.virtio_manager.clone(), + ) + .map_err(Error::Vcpu)?; // Set CPUID. let mut vcpu_cpuid = base_cpuid.clone(); @@ -241,11 +434,22 @@ impl VMM { .map_err(Error::TerminalConfigure)?; let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN]; let epoll_fd = self.epoll.as_raw_fd(); - - // Let's start the STDIN polling thread. + let interface_fd = match self.virtio_net.as_ref() { + Some(virtio_net) => Some(virtio_net.lock().unwrap().interface.as_raw_fd()), + None => None, + }; + // Let's start the STDIN/Network interface polling thread. loop { - let num_events = - epoll::wait(epoll_fd, -1, &mut events[..]).map_err(Error::EpollError)?; + let num_events = match epoll::wait(epoll_fd, -1, &mut events[..]) { + Ok(num_events) => num_events, + Err(e) => { + if e.kind() == io::ErrorKind::Interrupted { + continue; + } else { + return Err(Error::EpollError(e)); + } + } + }; for event in events.iter().take(num_events) { let event_data = event.data as RawFd; @@ -262,15 +466,46 @@ impl VMM { .enqueue_raw_bytes(&out[..count]) .map_err(Error::StdinWrite)?; } + + if interface_fd == Some(event_data) { + self.virtio_net + .as_ref() + // Safe because we checked that the virtio_net is Some before the loop. + .unwrap() + .lock() + .unwrap() + .process_tap() + .map_err(Error::VirtioNet)?; + } } } } - pub fn configure(&mut self, num_vcpus: u8, mem_size_mb: u32, kernel_path: &str, console: Option) -> Result<()> { + pub fn configure( + &mut self, + num_vcpus: u8, + mem_size_mb: u32, + kernel_path: &str, + console: Option, + if_name: Option, + ) -> Result<()> { + self.configure_allocator(mem_size_mb)?; self.configure_console(console)?; - self.configure_memory(mem_size_mb)?; - let kernel_load = kernel::kernel_setup(&self.guest_memory, PathBuf::from(kernel_path))?; + + self.load_default_cmdline()?; + + self.register_memory()?; + + self.configure_net(if_name)?; self.configure_io()?; + + let kernel_load = kernel::kernel_setup( + &self.guest_memory, + PathBuf::from(kernel_path), + &self.cmdline, + &self.memory_allocator, + )?; + self.configure_vcpus(num_vcpus, kernel_load)?; Ok(())