diff --git a/Cargo.toml b/Cargo.toml index c441880..2958f9a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,40 +1,62 @@ [package] +edition = "2024" name = "axvm" version = "0.1.0" -edition = "2024" [features] default = ["vmx"] vmx = [] -4-level-ept = ["arm_vcpu/4-level-ept"] # TODO: Realize 4-level-ept on x86_64 and riscv64. +# Note: 4-level-ept support is now provided through dynamic page table selection in axaddrspace +# The feature gate is no longer needed [dependencies] -log = "0.4" +anyhow = {version = "1.0", default-features = false} cfg-if = "1.0" -spin = "0.9" +lazyinit = "0.2" +log = "0.4" +ranges-ext.workspace = true +spin = "0.10" +thiserror = {version = "2", default-features = false} +timer_list = "0.1" # System independent crates provided by ArceOS. axerrno = "0.1.0" +bitmap-allocator = "0.2.1" cpumask = "0.1.0" -# kspin = "0.1.0" +kspin = "0.1" memory_addr = "0.4" -page_table_entry = { version = "0.5", features = ["arm-el2"] } +page_table_entry = {version = "0.5", features = ["arm-el2"]} page_table_multiarch = "0.5" -percpu = { version = "0.2.0", features = ["arm-el2"] } +percpu = {version = "0.2", features = ["arm-el2"]} +vm-allocator.workspace = true # System dependent modules provided by ArceOS-Hypervisor. -axvcpu = "0.1" -axaddrspace = "0.1" -axdevice = { git = "https://github.com/arceos-hypervisor/axdevice.git" } -axdevice_base = "0.1" -axvmconfig = { version = "0.1", default-features = false } +axaddrspace = "0.2" +# axdevice = {git = "https://github.com/arceos-hypervisor/axdevice.git"} +# axdevice_base = "0.1" +# axvcpu = "0.1" +axhal.workspace = true +axruntime.workspace = true +axstd.workspace = true +axvm-types.workspace = true +axvmconfig = {version = "0.1", default-features = false} +fdt-edit = "0.1" [target.'cfg(target_arch = "x86_64")'.dependencies] +raw-cpuid = "11" x86_vcpu = "0.1" +axplat-x86-qemu-q35.workspace = true [target.'cfg(target_arch = "riscv64")'.dependencies] -riscv_vcpu = "0.1" +# riscv_vcpu = "0.1" [target.'cfg(target_arch = "aarch64")'.dependencies] +aarch64-cpu = "11.0" +aarch64-cpu-ext = "0.1" arm_vcpu = "0.1" -arm_vgic = { version = "0.1", features = ["vgicv3"] } +# arm_vgic = {version = "0.1", features = ["vgicv3"]} + +[patch.crates-io] +arm_vcpu = {git = "https://github.com/arceos-hypervisor/arm_vcpu", branch = "next"} +axvcpu = {git = "https://github.com/arceos-hypervisor/axvcpu.git", branch = "next"} +axvmconfig = {git = "https://github.com/arceos-hypervisor/axvmconfig.git", branch = "next"} diff --git a/src/arch/aarch64/cpu.rs b/src/arch/aarch64/cpu.rs new file mode 100644 index 0000000..26dcbc6 --- /dev/null +++ b/src/arch/aarch64/cpu.rs @@ -0,0 +1,209 @@ +use core::{ + fmt::{self, Debug, Display}, + ops::Deref, +}; + +use aarch64_cpu::registers::*; +use arm_vcpu::{Aarch64PerCpu, Aarch64VCpuCreateConfig}; +use axvm_types::addr::*; + +use crate::{ + RunError, + data::VmDataWeak, + vcpu::{VCpuCommon, VCpuOp}, + vhal::{ + ArchCpuData, + cpu::{CpuHardId, CpuId}, + }, +}; + +pub struct HCpu { + pub id: CpuId, + pub hard_id: CpuHardId, + vpercpu: Aarch64PerCpu, + max_guest_page_table_levels: usize, + pub pa_range: core::ops::Range, + pub pa_bits: usize, +} + +impl HCpu { + pub fn new(id: CpuId) -> Self { + let mpidr = MPIDR_EL1.get() as usize; + let hard_id = mpidr & 0xff_ff_ff; + + let vpercpu = Aarch64PerCpu::new(); + + HCpu { + id, + hard_id: CpuHardId::new(hard_id), + vpercpu, + max_guest_page_table_levels: 0, + pa_range: 0..0, + pa_bits: 0, + } + } + + pub fn init(&mut self) -> anyhow::Result<()> { + self.vpercpu.hardware_enable(); + self.max_guest_page_table_levels = self.vpercpu.max_guest_page_table_levels(); + self.pa_range = self.vpercpu.pa_range(); + self.pa_bits = self.vpercpu.pa_bits(); + Ok(()) + } + + pub fn max_guest_page_table_levels(&self) -> usize { + self.max_guest_page_table_levels + } +} + +impl ArchCpuData for HCpu { + fn hard_id(&self) -> CpuHardId { + self.hard_id + } +} + +impl Display for HCpu { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!( + f, + " +CPU {}: + Hard ID: {} + PT Levels: {}", + self.id, self.hard_id, self.max_guest_page_table_levels + ) + } +} + +pub(super) struct VCpuHal; + +impl arm_vcpu::CpuHal for VCpuHal { + fn irq_hanlder(&self) { + axhal::irq::irq_handler(0); + } + + fn inject_interrupt(&self, irq: usize) { + todo!() + } +} + +pub struct VCpu { + pub vcpu: arm_vcpu::Aarch64VCpu, + common: VCpuCommon, +} + +impl VCpu { + pub fn new( + host_cpuid: Option, + dtb_addr: GuestPhysAddr, + vm: VmDataWeak, + ) -> anyhow::Result { + let common = VCpuCommon::new_exclusive(host_cpuid, vm)?; + + let hard_id = common.hard_id(); + + let vcpu = arm_vcpu::Aarch64VCpu::new(Aarch64VCpuCreateConfig { + mpidr_el1: hard_id.raw() as u64, + dtb_addr: dtb_addr.as_usize(), + }) + .unwrap(); + Ok(VCpu { vcpu, common }) + } + + pub fn set_pt_level(&mut self, level: usize) { + self.vcpu.pt_level = level; + } + + pub fn set_pa_bits(&mut self, pa_bits: usize) { + self.vcpu.pa_bits = pa_bits; + } +} + +impl VCpuOp for VCpu { + fn bind_id(&self) -> CpuId { + self.common.bind_id() + } + + fn hard_id(&self) -> CpuHardId { + self.common.hard_id() + } + + fn run(&mut self) -> Result<(), RunError> { + info!("Starting vCPU {}", self.bind_id()); + + self.vcpu + .setup_current_cpu(self.vm_id().into()) + .map_err(|e| anyhow!("{e}"))?; + while self.is_active() { + debug!("vCPU {} entering guest", self.bind_id()); + let exit_reason = self.vcpu.run().map_err(|e| anyhow!("{e}"))?; + debug!( + "vCPU {} exited with reason: {:?}", + self.bind_id(), + exit_reason + ); + match exit_reason { + arm_vcpu::AxVCpuExitReason::Hypercall { nr, args } => todo!(), + arm_vcpu::AxVCpuExitReason::MmioRead { + addr, + width, + reg, + reg_width, + signed_ext, + } => todo!(), + arm_vcpu::AxVCpuExitReason::MmioWrite { addr, width, data } => todo!(), + arm_vcpu::AxVCpuExitReason::SysRegRead { addr, reg } => todo!(), + arm_vcpu::AxVCpuExitReason::SysRegWrite { addr, value } => todo!(), + arm_vcpu::AxVCpuExitReason::ExternalInterrupt => { + axhal::irq::irq_handler(0); + } + arm_vcpu::AxVCpuExitReason::CpuUp { + target_cpu, + entry_point, + arg, + } => { + debug!("vCPU {} requested CPU {} up", self.bind_id(), target_cpu); + self.vm()?.with_machine_running_mut(|running| { + debug!("vCPU {} is bringing up CPU {}", self.bind_id(), target_cpu); + running.cpu_up(CpuHardId::new(target_cpu as _), entry_point, arg) + })??; + self.vcpu.set_gpr(0, 0); + } + arm_vcpu::AxVCpuExitReason::CpuDown { _state } => todo!(), + arm_vcpu::AxVCpuExitReason::SystemDown => { + info!("vCPU {} requested system shutdown", self.bind_id()); + self.vm()?.stop()?; + } + arm_vcpu::AxVCpuExitReason::Nothing => {} + arm_vcpu::AxVCpuExitReason::SendIPI { + target_cpu, + target_cpu_aux, + send_to_all, + send_to_self, + vector, + } => todo!(), + _ => todo!(), + } + } + + Ok(()) + } +} + +impl Deref for VCpu { + type Target = VCpuCommon; + + fn deref(&self) -> &Self::Target { + &self.common + } +} + +impl Debug for VCpu { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("VCpu") + .field("bind_id", &self.bind_id()) + .field("hard_id", &self.hard_id()) + .field("vcpu", &self.vcpu) + .finish() + } +} diff --git a/src/arch/aarch64/hal.rs b/src/arch/aarch64/hal.rs new file mode 100644 index 0000000..4c7c316 --- /dev/null +++ b/src/arch/aarch64/hal.rs @@ -0,0 +1,47 @@ +use alloc::vec::Vec; + +use aarch64_cpu::registers::*; +use aarch64_cpu_ext::cache::{CacheOp, dcache_range}; + +use crate::fdt; +use crate::vhal::{ + ArchHal, + cpu::{CpuHardId, CpuId}, +}; + +use super::cpu::{HCpu, VCpuHal}; + +pub struct Hal; + +impl ArchHal for Hal { + fn current_cpu_init(id: CpuId) -> anyhow::Result { + info!("Enabling virtualization on cpu {id}"); + let mut cpu = HCpu::new(id); + cpu.init()?; + info!("{cpu}"); + Ok(cpu) + } + + fn init() -> anyhow::Result<()> { + arm_vcpu::init_hal(&VCpuHal); + + Ok(()) + } + + fn cpu_list() -> Vec { + fdt::cpu_list() + .unwrap() + .into_iter() + .map(CpuHardId::new) + .collect() + } + + fn cpu_hard_id() -> CpuHardId { + let mpidr = MPIDR_EL1.get() as usize; + CpuHardId::new(mpidr) + } + + fn cache_flush(vaddr: arm_vcpu::HostVirtAddr, size: usize) { + dcache_range(CacheOp::CleanAndInvalidate, vaddr.as_usize(), size); + } +} diff --git a/src/arch/aarch64/mod.rs b/src/arch/aarch64/mod.rs new file mode 100644 index 0000000..a3c06f0 --- /dev/null +++ b/src/arch/aarch64/mod.rs @@ -0,0 +1,7 @@ +pub mod cpu; +mod hal; +mod vm; + +pub use cpu::HCpu; +pub use hal::Hal; +pub use vm::*; diff --git a/src/arch/aarch64/vm/inited.rs b/src/arch/aarch64/vm/inited.rs new file mode 100644 index 0000000..410b518 --- /dev/null +++ b/src/arch/aarch64/vm/inited.rs @@ -0,0 +1,43 @@ +use std::{string::String, vec::Vec}; + +use crate::{ + VmAddrSpace, VmMachineInitedOps, VmMachineRunningCommon, + arch::{VmMachineRunning, cpu::VCpu}, + data::VmDataWeak, + vm::VmId, +}; + +pub struct VmMachineInited { + pub id: VmId, + pub name: String, + pub vcpus: Vec, + pub vmspace: VmAddrSpace, +} + +impl VmMachineInited {} + +impl VmMachineInitedOps for VmMachineInited { + type Running = VmMachineRunning; + + fn id(&self) -> VmId { + self.id + } + + fn name(&self) -> &str { + &self.name + } + + fn start(self, vmdata: VmDataWeak) -> Result { + debug!("Starting VM {} ({})", self.id, self.name); + let mut running = VmMachineRunning { + common: VmMachineRunningCommon::new(self.vmspace, self.vcpus, vmdata), + }; + + let main = running.common.take_cpu()?; + + running.common.run_cpu(main)?; + + info!("VM {} ({}) main cpu started.", self.id, self.name,); + Ok(running) + } +} diff --git a/src/arch/aarch64/vm/mod.rs b/src/arch/aarch64/vm/mod.rs new file mode 100644 index 0000000..dff02d4 --- /dev/null +++ b/src/arch/aarch64/vm/mod.rs @@ -0,0 +1,22 @@ +use alloc::string::String; + +use crate::GuestPhysAddr; + +mod inited; +mod running; +mod unint; + +pub(crate) use inited::*; +pub(crate) use running::*; +pub(crate) use unint::*; + +/// Information about a device in the VM +#[derive(Debug, Clone)] +pub struct DeviceInfo {} + +#[derive(Debug, Clone)] +struct DevMapConfig { + gpa: GuestPhysAddr, + size: usize, + name: String, +} diff --git a/src/arch/aarch64/vm/running.rs b/src/arch/aarch64/vm/running.rs new file mode 100644 index 0000000..d6dbd45 --- /dev/null +++ b/src/arch/aarch64/vm/running.rs @@ -0,0 +1,49 @@ +use fdt_edit::NodeRef; + +use crate::{ + GuestPhysAddr, VmAddrSpace, VmMachineRunningCommon, VmMachineRunningOps, VmMachineStoppingOps, + arch::vm::DevMapConfig, vhal::cpu::CpuHardId, +}; + +/// Data needed when VM is running +pub struct VmMachineRunning { + pub(super) common: VmMachineRunningCommon, +} + +impl VmMachineRunning { + fn handle_node_regs(dev_vec: &mut [DevMapConfig], node: &NodeRef<'_>) {} + + pub fn cpu_up( + &mut self, + target_cpu: CpuHardId, + entry_point: GuestPhysAddr, + arg: u64, + ) -> anyhow::Result<()> { + let mut cpu = self + .common + .cpus + .remove(&target_cpu) + .ok_or(anyhow!("No cpu {target_cpu} found"))?; + + cpu.vcpu.set_entry(entry_point.as_usize().into()).unwrap(); + cpu.vcpu.set_gpr(0, arg as _); + self.common.run_cpu(cpu)?; + Ok(()) + } +} + +impl VmMachineRunningOps for VmMachineRunning { + type Stopping = VmStatusStopping; + + fn stop(self) -> Self::Stopping { + Self::Stopping { + _vmspace: self.common.vmspace, + } + } +} + +pub struct VmStatusStopping { + _vmspace: VmAddrSpace, +} + +impl VmMachineStoppingOps for VmStatusStopping {} diff --git a/src/arch/aarch64/vm/unint.rs b/src/arch/aarch64/vm/unint.rs new file mode 100644 index 0000000..00c93dc --- /dev/null +++ b/src/arch/aarch64/vm/unint.rs @@ -0,0 +1,161 @@ +use core::ops::Deref; + +use alloc::vec::Vec; +use arm_vcpu::Aarch64VCpuSetupConfig; + +use crate::{ + AxVMConfig, GuestPhysAddr, VmAddrSpace, VmMachineUninitOps, + arch::{VmMachineInited, cpu::VCpu}, + config::CpuNumType, + data::VmDataWeak, + fdt::FdtBuilder, +}; + +pub struct VmMachineUninit { + config: AxVMConfig, + pt_levels: usize, + pa_max: usize, + pa_bits: usize, +} + +impl VmMachineUninitOps for VmMachineUninit { + type Inited = VmMachineInited; + + fn new(config: AxVMConfig) -> Self { + Self { + config, + pt_levels: 4, + pa_max: usize::MAX, + pa_bits: 48, + } + } + + fn init(mut self, vmdata: VmDataWeak) -> Result + where + Self: Sized, + { + self.init_raw(vmdata) + } +} + +impl VmMachineUninit { + fn new_vcpus(&mut self, vm: &VmDataWeak) -> anyhow::Result> { + // Create vCPUs + let mut vcpus = vec![]; + + let dtb_addr = GuestPhysAddr::from_usize(0); + + match self.config.cpu_num { + CpuNumType::Alloc(num) => { + for _ in 0..num { + let vcpu = VCpu::new(None, dtb_addr, vm.clone())?; + debug!("Created vCPU with {:?}", vcpu.bind_id()); + vcpus.push(vcpu); + } + } + CpuNumType::Fixed(ref ids) => { + for id in ids { + let vcpu = VCpu::new(Some(*id), dtb_addr, vm.clone())?; + debug!("Created vCPU with {:?}", vcpu.bind_id()); + vcpus.push(vcpu); + } + } + } + + let vcpu_count = vcpus.len(); + + for vcpu in &vcpus { + let (max_levels, max_pa, pa_bits) = vcpu.with_hcpu(|cpu| { + ( + cpu.max_guest_page_table_levels(), + cpu.pa_range.end, + cpu.pa_bits, + ) + }); + if max_levels < self.pt_levels { + self.pt_levels = max_levels; + } + if max_pa < self.pa_max { + self.pa_max = max_pa; + } + + if pa_bits < self.pa_bits { + self.pa_bits = pa_bits; + } + } + + if self.pt_levels == 3 { + self.pa_max = self.pa_max.min(0x8000000000); + } + + debug!( + "VM {} ({}) vCPU count: {}, \n Max Guest Page Table Levels: {}\n Max PA: {:#x}\n PA Bits: {}", + self.config.id, self.config.name, vcpu_count, self.pt_levels, self.pa_max, self.pa_bits + ); + Ok(vcpus) + } + + fn init_raw(&mut self, vmdata: VmDataWeak) -> anyhow::Result { + debug!("Initializing VM {} ({})", self.config.id, self.config.name); + let mut cpus = self.new_vcpus(&vmdata)?; + + let mut vmspace = VmAddrSpace::new( + self.pt_levels, + GuestPhysAddr::from_usize(0)..self.pa_max.into(), + )?; + + debug!( + "Mapping memory regions for VM {} ({})", + self.config.id, self.config.name + ); + for memory_cfg in &self.config.memory_regions { + vmspace.new_memory(memory_cfg)?; + } + + vmspace.load_kernel_image(&self.config)?; + let mut fdt = FdtBuilder::new()?; + fdt.setup_cpus(cpus.iter().map(|c| c.deref()))?; + fdt.setup_memory(vmspace.memories().iter())?; + fdt.setup_chosen(None)?; + + let dtb_data = fdt.build()?; + + let dtb_addr = vmspace.load_dtb(&dtb_data)?; + + vmspace.map_passthrough_regions()?; + + let kernel_entry = vmspace.kernel_entry(); + let gpt_root = vmspace.gpt_root(); + + // Setup vCPUs + for vcpu in &mut cpus { + vcpu.vcpu.set_entry(kernel_entry).unwrap(); + vcpu.vcpu.set_dtb_addr(dtb_addr).unwrap(); + vcpu.set_pt_level(self.pt_levels); + vcpu.set_pa_bits(self.pa_bits); + + let setup_config = Aarch64VCpuSetupConfig { + passthrough_interrupt: self.config.interrupt_mode() + == axvmconfig::VMInterruptMode::Passthrough, + passthrough_timer: self.config.interrupt_mode() + == axvmconfig::VMInterruptMode::Passthrough, + }; + + vcpu.vcpu + .setup(setup_config) + .map_err(|e| anyhow::anyhow!("Failed to setup vCPU : {e:?}"))?; + + // Set EPT root + vcpu.vcpu + .set_ept_root(gpt_root) + .map_err(|e| anyhow::anyhow!("Failed to set EPT root for vCPU : {e:?}"))?; + } + + Ok(VmMachineInited { + id: self.config.id.into(), + name: self.config.name.clone(), + vmspace, + vcpus: cpus, + }) + } +} diff --git a/src/arch/x86_64/cpu.rs b/src/arch/x86_64/cpu.rs new file mode 100644 index 0000000..9aefd36 --- /dev/null +++ b/src/arch/x86_64/cpu.rs @@ -0,0 +1,304 @@ +use core::{ + fmt::{self, Debug, Display}, + ops::Deref, +}; +use std::os::arceos::modules::axalloc; + +use axvm_types::addr::*; +use memory_addr::{PAGE_SIZE_4K, PhysAddr, VirtAddr}; + +use crate::{ + RunError, + data::VmDataWeak, + vcpu::{VCpuCommon, VCpuOp}, + vhal::{ + ArchCpuData, + cpu::{CpuHardId, CpuId}, + }, +}; + +// ==================== x86 VCPU HAL 实现 ==================== +// x86_vcpu 现在使用自己的 Hal trait,不需要 AxVCpuHal + +/// x86 VCPU 的 HAL 实现 - 实现 x86_vcpu::Hal trait +pub(super) struct X86VcpuHal; + +impl x86_vcpu::Hal for X86VcpuHal { + fn alloc_frame() -> Option { + axalloc::global_allocator() + .alloc_pages(1, PAGE_SIZE_4K, axalloc::UsageKind::Global) + .ok() + } + + fn dealloc_frame(paddr: usize) { + axalloc::global_allocator().dealloc_pages(paddr, 1, axalloc::UsageKind::Global); + } + + fn phys_to_virt(paddr: usize) -> usize { + axhal::mem::phys_to_virt(PhysAddr::from(paddr)).into() + } + + fn virt_to_phys(vaddr: usize) -> usize { + axhal::mem::virt_to_phys(VirtAddr::from(vaddr)).into() + } +} + +// 使用具体的泛型类型 +type VmxPerCpuState = x86_vcpu::VmxArchPerCpuState; +type VmxVcpu = x86_vcpu::VmxArchVCpu; + +pub struct HCpu { + pub id: CpuId, + pub hard_id: CpuHardId, + vpercpu: VmxPerCpuState, + max_guest_page_table_levels: usize, + pub pa_range: core::ops::Range, + pub pa_bits: usize, +} + +impl HCpu { + pub fn new(id: CpuId) -> Self { + // 使用 raw_cpuid 获取 x86 APIC ID + let apic_id = raw_cpuid::CpuId::new() + .get_feature_info() + .map(|f| f.initial_local_apic_id() as usize) + .unwrap_or(0); + let hard_id = CpuHardId::new(apic_id); + + // 创建 x86 PerCpu 状态 + let vpercpu = VmxPerCpuState::new(id.raw()).expect("Failed to create VmxPerCpuState"); + + HCpu { + id, + hard_id, + vpercpu, + max_guest_page_table_levels: 0, + pa_range: 0..0, + pa_bits: 0, + } + } + + pub fn init(&mut self) -> anyhow::Result<()> { + // 启用 VMX 硬件虚拟化 + self.vpercpu.hardware_enable()?; + + // x86_64 平台的固定配置 + self.max_guest_page_table_levels = 4; // 4-level page tables (PML4) + self.pa_bits = 48; // 典型的 x86_64 物理地址宽度 + self.pa_range = 0..(1 << self.pa_bits); + + Ok(()) + } + + pub fn max_guest_page_table_levels(&self) -> usize { + self.max_guest_page_table_levels + } +} + +impl ArchCpuData for HCpu { + fn hard_id(&self) -> CpuHardId { + self.hard_id + } +} + +impl Display for HCpu { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!( + f, + " +CPU {}: + Hard ID: {} + PT Levels: {} + PA Bits: {}", + self.id, self.hard_id, self.max_guest_page_table_levels, self.pa_bits + ) + } +} + +// x86 特定的 VCPU +pub struct VCpu { + pub vcpu: VmxVcpu, + common: VCpuCommon, +} + +impl VCpu { + pub fn new( + host_cpuid: Option, + _dtb_addr: GuestPhysAddr, // 参数保留以保持接口兼容性,x86 不使用设备树 + vm: VmDataWeak, + ) -> anyhow::Result { + let common = VCpuCommon::new_exclusive(host_cpuid, vm)?; + + let hard_id = common.hard_id(); + let vm_id = common.vm_id().into(); + let vcpu_id = common.bind_id().raw(); + + // 使用 x86_vcpu 的新方法创建 VCPU + let vcpu = VmxVcpu::new(vm_id, vcpu_id) + .map_err(|e| anyhow::anyhow!("Failed to create VmxVcpu: {:?}", e))?; + + info!( + "Created x86 VCPU: vm_id={}, vcpu_id={}, hard_id={}", + vm_id, + vcpu_id, + hard_id.raw() + ); + + Ok(VCpu { vcpu, common }) + } + + pub fn set_pt_level(&mut self, level: usize) { + // x86 通过 EPT 配置,此方法预留以保持接口兼容性 + debug!("Setting page table level to {} (no-op on x86)", level); + } + + pub fn set_pa_bits(&mut self, pa_bits: usize) { + // x86 通过 EPT 配置,此方法预留以保持接口兼容性 + debug!("Setting PA bits to {} (no-op on x86)", pa_bits); + } +} + +impl VCpuOp for VCpu { + fn bind_id(&self) -> CpuId { + self.common.bind_id() + } + + fn hard_id(&self) -> CpuHardId { + self.common.hard_id() + } + + fn run(&mut self) -> Result<(), RunError> { + info!("Starting x86 vCPU {}", self.bind_id()); + + // 绑定到当前 CPU - 使用 x86_vcpu 的新方法 + self.vcpu.bind().map_err(|e| { + RunError::ExitWithError(anyhow::anyhow!("Failed to bind VCPU: {:?}", e)) + })?; + + while self.is_active() { + debug!("x86 vCPU {} entering guest", self.bind_id()); + + // 使用 x86_vcpu 的 run_arch 方法,返回自己的 VmxExitReason + let exit_reason = self.vcpu.run().map_err(|e| { + RunError::ExitWithError(anyhow::anyhow!("VCPU run failed: {:?}", e)) + })?; + + debug!( + "x86 vCPU {} exited with reason: {:?}", + self.bind_id(), + exit_reason + ); + + // 根据用户优先级处理退出原因 - 使用 x86_vcpu 的 VmxExitReason + match exit_reason { + // 高优先级:外部中断(必须实现) + x86_vcpu::VmxExitReason::ExternalInterrupt { vector } => { + debug!("Handling external interrupt, vector={}", vector); + axhal::irq::irq_handler(vector); + } + + // 高优先级:系统寄存器访问 (MSR) + x86_vcpu::VmxExitReason::SysRegRead { addr, reg } => { + // TODO: 实现 MSR 读取处理 + // x86_vcpu 的 VmxVcpu 已经处理了 x2APIC MSR 访问 + // 这里需要处理其他 MSR 的读取 + todo!("MSR read: addr={:?}, reg={}", addr, reg); + } + x86_vcpu::VmxExitReason::SysRegWrite { addr, value } => { + // TODO: 实现 MSR 写入处理 + todo!("MSR write: addr={:?}, value={:#x}", addr, value); + } + + // 高优先级:IO 指令 + x86_vcpu::VmxExitReason::IoRead { port, width } => { + // TODO: 实现端口 IO 读取 + // 需要连接到设备模拟层 + todo!("IO read: port={:?}, width={:?}", port, width); + } + x86_vcpu::VmxExitReason::IoWrite { port, width, data } => { + // TODO: 实现端口 IO 写入 + // 需要连接到设备模拟层 + todo!( + "IO write: port={:?}, width={:?}, data={:#x}", + port, + width, + data + ); + } + + // 中优先级:超级调用 + x86_vcpu::VmxExitReason::Hypercall { nr, args } => { + // TODO: 实现超级调用接口 + todo!("Hypercall: nr={:#x}, args={:?}", nr, args); + } + + // 低优先级:CPU 启动 + x86_vcpu::VmxExitReason::CpuUp { + target_cpu, + entry_point, + arg, + } => { + debug!( + "x86 vCPU {} requested CPU {} up", + self.bind_id(), + target_cpu + ); + self.vm()?.with_machine_running_mut(|running| { + debug!("vCPU {} is bringing up CPU {}", self.bind_id(), target_cpu); + // 将 axaddrspace::GuestPhysAddr 转换为 axvm_types::addr::GuestPhysAddr + // 先转为 usize,再转为目标类型 + let entry: GuestPhysAddr = entry_point.as_usize().into(); + running.cpu_up(CpuHardId::new(target_cpu), entry, arg) + })??; + // x86 使用 SIPI (Startup IPI) 启动 AP,返回值在 RAX 中 + self.vcpu.set_gpr(0, 0); + } + + x86_vcpu::VmxExitReason::CpuDown { state } => { + // TODO: 实现 CPU 关闭 + todo!("CPU down: state={:?}", state); + } + + // 系统关闭 + x86_vcpu::VmxExitReason::SystemDown => { + info!("x86 vCPU {} requested system shutdown", self.bind_id()); + self.vm()?.stop()?; + break; + } + + x86_vcpu::VmxExitReason::Nothing => { + // 无操作,继续运行 + } + + _ => { + warn!("Unhandled x86 VCPU exit reason: {:?}", exit_reason); + } + } + } + + // 解绑 VCPU - 使用 x86_vcpu 的新方法 + self.vcpu.unbind().map_err(|e| { + RunError::ExitWithError(anyhow::anyhow!("Failed to unbind VCPU: {:?}", e)) + })?; + + Ok(()) + } +} + +impl Deref for VCpu { + type Target = VCpuCommon; + + fn deref(&self) -> &Self::Target { + &self.common + } +} + +impl Debug for VCpu { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("x86::VCpu") + .field("bind_id", &self.bind_id()) + .field("hard_id", &self.hard_id()) + .field("vcpu", &self.vcpu) + .finish() + } +} diff --git a/src/arch/x86_64/hal.rs b/src/arch/x86_64/hal.rs new file mode 100644 index 0000000..ad837ff --- /dev/null +++ b/src/arch/x86_64/hal.rs @@ -0,0 +1,57 @@ +use alloc::vec::Vec; + +use crate::vhal::{ + ArchHal, + cpu::{CpuHardId, CpuId}, +}; +use memory_addr::VirtAddr; + +use super::cpu::HCpu; + +// 使用 x86_vcpus 提供的 raw_cpuid +extern crate raw_cpuid; + +pub struct Hal; + +impl ArchHal for Hal { + fn init() -> anyhow::Result<()> { + // x86_vcpu 不需要全局初始化 + // 每个独立的 CPU 在 current_cpu_init 中单独初始化 VMX + info!("x86_64 HAL initialization complete (no global init required)"); + Ok(()) + } + + fn current_cpu_init(id: CpuId) -> anyhow::Result { + info!("Enabling virtualization on x86_64 cpu {}", id); + let mut cpu = HCpu::new(id); + cpu.init()?; + info!("{}", cpu); + Ok(cpu) + } + + fn cpu_list() -> Vec { + // 简单实现:从 axruntime 获取 CPU 数量 + // 假设 CPU ID 连续(0, 1, 2, ...) + // TODO: 后续可以从 ACPI/MP 表获取更准确的 APIC ID 映射 + let count = axruntime::cpu_count(); + debug!("x86_64 CPU list: {} CPUs (simple implementation)", count); + (0..count).map(|i| CpuHardId::new(i)).collect() + } + + fn cpu_hard_id() -> CpuHardId { + // 使用 raw_cpuid 获取当前 CPU 的 APIC ID + let apic_id = raw_cpuid::CpuId::new() + .get_feature_info() + .map(|f| f.initial_local_apic_id() as usize) + .unwrap_or_else(|| { + warn!("Failed to get APIC ID from CPUID, using fallback"); + 0 + }); + CpuHardId::new(apic_id) + } + + fn cache_flush(_vaddr: VirtAddr, _size: usize) { + // x86 不需要显式的缓存刷新 + // WBINVD 指令会在需要时由硬件自动处理 + } +} diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs new file mode 100644 index 0000000..a3c06f0 --- /dev/null +++ b/src/arch/x86_64/mod.rs @@ -0,0 +1,7 @@ +pub mod cpu; +mod hal; +mod vm; + +pub use cpu::HCpu; +pub use hal::Hal; +pub use vm::*; diff --git a/src/arch/x86_64/vm/inited.rs b/src/arch/x86_64/vm/inited.rs new file mode 100644 index 0000000..410b518 --- /dev/null +++ b/src/arch/x86_64/vm/inited.rs @@ -0,0 +1,43 @@ +use std::{string::String, vec::Vec}; + +use crate::{ + VmAddrSpace, VmMachineInitedOps, VmMachineRunningCommon, + arch::{VmMachineRunning, cpu::VCpu}, + data::VmDataWeak, + vm::VmId, +}; + +pub struct VmMachineInited { + pub id: VmId, + pub name: String, + pub vcpus: Vec, + pub vmspace: VmAddrSpace, +} + +impl VmMachineInited {} + +impl VmMachineInitedOps for VmMachineInited { + type Running = VmMachineRunning; + + fn id(&self) -> VmId { + self.id + } + + fn name(&self) -> &str { + &self.name + } + + fn start(self, vmdata: VmDataWeak) -> Result { + debug!("Starting VM {} ({})", self.id, self.name); + let mut running = VmMachineRunning { + common: VmMachineRunningCommon::new(self.vmspace, self.vcpus, vmdata), + }; + + let main = running.common.take_cpu()?; + + running.common.run_cpu(main)?; + + info!("VM {} ({}) main cpu started.", self.id, self.name,); + Ok(running) + } +} diff --git a/src/arch/x86_64/vm/mod.rs b/src/arch/x86_64/vm/mod.rs new file mode 100644 index 0000000..0e1f0c2 --- /dev/null +++ b/src/arch/x86_64/vm/mod.rs @@ -0,0 +1,24 @@ +use alloc::string::String; + +use crate::GuestPhysAddr; + +mod inited; +mod running; +mod stopping; +mod unint; + +pub(crate) use inited::*; +pub(crate) use running::*; +pub(crate) use stopping::*; +pub(crate) use unint::*; + +/// Information about a device in the VM +#[derive(Debug, Clone)] +pub struct DeviceInfo {} + +#[derive(Debug, Clone)] +struct DevMapConfig { + gpa: GuestPhysAddr, + size: usize, + name: String, +} diff --git a/src/arch/x86_64/vm/running.rs b/src/arch/x86_64/vm/running.rs new file mode 100644 index 0000000..f293e7c --- /dev/null +++ b/src/arch/x86_64/vm/running.rs @@ -0,0 +1,49 @@ +use core::ops::Deref; + +use crate::{ + GuestPhysAddr, VmMachineRunningCommon, VmMachineRunningOps, VmMachineStoppingOps, + arch::cpu::VCpu, vhal::cpu::CpuHardId, +}; + +pub struct VmMachineRunning { + pub common: VmMachineRunningCommon, +} + +impl VmMachineRunning { + pub fn cpu_up( + &mut self, + target_cpu: CpuHardId, + entry_point: GuestPhysAddr, + arg: u64, + ) -> anyhow::Result<()> { + let mut cpu = self + .common + .cpus + .remove(&target_cpu) + .ok_or(anyhow!("No cpu {target_cpu} found"))?; + + // x86 使用 SIPI (Startup IPI) 来启动 AP + // 这里设置 entry point 和参数 + cpu.vcpu.set_entry(entry_point.as_usize().into())?; + cpu.vcpu.set_gpr(0, arg as _); + self.common.run_cpu(cpu)?; + Ok(()) + } +} + +impl Deref for VmMachineRunning { + type Target = VmMachineRunningCommon; + + fn deref(&self) -> &Self::Target { + &self.common + } +} + +impl VmMachineRunningOps for VmMachineRunning { + type Stopping = super::stopping::VmStatusStopping; + + fn stop(self) -> Self::Stopping { + debug!("Stopping x86_64 VM"); + super::stopping::VmStatusStopping {} + } +} diff --git a/src/arch/x86_64/vm/stopping.rs b/src/arch/x86_64/vm/stopping.rs new file mode 100644 index 0000000..1fb92eb --- /dev/null +++ b/src/arch/x86_64/vm/stopping.rs @@ -0,0 +1,5 @@ +use crate::VmMachineStoppingOps; + +pub struct VmStatusStopping {} + +impl VmMachineStoppingOps for VmStatusStopping {} diff --git a/src/arch/x86_64/vm/unint.rs b/src/arch/x86_64/vm/unint.rs new file mode 100644 index 0000000..d52b769 --- /dev/null +++ b/src/arch/x86_64/vm/unint.rs @@ -0,0 +1,134 @@ +use core::ops::Deref; + +use alloc::vec::Vec; + +use crate::{ + AxVMConfig, GuestPhysAddr, VmAddrSpace, VmMachineUninitOps, + arch::{VmMachineInited, cpu::VCpu}, + config::CpuNumType, + data::VmDataWeak, +}; + +pub struct VmMachineUninit { + config: AxVMConfig, + pt_levels: usize, + pa_max: usize, + pa_bits: usize, +} + +impl VmMachineUninitOps for VmMachineUninit { + type Inited = VmMachineInited; + + fn new(config: AxVMConfig) -> Self { + Self { + config, + pt_levels: 4, // x86_64 使用 4 级页表 (PML4) + pa_max: usize::MAX, + pa_bits: 48, // 典型的 x86_64 物理地址宽度 + } + } + + fn init(mut self, vmdata: VmDataWeak) -> Result + where + Self: Sized, + { + self.init_raw(vmdata) + } +} + +impl VmMachineUninit { + fn new_vcpus(&mut self, vm: &VmDataWeak) -> anyhow::Result> { + // 创建 vCPUs + let mut vcpus = vec![]; + + // x86 不使用设备树,dtb_addr 参数设为 0 + let dtb_addr = GuestPhysAddr::from(0); + + match self.config.cpu_num { + CpuNumType::Alloc(num) => { + for _ in 0..num { + let vcpu = VCpu::new(None, dtb_addr, vm.clone())?; + debug!("Created vCPU with {:?}", vcpu.bind_id()); + vcpus.push(vcpu); + } + } + CpuNumType::Fixed(ref ids) => { + for id in ids { + let vcpu = VCpu::new(Some(*id), dtb_addr, vm.clone())?; + debug!("Created vCPU with {:?}", vcpu.bind_id()); + vcpus.push(vcpu); + } + } + } + + let vcpu_count = vcpus.len(); + + // x86_64 平台的固定配置 + // 从 HCpu 获取页表级别和地址位信息(如果需要) + for vcpu in &vcpus { + // x86_64 固定使用 4 级页表 + // PA bits 可以根据需要调整 + debug!("vCPU bind_id: {:?}", vcpu.bind_id()); + } + + // 如果 pt_levels == 3,需要限制 pa_max + if self.pt_levels == 3 { + self.pa_max = self.pa_max.min(0x8000000000); + } + + debug!( + "VM {} ({}) vCPU count: {}, \n Max Guest Page Table Levels: {}\n Max PA: {:#x}\n PA Bits: {}", + self.config.id, self.config.name, vcpu_count, self.pt_levels, self.pa_max, self.pa_bits + ); + Ok(vcpus) + } + + fn init_raw(&mut self, vmdata: VmDataWeak) -> anyhow::Result { + debug!("Initializing VM {} ({})", self.config.id, self.config.name); + let mut cpus = self.new_vcpus(&vmdata)?; + + let mut vmspace = + VmAddrSpace::new(self.pt_levels, GuestPhysAddr::from(0)..self.pa_max.into())?; + + debug!( + "Mapping memory regions for VM {} ({})", + self.config.id, self.config.name + ); + for memory_cfg in &self.config.memory_regions { + vmspace.new_memory(memory_cfg)?; + } + + vmspace.load_kernel_image(&self.config)?; + + // x86 不使用设备树,而是使用 ACPI 表 + // 这里我们跳过 FDT 创建,直接加载内核 + // 如果需要 ACPI,可以在后续添加 + + vmspace.map_passthrough_regions()?; + + let kernel_entry = vmspace.kernel_entry(); + let gpt_root = vmspace.gpt_root(); + + // 设置 vCPUs + for vcpu in &mut cpus { + vcpu.vcpu + .set_entry(kernel_entry.as_usize().into()) + .map_err(|e| anyhow::anyhow!("Failed to set entry: {:?}", e))?; + + vcpu.vcpu + .set_ept_root(gpt_root) + .map_err(|e| anyhow::anyhow!("Failed to set EPT root: {:?}", e))?; + + // x86 特定的 VCPU 设置 + // 注意:x86_vcpu 的 VmxVcpu 不需要额外的 setup 调用 + // 因为在创建时已经完成基本初始化 + } + + Ok(VmMachineInited { + id: self.config.id.into(), + name: self.config.name.clone(), + vmspace, + vcpus: cpus, + }) + } +} diff --git a/src/config.rs b/src/config.rs index 841c8f5..b59beec 100644 --- a/src/config.rs +++ b/src/config.rs @@ -3,95 +3,77 @@ use alloc::string::String; use alloc::vec::Vec; -use core::ops::Range; -use axaddrspace::GuestPhysAddr; +use crate::{GuestPhysAddr, HostPhysAddr}; pub use axvmconfig::{ - AxVMCrateConfig, EmulatedDeviceConfig, PassThroughDeviceConfig, VMInterruptMode, VMType, - VmMemConfig, VmMemMappingType, + AxVMCrateConfig, EmulatedDeviceConfig, PassThroughAddressConfig, PassThroughDeviceConfig, + VMInterruptMode, VMType, VmMemConfig, VmMemMappingType, }; -/// A part of `AxVCpuConfig`, which represents an architecture-dependent `VCpu`. -/// -/// The concrete type of configuration is defined in `AxArchVCpuImpl`. -// #[derive(Clone, Copy, Debug, Default)] -// pub struct AxArchVCpuConfig { -// pub create_config: as AxArchVCpu>::CreateConfig, -// pub setup_config: as AxArchVCpu>::SetupConfig, -// } +use crate::vhal::cpu::CpuId; -/// A part of `AxVMConfig`, which represents a `VCpu`. -#[derive(Clone, Copy, Debug, Default)] -pub struct AxVCpuConfig { - // pub arch_config: AxArchVCpuConfig, - /// The entry address in GPA for the Bootstrap Processor (BSP). - pub bsp_entry: GuestPhysAddr, - /// The entry address in GPA for the Application Processor (AP). - pub ap_entry: GuestPhysAddr, +#[derive(Debug, Default, Clone)] +pub struct VMImageConfig { + pub gpa: Option, + pub data: Vec, } /// A part of `AxVMConfig`, which stores configuration attributes related to the load address of VM images. -#[derive(Debug, Default)] -pub struct VMImageConfig { +#[derive(Debug, Default, Clone)] +pub struct VMImagesConfig { /// The load address in GPA for the kernel image. - pub kernel_load_gpa: GuestPhysAddr, + pub kernel: VMImageConfig, /// The load address in GPA for the BIOS image, `None` if not used. - pub bios_load_gpa: Option, + pub bios: Option, /// The load address in GPA for the device tree blob (DTB), `None` if not used. - pub dtb_load_gpa: Option, + pub dtb: Option, /// The load address in GPA for the ramdisk image, `None` if not used. - pub ramdisk_load_gpa: Option, + pub ramdisk: Option, +} + +#[derive(Debug, Clone)] +pub enum MemoryKind { + /// Use identical memory regions, i.e., HPA == GPA + Identical { size: usize }, + /// Use host reserved memory regions, i.e., HPA == GPA + Reserved { hpa: HostPhysAddr, size: usize }, + /// Use fixed address memory regions, i.e., HPA != GPA + Vmem { gpa: GuestPhysAddr, size: usize }, } /// A part of `AxVMCrateConfig`, which represents a `VM`. #[derive(Debug, Default)] pub struct AxVMConfig { - id: usize, - name: String, - #[allow(dead_code)] - vm_type: VMType, - cpu_num: usize, - phys_cpu_ids: Option>, - phys_cpu_sets: Option>, - cpu_config: AxVCpuConfig, - image_config: VMImageConfig, - memory_regions: Vec, - emu_devices: Vec, - pass_through_devices: Vec, - // TODO: improve interrupt passthrough - spi_list: Vec, - interrupt_mode: VMInterruptMode, + pub id: usize, + pub name: String, + pub cpu_num: CpuNumType, + pub image_config: VMImagesConfig, + pub memory_regions: Vec, + pub interrupt_mode: VMInterruptMode, } -impl From for AxVMConfig { - fn from(cfg: AxVMCrateConfig) -> Self { - Self { - id: cfg.base.id, - name: cfg.base.name, - vm_type: VMType::from(cfg.base.vm_type), - cpu_num: cfg.base.cpu_num, - phys_cpu_ids: cfg.base.phys_cpu_ids, - phys_cpu_sets: cfg.base.phys_cpu_sets, - cpu_config: AxVCpuConfig { - bsp_entry: GuestPhysAddr::from(cfg.kernel.entry_point), - ap_entry: GuestPhysAddr::from(cfg.kernel.entry_point), - }, - image_config: VMImageConfig { - kernel_load_gpa: GuestPhysAddr::from(cfg.kernel.kernel_load_addr), - bios_load_gpa: cfg.kernel.bios_load_addr.map(GuestPhysAddr::from), - dtb_load_gpa: cfg.kernel.dtb_load_addr.map(GuestPhysAddr::from), - ramdisk_load_gpa: cfg.kernel.ramdisk_load_addr.map(GuestPhysAddr::from), - }, - memory_regions: cfg.kernel.memory_regions, - emu_devices: cfg.devices.emu_devices, - pass_through_devices: cfg.devices.passthrough_devices, - spi_list: Vec::new(), - interrupt_mode: cfg.devices.interrupt_mode, +#[derive(Debug, Clone)] +pub enum CpuNumType { + Alloc(usize), + Fixed(Vec), +} + +impl CpuNumType { + pub fn num(&self) -> usize { + match self { + CpuNumType::Alloc(num) => *num, + CpuNumType::Fixed(ids) => ids.len(), } } } +impl Default for CpuNumType { + fn default() -> Self { + CpuNumType::Alloc(1) + } +} + impl AxVMConfig { /// Returns VM id. pub fn id(&self) -> usize { @@ -99,95 +81,15 @@ impl AxVMConfig { } /// Returns VM name. - pub fn name(&self) -> String { - self.name.clone() - } - - /// Returns vCpu id list and its corresponding pCpu affinity list, as well as its physical id. - /// If the pCpu affinity is None, it means the vCpu will be allocated to any available pCpu randomly. - /// if the pCPU id is not provided, the vCpu's physical id will be set as vCpu id. - /// - /// Returns a vector of tuples, each tuple contains: - /// - The vCpu id. - /// - The pCpu affinity mask, `None` if not set. - /// - The physical id of the vCpu, equal to vCpu id if not provided. - pub fn get_vcpu_affinities_pcpu_ids(&self) -> Vec<(usize, Option, usize)> { - let mut vcpu_pcpu_tuples = Vec::new(); - for vcpu_id in 0..self.cpu_num { - vcpu_pcpu_tuples.push((vcpu_id, None, vcpu_id)); - } - if let Some(phys_cpu_sets) = &self.phys_cpu_sets { - for (vcpu_id, pcpu_mask_bitmap) in phys_cpu_sets.iter().enumerate() { - vcpu_pcpu_tuples[vcpu_id].1 = Some(*pcpu_mask_bitmap); - } - } - if let Some(phys_cpu_ids) = &self.phys_cpu_ids { - for (vcpu_id, phys_id) in phys_cpu_ids.iter().enumerate() { - vcpu_pcpu_tuples[vcpu_id].2 = *phys_id; - } - } - vcpu_pcpu_tuples + pub fn name(&self) -> &str { + &self.name } /// Returns configurations related to VM image load addresses. - pub fn image_config(&self) -> &VMImageConfig { + pub fn image_config(&self) -> &VMImagesConfig { &self.image_config } - /// Returns the entry address in GPA for the Bootstrap Processor (BSP). - pub fn bsp_entry(&self) -> GuestPhysAddr { - // Retrieves BSP entry from the CPU configuration. - self.cpu_config.bsp_entry - } - - /// Returns the entry address in GPA for the Application Processor (AP). - pub fn ap_entry(&self) -> GuestPhysAddr { - // Retrieves AP entry from the CPU configuration. - self.cpu_config.ap_entry - } - - /// Returns configurations related to VM memory regions. - pub fn memory_regions(&self) -> &Vec { - &self.memory_regions - } - - /// Adds a new memory region to the VM configuration. - pub fn add_memory_region(&mut self, region: VmMemConfig) { - self.memory_regions.push(region); - } - - /// Checks if the VM memory regions contain a specific range. - pub fn contains_memory_range(&self, range: &Range) -> bool { - self.memory_regions - .iter() - .any(|region| region.gpa <= range.start && region.gpa + region.size >= range.end) - } - - /// Returns configurations related to VM emulated devices. - pub fn emu_devices(&self) -> &Vec { - &self.emu_devices - } - - /// Returns configurations related to VM passthrough devices. - pub fn pass_through_devices(&self) -> &Vec { - &self.pass_through_devices - } - - /// Adds a new passthrough device to the VM configuration. - pub fn add_pass_through_device(&mut self, device: PassThroughDeviceConfig) { - self.pass_through_devices.push(device); - } - - /// Adds a passthrough SPI to the VM configuration. - pub fn add_pass_through_spi(&mut self, spi: u32) { - self.spi_list.push(spi); - } - - /// Returns the list of passthrough SPIs. - pub fn pass_through_spis(&self) -> &Vec { - &self.spi_list - } - /// Returns the interrupt mode of the VM. pub fn interrupt_mode(&self) -> VMInterruptMode { self.interrupt_mode diff --git a/src/fdt/mod.rs b/src/fdt/mod.rs new file mode 100644 index 0000000..3216a9d --- /dev/null +++ b/src/fdt/mod.rs @@ -0,0 +1,141 @@ +use alloc::vec::Vec; +use fdt_edit::{Fdt, FdtData, Node, Property, RegInfo, Status}; + +use crate::{GuestMemory, GuestPhysAddr, vcpu::VCpuCommon, vhal::cpu::CpuHardId}; + +pub(crate) fn fdt_edit() -> Option { + let addr = axhal::dtb::get_bootarg(); + if addr == 0 { + return None; + } + let fdt = unsafe { Fdt::from_ptr(addr as *mut u8).ok()? }; + Some(fdt) +} + +pub fn cpu_list() -> Option> { + let fdt = fdt_edit()?; + + let cpus = fdt + .find_by_path("/cpus/cpu") + .filter(|node| node.name().contains("cpu@")) + .filter(|node| !matches!(node.status(), Some(Status::Disabled))) + .map(|node| { + let reg = node + .regs() + .unwrap_or_else(|| panic!("cpu {} reg not found", node.name()))[0]; + reg.address as usize + }) + .collect(); + Some(cpus) +} + +pub(crate) struct FdtBuilder { + fdt: Fdt, +} + +impl FdtBuilder { + pub fn new() -> anyhow::Result { + let fdt = fdt_edit().ok_or_else(|| anyhow::anyhow!("No FDT found"))?; + Ok(Self { fdt }) + } + + pub fn build(self) -> anyhow::Result { + let dtb_data = self.fdt.encode(); + Ok(dtb_data) + } + + pub fn setup_cpus<'a>( + &mut self, + vcpus: impl Iterator, + ) -> anyhow::Result<()> { + let mut rm_nodes = vec![]; + let vcpu_hard_ls = vcpus.map(|v: &VCpuCommon| v.hard_id()).collect::>(); + for cpu in self.fdt.find_by_path("/cpus/cpu") { + if let Some(id) = cpu.regs() { + let id = CpuHardId::new(id[0].address as usize); + if vcpu_hard_ls.contains(&id) { + continue; + } + } + + rm_nodes.push(cpu.path()); + } + + for path in rm_nodes { + self.fdt.remove_node(&path).unwrap(); + } + + Ok(()) + } + + pub fn setup_memory<'a>( + &mut self, + memories: impl Iterator, + ) -> anyhow::Result<()> { + let nodes = self + .fdt + .find_by_path("/memory") + .into_iter() + .map(|o| o.path()) + .collect::>(); + for path in nodes { + self.fdt.remove_node(&path).unwrap(); + } + + for (i, m) in memories.enumerate() { + let mut node = Node::new(&format!("memory@{i}")); + let mut prop = Property::new("device_type", vec![]); + prop.set_string("memory"); + node.add_property(prop); + self.fdt.root_mut().add_child(node); + let mut node = self + .fdt + .get_by_path_mut(&format!("/memory@{i}")) + .expect("must has node"); + node.set_regs(&[RegInfo { + address: m.gpa().as_usize() as u64, + size: Some(m.size() as u64), + }]); + } + + Ok(()) + } + + pub fn setup_chosen(&mut self, initrd: Option<(GuestPhysAddr, usize)>) -> anyhow::Result<()> { + let mut node = self + .fdt + .get_by_path_mut("/chosen") + .ok_or_else(|| anyhow::anyhow!("No /chosen node found"))?; + + if let Some(initrd) = initrd { + let cells = node.ctx.parent_address_cells(); + let (initrd_start, initrd_end) = (initrd.0.as_usize(), initrd.0.as_usize() + initrd.1); + + let mut prop_s = Property::new("linux,initrd-start", vec![]); + let mut prop_e = Property::new("linux,initrd-end", vec![]); + + if cells == 2 { + prop_s.set_u32_ls(&[initrd_start as u32]); + prop_e.set_u32_ls(&[initrd_end as u32]); + } else { + prop_s.set_u64(initrd_start as _); + prop_e.set_u64(initrd_end as _); + } + + node.node.add_property(prop_s); + node.node.add_property(prop_e); + } else { + node.node.remove_property("linux,initrd-start"); + node.node.remove_property("linux,initrd-end"); + }; + + if let Some(args) = node.node.get_property_mut("bootargs") + && let Some(s) = args.as_str() + { + let bootargs = s.replace(" ro ", " rw "); + args.set_string(&bootargs); + } + + Ok(()) + } +} diff --git a/src/hal.rs b/src/hal.rs deleted file mode 100644 index 94b6c11..0000000 --- a/src/hal.rs +++ /dev/null @@ -1,44 +0,0 @@ -use axaddrspace::{HostPhysAddr, HostVirtAddr}; -use axerrno::AxResult; - -/// The interfaces which the underlying software (kernel or hypervisor) must implement. -pub trait AxVMHal: Sized { - /// The low-level **OS-dependent** helpers that must be provided for physical address management. - type PagingHandler: page_table_multiarch::PagingHandler; - - /// Allocates a memory region at the specified physical address. - /// - /// Returns `true` if the memory region is successfully allocated. - fn alloc_memory_region_at(base: HostPhysAddr, size: usize) -> bool; - - /// Deallocates a memory region at the specified physical address. - fn dealloc_memory_region_at(base: HostPhysAddr, size: usize); - - /// Converts a virtual address to the corresponding physical address. - fn virt_to_phys(vaddr: HostVirtAddr) -> HostPhysAddr; - - /// Current time in nanoseconds. - fn current_time_nanos() -> u64; - - /// Current VM ID. - fn current_vm_id() -> usize; - - /// Current Virtual CPU ID. - fn current_vcpu_id() -> usize; - - /// Current Physical CPU ID. - fn current_pcpu_id() -> usize; - - /// Get the Physical CPU ID where the specified VCPU of the current VM resides. - /// - /// Returns an error if the VCPU is not found. - fn vcpu_resides_on(vm_id: usize, vcpu_id: usize) -> AxResult; - - /// Inject an IRQ to the specified VCPU. - /// - /// This method should find the physical CPU where the specified VCPU resides and inject the IRQ - /// to it on that physical CPU with [`axvcpu::AxVCpu::inject_interrupt`]. - /// - /// Returns an error if the VCPU is not found. - fn inject_irq_to_vcpu(vm_id: usize, vcpu_id: usize, irq: usize) -> AxResult; -} diff --git a/src/lib.rs b/src/lib.rs index 5c1a622..180cc3e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,33 +1,40 @@ #![no_std] #![feature(new_range_api)] -// #![feature(concat_idents)] -// #![feature(naked_functions)] -// #![feature(const_trait_impl)] //! This crate provides a minimal VM monitor (VMM) for running guest VMs. //! //! This crate contains: //! - [`AxVM`]: The main structure representing a VM. +#[macro_use] extern crate alloc; #[macro_use] extern crate log; +#[macro_use] +extern crate anyhow; + +extern crate axstd as std; -mod hal; +const TASK_STACK_SIZE: usize = 0x40000; // 256 KB + +#[cfg_attr(target_arch = "aarch64", path = "arch/aarch64/mod.rs")] +#[cfg_attr(target_arch = "x86_64", path = "arch/x86_64/mod.rs")] +pub(crate) mod arch; + +mod fdt; mod vcpu; mod vm; pub mod config; +pub mod vhal; -pub use hal::AxVMHal; -pub use vm::AxVCpuRef; -pub use vm::AxVM; -pub use vm::AxVMRef; - -/// The architecture-independent per-CPU type. -pub type AxVMPerCpu = axvcpu::AxPerCpu>; +pub use axvm_types::addr::*; +pub use config::AxVMConfig; +pub use vhal::cpu::CpuId; +pub use vm::*; -/// Whether the hardware has virtualization support. -pub fn has_hardware_support() -> bool { - vcpu::has_hardware_support() +/// Enable hardware virtualization support. +pub fn enable_viretualization() -> anyhow::Result<()> { + vhal::init()?; + Ok(()) } diff --git a/src/vcpu.rs b/src/vcpu.rs deleted file mode 100644 index 3e5aec9..0000000 --- a/src/vcpu.rs +++ /dev/null @@ -1,30 +0,0 @@ -//! Architecture dependent vcpu implementations. - -cfg_if::cfg_if! { - if #[cfg(target_arch = "x86_64")] { - pub use x86_vcpu::VmxArchVCpu as AxArchVCpuImpl; - pub use x86_vcpu::VmxArchPerCpuState as AxVMArchPerCpuImpl; - pub use x86_vcpu::has_hardware_support; - pub type AxVCpuCreateConfig = (); - - // Note: - // According to the requirements of `x86_vcpu`, - // users of the `x86_vcpu` crate need to implement the `PhysFrameIf` trait for it with the help of `crate_interface`. - // - // Since in our hypervisor architecture, `axvm` is not responsible for OS-related resource management, - // we leave the `PhysFrameIf` implementation to `vmm_app`. - } else if #[cfg(target_arch = "riscv64")] { - pub use riscv_vcpu::RISCVVCpu as AxArchVCpuImpl; - pub use riscv_vcpu::RISCVPerCpu as AxVMArchPerCpuImpl; - pub use riscv_vcpu::RISCVVCpuCreateConfig as AxVCpuCreateConfig; - pub use riscv_vcpu::has_hardware_support; - } else if #[cfg(target_arch = "aarch64")] { - pub use arm_vcpu::Aarch64VCpu as AxArchVCpuImpl; - pub use arm_vcpu::Aarch64PerCpu as AxVMArchPerCpuImpl; - pub use arm_vcpu::Aarch64VCpuCreateConfig as AxVCpuCreateConfig; - pub use arm_vcpu::Aarch64VCpuSetupConfig as AxVCpuSetupConfig; - pub use arm_vcpu::has_hardware_support; - - pub use arm_vgic::vtimer::get_sysreg_device; - } -} diff --git a/src/vcpu/mod.rs b/src/vcpu/mod.rs new file mode 100644 index 0000000..83ba201 --- /dev/null +++ b/src/vcpu/mod.rs @@ -0,0 +1,54 @@ +use crate::{ + CpuId, RunError, VmId, + arch::HCpu, + data::{VmData, VmDataWeak}, + vhal::cpu::{CpuHardId, HCpuExclusive}, +}; + +pub trait VCpuOp: core::fmt::Debug + Send + 'static { + fn bind_id(&self) -> CpuId; + fn hard_id(&self) -> CpuHardId; + fn run(&mut self) -> Result<(), RunError>; +} + +#[derive(Debug)] +pub struct VCpuCommon { + pub(crate) hcpu: HCpuExclusive, + vm: VmDataWeak, +} + +impl VCpuCommon { + pub fn vm_id(&self) -> VmId { + self.vm.id() + } + + pub fn new_exclusive(bind: Option, vm: VmDataWeak) -> anyhow::Result { + let hcpu = HCpuExclusive::try_new(bind) + .ok_or_else(|| anyhow!("Failed to allocate cpu with id `{bind:?}`"))?; + Ok(VCpuCommon { hcpu, vm }) + } + + pub fn bind_id(&self) -> CpuId { + self.hcpu.id() + } + + pub fn hard_id(&self) -> CpuHardId { + self.hcpu.hard_id() + } + + #[inline] + pub fn is_active(&self) -> bool { + self.vm.is_active() + } + + pub fn with_hcpu(&self, f: F) -> R + where + F: FnOnce(&HCpu) -> R, + { + self.hcpu.with_cpu(f) + } + + pub fn vm(&self) -> anyhow::Result { + self.vm.try_upgrade() + } +} diff --git a/src/vhal/cpu.rs b/src/vhal/cpu.rs new file mode 100644 index 0000000..1285a75 --- /dev/null +++ b/src/vhal/cpu.rs @@ -0,0 +1,100 @@ +use core::fmt::Display; + +use bitmap_allocator::{BitAlloc, BitAlloc4K}; +use spin::Mutex; + +use crate::{ + arch::HCpu, + vhal::{ArchCpuData, precpu::PreCpuSet}, +}; + +pub(super) static PRE_CPU: PreCpuSet = PreCpuSet::new(); +pub(super) static HCPU_ALLOC: Mutex = Mutex::new(BitAlloc4K::DEFAULT); + +#[derive(Debug)] +pub struct HCpuExclusive(CpuId); + +impl HCpuExclusive { + pub fn id(&self) -> CpuId { + self.0 + } + + pub fn try_new(id: Option) -> Option { + let mut a = HCPU_ALLOC.lock(); + match id { + Some(id) => { + // Try to allocate the specific ID + let raw = a.alloc_contiguous(Some(id.raw()), 1, 0)?; + Some(HCpuExclusive(CpuId::new(raw))) + } + None => { + // Auto-allocate any available ID + let raw_id = a.alloc()?; + Some(HCpuExclusive(CpuId::new(raw_id))) + } + } + } + + pub fn with_cpu(&self, f: F) -> R + where + F: FnOnce(&HCpu) -> R, + { + for (_id, cpu) in PRE_CPU.iter() { + if cpu.id == self.0 { + return f(cpu); + } + } + panic!("CPU data not found for CPU ID {}", self.0); + } + + pub fn hard_id(&self) -> CpuHardId { + self.with_cpu(|cpu| cpu.hard_id()) + } +} + +impl Drop for HCpuExclusive { + fn drop(&mut self) { + let mut allocator = HCPU_ALLOC.lock(); + allocator.dealloc(self.0.raw()); + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[repr(transparent)] +pub struct CpuHardId(usize); + +impl CpuHardId { + pub fn new(id: usize) -> Self { + CpuHardId(id) + } + + pub fn raw(&self) -> usize { + self.0 + } +} + +impl Display for CpuHardId { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "CPU Hard({:#x})", self.0) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[repr(transparent)] +pub struct CpuId(usize); + +impl CpuId { + pub fn new(id: usize) -> Self { + CpuId(id) + } + + pub fn raw(&self) -> usize { + self.0 + } +} + +impl Display for CpuId { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "CPU({})", self.0) + } +} diff --git a/src/vhal/mod.rs b/src/vhal/mod.rs new file mode 100644 index 0000000..a2be9a7 --- /dev/null +++ b/src/vhal/mod.rs @@ -0,0 +1,91 @@ +use alloc::vec::Vec; +use axstd::{ + os::arceos::{api::task::AxCpuMask, modules::axtask::set_current_affinity}, + thread::yield_now, +}; +use bitmap_allocator::BitAlloc; +use core::sync::atomic::{AtomicUsize, Ordering}; + +use crate::{ + HostPhysAddr, HostVirtAddr, TASK_STACK_SIZE, + arch::{HCpu, Hal}, + vhal::cpu::{CpuHardId, CpuId}, +}; + +pub(crate) mod cpu; +pub(crate) mod precpu; +mod timer; + +pub fn init() -> anyhow::Result<()> { + Hal::init()?; + + static CORES: AtomicUsize = AtomicUsize::new(0); + + let cpu_count = cpu_count(); + + info!("Initializing VHal for {cpu_count} CPUs..."); + cpu::PRE_CPU.init(); + + for cpu_id in 0..cpu_count { + let id = CpuId::new(cpu_id); + axstd::thread::Builder::new() + .name(format!("init-cpu-{}", cpu_id)) + .stack_size(TASK_STACK_SIZE) + .spawn(move || { + info!("Core {cpu_id} is initializing hardware virtualization support..."); + // Initialize cpu affinity here. + assert!( + set_current_affinity(AxCpuMask::one_shot(cpu_id)), + "Initialize CPU affinity failed!" + ); + info!("Enabling hardware virtualization support on core {id}"); + timer::init_percpu(); + + let cpu_data = Hal::current_cpu_init(id).expect("Enable virtualization failed!"); + unsafe { cpu::PRE_CPU.set(cpu_data.hard_id(), cpu_data) }; + let _ = CORES.fetch_add(1, Ordering::Release); + }) + .map_err(|e| anyhow!("{e:?}"))?; + } + info!("Waiting for all cores to enable hardware virtualization..."); + + // Wait for all cores to enable virtualization. + while CORES.load(Ordering::Acquire) != cpu_count { + // Use `yield_now` instead of `core::hint::spin_loop` to avoid deadlock. + yield_now(); + } + + cpu::HCPU_ALLOC.lock().insert(0..cpu_count); + + info!("All cores have enabled hardware virtualization support."); + + Ok(()) +} + +pub fn cpu_count() -> usize { + axruntime::cpu_count() +} + +pub(crate) trait ArchHal { + fn init() -> anyhow::Result<()>; + fn cache_flush(vaddr: HostVirtAddr, size: usize); + fn cpu_hard_id() -> CpuHardId; + fn cpu_list() -> Vec; + fn current_cpu_init(id: CpuId) -> anyhow::Result; +} + +pub(crate) trait ArchCpuData { + fn hard_id(&self) -> CpuHardId; +} + +pub fn phys_to_virt(paddr: HostPhysAddr) -> HostVirtAddr { + axhal::mem::phys_to_virt(paddr.as_usize().into()) + .as_usize() + .into() +} + +pub fn virt_to_phys(vaddr: HostVirtAddr) -> HostPhysAddr { + axhal::mem::virt_to_phys(vaddr.as_usize().into()) + .as_usize() + .into() +} diff --git a/src/vhal/precpu.rs b/src/vhal/precpu.rs new file mode 100644 index 0000000..3d1b270 --- /dev/null +++ b/src/vhal/precpu.rs @@ -0,0 +1,52 @@ +use alloc::collections::BTreeMap; +use core::{cell::UnsafeCell, ops::Deref}; + +use crate::{ + arch::Hal, + vhal::{ArchHal, cpu::CpuHardId}, +}; + +pub(crate) struct PreCpuSet(UnsafeCell>>); + +unsafe impl Sync for PreCpuSet {} +unsafe impl Send for PreCpuSet {} + +impl PreCpuSet { + pub const fn new() -> Self { + PreCpuSet(UnsafeCell::new(BTreeMap::new())) + } + + pub unsafe fn set(&self, cpu_id: CpuHardId, val: T) { + let pre_cpu_map = unsafe { &mut *self.0.get() }; + pre_cpu_map.insert(cpu_id, Some(val)); + } + + pub fn init(&self) { + let cpu_list = Hal::cpu_list(); + debug!("Initializing PreCpuSet for CPUs: {:?}", cpu_list); + for cpu_id in cpu_list { + let v = unsafe { &mut *self.0.get() }; + v.insert(cpu_id, None); + } + } + + pub fn iter(&self) -> impl Iterator { + let set = unsafe { &*self.0.get() }; + set.iter() + .map(|(k, v)| (*k, v.as_ref().expect("CPU data not initialized!"))) + } +} + +impl Deref for PreCpuSet { + type Target = T; + + fn deref(&self) -> &Self::Target { + let set = unsafe { &*self.0.get() }; + let cpu_id = Hal::cpu_hard_id(); + let cpu_data = set + .get(&cpu_id) + .and_then(|data| data.as_ref()) + .expect("CPU data not initialized!"); + cpu_data + } +} diff --git a/src/vhal/timer.rs b/src/vhal/timer.rs new file mode 100644 index 0000000..d8a6bfe --- /dev/null +++ b/src/vhal/timer.rs @@ -0,0 +1,113 @@ +use core::sync::atomic::AtomicUsize; +use core::sync::atomic::Ordering; + +use axhal; + +use alloc::boxed::Box; +use kspin::SpinNoIrq; +use lazyinit::LazyInit; +use timer_list::{TimeValue, TimerEvent, TimerList}; + +static TOKEN: AtomicUsize = AtomicUsize::new(0); +// const PERIODIC_INTERVAL_NANOS: u64 = axhal::time::NANOS_PER_SEC / axconfig::TICKS_PER_SEC as u64; + +/// Represents a timer event in the virtual machine monitor (VMM). +/// +/// This struct holds a unique token for the timer and a callback function +/// that will be executed when the timer expires. +pub struct VmmTimerEvent { + // Unique identifier for the timer event + token: usize, + // Callback function to be executed when the timer expires + timer_callback: Box, +} + +impl VmmTimerEvent { + fn new(token: usize, f: F) -> Self + where + F: FnOnce(TimeValue) + Send + 'static, + { + Self { + token, + timer_callback: Box::new(f), + } + } +} + +impl TimerEvent for VmmTimerEvent { + fn callback(self, now: TimeValue) { + (self.timer_callback)(now) + } +} + +#[percpu::def_percpu] +static TIMER_LIST: LazyInit>> = LazyInit::new(); + +/// Registers a new timer that will execute at the specified deadline +/// +/// # Arguments +/// - `deadline`: The absolute time in nanoseconds when the timer should trigger +/// - `handler`: The callback function to execute when the timer expires +/// +/// # Returns +/// A unique token that can be used to cancel this timer later +pub fn register_timer(deadline: u64, handler: F) -> usize +where + F: FnOnce(TimeValue) + Send + 'static, +{ + trace!("Registering timer..."); + trace!( + "deadline is {:#?} = {:#?}", + deadline, + TimeValue::from_nanos(deadline) + ); + let timer_list = unsafe { TIMER_LIST.current_ref_mut_raw() }; + let mut timers = timer_list.lock(); + let token = TOKEN.fetch_add(1, Ordering::Release); + let event = VmmTimerEvent::new(token, handler); + timers.set(TimeValue::from_nanos(deadline), event); + token +} + +/// Cancels a timer with the specified token. +/// +/// # Parameters +/// - `token`: The unique token of the timer to cancel. +pub fn cancel_timer(token: usize) { + let timer_list = unsafe { TIMER_LIST.current_ref_mut_raw() }; + let mut timers = timer_list.lock(); + timers.cancel(|event| event.token == token); +} + +/// Check and process any pending timer events +pub fn check_events() { + // info!("Checking timer events..."); + // info!("now is {:#?}", axhal::time::wall_time()); + let timer_list = unsafe { TIMER_LIST.current_ref_mut_raw() }; + loop { + let now = axhal::time::wall_time(); + let event = timer_list.lock().expire_one(now); + if let Some((_deadline, event)) = event { + trace!("pick one {_deadline:#?} to handle!!!"); + event.callback(now); + } else { + break; + } + } +} + +// /// Schedule the next timer event based on the periodic interval +// pub fn scheduler_next_event() { +// trace!("Scheduling next event..."); +// let now_ns = axhal::time::monotonic_time_nanos(); +// let deadline = now_ns + PERIODIC_INTERVAL_NANOS; +// debug!("PHY deadline {} !!!", deadline); +// axhal::time::set_oneshot_timer(deadline); +// } + +/// Initialize the hypervisor timer system +pub fn init_percpu() { + info!("Initing HV Timer..."); + let timer_list = unsafe { TIMER_LIST.current_ref_mut_raw() }; + timer_list.init_once(SpinNoIrq::new(TimerList::new())); +} diff --git a/src/vm.rs b/src/vm.rs deleted file mode 100644 index 9f776b4..0000000 --- a/src/vm.rs +++ /dev/null @@ -1,627 +0,0 @@ -use alloc::boxed::Box; -use alloc::format; -use alloc::sync::Arc; -use alloc::vec::Vec; -use axvmconfig::VMInterruptMode; -use core::sync::atomic::{AtomicBool, Ordering}; -use memory_addr::{align_down_4k, align_up_4k}; - -use axerrno::{AxResult, ax_err, ax_err_type}; -use spin::Mutex; - -use axaddrspace::{AddrSpace, GuestPhysAddr, HostPhysAddr, MappingFlags, device::AccessWidth}; -use axdevice::{AxVmDeviceConfig, AxVmDevices}; -use axvcpu::{AxArchVCpu, AxVCpu, AxVCpuExitReason, AxVCpuHal}; -use cpumask::CpuMask; - -use crate::config::{AxVMConfig, VmMemMappingType}; -use crate::vcpu::{AxArchVCpuImpl, AxVCpuCreateConfig}; -use crate::{AxVMHal, has_hardware_support}; - -#[cfg(target_arch = "aarch64")] -use crate::vcpu::get_sysreg_device; - -const VM_ASPACE_BASE: usize = 0x0; -const VM_ASPACE_SIZE: usize = 0x7fff_ffff_f000; - -/// A vCPU with architecture-independent interface. -#[allow(type_alias_bounds)] -type VCpu = AxVCpu>; -/// A reference to a vCPU. -#[allow(type_alias_bounds)] -pub type AxVCpuRef = Arc>; -/// A reference to a VM. -#[allow(type_alias_bounds)] -pub type AxVMRef = Arc>; // we know the bound is not enforced here, we keep it for clarity - -struct AxVMInnerConst { - id: usize, - config: AxVMConfig, - vcpu_list: Box<[AxVCpuRef]>, - devices: AxVmDevices, -} - -unsafe impl Send for AxVMInnerConst {} -unsafe impl Sync for AxVMInnerConst {} - -struct AxVMInnerMut { - // Todo: use more efficient lock. - address_space: Mutex>, - _marker: core::marker::PhantomData, -} - -const TEMP_MAX_VCPU_NUM: usize = 64; - -/// A Virtual Machine. -pub struct AxVM { - running: AtomicBool, - shutting_down: AtomicBool, - inner_const: AxVMInnerConst, - inner_mut: AxVMInnerMut, -} - -impl AxVM { - /// Creates a new VM with the given configuration. - /// Returns an error if the configuration is invalid. - /// The VM is not started until `boot` is called. - pub fn new(config: AxVMConfig) -> AxResult> { - let vcpu_id_pcpu_sets = config.get_vcpu_affinities_pcpu_ids(); - - debug!( - "id: {}, VCpuIdPCpuSets: {:#x?}", - config.id(), - vcpu_id_pcpu_sets - ); - - let mut vcpu_list = Vec::with_capacity(vcpu_id_pcpu_sets.len()); - for (vcpu_id, phys_cpu_set, _pcpu_id) in vcpu_id_pcpu_sets { - #[cfg(target_arch = "aarch64")] - let arch_config = AxVCpuCreateConfig { - mpidr_el1: _pcpu_id as _, - dtb_addr: config - .image_config() - .dtb_load_gpa - .unwrap_or_default() - .as_usize(), - }; - #[cfg(target_arch = "riscv64")] - let arch_config = AxVCpuCreateConfig { - hart_id: vcpu_id as _, - dtb_addr: config - .image_config() - .dtb_load_gpa - .unwrap_or(GuestPhysAddr::from_usize(0x9000_0000)), - }; - #[cfg(target_arch = "x86_64")] - let arch_config = AxVCpuCreateConfig::default(); - - vcpu_list.push(Arc::new(VCpu::new( - config.id(), - vcpu_id, - 0, // Currently not used. - phys_cpu_set, - arch_config, - )?)); - } - let mut address_space = - AddrSpace::new_empty(GuestPhysAddr::from(VM_ASPACE_BASE), VM_ASPACE_SIZE)?; - - for mem_region in config.memory_regions() { - let mapping_flags = MappingFlags::from_bits(mem_region.flags).ok_or_else(|| { - ax_err_type!( - InvalidInput, - format!("Illegal flags {:?}", mem_region.flags) - ) - })?; - - // Check mapping flags. - if mapping_flags.contains(MappingFlags::DEVICE) { - warn!( - "Do not include DEVICE flag in memory region flags, it should be configured in pass_through_devices" - ); - continue; - } - - info!( - "Setting up memory region: [{:#x}~{:#x}] {:?}", - mem_region.gpa, - mem_region.gpa + mem_region.size, - mapping_flags - ); - - // Handle ram region. - match mem_region.map_type { - VmMemMappingType::MapIdentical => { - if H::alloc_memory_region_at( - HostPhysAddr::from(mem_region.gpa), - mem_region.size, - ) { - } else { - address_space.map_linear( - GuestPhysAddr::from(mem_region.gpa), - HostPhysAddr::from(mem_region.gpa), - mem_region.size, - mapping_flags, - )?; - warn!( - "Failed to allocate memory region at {:#x} for VM [{}]", - mem_region.gpa, - config.id() - ); - } - - address_space.map_linear( - GuestPhysAddr::from(mem_region.gpa), - HostPhysAddr::from(mem_region.gpa), - mem_region.size, - mapping_flags, - )?; - } - VmMemMappingType::MapAlloc => { - // Note: currently we use `map_alloc`, - // which allocates real physical memory in units of physical page frames, - // which may not be contiguous!!! - address_space.map_alloc( - GuestPhysAddr::from(mem_region.gpa), - mem_region.size, - mapping_flags, - true, - )?; - } - } - } - - let mut pt_dev_region = Vec::new(); - for pt_device in config.pass_through_devices() { - trace!( - "PT dev {:?} region: [{:#x}~{:#x}] -> [{:#x}~{:#x}]", - pt_device.name, - pt_device.base_gpa, - pt_device.base_gpa + pt_device.length, - pt_device.base_hpa, - pt_device.base_hpa + pt_device.length - ); - // Align the base address and length to 4K boundaries. - pt_dev_region.push(( - align_down_4k(pt_device.base_gpa), - align_up_4k(pt_device.length), - )); - } - - pt_dev_region.sort_by_key(|(gpa, _)| *gpa); - - // Merge overlapping regions. - let pt_dev_region = - pt_dev_region - .into_iter() - .fold(Vec::<(usize, usize)>::new(), |mut acc, (gpa, len)| { - if let Some(last) = acc.last_mut() { - if last.0 + last.1 >= gpa { - // Merge with the last region. - last.1 = (last.0 + last.1).max(gpa + len) - last.0; - } else { - acc.push((gpa, len)); - } - } else { - acc.push((gpa, len)); - } - acc - }); - - for (gpa, len) in &pt_dev_region { - address_space.map_linear( - GuestPhysAddr::from(*gpa), - HostPhysAddr::from(*gpa), - *len, - MappingFlags::DEVICE - | MappingFlags::READ - | MappingFlags::WRITE - | MappingFlags::USER, - )?; - } - - let mut devices = axdevice::AxVmDevices::new(AxVmDeviceConfig { - emu_configs: config.emu_devices().to_vec(), - }); - - let passthrough = config.interrupt_mode() == VMInterruptMode::Passthrough; - - #[cfg(target_arch = "aarch64")] - { - if passthrough { - let spis = config.pass_through_spis(); - let cpu_id = config.id() - 1; // FIXME: get the real CPU id. - let mut gicd_found = false; - - for device in devices.iter_mmio_dev() { - if let Some(result) = axdevice_base::map_device_of_type( - device, - |gicd: &arm_vgic::v3::vgicd::VGicD| { - debug!("VGicD found, assigning SPIs..."); - - for spi in spis { - gicd.assign_irq(*spi + 32, cpu_id, (0, 0, 0, cpu_id as _)) - } - - Ok(()) - }, - ) { - result?; - gicd_found = true; - break; - } - } - - if !gicd_found { - warn!("Failed to assign SPIs: No VGicD found in device list"); - } - } else { - // non-passthrough mode, we need to set up the virtual timer. - // - // FIXME: maybe let `axdevice` handle this automatically? - // how to let `axdevice` know whether the VM is in passthrough mode or not? - for dev in get_sysreg_device() { - devices.add_sys_reg_dev(dev); - } - } - } - - let result = Arc::new(Self { - running: AtomicBool::new(false), - shutting_down: AtomicBool::new(false), - inner_const: AxVMInnerConst { - id: config.id(), - config, - vcpu_list: vcpu_list.into_boxed_slice(), - devices, - }, - inner_mut: AxVMInnerMut { - address_space: Mutex::new(address_space), - _marker: core::marker::PhantomData, - }, - }); - - info!("VM created: id={}", result.id()); - - // Setup VCpus. - for vcpu in result.vcpu_list() { - let setup_config = { - #[cfg(target_arch = "aarch64")] - { - crate::vcpu::AxVCpuSetupConfig { - passthrough_interrupt: passthrough, - passthrough_timer: passthrough, - } - } - #[cfg(not(target_arch = "aarch64"))] - { - as AxArchVCpu>::SetupConfig::default() - } - }; - - let entry = if vcpu.id() == 0 { - result.inner_const.config.bsp_entry() - } else { - result.inner_const.config.ap_entry() - }; - vcpu.setup(entry, result.ept_root(), setup_config)?; - } - info!("VM setup: id={}", result.id()); - - Ok(result) - } - - /// Returns the VM id. - #[inline] - pub const fn id(&self) -> usize { - self.inner_const.id - } - - /// Retrieves the vCPU corresponding to the given vcpu_id for the VM. - /// Returns None if the vCPU does not exist. - #[inline] - pub fn vcpu(&self, vcpu_id: usize) -> Option> { - self.vcpu_list().get(vcpu_id).cloned() - } - - /// Returns the number of vCPUs corresponding to the VM. - #[inline] - pub const fn vcpu_num(&self) -> usize { - self.inner_const.vcpu_list.len() - } - - /// Returns a reference to the list of vCPUs corresponding to the VM. - #[inline] - pub fn vcpu_list(&self) -> &[AxVCpuRef] { - &self.inner_const.vcpu_list - } - - /// Returns the base address of the two-stage address translation page table for the VM. - pub fn ept_root(&self) -> HostPhysAddr { - self.inner_mut.address_space.lock().page_table_root() - } - - /// Returns guest VM image load region in `Vec<&'static mut [u8]>`, - /// according to the given `image_load_gpa` and `image_size. - /// `Vec<&'static mut [u8]>` is a series of (HVA) address segments, - /// which may correspond to non-contiguous physical addresses, - /// - /// FIXME: - /// Find a more elegant way to manage potentially non-contiguous physical memory - /// instead of `Vec<&'static mut [u8]>`. - pub fn get_image_load_region( - &self, - image_load_gpa: GuestPhysAddr, - image_size: usize, - ) -> AxResult> { - let addr_space = self.inner_mut.address_space.lock(); - let image_load_hva = addr_space - .translated_byte_buffer(image_load_gpa, image_size) - .expect("Failed to translate kernel image load address"); - Ok(image_load_hva) - } - - /// Returns if the VM is running. - pub fn running(&self) -> bool { - self.running.load(Ordering::Relaxed) - } - - /// Boots the VM by setting the running flag as true. - pub fn boot(&self) -> AxResult { - if !has_hardware_support() { - ax_err!(Unsupported, "Hardware does not support virtualization") - } else if self.running() { - ax_err!(BadState, format!("VM[{}] is already running", self.id())) - } else { - info!("Booting VM[{}]", self.id()); - self.running.store(true, Ordering::Relaxed); - Ok(()) - } - } - - /// Returns if the VM is shutting down. - pub fn shutting_down(&self) -> bool { - self.shutting_down.load(Ordering::Relaxed) - } - - /// Shuts down the VM by setting the shutting_down flag as true. - /// - /// Currently, the "re-init" process of the VM is not implemented. Therefore, a VM can only be - /// booted once. And after the VM is shut down, it cannot be booted again. - pub fn shutdown(&self) -> AxResult { - if self.shutting_down() { - ax_err!( - BadState, - format!("VM[{}] is already shutting down", self.id()) - ) - } else { - info!("Shutting down VM[{}]", self.id()); - self.shutting_down.store(true, Ordering::Relaxed); - Ok(()) - } - } - - // TODO: implement suspend/resume. - // TODO: implement re-init. - - /// Returns this VM's emulated devices. - pub fn get_devices(&self) -> &AxVmDevices { - &self.inner_const.devices - } - - /// Run a vCPU according to the given vcpu_id. - /// - /// ## Arguments - /// * `vcpu_id` - the id of the vCPU to run. - /// - /// ## Returns - /// * `AxVCpuExitReason` - the exit reason of the vCPU, wrapped in an `AxResult`. - /// - pub fn run_vcpu(&self, vcpu_id: usize) -> AxResult { - let vcpu = self - .vcpu(vcpu_id) - .ok_or_else(|| ax_err_type!(InvalidInput, "Invalid vcpu_id"))?; - - vcpu.bind()?; - - let exit_reason = loop { - let exit_reason = vcpu.run()?; - trace!("{exit_reason:#x?}"); - let handled = match &exit_reason { - AxVCpuExitReason::MmioRead { - addr, - width, - reg, - reg_width: _, - signed_ext: _, - } => { - let val = self - .get_devices() - .handle_mmio_read(*addr, (*width).into())?; - vcpu.set_gpr(*reg, val); - true - } - AxVCpuExitReason::MmioWrite { addr, width, data } => { - self.get_devices() - .handle_mmio_write(*addr, (*width).into(), *data as usize)?; - true - } - AxVCpuExitReason::IoRead { port, width } => { - let val = self.get_devices().handle_port_read(*port, *width)?; - vcpu.set_gpr(0, val); // The target is always eax/ax/al, todo: handle access_width correctly - - true - } - AxVCpuExitReason::IoWrite { port, width, data } => { - self.get_devices() - .handle_port_write(*port, *width, *data as usize)?; - true - } - AxVCpuExitReason::SysRegRead { addr, reg } => { - let val = self.get_devices().handle_sys_reg_read( - *addr, - // Generally speaking, the width of system register is fixed and needless to be specified. - // AccessWidth::Qword here is just a placeholder, may be changed in the future. - AccessWidth::Qword, - )?; - vcpu.set_gpr(*reg, val); - true - } - AxVCpuExitReason::SysRegWrite { addr, value } => { - self.get_devices().handle_sys_reg_write( - *addr, - AccessWidth::Qword, - *value as usize, - )?; - true - } - AxVCpuExitReason::NestedPageFault { addr, access_flags } => self - .inner_mut - .address_space - .lock() - .handle_page_fault(*addr, *access_flags), - _ => false, - }; - if !handled { - break exit_reason; - } - }; - - vcpu.unbind()?; - Ok(exit_reason) - } - - /// Injects an interrupt to the vCPU. - pub fn inject_interrupt_to_vcpu( - &self, - targets: CpuMask, - irq: usize, - ) -> AxResult { - let vm_id = self.id(); - // Check if the current running vm is self. - // - // It is not supported to inject interrupt to a vcpu in another VM yet. - // - // It may be supported in the future, as a essential feature for cross-VM communication. - if H::current_vm_id() != self.id() { - panic!("Injecting interrupt to a vcpu in another VM is not supported"); - } - - for target_vcpu in &targets { - H::inject_irq_to_vcpu(vm_id, target_vcpu, irq)?; - } - - Ok(()) - } - - /// Returns a reference to the VM's configuration. - pub fn config(&self) -> &AxVMConfig { - &self.inner_const.config - } - - /// Maps a region of host physical memory to guest physical memory. - pub fn map_region( - &self, - gpa: GuestPhysAddr, - hpa: HostPhysAddr, - size: usize, - flags: MappingFlags, - ) -> AxResult<()> { - self.inner_mut - .address_space - .lock() - .map_linear(gpa, hpa, size, flags) - } - - /// Unmaps a region of guest physical memory. - pub fn unmap_region(&self, gpa: GuestPhysAddr, size: usize) -> AxResult<()> { - self.inner_mut.address_space.lock().unmap(gpa, size) - } - - /// Reads an object of type `T` from the guest physical address. - pub fn read_from_guest_of(&self, gpa_ptr: GuestPhysAddr) -> AxResult { - let size = core::mem::size_of::(); - - // Ensure the address is properly aligned for the type. - if gpa_ptr.as_usize() % core::mem::align_of::() != 0 { - return ax_err!(InvalidInput, "Unaligned guest physical address"); - } - - let addr_space = self.inner_mut.address_space.lock(); - match addr_space.translated_byte_buffer(gpa_ptr, size) { - Some(buffers) => { - let mut data_bytes = Vec::with_capacity(size); - for chunk in buffers { - let remaining = size - data_bytes.len(); - let chunk_size = remaining.min(chunk.len()); - data_bytes.extend_from_slice(&chunk[..chunk_size]); - if data_bytes.len() >= size { - break; - } - } - if data_bytes.len() < size { - return ax_err!( - InvalidInput, - "Insufficient data in guest memory to read the requested object" - ); - } - let data: T = unsafe { - // Use `ptr::read_unaligned` for safety in case of unaligned memory. - core::ptr::read_unaligned(data_bytes.as_ptr() as *const T) - }; - Ok(data) - } - None => ax_err!( - InvalidInput, - "Failed to translate guest physical address or insufficient buffer size" - ), - } - } - - /// Writes an object of type `T` to the guest physical address. - pub fn write_to_guest_of(&self, gpa_ptr: GuestPhysAddr, data: &T) -> AxResult { - let addr_space = self.inner_mut.address_space.lock(); - - match addr_space.translated_byte_buffer(gpa_ptr, core::mem::size_of::()) { - Some(mut buffer) => { - let bytes = unsafe { - core::slice::from_raw_parts( - data as *const T as *const u8, - core::mem::size_of::(), - ) - }; - let mut copied_bytes = 0; - for (_i, chunk) in buffer.iter_mut().enumerate() { - let end = copied_bytes + chunk.len(); - chunk.copy_from_slice(&bytes[copied_bytes..end]); - copied_bytes += chunk.len(); - } - Ok(()) - } - None => ax_err!(InvalidInput, "Failed to translate guest physical address"), - } - } - - /// Allocates an IVC channel for inter-VM communication region. - /// - /// ## Arguments - /// * `expected_size` - The expected size of the IVC channel in bytes. - /// ## Returns - /// * `AxResult<(GuestPhysAddr, usize)>` - A tuple containing the guest physical address of the allocated IVC channel and its actual size. - pub fn alloc_ivc_channel(&self, expected_size: usize) -> AxResult<(GuestPhysAddr, usize)> { - // Ensure the expected size is aligned to 4K. - let size = align_up_4k(expected_size); - let gpa = self.inner_const.devices.alloc_ivc_channel(size)?; - Ok((gpa, size)) - } - - /// Releases an IVC channel for inter-VM communication region. - /// ## Arguments - /// * `gpa` - The guest physical address of the IVC channel to release. - /// * `size` - The size of the IVC channel in bytes. - /// ## Returns - /// * `AxResult<()>` - An empty result indicating success or failure. - pub fn release_ivc_channel(&self, gpa: GuestPhysAddr, size: usize) -> AxResult { - self.inner_const.devices.release_ivc_channel(gpa, size) - } -} diff --git a/src/vm/addrspace.rs b/src/vm/addrspace.rs new file mode 100644 index 0000000..1e4119e --- /dev/null +++ b/src/vm/addrspace.rs @@ -0,0 +1,389 @@ +use alloc::vec::Vec; +use axaddrspace::MappingFlags; +use core::{ + alloc::Layout, + ops::{Deref, DerefMut, Range}, +}; +use memory_addr::MemoryAddr; +use std::sync::{Arc, Mutex}; + +use ranges_ext::RangeInfo; + +use crate::{ + AxVMConfig, GuestPhysAddr, HostPhysAddr, HostVirtAddr, + config::MemoryKind, + vhal::{ArchHal, phys_to_virt, virt_to_phys}, +}; + +const ALIGN: usize = 1024 * 1024 * 2; + +type AddrSpaceRaw = axaddrspace::AddrSpace; +type AddrSpaceSync = Arc>; + +pub(crate) type VmRegionMap = ranges_ext::RangeSetAlloc; + +pub struct VmAddrSpace { + pub aspace: AddrSpaceSync, + pub region_map: VmRegionMap, + kernel_entry: GuestPhysAddr, + kernel_memory_index: usize, + memories: Vec, +} + +impl VmAddrSpace { + pub fn new(gpt_levels: usize, vm_addr_space: Range) -> anyhow::Result { + let mut region_map = VmRegionMap::new(Vec::new()); + let vm_space_size = vm_addr_space.end.as_usize() - vm_addr_space.start.as_usize(); + region_map.add(VmRegion { + gpa: vm_addr_space.start, + size: vm_space_size, + kind: VmRegionKind::Passthrough, + })?; + // Create address space for the VM + let address_space = AddrSpaceRaw::new_empty( + gpt_levels, + vm_addr_space.start.as_usize().into(), + vm_space_size, + ) + .map_err(|e| anyhow!("Failed to create address space: {e:?}"))?; + + Ok(Self { + aspace: Arc::new(Mutex::new(address_space)), + region_map, + kernel_entry: GuestPhysAddr::from_usize(0), + kernel_memory_index: 0, + memories: vec![], + }) + } + + pub fn gpt_root(&self) -> HostPhysAddr { + let g = self.aspace.lock(); + g.page_table_root().as_usize().into() + } + + pub fn kernel_entry(&self) -> GuestPhysAddr { + self.kernel_entry + } + + pub fn new_memory(&mut self, kind: &MemoryKind) -> anyhow::Result<()> { + let _gpa; + let _size; + let _align = 0x1000; + let mut hva = HostVirtAddr::from(0); + let _payload; + let flags = + MappingFlags::READ | MappingFlags::WRITE | MappingFlags::EXECUTE | MappingFlags::USER; + + match kind { + MemoryKind::Identical { size } => { + let array = Array::new(*size, ALIGN); + + hva = HostVirtAddr::from(array.as_mut_ptr() as usize); + _gpa = GuestPhysAddr::from_usize(virt_to_phys(hva).as_usize()); + _size = *size; + _payload = Some(array); + let mut g = self.aspace.lock(); + g.map_linear( + _gpa.as_usize().into(), + hva.as_usize().into(), + _size.align_up_4k(), + flags, + ) + .unwrap(); + } + MemoryKind::Reserved { hpa, size } => { + hva = phys_to_virt(*hpa); + _gpa = GuestPhysAddr::from_usize(hva.as_usize()); + _size = *size; + _payload = None; + let mut g = self.aspace.lock(); + g.map_linear( + _gpa.as_usize().into(), + hva.as_usize().into(), + _size.align_up_4k(), + flags, + ) + .unwrap(); + } + MemoryKind::Vmem { gpa, size } => { + _gpa = *gpa; + _size = *size; + _payload = None; + let mut g = self.aspace.lock(); + g.map_alloc(_gpa.as_usize().into(), _size.align_up_4k(), flags, true) + .unwrap(); + } + } + + self.memories.push(GuestMemory { + gpa: _gpa, + hva, + layout: Layout::from_size_align(_size, _align).unwrap(), + _payload, + aspace: self.aspace.clone(), + }); + + self.region_map.add(VmRegion { + gpa: _gpa, + size: _size, + kind: VmRegionKind::Memory, + })?; + + Ok(()) + } + + pub fn load_kernel_image(&mut self, config: &AxVMConfig) -> anyhow::Result<()> { + let mut idx = 0; + let image_cfg = config.image_config(); + let gpa = if let Some(gpa) = image_cfg.kernel.gpa { + let mut found = false; + for (i, region) in self.memories.iter().enumerate() { + if (region.gpa..region.gpa + region.size()).contains(&gpa) { + idx = i; + found = true; + break; + } + } + if !found { + return Err(anyhow!( + "Kernel load GPA {:#x} not within any memory region", + gpa.as_usize() + )); + } + gpa + } else { + let mut gpa = None; + for (i, region) in self.memories.iter().enumerate() { + if region.size() >= image_cfg.kernel.data.len() { + gpa = Some(region.gpa + 2 * 1024 * 1024); + idx = i; + break; + } else { + continue; + } + } + gpa.ok_or(anyhow!("No suitable memory region found for kernel image"))? + }; + + debug!( + "Loading kernel image into GPA @{:#x} for VM {} ({})", + gpa.as_usize(), + config.id(), + config.name() + ); + let offset = gpa.as_usize() - self.memories[idx].gpa().as_usize(); + self.memories[idx].copy_from_slice(offset, &image_cfg.kernel.data); + self.kernel_memory_index = idx; + self.kernel_entry = gpa; + Ok(()) + } + + pub fn memories(&self) -> &[GuestMemory] { + &self.memories + } + + pub fn load_dtb(&mut self, data: &[u8]) -> anyhow::Result { + let guest_mem = self.memories().iter().next().unwrap(); + let mut dtb_start = + (guest_mem.gpa().as_usize() + guest_mem.size().min(512 * 1024 * 1024)) - data.len(); + dtb_start = dtb_start.align_down_4k(); + + let gpa = GuestPhysAddr::from(dtb_start); + debug!("Loading generated DTB into GPA @{:#x}", dtb_start,); + self.copy_to_guest(gpa, &data); + Ok(gpa) + } + + pub fn map_passthrough_regions(&self) -> anyhow::Result<()> { + let mut g = self.aspace.lock(); + for region in self + .region_map + .iter() + .filter(|m| m.kind == VmRegionKind::Passthrough) + { + g.map_linear( + region.gpa.as_usize().into(), + region.gpa.as_usize().into(), + region.size.align_up_4k(), + MappingFlags::READ + | MappingFlags::WRITE + | MappingFlags::EXECUTE + | MappingFlags::DEVICE + | MappingFlags::USER, + ) + .map_err(|e| { + anyhow!( + "Failed to map passthrough region: [{:?}, {:?})\n {e:?}", + region.gpa, + region.gpa + region.size + ) + })?; + } + + Ok(()) + } + + fn copy_to_guest(&mut self, gpa: GuestPhysAddr, data: &[u8]) { + let parts = self + .aspace + .lock() + .translated_byte_buffer(gpa.as_usize().into(), data.len()) + .unwrap(); + let mut offset = 0; + for part in parts { + let len = part.len().min(data.len() - offset); + part.copy_from_slice(&data[offset..offset + len]); + offset += len; + } + } +} + +#[derive(Debug, Clone)] +pub struct VmRegion { + pub gpa: GuestPhysAddr, + pub size: usize, + pub kind: VmRegionKind, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum VmRegionKind { + Passthrough, + Memory, +} + +impl RangeInfo for VmRegion { + type Kind = VmRegionKind; + + type Type = GuestPhysAddr; + + fn range(&self) -> core::ops::Range { + self.gpa..GuestPhysAddr::from_usize(self.gpa.as_usize() + self.size) + } + + fn kind(&self) -> &Self::Kind { + &self.kind + } + + fn overwritable(&self) -> bool { + matches!(self.kind, VmRegionKind::Passthrough) + } + + fn clone_with_range(&self, range: core::ops::Range) -> Self { + VmRegion { + gpa: range.start, + size: range.end.as_usize() - range.start.as_usize(), + kind: self.kind, + } + } +} + +pub struct Array { + ptr: *mut u8, + layout: Layout, +} + +unsafe impl Send for Array {} +unsafe impl Sync for Array {} + +impl Array { + pub fn new(size: usize, align: usize) -> Self { + let layout = Layout::from_size_align(size, align).unwrap(); + let ptr = unsafe { alloc::alloc::alloc_zeroed(layout) }; + Array { ptr, layout } + } + + pub fn as_mut_ptr(&self) -> *mut u8 { + self.ptr + } +} + +impl Deref for Array { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + unsafe { core::slice::from_raw_parts(self.ptr, self.layout.size()) } + } +} + +impl DerefMut for Array { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { core::slice::from_raw_parts_mut(self.ptr, self.layout.size()) } + } +} + +impl Drop for Array { + fn drop(&mut self) { + unsafe { + alloc::alloc::dealloc(self.ptr, self.layout); + } + } +} + +pub struct GuestMemory { + gpa: GuestPhysAddr, + hva: HostVirtAddr, + layout: Layout, + aspace: AddrSpaceSync, + _payload: Option, +} + +impl GuestMemory { + pub fn copy_from_slice(&mut self, offset: usize, data: &[u8]) { + assert!(data.len() <= self.size() - offset); + + let g = self.aspace.lock(); + let hva = g + .translated_byte_buffer(self.gpa.as_usize().into(), self.size()) + .expect("Failed to translate kernel image load address"); + let mut remain = data; + let mut skip = offset; + + for buff in hva { + if skip >= buff.len() { + skip -= buff.len(); + continue; + } + let buff = &mut buff[skip..]; + skip = 0; + + let copy_size = core::cmp::min(remain.len(), buff.len()); + buff[..copy_size].copy_from_slice(&remain[..copy_size]); + crate::arch::Hal::cache_flush(HostVirtAddr::from(buff.as_ptr() as usize), copy_size); + remain = &remain[copy_size..]; + if remain.is_empty() { + break; + } + } + } + + pub fn gpa(&self) -> GuestPhysAddr { + self.gpa + } + + pub fn size(&self) -> usize { + self.layout.size() + } + + // pub fn to_vec(&self) -> Vec { + // let mut result = vec![]; + // let g = self.aspace.lock(); + // let hva = g + // .translated_byte_buffer(self.gpa.as_usize().into(), self.size()) + // .expect("Failed to translate memory region"); + // for buff in hva { + // result.extend_from_slice(buff); + // } + // result.resize(self.size(), 0); + // result + // } +} + +impl Drop for GuestMemory { + fn drop(&mut self) { + let start = self.gpa.as_usize().align_down(self.layout.align()); + let size = self.size().align_up(self.layout.align()); + + let mut g = self.aspace.lock(); + g.unmap(start.into(), size).unwrap(); + } +} diff --git a/src/vm/data.rs b/src/vm/data.rs new file mode 100644 index 0000000..07f7964 --- /dev/null +++ b/src/vm/data.rs @@ -0,0 +1,313 @@ +use core::{ + fmt::{self, Debug}, + ops::Deref, +}; +use std::{ + string::String, + sync::{Arc, Weak}, +}; + +use spin::RwLock; + +use crate::{ + AxVMConfig, RunError, VmId, VmMachineInitedOps, VmMachineRunningOps, VmMachineUninitOps, + arch::{VmMachineRunning, VmMachineUninit}, + vm::machine::{AtomicState, VMStatus, VmMachineState}, +}; + +pub(crate) struct VmDataInner { + pub id: VmId, + pub name: String, + pub machine: RwLock, + pub status: AtomicState, + error: RwLock>, +} + +impl VmDataInner { + pub fn new(config: AxVMConfig) -> Self { + Self { + id: config.id.into(), + name: config.name.clone(), + machine: RwLock::new(VmMachineState::Uninit(VmMachineUninit::new(config))), + status: AtomicState::new(VMStatus::Uninit), + error: RwLock::new(None), + } + } + + pub fn stop(&self) -> anyhow::Result<()> { + let mut status_guard = self.machine.write(); + match core::mem::replace(&mut *status_guard, VmMachineState::Switching) { + VmMachineState::Running(running) => { + let stopping = running.stop(); + *status_guard = VmMachineState::Stopping(stopping); + self.status.store(VMStatus::Stopping); + Ok(()) + } + other => { + *status_guard = other; + Err(anyhow::anyhow!("VM is not in Running state")) + } + } + } + + pub fn wait(&self) -> anyhow::Result<()> { + while !matches!(self.status(), VMStatus::Stopped) { + // TODO: arceos bug, sleep never wakes up + // std::thread::sleep(std::time::Duration::from_millis(50)); + std::thread::yield_now(); + } + info!("VM {} ({}) has stopped.", self.id, self.name); + self.run_result() + } + + #[inline] + pub fn status(&self) -> VMStatus { + self.status.load() + } + + #[inline] + pub fn is_active(&self) -> bool { + let status = self.status(); + status < VMStatus::Stopping + } + + pub(crate) fn set_err(&self, err: RunError) { + let mut guard = self.error.write(); + *guard = Some(err); + } + + pub(crate) fn run_result(&self) -> anyhow::Result<()> { + let guard = self.error.read(); + let res = guard.clone(); + match res { + Some(err) => match err { + RunError::Exit => Ok(()), + RunError::ExitWithError(e) => Err(e), + }, + None => Ok(()), + } + } +} + +pub(crate) struct VmData { + inner: Arc, +} + +impl VmData { + pub fn new(config: AxVMConfig) -> anyhow::Result { + Ok(Self { + inner: Arc::new(VmDataInner::new(config)), + }) + } + + pub fn id(&self) -> VmId { + self.inner.id + } + + pub fn name(&self) -> &str { + &self.inner.name + } + + pub fn init(&self) -> anyhow::Result<()> { + let next; + let res; + let next_state; + + match self.replace_status(VmMachineState::Switching) { + VmMachineState::Uninit(uninit) => { + match uninit.init(self.downgrade()) { + Ok(inited) => { + next_state = Some(VMStatus::Inited); + res = Ok(()); + next = VmMachineState::Inited(inited); + } + Err(e) => { + self.set_err(RunError::ExitWithError(anyhow!("{e}"))); + next_state = Some(VMStatus::Stopped); + next = VmMachineState::Stopped; + res = Err(e); + } + }; + } + other => { + next = other; + next_state = None; + res = Err(anyhow::anyhow!("VM is not in Uninit state")); + } + } + self.replace_status(next); + if let Some(status) = next_state { + self.status.store(status); + } + res + } + + fn replace_status(&self, new_status: VmMachineState) -> VmMachineState { + let mut status_guard = self.machine.write(); + core::mem::replace(&mut *status_guard, new_status) + } + + pub fn start(&self) -> anyhow::Result<()> { + let data = self.downgrade(); + let next_state; + let res; + let next = match self.replace_status(VmMachineState::Switching) { + VmMachineState::Inited(init) => match init.start(data) { + Ok(running) => { + next_state = Some(VMStatus::Running); + res = Ok(()); + VmMachineState::Running(running) + } + Err(e) => { + self.set_err(RunError::ExitWithError(anyhow!("{e}"))); + + next_state = Some(VMStatus::Stopped); + res = Err(e); + VmMachineState::Stopped + } + }, + other => { + next_state = None; + + res = Err(anyhow::anyhow!("VM is not in Init state")); + other + } + }; + self.replace_status(next); + if let Some(status) = next_state { + self.status.store(status); + } + res + } + + pub fn downgrade(&self) -> VmDataWeak { + VmDataWeak { + id: self.id(), + inner: Arc::downgrade(&self.inner), + } + } + + pub(crate) fn with_machine_running(&self, f: F) -> Result + where + F: FnOnce(&VmMachineRunning) -> R, + { + loop { + let status = self.machine.read(); + let running = match &*status { + VmMachineState::Running(running) => running, + VmMachineState::Switching => { + drop(status); + std::thread::yield_now(); + continue; + } + _ => { + return Err(RunError::ExitWithError(anyhow!( + "VM is not in Running state" + ))); + } + }; + return Ok(f(running)); + } + } + + pub(crate) fn with_machine_running_mut(&self, f: F) -> Result + where + F: FnOnce(&mut VmMachineRunning) -> R, + { + loop { + let mut status = self.machine.write(); + let running = match &mut *status { + VmMachineState::Running(running) => running, + VmMachineState::Switching => { + drop(status); + std::thread::yield_now(); + continue; + } + _ => { + return Err(RunError::ExitWithError(anyhow!( + "VM is not in Running state" + ))); + } + }; + return Ok(f(running)); + } + } +} + +impl From> for VmData { + fn from(inner: Arc) -> Self { + Self { inner } + } +} + +impl Deref for VmData { + type Target = VmDataInner; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +#[derive(Clone)] +pub struct VmDataWeak { + id: VmId, + inner: Weak, +} + +impl VmDataWeak { + pub fn id(&self) -> VmId { + self.id + } + + pub fn upgrade(&self) -> Option { + Some(self.inner.upgrade()?.into()) + } + + pub fn try_upgrade(&self) -> anyhow::Result { + let res = self + .upgrade() + .ok_or_else(|| anyhow::anyhow!("VM data has been dropped"))?; + Ok(res) + } + + #[inline] + pub fn is_active(&self) -> bool { + if let Some(inner) = self.upgrade() { + inner.is_active() + } else { + false + } + } + + pub(crate) fn set_stopped(&self) { + if let Some(inner) = self.upgrade() { + let mut status_guard = inner.machine.write(); + *status_guard = VmMachineState::Stopped; + inner.status.store(VMStatus::Stopped); + } + } + + pub(crate) fn wait_for_running(&self) { + while let Some(inner) = self.upgrade() { + let status = inner.status.load(); + if status >= VMStatus::Running { + break; + } + std::thread::yield_now(); + } + } +} + +impl Debug for VmDataWeak { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.upgrade() { + Some(data) => write!( + f, + "VmDataWeak {{ id: {}, name: {} }}", + data.id(), + data.name() + ), + None => write!(f, "VmDataWeak {{ dropped }}"), + } + } +} diff --git a/src/vm/define.rs b/src/vm/define.rs new file mode 100644 index 0000000..00e878f --- /dev/null +++ b/src/vm/define.rs @@ -0,0 +1,69 @@ +use core::fmt; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct VmId(usize); + +impl VmId { + pub fn new_fixed(id: usize) -> Self { + VmId(id) + } + + pub fn new() -> Self { + use core::sync::atomic::{AtomicUsize, Ordering}; + static VM_ID_COUNTER: AtomicUsize = AtomicUsize::new(1); + let id = VM_ID_COUNTER.fetch_add(1, Ordering::Relaxed); + VmId(id) + } +} + +impl Default for VmId { + fn default() -> Self { + VmId::new() + } +} + +// Implement Display for VmId +impl fmt::Display for VmId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl From for VmId { + fn from(value: usize) -> Self { + VmId(value) + } +} + +impl From for usize { + fn from(value: VmId) -> Self { + value.0 + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Status { + Idle, + Running, + ShuttingDown, + PoweredOff, +} + +#[derive(thiserror::Error, Debug)] +pub enum RunError { + #[error("VM exited normally")] + Exit, + #[error("VM exited with error: {0}")] + ExitWithError(#[from] anyhow::Error), +} + +impl Clone for RunError { + fn clone(&self) -> Self { + match self { + RunError::Exit => RunError::Exit, + RunError::ExitWithError(err) => { + RunError::ExitWithError(anyhow::anyhow!(format!("{err}"))) + } + } + } +} diff --git a/src/vm/machine/mod.rs b/src/vm/machine/mod.rs new file mode 100644 index 0000000..5c83822 --- /dev/null +++ b/src/vm/machine/mod.rs @@ -0,0 +1,102 @@ +use core::sync::atomic::{AtomicU8, Ordering}; + +use crate::{ + AxVMConfig, Status, VmId, + arch::{VmMachineInited, VmMachineRunning, VmMachineUninit, VmStatusStopping}, + data::VmDataWeak, +}; + +mod running; + +pub(crate) use running::*; + +#[allow(unused)] +pub trait VmMachineUninitOps { + type Inited: VmMachineInitedOps; + fn new(config: AxVMConfig) -> Self; + fn init(self, vmdata: VmDataWeak) -> Result + where + Self: Sized; +} + +#[allow(unused)] +pub trait VmMachineInitedOps { + type Running: VmMachineRunningOps; + fn id(&self) -> VmId; + fn name(&self) -> &str; + fn start(self, vmdata: VmDataWeak) -> Result + where + Self: Sized; +} + +#[allow(unused)] +pub trait VmMachineRunningOps { + type Stopping: VmMachineStoppingOps; + fn stop(self) -> Self::Stopping; +} + +pub trait VmMachineStoppingOps {} + +pub enum VmMachineState { + Uninit(VmMachineUninit), + Inited(VmMachineInited), + Running(VmMachineRunning), + Switching, + #[allow(unused)] + Stopping(VmStatusStopping), + Stopped, +} + +/// Auxiliary wrapper that stores the current machine status in an atomically +/// readable form so management threads can query it without synchronisation +/// overhead. +pub(crate) struct AtomicState(AtomicU8); + +impl AtomicState { + pub const fn new(state: VMStatus) -> Self { + Self(AtomicU8::new(state as u8)) + } + + #[inline] + pub fn load(&self) -> VMStatus { + VMStatus::from_u8(self.0.load(Ordering::Acquire)) + } + + pub fn store(&self, new_state: VMStatus) { + self.0.store(new_state as u8, Ordering::Release); + } +} + +/// High-level VM lifecycle that is visible to callers of the [`Vm`] API. +/// This is intentionally richer than the low-level `Status` that is returned +/// by the architecture specific implementation so that the shell and +/// management layers can express user-friendly states. +#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[repr(u8)] +pub enum VMStatus { + #[default] + Uninit, + Switching, + Inited, + Running, + Suspended, + Stopping, + Stopped, +} + +impl VMStatus { + fn from_u8(raw: u8) -> Self { + unsafe { core::mem::transmute(raw) } + } +} + +impl From for VMStatus { + fn from(status: Status) -> Self { + match status { + Status::Idle => VMStatus::Inited, + Status::Running => VMStatus::Running, + Status::ShuttingDown => VMStatus::Stopping, + Status::PoweredOff => VMStatus::Stopped, + } + } +} diff --git a/src/vm/machine/running.rs b/src/vm/machine/running.rs new file mode 100644 index 0000000..0ab9a17 --- /dev/null +++ b/src/vm/machine/running.rs @@ -0,0 +1,106 @@ +use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; +use std::{ + collections::btree_map::BTreeMap, + os::arceos::{api::task::AxCpuMask, modules::axtask::set_current_affinity}, + sync::Arc, +}; + +use alloc::vec::Vec; + +use crate::{ + TASK_STACK_SIZE, VmAddrSpace, arch::cpu::VCpu, data::VmDataWeak, vcpu::VCpuOp, + vhal::cpu::CpuHardId, +}; + +pub struct VmMachineRunningCommon { + pub cpus: BTreeMap, + pub vmspace: VmAddrSpace, + pub vm: VmDataWeak, + running_cpu_count: Arc, +} + +impl VmMachineRunningCommon { + pub fn new(vmspace: VmAddrSpace, vcpu: Vec, vm: VmDataWeak) -> Self { + let mut cpus = BTreeMap::new(); + for cpu in vcpu.into_iter() { + cpus.insert(cpu.hard_id(), cpu); + } + + VmMachineRunningCommon { + vmspace, + cpus, + vm, + running_cpu_count: Arc::new(AtomicUsize::new(0)), + } + } + + pub fn take_cpu(&mut self) -> anyhow::Result { + let next = self + .cpus + .keys() + .next() + .cloned() + .ok_or_else(|| anyhow!("No CPUs available"))?; + let cpu = self.cpus.remove(&next).unwrap(); + Ok(cpu) + } + + pub fn run_cpu(&mut self, mut cpu: C) -> anyhow::Result<()> { + let waiter = self.new_waiter(); + let thread_ok = Arc::new(AtomicBool::new(false)); + let thread_ok_clone = thread_ok.clone(); + let bind_id = cpu.bind_id(); + std::thread::Builder::new() + .name(format!("init-cpu-{}", bind_id)) + .stack_size(TASK_STACK_SIZE) + .spawn(move || { + // Initialize cpu affinity here. + assert!( + set_current_affinity(AxCpuMask::one_shot(bind_id.raw())), + "Initialize CPU affinity failed!" + ); + thread_ok_clone.store(true, Ordering::SeqCst); + + info!( + "vCPU {} on {} ready, waiting for running...", + cpu.bind_id(), + bind_id + ); + waiter.vm.wait_for_running(); + info!("VCpu {} on {} run", cpu.hard_id(), bind_id); + // debug!("\n{:#x?}", cpu); + let res = cpu.run(); + if let Err(e) = res { + info!("vCPU {} exited with error: {e}", bind_id); + if let Some(vm) = waiter.vm.upgrade() { + vm.set_err(e); + } + } + waiter.running_cpu_count.fetch_sub(1, Ordering::SeqCst); + if waiter.running_cpu_count.load(Ordering::SeqCst) == 0 { + info!("All vCPUs have exited, VM set stopped."); + waiter.vm.set_stopped(); + } + }) + .map_err(|e| anyhow!("{e:?}"))?; + debug!("Waiting for CPU {} thread", bind_id); + while !thread_ok.load(Ordering::SeqCst) { + std::thread::yield_now(); + } + Ok(()) + } + + fn new_waiter(&self) -> Waiter { + let running_cpu_count = self.running_cpu_count.clone(); + running_cpu_count.fetch_add(1, Ordering::SeqCst); + Waiter { + running_cpu_count, + vm: self.vm.clone(), + } + } +} + +struct Waiter { + running_cpu_count: Arc, + vm: VmDataWeak, +} diff --git a/src/vm/mod.rs b/src/vm/mod.rs new file mode 100644 index 0000000..3281d27 --- /dev/null +++ b/src/vm/mod.rs @@ -0,0 +1,47 @@ +use crate::{AxVMConfig, data::VmData}; + +mod addrspace; +pub(crate) mod data; +mod define; +mod machine; + +pub(crate) use addrspace::*; +pub use define::*; +pub(crate) use machine::*; + +pub struct Vm { + data: VmData, +} + +impl Vm { + pub fn new(config: AxVMConfig) -> anyhow::Result { + let data = VmData::new(config)?; + data.init()?; + Ok(Self { data }) + } + + pub fn id(&self) -> VmId { + self.data.id() + } + + pub fn name(&self) -> &str { + self.data.name() + } + + pub fn boot(&self) -> anyhow::Result<()> { + self.data.start() + } + + pub fn shutdown(&self) -> anyhow::Result<()> { + self.data.stop() + } + + #[inline] + pub fn status(&self) -> VMStatus { + self.data.status() + } + + pub fn wait(&self) -> anyhow::Result<()> { + self.data.wait() + } +}