diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000..62d33ee --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,2 @@ +[build] +target = "aarch64-unknown-none-softfloat" \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c91435c..8764187 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -44,16 +44,16 @@ jobs: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@nightly with: - toolchain: nightly-2024-12-25 + toolchain: nightly-2025-05-20 - name: Build docs continue-on-error: ${{ github.ref != env.default-branch && github.event_name != 'pull_request' }} run: | cargo doc --no-deps --all-features - printf '' $(cargo tree | head -1 | cut -d' ' -f1) > target/doc/index.html + printf '' $(cargo tree | head -1 | cut -d' ' -f1) > target/aarch64-unknown-none-softfloat/doc/index.html - name: Deploy to Github Pages if: ${{ github.ref == env.default-branch }} uses: JamesIves/github-pages-deploy-action@v4 with: single-commit: true branch: gh-pages - folder: target/doc + folder: target/aarch64-unknown-none-softfloat/doc diff --git a/Cargo.toml b/Cargo.toml index 655fb6d..123d914 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,35 +1,26 @@ [package] -edition = "2024" -name = "arm_vcpu" -version = "0.1.1" authors = [ - "KeYang Hu ", - "Mingxian Su ", - "ShiMei Tang ", - "DeBin Luo ", - "周睿 " + "KeYang Hu ", + "Mingxian Su ", + "ShiMei Tang ", + "DeBin Luo ", + "周睿 ", ] +categories = ["embedded", "no-std"] description = "Aarch64 VCPU implementation for Arceos Hypervisor" +edition = "2024" +keywords = ["hypervisor", "aarch64", "vcpu"] license = "MIT OR Apache-2.0" +name = "arm_vcpu" repository = "https://github.com/arceos-hypervisor/arm_vcpu" -categories = ["embedded", "no-std"] -keywords = ["hypervisor", "aarch64", "vcpu"] - -[features] -4-level-ept = [] +version = "0.1.1" [dependencies] +aarch64-cpu = "11.0" log = "0.4" -spin = "0.10" - -aarch64-cpu = "10.0" numeric-enum-macro = "0.2" -tock-registers = "0.9" +spin = "0.10" axerrno = "0.1.0" +axvm-types.workspace = true percpu = {version = "0.2.0", features = ["arm-el2"]} - -axaddrspace = "0.1" -axdevice_base = "0.1.0" -axvcpu = "0.1.0" -axvisor_api = "0.1.0" diff --git a/src/exception.rs b/src/exception.rs index c7357da..1cd68ad 100644 --- a/src/exception.rs +++ b/src/exception.rs @@ -1,4 +1,3 @@ -use crate::TrapFrame; use crate::exception_utils::{ exception_class, exception_class_value, exception_data_abort_access_is_write, exception_data_abort_access_reg, exception_data_abort_access_reg_width, @@ -7,15 +6,17 @@ use crate::exception_utils::{ exception_esr, exception_fault_addr, exception_next_instruction_step, exception_sysreg_addr, exception_sysreg_direction_write, exception_sysreg_gpr, }; +use crate::{TrapFrame, handle_irq}; -use aarch64_cpu::registers::{ESR_EL2, HCR_EL2, Readable, SCTLR_EL1, VTCR_EL2, VTTBR_EL2}; -use axaddrspace::{ - GuestPhysAddr, - device::{AccessWidth, SysRegAddr}, +use crate::exit::AxVCpuExitReason; +use aarch64_cpu::registers::{ + ESR_EL2, FAR_EL2, HCR_EL2, HPFAR_EL2, Readable, SCTLR_EL1, SPSR_EL2, VTCR_EL2, VTTBR_EL2, }; use axerrno::{AxError, AxResult}; -use axvcpu::AxVCpuExitReason; -use log::error; +use axvm_types::{ + addr::GuestPhysAddr, + device::{AccessWidth, SysRegAddr}, +}; numeric_enum_macro::numeric_enum! { #[repr(u8)] @@ -281,9 +282,7 @@ fn handle_smc64_exception(ctx: &mut TrapFrame) -> AxResult { /// which is registered at [`crate::pcpu::IRQ_HANDLER`] during `Aarch64PerCpu::new()`. #[unsafe(no_mangle)] fn current_el_irq_handler(_tf: &mut TrapFrame) { - unsafe { crate::pcpu::IRQ_HANDLER.current_ref_raw() } - .get() - .unwrap()() + handle_irq(); } /// Handles synchronous exceptions that occur from the current exception level. @@ -292,10 +291,16 @@ fn current_el_sync_handler(tf: &mut TrapFrame) { let esr = ESR_EL2.extract(); let ec = ESR_EL2.read(ESR_EL2::EC); let iss = ESR_EL2.read(ESR_EL2::ISS); + let far = FAR_EL2.get(); + let hpfar = HPFAR_EL2.get(); + let spsr_el2 = SPSR_EL2.get(); error!("ESR_EL2: {:#x}", esr.get()); error!("Exception Class: {ec:#x}"); error!("Instruction Specific Syndrome: {iss:#x}"); + error!("FAR_EL2: {far:#x}"); + error!("HPFAR_EL2: {hpfar:#x}"); + error!("SPSR_EL2: {spsr_el2:#x}"); panic!( "Unhandled synchronous exception from current EL: {:#x?}", diff --git a/src/exception_utils.rs b/src/exception_utils.rs index 0ae5e37..78ef106 100644 --- a/src/exception_utils.rs +++ b/src/exception_utils.rs @@ -1,7 +1,6 @@ -use aarch64_cpu::registers::{ESR_EL2, FAR_EL2, PAR_EL1}; -use axaddrspace::GuestPhysAddr; +use aarch64_cpu::registers::*; use axerrno::{AxResult, ax_err}; -use tock_registers::interfaces::*; +use axvm_types::addr::GuestPhysAddr; /// Retrieves the Exception Syndrome Register (ESR) value from EL2. /// diff --git a/src/exit.rs b/src/exit.rs new file mode 100644 index 0000000..469c718 --- /dev/null +++ b/src/exit.rs @@ -0,0 +1,191 @@ +use axvm_types::{ + addr::GuestPhysAddr, + device::{AccessWidth, SysRegAddr}, +}; + +/// Reasons for VM-Exits returned by [AxArchVCpu::run]. +/// +/// When a guest virtual CPU executes, various conditions can cause control to be +/// transferred back to the hypervisor. This enum represents all possible exit reasons +/// that can occur during VCpu execution. +/// +/// # VM Exit Categories +/// +/// - **I/O Operations**: MMIO reads/writes, port I/O, system register access +/// - **System Events**: Hypercalls, interrupts, nested page faults +/// - **Power Management**: CPU power state changes, system shutdown +/// - **Multiprocessing**: IPI sending, secondary CPU bring-up +/// - **Error Conditions**: Entry failures, invalid states +/// +/// # Compatibility Note +/// +/// This enum draws inspiration from [kvm-ioctls](https://github.com/rust-vmm/kvm-ioctls/blob/main/src/ioctls/vcpu.rs) +/// for consistency with existing virtualization frameworks. +#[non_exhaustive] +#[derive(Debug)] +pub enum AxVCpuExitReason { + /// A guest instruction triggered a hypercall to the hypervisor. + /// + /// Hypercalls are a mechanism for the guest OS to request services from + /// the hypervisor, similar to system calls in a traditional OS. + Hypercall { + /// The hypercall number identifying the requested service + nr: u64, + /// Arguments passed to the hypercall (up to 6 parameters) + args: [u64; 6], + }, + + /// The guest performed a Memory-Mapped I/O (MMIO) read operation. + /// + /// MMIO reads occur when the guest accesses device registers or other + /// hardware-mapped memory regions that require hypervisor emulation. + MmioRead { + /// Guest physical address being read from + addr: GuestPhysAddr, + /// Width/size of the memory access (8, 16, 32, or 64 bits) + width: AccessWidth, + /// Index of the guest register that will receive the read value + reg: usize, + /// Width of the destination register + reg_width: AccessWidth, + /// Whether to sign-extend the read value to fill the register + signed_ext: bool, + }, + + /// The guest performed a Memory-Mapped I/O (MMIO) write operation. + /// + /// MMIO writes occur when the guest writes to device registers or other + /// hardware-mapped memory regions that require hypervisor emulation. + MmioWrite { + /// Guest physical address being written to + addr: GuestPhysAddr, + /// Width/size of the memory access (8, 16, 32, or 64 bits) + width: AccessWidth, + /// Data being written to the memory location + data: u64, + }, + + /// The guest performed a system register read operation. + /// + /// System registers are architecture-specific control and status registers: + /// - **x86_64**: Model-Specific Registers (MSRs) + /// - **RISC-V**: Control and Status Registers (CSRs) + /// - **AArch64**: System registers accessible via MRS instruction + SysRegRead { + /// Address/identifier of the system register being read + /// + /// - **x86_64/RISC-V**: Direct register address + /// - **AArch64**: ESR_EL2.ISS format (`000000`) + /// compatible with the `aarch64_sysreg` crate numbering scheme + addr: SysRegAddr, + /// Index of the guest register that will receive the read value + /// + /// **Note**: Unused on x86_64 where the result is always stored in `[edx:eax]` + reg: usize, + }, + + /// The guest performed a system register write operation. + /// + /// System registers are architecture-specific control and status registers: + /// - **x86_64**: Model-Specific Registers (MSRs) + /// - **RISC-V**: Control and Status Registers (CSRs) + /// - **AArch64**: System registers accessible via MSR instruction + SysRegWrite { + /// Address/identifier of the system register being written + /// + /// - **x86_64/RISC-V**: Direct register address + /// - **AArch64**: ESR_EL2.ISS format (`000000`) + /// compatible with the `aarch64_sysreg` crate numbering scheme + addr: SysRegAddr, + /// Data being written to the system register + value: u64, + }, + + /// An external interrupt was delivered to the VCpu. + /// + /// This represents hardware interrupts from external devices that need + /// to be processed by the guest or hypervisor. + /// + /// **Note**: This enum may be extended with additional fields in the future. + /// Use `..` in pattern matching to ensure forward compatibility. + ExternalInterrupt, + + /// Request to bring up a secondary CPU core. + /// + /// This exit reason is used during the multi-core VM boot process when + /// the primary CPU requests that a secondary CPU be started. The specific + /// mechanism varies by architecture: + /// + /// - **ARM**: PSCI (Power State Coordination Interface) calls + /// - **x86**: SIPI (Startup Inter-Processor Interrupt) + /// - **RISC-V**: SBI (Supervisor Binary Interface) calls + CpuUp { + /// Target CPU identifier to be started + /// + /// Format varies by architecture: + /// - **AArch64**: MPIDR register affinity fields + /// - **x86_64**: APIC ID of the target CPU + /// - **RISC-V**: Hart ID of the target CPU + target_cpu: u64, + /// Guest physical address where the secondary CPU should begin execution + entry_point: GuestPhysAddr, + /// Argument to pass to the secondary CPU + /// + /// - **AArch64**: Value to set in `x0` register at startup + /// - **RISC-V**: Value to set in `a1` register (`a0` gets the hartid) + /// - **x86_64**: Currently unused + arg: u64, + }, + + /// The guest VCpu has been powered down. + /// + /// This indicates the VCpu has executed a power-down instruction or + /// hypercall and should be suspended. The VCpu may be resumed later. + CpuDown { + /// Power state information (currently unused) + /// + /// Reserved for future use with PSCI_POWER_STATE or similar mechanisms + _state: u64, + }, + + /// The guest has requested system-wide shutdown. + /// + /// This indicates the entire virtual machine should be powered off, + /// not just the current VCpu. + SystemDown, + + /// No special handling required - the VCpu handled the exit internally. + /// + /// This provides an opportunity for the hypervisor to: + /// - Check virtual device states + /// - Process pending interrupts + /// - Handle background tasks + /// - Perform scheduling decisions + /// + /// The VCpu can typically be resumed immediately after these checks. + Nothing, + + /// The guest is attempting to send an Inter-Processor Interrupt (IPI). + /// + /// IPIs are used for inter-CPU communication in multi-core systems. + /// This does **not** include Startup IPIs (SIPI), which are handled + /// by the [`AxVCpuExitReason::CpuUp`] variant. + SendIPI { + /// Target CPU identifier to receive the IPI + /// + /// This field is invalid if `send_to_all` or `send_to_self` is true. + target_cpu: u64, + /// Auxiliary field for complex target CPU specifications + /// + /// Currently used only on AArch64 where: + /// - `target_cpu` contains `Aff3.Aff2.Aff1.0` + /// - `target_cpu_aux` contains a bitmask for `Aff0` values + target_cpu_aux: u64, + /// Whether to broadcast the IPI to all CPUs except the sender + send_to_all: bool, + /// Whether to send the IPI to the current CPU (self-IPI) + send_to_self: bool, + /// IPI vector/interrupt number to deliver + vector: u64, + }, +} diff --git a/src/lib.rs b/src/lib.rs index 7328d2a..6eb66d5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,5 @@ #![no_std] +#![cfg(target_arch = "aarch64")] #![feature(doc_cfg)] #![doc = include_str!("../README.md")] @@ -9,12 +10,18 @@ mod context_frame; #[macro_use] mod exception_utils; mod exception; +mod exit; mod pcpu; mod smc; mod vcpu; +use core::sync::atomic::{AtomicBool, Ordering}; + pub use self::pcpu::Aarch64PerCpu; pub use self::vcpu::{Aarch64VCpu, Aarch64VCpuCreateConfig, Aarch64VCpuSetupConfig}; +pub use axvm_types::addr::*; +pub use axvm_types::device::*; +pub use exit::*; /// context frame for aarch64 pub type TrapFrame = context_frame::Aarch64ContextFrame; @@ -29,3 +36,47 @@ pub fn has_hardware_support() -> bool { // Current just return true by default. true } + +pub trait CpuHal { + fn irq_hanlder(&self); + fn inject_interrupt(&self, irq: usize); +} + +struct NopHal; + +impl CpuHal for NopHal { + fn irq_hanlder(&self) { + unimplemented!() + } + fn inject_interrupt(&self, _irq: usize) { + unimplemented!() + } +} + +static mut HAL: &dyn CpuHal = &NopHal; +static INIT: AtomicBool = AtomicBool::new(false); + +fn hal() -> &'static dyn CpuHal { + unsafe { HAL } +} + +fn handle_irq() { + hal().irq_hanlder(); +} + +fn inject_interrupt(irq: usize) { + hal().inject_interrupt(irq); +} + +pub fn init_hal(hal: &'static dyn CpuHal) { + if INIT + .compare_exchange(false, true, Ordering::SeqCst, Ordering::SeqCst) + .is_ok() + { + unsafe { + HAL = hal; + } + } else { + panic!("arm_vcpu hal has been initialized"); + } +} diff --git a/src/pcpu.rs b/src/pcpu.rs index 296cbbb..ae02837 100644 --- a/src/pcpu.rs +++ b/src/pcpu.rs @@ -1,66 +1,35 @@ -use core::{cell::OnceCell, marker::PhantomData}; - use aarch64_cpu::registers::*; use axerrno::AxResult; -use axvcpu::{AxArchPerCpu, AxVCpuHal}; -use tock_registers::interfaces::ReadWriteable; /// Per-CPU data. A pointer to this struct is loaded into TP when a CPU starts. This structure #[repr(C)] #[repr(align(4096))] -pub struct Aarch64PerCpu { - /// per cpu id - pub cpu_id: usize, - _phantom: PhantomData, +pub struct Aarch64PerCpu { + ori_vbar: u64, } -#[percpu::def_percpu] -static ORI_EXCEPTION_VECTOR_BASE: usize = 0; - -/// IRQ handler registered by underlying host OS during per-cpu initialization, -/// for dispatching IRQs to the host OS. -/// -/// Set `IRQ_HANDLER` as per-cpu variable to avoid the need of `OnceLock`. -#[percpu::def_percpu] -pub static IRQ_HANDLER: OnceCell<&(dyn Fn() + Send + Sync)> = OnceCell::new(); - unsafe extern "C" { fn exception_vector_base_vcpu(); } -impl AxArchPerCpu for Aarch64PerCpu { - fn new(cpu_id: usize) -> AxResult { - // Register IRQ handler for this CPU. - let _ = unsafe { IRQ_HANDLER.current_ref_mut_raw() } - .set(&|| H::irq_hanlder()) - .map(|_| {}); - - Ok(Self { - cpu_id, - _phantom: PhantomData, - }) +impl Aarch64PerCpu { + pub fn new() -> Self { + Self { + ori_vbar: VBAR_EL2.get(), + } } - fn is_enabled(&self) -> bool { + pub fn is_enabled(&self) -> bool { HCR_EL2.is_set(HCR_EL2::VM) } - fn hardware_enable(&mut self) -> AxResult { - // First we save origin `exception_vector_base`. - // Safety: - // Todo: take care of `preemption` - unsafe { ORI_EXCEPTION_VECTOR_BASE.write_current_raw(VBAR_EL2.get() as usize) } - + pub fn hardware_enable(&mut self) { // Set current `VBAR_EL2` to `exception_vector_base_vcpu` // defined in this crate. - VBAR_EL2.set(exception_vector_base_vcpu as usize as _); + VBAR_EL2.set(exception_vector_base_vcpu as *const () as usize as _); HCR_EL2.modify( - HCR_EL2::VM::Enable - + HCR_EL2::RW::EL1IsAarch64 - + HCR_EL2::IMO::EnableVirtualIRQ - + HCR_EL2::FMO::EnableVirtualFIQ - + HCR_EL2::TSC::EnableTrapEl1SmcToEl2, + HCR_EL2::VM::Enable + HCR_EL2::RW::EL1IsAarch64 + HCR_EL2::TSC::EnableTrapEl1SmcToEl2, ); // Note that `ICH_HCR_EL2` is not the same as `HCR_EL2`. @@ -75,17 +44,28 @@ impl AxArchPerCpu for Aarch64PerCpu { // value = in(reg) 0, // } // } - - Ok(()) } - fn hardware_disable(&mut self) -> AxResult { + pub fn hardware_disable(&mut self) -> AxResult { // Reset `VBAR_EL2` into previous value. // Safety: // Todo: take care of `preemption` - VBAR_EL2.set(unsafe { ORI_EXCEPTION_VECTOR_BASE.read_current_raw() } as _); + VBAR_EL2.set(self.ori_vbar); HCR_EL2.set(HCR_EL2::VM::Disable.into()); Ok(()) } + + pub fn max_guest_page_table_levels(&self) -> usize { + crate::vcpu::max_gpt_level(crate::vcpu::pa_bits()) + } + + pub fn pa_bits(&self) -> usize { + crate::vcpu::pa_bits() + } + + pub fn pa_range(&self) -> core::ops::Range { + let pa_bits = crate::vcpu::pa_bits(); + 0..(1 << pa_bits) + } } diff --git a/src/vcpu.rs b/src/vcpu.rs index 724463c..3ef7550 100644 --- a/src/vcpu.rs +++ b/src/vcpu.rs @@ -1,14 +1,17 @@ -use core::marker::PhantomData; +use core::fmt::Arguments; use aarch64_cpu::registers::*; -use axaddrspace::{GuestPhysAddr, HostPhysAddr, device::SysRegAddr}; use axerrno::AxResult; -use axvcpu::{AxArchVCpu, AxVCpuExitReason, AxVCpuHal}; +use axvm_types::{ + addr::{GuestPhysAddr, HostPhysAddr}, + device::SysRegAddr, +}; -use crate::TrapFrame; use crate::context_frame::GuestSystemRegisters; use crate::exception::{TrapKind, handle_exception_sync}; use crate::exception_utils::exception_class_value; +use crate::exit::AxVCpuExitReason; +use crate::{TrapFrame, inject_interrupt}; #[percpu::def_percpu] static HOST_SP_EL0: u64 = 0; @@ -37,7 +40,7 @@ pub struct VmCpuRegisters { /// A virtual CPU within a guest #[repr(C)] #[derive(Debug)] -pub struct Aarch64VCpu { +pub struct Aarch64VCpu { // DO NOT modify `guest_regs` and `host_stack_top` and their order unless you do know what you are doing! // DO NOT add anything before or between them unless you do know what you are doing! ctx: TrapFrame, @@ -45,7 +48,8 @@ pub struct Aarch64VCpu { guest_system_regs: GuestSystemRegisters, /// The MPIDR_EL1 value for the vCPU. mpidr: u64, - _phantom: PhantomData, + pub pt_level: usize, + pub pa_bits: usize, } /// Configuration for creating a new `Aarch64VCpu` @@ -69,42 +73,98 @@ pub struct Aarch64VCpuSetupConfig { pub passthrough_timer: bool, } -impl axvcpu::AxArchVCpu for Aarch64VCpu { - type CreateConfig = Aarch64VCpuCreateConfig; - - type SetupConfig = Aarch64VCpuSetupConfig; - - fn new(_vm_id: usize, _vcpu_id: usize, config: Self::CreateConfig) -> AxResult { +impl Aarch64VCpu { + pub fn new(config: Aarch64VCpuCreateConfig) -> AxResult { let mut ctx = TrapFrame::default(); ctx.set_argument(config.dtb_addr); + let pa_bits = pa_bits(); + let pt_level = max_gpt_level(pa_bits); + Ok(Self { ctx, host_stack_top: 0, guest_system_regs: GuestSystemRegisters::default(), mpidr: config.mpidr_el1, - _phantom: PhantomData, + pt_level, + pa_bits, }) } - fn setup(&mut self, config: Self::SetupConfig) -> AxResult { + pub fn setup(&mut self, config: Aarch64VCpuSetupConfig) -> AxResult { self.init_hv(config); Ok(()) } - fn set_entry(&mut self, entry: GuestPhysAddr) -> AxResult { - debug!("set vcpu entry:{entry:?}"); + pub fn set_dtb_addr(&mut self, dtb_addr: GuestPhysAddr) -> AxResult { + debug!("vCPU{} set vcpu dtb addr:{dtb_addr:?}", self.mpidr); + self.ctx.set_argument(dtb_addr.as_usize()); + Ok(()) + } + + pub fn set_entry(&mut self, entry: GuestPhysAddr) -> AxResult { + debug!("vCPU{} set vcpu entry:{entry:?}", self.mpidr); self.set_elr(entry.as_usize()); Ok(()) } - fn set_ept_root(&mut self, ept_root: HostPhysAddr) -> AxResult { - debug!("set vcpu ept root:{ept_root:#x}"); + pub fn set_ept_root(&mut self, ept_root: HostPhysAddr) -> AxResult { + debug!("vCPU{} set vcpu ept root:{ept_root:#x}", self.mpidr); self.guest_system_regs.vttbr_el2 = ept_root.as_usize() as u64; Ok(()) } - fn run(&mut self) -> AxResult { + pub fn setup_current_cpu(&mut self, vmid: usize) -> AxResult { + // Set VMID then invalidate stage-2 TLB for this VMID to avoid stale translations. + let vmid_mask: u64 = 0xffff << 48; + let mut val = match self.pt_level { + 4 => VTCR_EL2::SL0::Granule4KBLevel0 + VTCR_EL2::T0SZ.val(64 - 48), + _ => VTCR_EL2::SL0::Granule4KBLevel1 + VTCR_EL2::T0SZ.val(64 - 39), + }; + + val = val + + match self.pa_bits { + 52..=64 => VTCR_EL2::PS::PA_52B_4PB, + 48..=51 => VTCR_EL2::PS::PA_48B_256TB, + 44..=47 => VTCR_EL2::PS::PA_44B_16TB, + 42..=43 => VTCR_EL2::PS::PA_42B_4TB, + 40..=41 => VTCR_EL2::PS::PA_40B_1TB, + 36..=39 => VTCR_EL2::PS::PA_36B_64GB, + _ => VTCR_EL2::PS::PA_32B_4GB, + }; + + val = val + + VTCR_EL2::TG0::Granule4KB + + VTCR_EL2::SH0::Inner + + VTCR_EL2::ORGN0::NormalWBRAWA + + VTCR_EL2::IRGN0::NormalWBRAWA; + + self.guest_system_regs.vtcr_el2 = val.value; + VTCR_EL2.set(self.guest_system_regs.vtcr_el2); + debug!( + "vCPU {:#x} set pt level: {}, pt bits: {}", + self.mpidr, self.pt_level, self.pa_bits + ); + + let mut vttbr = self.guest_system_regs.vttbr_el2; + vttbr = (vttbr & !vmid_mask) | ((vmid as u64 & 0xffff) << 48); + self.guest_system_regs.vttbr_el2 = vttbr; + VTTBR_EL2.set(vttbr); + + unsafe { + core::arch::asm!( + "dsb ishst", // ensure VTTBR write visible before TLB invalidation + "tlbi vmalls12e1is", // invalidate stage-2 by VMID (inner-shareable) + "dsb ish", // ensure completion of invalidation + "isb", // sync context + options(nostack, preserves_flags) + ); + } + + Ok(()) + } + + pub fn run(&mut self) -> AxResult { // Run guest. let exit_reson = unsafe { // Save host SP_EL0 to the ctx becase it's used as current task ptr. @@ -118,31 +178,23 @@ impl axvcpu::AxArchVCpu for Aarch64VCpu { self.vmexit_handler(trap_kind) } - fn bind(&mut self) -> AxResult { - Ok(()) - } - - fn unbind(&mut self) -> AxResult { - Ok(()) - } - - fn set_gpr(&mut self, idx: usize, val: usize) { + pub fn set_gpr(&mut self, idx: usize, val: usize) { self.ctx.set_gpr(idx, val); } - fn inject_interrupt(&mut self, vector: usize) -> AxResult { - axvisor_api::arch::hardware_inject_virtual_interrupt(vector as u8); + pub fn inject_interrupt(&mut self, vector: usize) -> AxResult { + inject_interrupt(vector); Ok(()) } - fn set_return_value(&mut self, val: usize) { + pub fn set_return_value(&mut self, val: usize) { // Return value is stored in x0. self.ctx.set_argument(val); } } // Private function -impl Aarch64VCpu { +impl Aarch64VCpu { fn init_hv(&mut self, config: Aarch64VCpuSetupConfig) { self.ctx.spsr = (SPSR_EL1::M::EL1h + SPSR_EL1::I::Masked @@ -167,60 +219,15 @@ impl Aarch64VCpu { self.guest_system_regs.sctlr_el1 = 0x30C50830; self.guest_system_regs.pmcr_el0 = 0; - // use 3 level ept paging - // - 4KiB granule (TG0) - // - 39-bit address space (T0_SZ) - // - start at level 1 (SL0) - #[cfg(not(feature = "4-level-ept"))] - { - self.guest_system_regs.vtcr_el2 = (VTCR_EL2::PS::PA_40B_1TB - + VTCR_EL2::TG0::Granule4KB - + VTCR_EL2::SH0::Inner - + VTCR_EL2::ORGN0::NormalWBRAWA - + VTCR_EL2::IRGN0::NormalWBRAWA - + VTCR_EL2::SL0.val(0b01) - + VTCR_EL2::T0SZ.val(64 - 39)) - .into(); - } + // self.guest_system_regs.vtcr_el2 = probe_vtcr_support() + // + (VTCR_EL2::TG0::Granule4KB + // + VTCR_EL2::SH0::Inner + // + VTCR_EL2::ORGN0::NormalWBRAWA + // + VTCR_EL2::IRGN0::NormalWBRAWA) + // .value; - // use 4 level ept paging - // - 4KiB granule (TG0) - // - 48-bit address space (T0_SZ) - // - start at level 0 (SL0) - #[cfg(feature = "4-level-ept")] - { - // read PARange (bits 3:0) - let parange = (ID_AA64MMFR0_EL1.get() & 0xF) as u8; - // ARM Definition: 0x5 indicates 48 bits PA, 0x4 indicates 44 bits PA, and so on. - if parange <= 0x4 { - panic!( - "CPU only supports {}-bit PA (< 44), \ - cannot enable 4-level EPT paging!", - match parange { - 0x0 => 32, - 0x1 => 36, - 0x2 => 40, - 0x3 => 42, - 0x4 => 44, - _ => 48, - } - ); - } - self.guest_system_regs.vtcr_el2 = (VTCR_EL2::PS::PA_48B_256TB - + VTCR_EL2::TG0::Granule4KB - + VTCR_EL2::SH0::Inner - + VTCR_EL2::ORGN0::NormalWBRAWA - + VTCR_EL2::IRGN0::NormalWBRAWA - + VTCR_EL2::SL0.val(0b10) // 0b10 means start at level 0 - + VTCR_EL2::T0SZ.val(64 - 48)) - .into(); - } - - let mut hcr_el2 = HCR_EL2::VM::Enable - + HCR_EL2::RW::EL1IsAarch64 - + HCR_EL2::FMO::EnableVirtualFIQ - + HCR_EL2::TSC::EnableTrapEl1SmcToEl2 - + HCR_EL2::RW::EL1IsAarch64; + let mut hcr_el2 = + HCR_EL2::VM::Enable + HCR_EL2::TSC::EnableTrapEl1SmcToEl2 + HCR_EL2::RW::EL1IsAarch64; if !config.passthrough_interrupt { // Set HCR_EL2.IMO will trap IRQs to EL2 while enabling virtual IRQs. @@ -228,7 +235,7 @@ impl Aarch64VCpu { // We must choose one of the two: // - Enable virtual IRQs and trap physical IRQs to EL2. // - Disable virtual IRQs and pass through physical IRQs to EL1. - hcr_el2 += HCR_EL2::IMO::EnableVirtualIRQ; + hcr_el2 += HCR_EL2::IMO::EnableVirtualIRQ + HCR_EL2::FMO::EnableVirtualFIQ; } self.guest_system_regs.hcr_el2 = hcr_el2.into(); @@ -254,7 +261,7 @@ impl Aarch64VCpu { } /// Private functions related to vcpu runtime control flow. -impl Aarch64VCpu { +impl Aarch64VCpu { /// Save host context and run guest. /// /// When a VM-Exit happens when guest's vCpu is running, @@ -348,9 +355,7 @@ impl Aarch64VCpu { let result = match exit_reason { TrapKind::Synchronous => handle_exception_sync(&mut self.ctx), - TrapKind::Irq => Ok(AxVCpuExitReason::ExternalInterrupt { - vector: H::irq_fetch() as _, - }), + TrapKind::Irq => Ok(AxVCpuExitReason::ExternalInterrupt), _ => panic!("Unhandled exception {:?}", exit_reason), }; @@ -441,3 +446,54 @@ impl Aarch64VCpu { } } } + +pub(crate) fn pa_bits() -> usize { + match ID_AA64MMFR0_EL1.read_as_enum(ID_AA64MMFR0_EL1::PARange) { + Some(ID_AA64MMFR0_EL1::PARange::Value::Bits_32) => 32, + Some(ID_AA64MMFR0_EL1::PARange::Value::Bits_36) => 36, + Some(ID_AA64MMFR0_EL1::PARange::Value::Bits_40) => 40, + Some(ID_AA64MMFR0_EL1::PARange::Value::Bits_42) => 42, + Some(ID_AA64MMFR0_EL1::PARange::Value::Bits_44) => 44, + Some(ID_AA64MMFR0_EL1::PARange::Value::Bits_48) => 48, + Some(ID_AA64MMFR0_EL1::PARange::Value::Bits_52) => 52, + _ => 32, + } +} + +#[allow(dead_code)] +pub(crate) fn current_gpt_level() -> usize { + let t0sz = VTCR_EL2.read(VTCR_EL2::T0SZ) as usize; + match t0sz { + 16..=25 => 4, + 26..=35 => 3, + _ => 2, + } +} + +pub(crate) fn max_gpt_level(pa_bits: usize) -> usize { + match pa_bits { + 44.. => 4, + _ => 3, + } +} + +fn probe_vtcr_support() -> u64 { + let pa_bits = pa_bits(); + + let mut val = match max_gpt_level(pa_bits) { + 4 => VTCR_EL2::SL0::Granule4KBLevel0 + VTCR_EL2::T0SZ.val(64 - 48), + _ => VTCR_EL2::SL0::Granule4KBLevel1 + VTCR_EL2::T0SZ.val(64 - 39), + }; + + match pa_bits { + 52..=64 => val += VTCR_EL2::PS::PA_52B_4PB, + 48..=51 => val += VTCR_EL2::PS::PA_48B_256TB, + 44..=47 => val += VTCR_EL2::PS::PA_44B_16TB, + 42..=43 => val += VTCR_EL2::PS::PA_42B_4TB, + 40..=41 => val += VTCR_EL2::PS::PA_40B_1TB, + 36..=39 => val += VTCR_EL2::PS::PA_36B_64GB, + _ => val += VTCR_EL2::PS::PA_32B_4GB, + } + + val.value +}