Skip to content

tdx: init APs with the ACPI mailbox protocol #970

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Jun 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions flowey/flowey_lib_hvlite/src/_jobs/cfg_versions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ pub const NODEJS: &str = "18.x";
// N.B. Kernel version numbers for dev and stable branches are not directly
// comparable. They originate from separate branches, and the fourth digit
// increases with each release from the respective branch.
pub const OPENHCL_KERNEL_DEV_VERSION: &str = "6.12.9.5";
pub const OPENHCL_KERNEL_STABLE_VERSION: &str = "6.12.9.7";
pub const OPENHCL_KERNEL_DEV_VERSION: &str = "6.12.9.6";
pub const OPENHCL_KERNEL_STABLE_VERSION: &str = "6.12.9.10";
pub const OPENVMM_DEPS: &str = "0.1.0-20250403.3";
pub const PROTOC: &str = "27.1";

Expand Down
10 changes: 6 additions & 4 deletions openhcl/openhcl_boot/src/arch/aarch64/hypercall.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

use hvdef::hypercall::HvGuestOsMicrosoft;

/// Writes a synthehtic register to tell the hypervisor the OS ID for the boot shim.
fn report_os_id(guest_os_id: u64) {
fn report_os_id(guest_os_id: HvGuestOsMicrosoft) {
// On ARM64, to be able to make hypercalls, one needs first to set the Guest OS ID
// synthetic register using a hypercall. Can't use `Hvcall::set_register` at that will
// lead to the infinite recursion as that function will first try initializing hypercalls
Expand All @@ -12,16 +14,16 @@ fn report_os_id(guest_os_id: u64) {
// (this is TLFS section 17.4.4.1.1 and 5.3), and that must be the fast hypercall.
let _ = minimal_rt::arch::hypercall::set_register_fast(
hvdef::HvArm64RegisterName::GuestOsId.into(),
guest_os_id.into(),
u64::from(guest_os_id).into(),
);
}

pub(crate) fn initialize(guest_os_id: u64) {
pub(crate) fn initialize(guest_os_id: HvGuestOsMicrosoft) {
// We are assuming we are running under a Microsoft hypervisor.
report_os_id(guest_os_id);
}

/// Call before jumping to kernel.
pub(crate) fn uninitialize() {
report_os_id(0);
report_os_id(0.into());
}
1 change: 1 addition & 0 deletions openhcl/openhcl_boot/src/arch/aarch64/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use aarch64defs::IntermPhysAddrSize;
pub fn setup_vtl2_memory(_shim_params: &ShimParams, _partition_info: &PartitionInfo) {
// TODO: memory acceptance isn't currently supported in the boot shim for aarch64.
let _ = _shim_params.bounce_buffer;
let _ = _shim_params.page_tables;

// Enable VTL protection so that vtl 2 protections can be applied. All other config
// should be set by the user mode
Expand Down
84 changes: 84 additions & 0 deletions openhcl/openhcl_boot/src/arch/x86_64/address_space.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@ use core::marker::PhantomData;
use core::sync::atomic::AtomicU64;
use core::sync::atomic::Ordering;
use core::sync::atomic::compiler_fence;
use hvdef::HV_PAGE_SIZE;
use memory_range::MemoryRange;
use x86defs::X64_LARGE_PAGE_SIZE;
use x86defs::tdx::TDX_SHARED_GPA_BOUNDARY_ADDRESS_BIT;
use zerocopy::FromBytes;
use zerocopy::Immutable;
use zerocopy::IntoBytes;
Expand Down Expand Up @@ -101,6 +103,26 @@ impl PageTableEntry {
pub fn clear(&mut self) {
self.write_pte(0);
}

/// Check the TDX shared bit on a page table entry
pub fn tdx_is_shared(&mut self) -> bool {
let val = self.read_pte();
val & TDX_SHARED_GPA_BOUNDARY_ADDRESS_BIT == TDX_SHARED_GPA_BOUNDARY_ADDRESS_BIT
}

/// Set the TDX shared bit on a page table entry
pub fn tdx_set_shared(&mut self) {
let mut val = self.read_pte();
val |= TDX_SHARED_GPA_BOUNDARY_ADDRESS_BIT;
self.write_pte(val);
}

/// Unset the TDX shared bit on a page table entry
pub fn tdx_set_private(&mut self) {
let mut val = self.read_pte();
val &= !TDX_SHARED_GPA_BOUNDARY_ADDRESS_BIT;
self.write_pte(val);
}
}

#[repr(C)]
Expand Down Expand Up @@ -205,6 +227,7 @@ unsafe fn page_table_at_address(address: u64) -> &'static mut PageTable {
/// Returns a reference to the PDE corresponding to a virtual address.
///
/// # Safety
///
/// This routine requires the caller to ensure that the VA is a valid one for which the paging
/// hierarchy was configured by the file loader (the page directory must exist). If this is not
/// true this routine will panic rather than corrupt the address space.
Expand Down Expand Up @@ -253,3 +276,64 @@ pub fn init_local_map(va: u64) -> LocalMap<'static> {
unmap_page_helper(&local_map);
local_map
}

/// A page used for TDX hypercalls
/// This wrapper assures that the page is a large page, present in the
/// paging hierarchy, aligned to 2MB, and shared with the hypervisor
pub struct TdxHypercallPage(u64);

impl TdxHypercallPage {
/// Validate that a virtual address is present in the paging hierarchy,
/// and that it is a large page
///
/// # Safety
/// The caller ensures that the input is a virtual address with a valid page table
pub unsafe fn new(va: u64) -> Self {
// SAFETY: Caller has guaranteed the va is a valid pagetable mapping
unsafe {
let entry = get_pde_for_va(va);
assert!(entry.is_present() & entry.is_large_page());
assert!(va % X64_LARGE_PAGE_SIZE == 0);
assert!(entry.tdx_is_shared());
TdxHypercallPage(va)
}
}

/// Returns the VA of the large page containing the I/O buffers
pub fn base(&self) -> u64 {
self.0
}

/// Returns the VA of the hypercall input buffer
pub fn input(&self) -> u64 {
self.0
}

/// Returns the VA of the hypercall output buffer
pub fn output(&self) -> u64 {
self.0 + HV_PAGE_SIZE
}
}

/// Set the shared bit in the PDE of a large page in the local map for a given VA.
///
/// # Safety
/// The va passed in is guaranteed by the type to be a present large page,
/// the caller must ensure it is safe to share with the hypervisor
pub unsafe fn tdx_share_large_page(va: u64) {
// SAFETY: See above
unsafe {
let entry = get_pde_for_va(va);
entry.tdx_set_shared();
}
}

/// Clear the shared bit in the PDE of the local map for a given VA.
pub fn tdx_unshare_large_page(va: TdxHypercallPage) {
// SAFETY: The va passed in is guaranteed by the type to be a present large page,
// which is shared with the hypervisor
unsafe {
let entry = get_pde_for_va(va.base());
entry.tdx_set_private();
}
}
12 changes: 6 additions & 6 deletions openhcl/openhcl_boot/src/arch/x86_64/hypercall.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@

use core::ptr::addr_of;
use hvdef::HV_PAGE_SIZE;
use hvdef::hypercall::HvGuestOsMicrosoft;
use minimal_rt::arch::hypercall::HYPERCALL_PAGE;
use minimal_rt::arch::msr::read_msr;
use minimal_rt::arch::msr::write_msr;

/// Writes an MSR to tell the hypervisor the OS ID for the boot shim.
fn report_os_id(guest_os_id: u64) {
fn report_os_id(guest_os_id: HvGuestOsMicrosoft) {
// SAFETY: Using the contract established in the Hyper-V TLFS.
unsafe {
write_msr(hvdef::HV_X64_MSR_GUEST_OS_ID, guest_os_id);
write_msr(hvdef::HV_X64_MSR_GUEST_OS_ID, guest_os_id.into());
};
}

Expand Down Expand Up @@ -40,8 +40,8 @@ fn write_hypercall_msr(enable: bool) {
}

/// Has to be called before using hypercalls.
pub(crate) fn initialize(guest_os_id: u64) {
// We are assuming we are running under a Microsoft hypervisor, so there is
pub(crate) fn initialize(guest_os_id: HvGuestOsMicrosoft) {
// TODO: For now, we are assuming we are running under a Microsoft hypervisor, so there is
// no need to check any cpuid leaves.
report_os_id(guest_os_id);
write_hypercall_msr(true);
Expand All @@ -50,5 +50,5 @@ pub(crate) fn initialize(guest_os_id: u64) {
/// Call before jumping to kernel.
pub(crate) fn uninitialize() {
write_hypercall_msr(false);
report_os_id(0);
report_os_id(0.into());
}
17 changes: 17 additions & 0 deletions openhcl/openhcl_boot/src/arch/x86_64/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
use super::address_space::LocalMap;
use super::address_space::init_local_map;
use crate::ShimParams;
use crate::arch::TdxHypercallPage;
use crate::arch::x86_64::address_space::tdx_share_large_page;
use crate::host_params::PartitionInfo;
use crate::host_params::shim_params::IsolationType;
use crate::hypercall::hvcall;
Expand Down Expand Up @@ -118,6 +120,21 @@ pub fn setup_vtl2_memory(shim_params: &ShimParams, partition_info: &PartitionInf
accept_pending_vtl2_memory(shim_params, &mut local_map, ram_buffer, imported_range);
}
}

// For TDVMCALL based hypercalls, take the first 2 MB region from ram_buffer for
// hypercall IO pages. ram_buffer must not be used again beyond this point
// TODO: find an approach that does not require re-using the ram_buffer
if shim_params.isolation_type == IsolationType::Tdx {
let free_buffer = ram_buffer.as_mut_ptr() as u64;
assert!(free_buffer % X64_LARGE_PAGE_SIZE == 0);
// SAFETY: The bottom 2MB region of the ram_buffer is unused by the shim
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This safety comment isn't strong enough - see the discussion in the TdxHypercallPage type

// The region is aligned to 2MB, and mapped as a large page
let tdx_io_page = unsafe {
tdx_share_large_page(free_buffer);
TdxHypercallPage::new(free_buffer)
};
hvcall().initialize_tdx(tdx_io_page);
}
}

/// Accepts VTL2 memory in the specified gpa range.
Expand Down
1 change: 1 addition & 0 deletions openhcl/openhcl_boot/src/arch/x86_64/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ mod vp;
mod vsm;

use crate::host_params::shim_params::IsolationType;
pub use address_space::TdxHypercallPage;
pub use memory::setup_vtl2_memory;
pub use memory::verify_imported_regions_hash;
use safe_intrinsics::cpuid;
Expand Down
100 changes: 100 additions & 0 deletions openhcl/openhcl_boot/src/arch/x86_64/tdx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,68 @@

//! TDX support.

use crate::arch::x86_64::address_space::TdxHypercallPage;
use crate::arch::x86_64::address_space::tdx_unshare_large_page;
use crate::host_params::PartitionInfo;
use crate::hvcall;
use crate::single_threaded::SingleThreaded;
use core::arch::asm;
use core::cell::Cell;
use loader_defs::shim::TdxTrampolineContext;
use memory_range::MemoryRange;
use safe_intrinsics::cpuid;
use tdcall::AcceptPagesError;
use tdcall::Tdcall;
use tdcall::TdcallInput;
use tdcall::TdcallOutput;
use tdcall::tdcall_hypercall;
use tdcall::tdcall_map_gpa;
use tdcall::tdcall_wrmsr;
use x86defs::X64_LARGE_PAGE_SIZE;
use x86defs::tdx::RESET_VECTOR_PAGE;
use x86defs::tdx::TdVmCallR10Result;

/// Writes a synthehtic register to tell the hypervisor the OS ID for the boot shim.
fn report_os_id(guest_os_id: u64) {
tdcall_wrmsr(
&mut TdcallInstruction,
hvdef::HV_X64_MSR_GUEST_OS_ID,
guest_os_id,
)
.unwrap();
}

/// Initialize hypercalls for a TDX L1, sharing the hypercall I/O pages with the HV
pub fn initialize_hypercalls(guest_os_id: u64, io: &TdxHypercallPage) {
// TODO: We are assuming we are running under a Microsoft hypervisor, so there is
// no need to check any cpuid leaves.
report_os_id(guest_os_id);

// Enable host visibility for hypercall page
let hypercall_page_range = MemoryRange::new(io.base()..io.base() + X64_LARGE_PAGE_SIZE);
change_page_visibility(hypercall_page_range, true);
}

/// Unitialize hypercalls for a TDX L1, stop sharing the hypercall I/O pages with the HV
pub fn uninitialize_hypercalls(io: TdxHypercallPage) {
report_os_id(0);

let hypercall_page_range = MemoryRange::new(io.base()..io.base() + X64_LARGE_PAGE_SIZE);
tdx_unshare_large_page(io);

// Disable host visibility for hypercall page
change_page_visibility(hypercall_page_range, false);
accept_pages(hypercall_page_range).expect("pages previously accepted by the bootshim should be reaccepted without failure when sharing permissions are changed");

// SAFETY: Flushing the TLB has no pre or post conditions required by the caller, and thus is safe
unsafe {
asm! {
"mov rax, cr3",
"mov cr3, rax",
out("rax") _,
}
}
}

/// Perform a tdcall instruction with the specified inputs.
fn tdcall(input: TdcallInput) -> TdcallOutput {
Expand Down Expand Up @@ -98,6 +150,21 @@ impl minimal_rt::arch::IoAccess for TdxIoAccess {
}
}

/// Invokes a hypercall via a TDCALL
pub fn invoke_tdcall_hypercall(
control: hvdef::hypercall::Control,
io: &TdxHypercallPage,
) -> hvdef::hypercall::HypercallOutput {
let result = tdcall_hypercall(&mut TdcallInstruction, control, io.input(), io.output());
match result {
Ok(()) => 0.into(),
Err(val) => {
let TdVmCallR10Result(return_code) = val;
return_code.into()
}
}
}

/// Global variable to store tsc frequency.
static TSC_FREQUENCY: SingleThreaded<Cell<u64>> = SingleThreaded(Cell::new(0));

Expand All @@ -119,3 +186,36 @@ pub fn get_tdx_tsc_reftime() -> Option<u64> {
}
None
}

/// Update the TdxTrampolineContext, setting the necessary control registers for AP startup,
/// and ensuring that LGDT will be skipped, so the GDT page does not need to be added to the
/// e820 entries
pub fn tdx_prepare_ap_trampoline() {
let context_ptr: *mut TdxTrampolineContext = RESET_VECTOR_PAGE as *mut TdxTrampolineContext;
// SAFETY: The TdxTrampolineContext is known to be stored at the architectural reset vector address
let tdxcontext: &mut TdxTrampolineContext = unsafe { context_ptr.as_mut().unwrap() };
tdxcontext.gdtr_limit = 0;
tdxcontext.idtr_limit = 0;
tdxcontext.code_selector = 0;
tdxcontext.task_selector = 0;
tdxcontext.cr0 |= x86defs::X64_CR0_PG | x86defs::X64_CR0_PE | x86defs::X64_CR0_NE;
tdxcontext.cr4 |= x86defs::X64_CR4_PAE | x86defs::X64_CR4_MCE;
}

pub fn setup_vtl2_vp(partition_info: &PartitionInfo) {
for cpu in 1..partition_info.cpus.len() {
hvcall()
.tdx_enable_vp_vtl2(cpu as u32)
.expect("enabling vp should not fail");
}

// Start VPs on Tdx-isolated VMs by sending TDVMCALL-based hypercall HvCallStartVirtualProcessor
for cpu in 1..partition_info.cpus.len() {
hvcall()
.tdx_start_vp(cpu as u32)
.expect("start vp should not fail");
}

// Update the TDX Trampoline Context for AP Startup
tdx_prepare_ap_trampoline();
}
9 changes: 6 additions & 3 deletions openhcl/openhcl_boot/src/arch/x86_64/vp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@

//! Setting up VTL2 VPs

use crate::IsolationType;
use crate::host_params::PartitionInfo;

pub fn setup_vtl2_vp(_partition_info: &PartitionInfo) {
// X64 doesn't require any special VTL2 VP setup in the boot loader at the
// moment.
pub fn setup_vtl2_vp(partition_info: &PartitionInfo) {
// Only TDX requires VP initialization in the shim on x86
if partition_info.isolation == IsolationType::Tdx {
crate::arch::tdx::setup_vtl2_vp(partition_info);
};
}
Loading