microsoft · macarte · Mar 12, 2026 · Mar 11, 2026
diff --git a/src/hotspot/os_cpu/windows_aarch64/atomic_windows_aarch64.hpp b/src/hotspot/os_cpu/windows_aarch64/atomic_windows_aarch64.hpp
@@ -108,101 +108,6 @@ DEFINE_INTRINSIC_CMPXCHG(InterlockedCompareExchange64, __int64)
 
 #undef DEFINE_INTRINSIC_CMPXCHG
 
-// Override PlatformLoad and PlatformStore to use LDAR/STLR on Windows AArch64.
-//
-// The generic PlatformLoad and PlatformStore use plain volatile dereferences.
-// With /volatile:ms (set in flags-cflags.m4 for AArch64), MSVC already compiles
-// those to LDAR/STLR, so these overrides produce identical codegen. They are
-// retained as defense-in-depth: they guarantee acquire/release semantics for
-// Atomic::load()/Atomic::store() regardless of the compiler flag setting,
-// ensuring correct cross-core visibility for HotSpot's lock-free algorithms
-// (ObjectMonitor Dekker protocols, ParkEvent signaling, etc.) even if
-// /volatile:ms were ever removed or overridden.
-
-template<>
-struct Atomic::PlatformLoad<1> {
-  template<typename T>
-  T operator()(T const volatile* dest) const {
-    STATIC_ASSERT(sizeof(T) == 1);
-    return PrimitiveConversions::cast<T>(
-      __ldar8(reinterpret_cast<unsigned __int8 volatile*>(
-        const_cast<T volatile*>(dest))));
-  }
-};
-
-template<>
-struct Atomic::PlatformLoad<2> {
-  template<typename T>
-  T operator()(T const volatile* dest) const {
-    STATIC_ASSERT(sizeof(T) == 2);
-    return PrimitiveConversions::cast<T>(
-      __ldar16(reinterpret_cast<unsigned __int16 volatile*>(
-        const_cast<T volatile*>(dest))));
-  }
-};
-
-template<>
-struct Atomic::PlatformLoad<4> {
-  template<typename T>
-  T operator()(T const volatile* dest) const {
-    STATIC_ASSERT(sizeof(T) == 4);
-    return PrimitiveConversions::cast<T>(
-      __ldar32(reinterpret_cast<unsigned __int32 volatile*>(
-        const_cast<T volatile*>(dest))));
-  }
-};
-
-template<>
-struct Atomic::PlatformLoad<8> {
-  template<typename T>
-  T operator()(T const volatile* dest) const {
-    STATIC_ASSERT(sizeof(T) == 8);
-    return PrimitiveConversions::cast<T>(
-      __ldar64(reinterpret_cast<unsigned __int64 volatile*>(
-        const_cast<T volatile*>(dest))));
-  }
-};
-
-template<>
-struct Atomic::PlatformStore<1> {
-  template<typename T>
-  void operator()(T volatile* dest, T new_value) const {
-    STATIC_ASSERT(sizeof(T) == 1);
-    __stlr8(reinterpret_cast<unsigned __int8 volatile*>(dest),
-            PrimitiveConversions::cast<unsigned __int8>(new_value));
-  }
-};
-
-template<>
-struct Atomic::PlatformStore<2> {
-  template<typename T>
-  void operator()(T volatile* dest, T new_value) const {
-    STATIC_ASSERT(sizeof(T) == 2);
-    __stlr16(reinterpret_cast<unsigned __int16 volatile*>(dest),
-             PrimitiveConversions::cast<unsigned __int16>(new_value));
-  }
-};
-
-template<>
-struct Atomic::PlatformStore<4> {
-  template<typename T>
-  void operator()(T volatile* dest, T new_value) const {
-    STATIC_ASSERT(sizeof(T) == 4);
-    __stlr32(reinterpret_cast<unsigned __int32 volatile*>(dest),
-             PrimitiveConversions::cast<unsigned __int32>(new_value));
-  }
-};
-
-template<>
-struct Atomic::PlatformStore<8> {
-  template<typename T>
-  void operator()(T volatile* dest, T new_value) const {
-    STATIC_ASSERT(sizeof(T) == 8);
-    __stlr64(reinterpret_cast<unsigned __int64 volatile*>(dest),
-             PrimitiveConversions::cast<unsigned __int64>(new_value));
-  }
-};
-
 // Specialize PlatformOrderedLoad and PlatformOrderedStore to use MSVC's
 // __ldar/__stlr intrinsics, matching the Linux AArch64 implementation which
 // uses __atomic_load/__atomic_store with __ATOMIC_ACQUIRE/__ATOMIC_RELEASE.

diff --git a/src/hotspot/os_cpu/windows_aarch64/orderAccess_windows_aarch64.hpp b/src/hotspot/os_cpu/windows_aarch64/orderAccess_windows_aarch64.hpp
@@ -26,29 +26,22 @@
 #define OS_CPU_WINDOWS_AARCH64_ORDERACCESS_WINDOWS_AARCH64_HPP
 
 // Included in orderAccess.hpp header file.
+#include <atomic>
+using std::atomic_thread_fence;
 #include <arm64intr.h>
 #include "vm_version_aarch64.hpp"
 #include "runtime/vm_version.hpp"
 
 // Implementation of class OrderAccess.
-//
-// Use the MSVC __dmb() intrinsic directly rather than C++ std::atomic_thread_fence().
-// Microsoft documents that __dmb() "inserts compiler blocks to prevent instruction
-// reordering" in addition to emitting the hardware DMB instruction. This is critical
-// because HotSpot uses volatile (non-std::atomic) fields throughout the runtime, and
-// std::atomic_thread_fence() is only defined by the C++ standard to order std::atomic
-// operations — it may not act as a compiler barrier for volatile/non-atomic accesses
-// on ARM64 with /volatile:iso. Using __dmb() ensures correct ordering for the Dekker
-// protocol in ObjectMonitor::exit() and similar patterns throughout HotSpot.
 
 inline void OrderAccess::loadload()   { acquire(); }
 inline void OrderAccess::storestore() { release(); }
 inline void OrderAccess::loadstore()  { acquire(); }
 inline void OrderAccess::storeload()  { fence(); }
 
-#define READ_MEM_BARRIER  __dmb(_ARM64_BARRIER_ISHLD)
-#define WRITE_MEM_BARRIER __dmb(_ARM64_BARRIER_ISH)
-#define FULL_MEM_BARRIER  __dmb(_ARM64_BARRIER_ISH)
+#define READ_MEM_BARRIER atomic_thread_fence(std::memory_order_acquire);
+#define WRITE_MEM_BARRIER atomic_thread_fence(std::memory_order_release);
+#define FULL_MEM_BARRIER atomic_thread_fence(std::memory_order_seq_cst);
 
 inline void OrderAccess::acquire() {
   READ_MEM_BARRIER;