diff --git a/src/java.base/share/classes/java/lang/VirtualThread.java b/src/java.base/share/classes/java/lang/VirtualThread.java index f058f967b9135..f650436a4478a 100644 --- a/src/java.base/share/classes/java/lang/VirtualThread.java +++ b/src/java.base/share/classes/java/lang/VirtualThread.java @@ -579,6 +579,13 @@ private void afterYield() { setState(newState = TIMED_PARKED); } + // Full fence (StoreLoad) to ensure the PARKED/TIMED_PARKED state + // is visible before reading parkPermit (Dekker pattern with + // unpark which writes parkPermit then reads state). + // Note: storeFence is insufficient — on ARM64 it only emits + // LoadStore+StoreStore (dmb ishst), not StoreLoad (dmb ish). + U.fullFence(); + // may have been unparked while parking if (parkPermit && compareAndSetState(newState, UNPARKED)) { // lazy submit if local queue is empty @@ -604,6 +611,10 @@ private void afterYield() { if (s == BLOCKING) { setState(BLOCKED); + // Full fence (StoreLoad) for Dekker pattern with unblock + // which writes blockPermit then reads state. + U.fullFence(); + // may have been unblocked while blocking if (blockPermit && compareAndSetState(BLOCKED, UNBLOCKED)) { // lazy submit if local queue is empty @@ -619,6 +630,9 @@ private void afterYield() { boolean interruptible = interruptibleWait; if (s == WAITING) { setState(newState = WAIT); + // Full fence (StoreLoad) for Dekker pattern with notify + // which writes notified then reads state. + U.fullFence(); // may have been notified while in transition blocked = notified && compareAndSetState(WAIT, BLOCKED); } else { @@ -635,6 +649,9 @@ private void afterYield() { byte seqNo = ++timedWaitSeqNo; timeoutTask = schedule(() -> waitTimeoutExpired(seqNo), timeout, MILLISECONDS); setState(newState = TIMED_WAIT); + // Full fence (StoreLoad) for Dekker pattern with notify + // which writes notified then reads state. + U.fullFence(); // May have been notified while in transition. This must be done while // holding the monitor to avoid changing the state of a new timed wait call. blocked = notified && compareAndSetState(TIMED_WAIT, BLOCKED); @@ -675,6 +692,15 @@ private void afterDone(boolean notifyContainer) { assert carrierThread == null; setState(TERMINATED); + // Full fence (StoreLoad) to ensure the TERMINATED state is + // visible before reading notifyAllAfterTerminate (Dekker pattern + // with beforeJoin which writes notifyAllAfterTerminate then + // reads state). Without this, on ARM64 the volatile write of + // state and the subsequent volatile read can be reordered, + // causing a missed-wakeup where both sides miss each other's + // store. + U.fullFence(); + // notifyAll to wakeup any threads waiting for this thread to terminate if (notifyAllAfterTerminate) { synchronized (this) { @@ -870,6 +896,10 @@ private void parkOnCarrierThread(boolean timed, long nanos) { */ private void unpark(boolean lazySubmit) { if (!getAndSetParkPermit(true) && currentThread() != this) { + // Full fence (StoreLoad) to ensure parkPermit=true is visible + // before reading state (Dekker pattern with afterYield PARKING + // path which writes state then reads parkPermit). + U.fullFence(); int s = state(); // unparked while parked @@ -912,6 +942,7 @@ void unpark() { private void unblock() { assert !Thread.currentThread().isVirtual(); blockPermit = true; + U.fullFence(); // Full fence (StoreLoad) for Dekker with afterYield BLOCKING path if (state() == BLOCKED && compareAndSetState(BLOCKED, UNBLOCKED)) { submitRunContinuation(); } diff --git a/src/java.base/share/classes/java/util/concurrent/LinkedTransferQueue.java b/src/java.base/share/classes/java/util/concurrent/LinkedTransferQueue.java index 787d8d5fecd87..558c5ab5c2bc8 100644 --- a/src/java.base/share/classes/java/util/concurrent/LinkedTransferQueue.java +++ b/src/java.base/share/classes/java/util/concurrent/LinkedTransferQueue.java @@ -590,6 +590,11 @@ final Object xfer(Object e, long ns) { q = p.next; if (p.isData != haveData && haveData != (m != null)) { if (p.cmpExItem(m, e) == m) { + // Full fence (StoreLoad) for Dekker with await() which + // writes waiter then reads item. On ARM64, CAS + // (ldaxr/stlxr) + plain load to a different field does + // NOT provide StoreLoad ordering. + VarHandle.fullFence(); Thread w = p.waiter; // matched complementary node if (p != h && h == cmpExHead(h, (q == null) ? p : q)) h.next = h; // advance head; self-link old diff --git a/src/java.base/share/classes/java/util/concurrent/SynchronousQueue.java b/src/java.base/share/classes/java/util/concurrent/SynchronousQueue.java index 49efe5d5c2c0d..c74a2483aa4b4 100644 --- a/src/java.base/share/classes/java/util/concurrent/SynchronousQueue.java +++ b/src/java.base/share/classes/java/util/concurrent/SynchronousQueue.java @@ -177,6 +177,11 @@ final Object xferLifo(Object e, long ns) { else if (p.cmpExItem(m, e) != m) p = head; // missed; restart else { // matched complementary node + // Full fence (StoreLoad) for Dekker with await() which + // writes waiter then reads item. On ARM64, CAS + // (ldaxr/stlxr) + plain load to a different field does + // NOT provide StoreLoad ordering. + VarHandle.fullFence(); Thread w = p.waiter; cmpExHead(p, p.next); LockSupport.unpark(w); diff --git a/src/java.base/share/classes/java/util/concurrent/locks/AbstractQueuedSynchronizer.java b/src/java.base/share/classes/java/util/concurrent/locks/AbstractQueuedSynchronizer.java index c077954508341..dfdde38642fd8 100644 --- a/src/java.base/share/classes/java/util/concurrent/locks/AbstractQueuedSynchronizer.java +++ b/src/java.base/share/classes/java/util/concurrent/locks/AbstractQueuedSynchronizer.java @@ -782,6 +782,13 @@ final int acquire(Node node, int arg, boolean shared, Thread.onSpinWait(); } else if (node.status == 0) { node.status = WAITING; // enable signal and recheck + // Full fence (StoreLoad) to ensure WAITING status is visible + // before re-reading state in tryAcquire/tryAcquireShared + // (Dekker pattern with releaseShared/release which writes + // state then reads node.status in signalNext). + // On ARM64, volatile write (stlr) + volatile read (ldar) to + // different addresses does NOT provide StoreLoad ordering. + U.fullFence(); } else { spins = postSpins = (byte)((postSpins << 1) | 1); try { @@ -1097,6 +1104,13 @@ public final boolean tryAcquireNanos(int arg, long nanosTimeout) */ public final boolean release(int arg) { if (tryRelease(arg)) { + // Full fence (StoreLoad) to ensure the state update from + // tryRelease is visible before reading node.status in signalNext + // (Dekker pattern: release writes state then reads status, + // acquire writes status then reads state). + // On ARM64, CAS (stlxr/release) + ldar to different addresses + // does NOT provide StoreLoad ordering. + U.fullFence(); signalNext(head); return true; } @@ -1184,6 +1198,8 @@ public final boolean tryAcquireSharedNanos(int arg, long nanosTimeout) */ public final boolean releaseShared(int arg) { if (tryReleaseShared(arg)) { + // Full fence (StoreLoad) — see comment in release() + U.fullFence(); signalNext(head); return true; }