Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions akka-actor/src/main/scala/akka/dispatch/SuspendDetector.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* Copyright (C) 2021 Lightbend Inc. <https://www.lightbend.com>
*/

package akka.dispatch

import scala.concurrent.duration._

import akka.actor.ActorSystem
import akka.actor.ClassicActorSystemProvider
import akka.actor.ExtendedActorSystem
import akka.actor.Extension
import akka.actor.ExtensionId
import akka.actor.ExtensionIdProvider
import akka.event.Logging

object SuspendDetector extends ExtensionId[SuspendDetector] with ExtensionIdProvider {
override def get(system: ActorSystem): SuspendDetector = super.get(system)

override def get(system: ClassicActorSystemProvider): SuspendDetector = super.get(system)

override def lookup = SuspendDetector

override def createExtension(system: ExtendedActorSystem): SuspendDetector = new SuspendDetector(system)

/**
* Published to the ActorSystem's eventStream when the process suspension has been detected.
* Note that this message could be stale when it is received and additional check with
* [[SuspendDetected#wasSuspended]] is recommended to be sure that the suspension occurred recently.
*/
final class SuspendDetected(suspendDetectedNanoTime: Long) {
def wasSuspended(since: FiniteDuration): Boolean =
(System.nanoTime() - suspendDetectedNanoTime <= since.toNanos)
}

}

class SuspendDetector(val system: ExtendedActorSystem) extends Extension {
import SuspendDetector.SuspendDetected

// FIXME config
private val tickInterval = 100.millis
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This might not have to be this frequently, because I think the scheduled task will trigger immediately when the process is woken up.

private val tickDeadlineNanos = 5.seconds.toNanos // FIXME default should be > 30 seconds

private val log = Logging(system, classOf[SuspendDetector])

@volatile private var aliveTime = System.nanoTime()
@volatile private var suspendDetectedTime = aliveTime - 1.day.toNanos

system.scheduler.scheduleWithFixedDelay(tickInterval, tickInterval) { () =>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't this approach (scheduling checkTime on the scheduler and dispatcher) yield a false positive in cases of thread starvation within the application? Thread starvation like that is definitely https://www.youtube.com/watch?v=MpUWrl3-mc8, but the response of pre-emptively tearing things down might be out of proportion; so we have a very loose deadline.

It seems like the intent is to capture scenarios where nothing in the JVM process gets scheduled (suspended process/VM, but also sufficiently-high levels of host load or priority inversions etc.), so maybe having the check run in a thread that is nearly always sleeping would be more effective and could have a faster reaction time? My understanding is that the kernel schedulers tend to be eager to give CPU to processes that haven't been runnable in a long time (e.g. because they're sleeping).

checkTime()
}(system.dispatcher)

private def checkTime(): Boolean = synchronized {
val now = System.nanoTime()
val suspendDetected =
if (now - aliveTime >= tickDeadlineNanos) {
suspendDetectedTime = now
true
} else {
false
}

if (suspendDetected) {
log.warning("Process was suspended for [{} seconds]", (now - aliveTime).nanos.toSeconds)
system.eventStream.publish(new SuspendDetected(now))
}

aliveTime = now

suspendDetected
}

def wasSuspended(since: FiniteDuration): Boolean = {
checkTime() || (System.nanoTime() - suspendDetectedTime <= since.toNanos)
}

}
10 changes: 6 additions & 4 deletions akka-cluster/src/main/scala/akka/cluster/ClusterDaemon.scala
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ import scala.concurrent.Future
import scala.concurrent.Promise
import scala.concurrent.duration._
import scala.util.control.NonFatal

import scala.annotation.nowarn

import com.typesafe.config.Config

import akka.Done
Expand All @@ -19,11 +19,12 @@ import akka.actor.SupervisorStrategy.Stop
import akka.annotation.InternalApi
import akka.cluster.ClusterEvent._
import akka.cluster.MemberStatus._
import akka.dispatch.{ RequiresMessageQueue, UnboundedMessageQueueSemantics }
import akka.dispatch.SuspendDetector
import akka.dispatch.{RequiresMessageQueue, UnboundedMessageQueueSemantics}
import akka.event.ActorWithLogClass
import akka.event.Logging
import akka.pattern.ask
import akka.remote.{ QuarantinedEvent => ClassicQuarantinedEvent }
import akka.remote.{QuarantinedEvent => ClassicQuarantinedEvent}
import akka.remote.artery.QuarantinedEvent
import akka.util.Timeout
import akka.util.Version
Expand Down Expand Up @@ -1492,7 +1493,8 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef, joinConfigCompatCh
* Reaps the unreachable members according to the failure detector's verdict.
*/
def reapUnreachableMembers(): Unit = {
if (!isSingletonCluster) {
// FIXME how long should the suspend timeout be?
if (!isSingletonCluster && !SuspendDetector(context.system).wasSuspended(3.seconds)) {
// only scrutinize if we are a non-singleton cluster

val localGossip = latestGossip
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ import akka.coordination.lease.scaladsl.Lease
case object DownSelfQuarantinedByRemote extends Decision {
override def isIndirectlyConnected: Boolean = false
}
case object DownSelfSuspended extends Decision {
override def isIndirectlyConnected: Boolean = false
}
}

/**
Expand Down Expand Up @@ -278,6 +281,9 @@ import akka.coordination.lease.scaladsl.Lease
case DownSelfQuarantinedByRemote =>
if (downable.contains(selfUniqueAddress)) Set(selfUniqueAddress)
else Set.empty
case DownSelfSuspended =>
if (downable.contains(selfUniqueAddress)) Set(selfUniqueAddress)
else Set.empty
}
}

Expand Down
106 changes: 63 additions & 43 deletions akka-cluster/src/main/scala/akka/cluster/sbr/SplitBrainResolver.scala
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import akka.cluster.Member
import akka.cluster.Reachability
import akka.cluster.UniqueAddress
import akka.cluster.sbr.DowningStrategy.Decision
import akka.dispatch.SuspendDetector
import akka.event.DiagnosticMarkerBusLoggingAdapter
import akka.event.Logging
import akka.pattern.pipe
Expand Down Expand Up @@ -146,6 +147,10 @@ import akka.remote.artery.ThisActorSystemQuarantinedEvent

val log: DiagnosticMarkerBusLoggingAdapter = Logging.withMarker(this)

private val suspendTimeout = 1.minute
private val suspendDetector = SuspendDetector(context.system) // make sure it's started
context.system.eventStream.subscribe(self, classOf[SuspendDetector.SuspendDetected])

@InternalStableApi
def strategy: DowningStrategy = _strategy

Expand Down Expand Up @@ -287,6 +292,7 @@ import akka.remote.artery.ThisActorSystemQuarantinedEvent
case Tick => tick()
case ThisActorSystemQuarantinedEvent(_, remote) => thisActorSystemWasQuarantined(remote)
case _: ClusterDomainEvent => // not interested in other events
case s: SuspendDetector.SuspendDetected => if (s.wasSuspended(suspendTimeout)) suspendDetected()
}

private def leaderChanged(leaderOption: Option[Address]): Unit = {
Expand All @@ -296,53 +302,58 @@ import akka.remote.artery.ThisActorSystemQuarantinedEvent
}

private def tick(): Unit = {
// note the DownAll due to instability is running on all nodes to make that decision as quickly and
// aggressively as possible if time is out
if (reachabilityChangedStats.changeCount > 0) {
val now = System.nanoTime()
val durationSinceLatestChange = (now - reachabilityChangedStats.latestChangeTimestamp).nanos
val durationSinceFirstChange = (now - reachabilityChangedStats.firstChangeTimestamp).nanos

val downAllWhenUnstableEnabled = downAllWhenUnstable > Duration.Zero
if (downAllWhenUnstableEnabled && durationSinceFirstChange > (stableAfter + downAllWhenUnstable)) {
log.warning(
ClusterLogMarker.sbrInstability,
"SBR detected instability and will down all nodes: {}",
reachabilityChangedStats)
actOnDecision(DownAll)
} else if (!downAllWhenUnstableEnabled && durationSinceLatestChange > (stableAfter * 2)) {
// downAllWhenUnstable is disabled but reset for meaningful logging
log.debug("SBR no reachability changes within {} ms, resetting stats", (stableAfter * 2).toMillis)
resetReachabilityChangedStats()
if (suspendDetector.wasSuspended(suspendTimeout)) {
// note that suspend detection is running on all nodes
suspendDetected()
} else {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

changes here is mostly indentation, added above if suspended case

// note the DownAll due to instability is running on all nodes to make that decision as quickly and
// aggressively as possible if time is out
if (reachabilityChangedStats.changeCount > 0) {
val now = System.nanoTime()
val durationSinceLatestChange = (now - reachabilityChangedStats.latestChangeTimestamp).nanos
val durationSinceFirstChange = (now - reachabilityChangedStats.firstChangeTimestamp).nanos

val downAllWhenUnstableEnabled = downAllWhenUnstable > Duration.Zero
if (downAllWhenUnstableEnabled && durationSinceFirstChange > (stableAfter + downAllWhenUnstable)) {
log.warning(
ClusterLogMarker.sbrInstability,
"SBR detected instability and will down all nodes: {}",
reachabilityChangedStats)
actOnDecision(DownAll)
} else if (!downAllWhenUnstableEnabled && durationSinceLatestChange > (stableAfter * 2)) {
// downAllWhenUnstable is disabled but reset for meaningful logging
log.debug("SBR no reachability changes within {} ms, resetting stats", (stableAfter * 2).toMillis)
resetReachabilityChangedStats()
}
}
}

if (isResponsible && strategy.unreachable.nonEmpty && stableDeadline.isOverdue()) {
strategy.decide() match {
case decision: AcquireLeaseDecision =>
strategy.lease match {
case Some(lease) =>
if (lease.checkLease()) {
log.info(
ClusterLogMarker.sbrLeaseAcquired(decision),
"SBR has acquired lease for decision [{}]",
decision)
actOnDecision(decision)
} else {
if (decision.acquireDelay == Duration.Zero)
acquireLease() // reply message is AcquireLeaseResult
else {
log.debug("SBR delayed attempt to acquire lease for [{} ms]", decision.acquireDelay.toMillis)
timers.startSingleTimer(AcquireLease, AcquireLease, decision.acquireDelay)
if (isResponsible && strategy.unreachable.nonEmpty && stableDeadline.isOverdue()) {
strategy.decide() match {
case decision: AcquireLeaseDecision =>
strategy.lease match {
case Some(lease) =>
if (lease.checkLease()) {
log.info(
ClusterLogMarker.sbrLeaseAcquired(decision),
"SBR has acquired lease for decision [{}]",
decision)
actOnDecision(decision)
} else {
if (decision.acquireDelay == Duration.Zero)
acquireLease() // reply message is AcquireLeaseResult
else {
log.debug("SBR delayed attempt to acquire lease for [{} ms]", decision.acquireDelay.toMillis)
timers.startSingleTimer(AcquireLease, AcquireLease, decision.acquireDelay)
}
context.become(waitingForLease(decision))
}
context.become(waitingForLease(decision))
}
case None =>
throw new IllegalStateException("Unexpected lease decision although lease is not configured")
}
case None =>
throw new IllegalStateException("Unexpected lease decision although lease is not configured")
}

case decision =>
actOnDecision(decision)
case decision =>
actOnDecision(decision)
}
}
}

Expand All @@ -364,6 +375,15 @@ import akka.remote.artery.ThisActorSystemQuarantinedEvent
}
}

private def suspendDetected(): Unit = {
if (strategy.allMembersInDC.size > 1) {
log.warning(
ClusterLogMarker.sbrInstability,
"SBR detected that the process was suspended too long and will down itself.")
actOnDecision(DowningStrategy.DownSelfSuspended)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is how the logs from a kill -STOP followed by kill -CONT looks like:

[2021-06-21 12:17:18,570] [WARN] [akka.dispatch.SuspendDetector] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-32] - Process was suspended for [21 seconds]
[2021-06-21 12:17:18,575] [WARN] [akka.cluster.sbr.SplitBrainResolver] [akka://[email protected]:2551] [akkaSbrInstability] [ShoppingCartService-akka.actor.default-dispatcher-32] - SBR detected that the process was suspended too long and will down itself.
[2021-06-21 12:17:18,575] [WARN] [akka.cluster.ClusterHeartbeat] [akka://[email protected]:2551] [akkaHeartbeatStarvation] [ShoppingCartService-akka.actor.default-dispatcher-32] - Cluster Node [akka://[email protected]:2551] - Scheduled sending of heartbeat was delayed. Previous heartbeat was sent [21634] ms ago, expected interval is [1000] ms. This may cause failure detection to mark members as unreachable. The reason can be thread starvation, CPU overload, or GC.
[2021-06-21 12:17:18,584] [WARN] [akka.cluster.sbr.SplitBrainResolver] [akka://[email protected]:2551] [akkaSbrDowning] [ShoppingCartService-akka.actor.default-dispatcher-32] - SBR took decision DownSelfSuspended and is downing [akka://[email protected]:2551] including myself,, [0] unreachable of [2] members, all members in DC [Member(akka://[email protected]:2551, Up), Member(akka://[email protected]:2552, Up)], full reachability status: []
[2021-06-21 12:17:18,585] [INFO] [akka.cluster.sbr.SplitBrainResolver] [akka://[email protected]:2551] [akkaSbrDowningNode] [ShoppingCartService-akka.actor.default-dispatcher-32] - SBR is downing [UniqueAddress(akka://[email protected]:2551,-2763929823204594617)]
[2021-06-21 12:17:18,585] [WARN] [akka.cluster.sbr.SplitBrainResolver] [akka://[email protected]:2551] [akkaSbrInstability] [ShoppingCartService-akka.actor.default-dispatcher-32] - SBR detected that the process was suspended too long and will down itself.
[2021-06-21 12:17:18,585] [WARN] [akka.cluster.sbr.SplitBrainResolver] [akka://[email protected]:2551] [akkaSbrDowning] [ShoppingCartService-akka.actor.default-dispatcher-32] - SBR took decision DownSelfSuspended and is downing [akka://[email protected]:2551] including myself,, [0] unreachable of [2] members, all members in DC [Member(akka://[email protected]:2551, Up), Member(akka://[email protected]:2552, Up)], full reachability status: []
[2021-06-21 12:17:18,585] [INFO] [akka.cluster.Cluster] [akka://[email protected]:2551] [akkaMemberChanged] [ShoppingCartService-akka.actor.default-dispatcher-32] - Cluster Node [akka://[email protected]:2551] - Marking node [akka://[email protected]:2551] as [Down]
[2021-06-21 12:17:18,586] [INFO] [akka.cluster.sbr.SplitBrainResolver] [akka://[email protected]:2551] [akkaSbrDowningNode] [ShoppingCartService-akka.actor.default-dispatcher-32] - SBR is downing [UniqueAddress(akka://[email protected]:2551,-2763929823204594617)]
[2021-06-21 12:17:18,586] [INFO] [akka.cluster.sharding.ShardRegion] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-33] - ShoppingCart: Self downed, stopping ShardRegion [akka://ShoppingCartService/system/sharding/ShoppingCart]
[2021-06-21 12:17:18,586] [INFO] [akka.cluster.singleton.ClusterSingletonManager] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-33] - Self downed, stopping
[2021-06-21 12:17:18,586] [INFO] [akka.cluster.Cluster] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-33] - Cluster Node [akka://[email protected]:2551] - event MemberDowned(Member(akka://[email protected]:2551, Down))
[2021-06-21 12:17:18,586] [INFO] [akka.cluster.singleton.ClusterSingletonManager] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-33] - Singleton manager stopping singleton actor [akka://ShoppingCartService/system/sharding/ShoppingCartCoordinator/singleton]
[2021-06-21 12:17:18,587] [INFO] [akka.cluster.Cluster] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-31] - Cluster Node [akka://[email protected]:2551] - event LeaderChanged(Some(akka://[email protected]:2552))
[2021-06-21 12:17:18,587] [INFO] [akka.cluster.sbr.SplitBrainResolver] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-31] - This node is not the leader any more and not responsible for taking SBR decisions.
[2021-06-21 12:17:18,587] [INFO] [akka.cluster.Cluster] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-31] - Cluster Node [akka://[email protected]:2551] - event RoleLeaderChanged(dc-default,Some(akka://[email protected]:2552))
[2021-06-21 12:17:18,587] [INFO] [akka.cluster.singleton.ClusterSingletonManager] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-33] - ClusterSingletonManager state change [Oldest -> Stopping]
[2021-06-21 12:17:18,588] [DEBUG] [shopping.cart.ShoppingCart$] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-31] - PostStop cart [cart2]
[2021-06-21 12:17:18,589] [DEBUG] [akka.cluster.sharding.Shard] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-3] - ShoppingCart: Shard [506] shutting down
[2021-06-21 12:17:18,591] [DEBUG] [akka.cluster.sharding.DDataShardCoordinator] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-3] - ShoppingCart: Received termination message.
[2021-06-21 12:17:18,592] [DEBUG] [akka.cluster.Cluster] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-3] - Unwatching: [Actor[akka://ShoppingCartService/system/sharding/ShoppingCartCoordinator/singleton/coordinator#1190468820] -> Actor[akka://[email protected]:2552/system/sharding/ShoppingCart#279229579]]
[2021-06-21 12:17:18,592] [DEBUG] [akka.cluster.Cluster] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-3] - Cleanup self watch of [akka://[email protected]:2552/system/sharding/ShoppingCart]
[2021-06-21 12:17:18,593] [INFO] [akka.cluster.singleton.ClusterSingletonManager] [akka://[email protected]:2551] [akkaClusterSingletonTerminated] [ShoppingCartService-akka.actor.default-dispatcher-32] - Singleton actor [akka://ShoppingCartService/system/sharding/ShoppingCartCoordinator/singleton] was terminated
[2021-06-21 12:17:19,575] [WARN] [akka.cluster.sbr.SplitBrainResolver] [akka://[email protected]:2551] [akkaSbrInstability] [ShoppingCartService-akka.actor.default-dispatcher-35] - SBR detected that the process was suspended too long and will down itself.
[2021-06-21 12:17:19,575] [INFO] [akka.cluster.Cluster] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-35] - Cluster Node [akka://[email protected]:2551] - is no longer leader
[2021-06-21 12:17:19,575] [WARN] [akka.cluster.sbr.SplitBrainResolver] [akka://[email protected]:2551] [akkaSbrDowning] [ShoppingCartService-akka.actor.default-dispatcher-35] - SBR took decision DownSelfSuspended and is downing [akka://[email protected]:2551] including myself,, [0] unreachable of [2] members, all members in DC [Member(akka://[email protected]:2551, Up), Member(akka://[email protected]:2552, Up)], full reachability status: []
[2021-06-21 12:17:19,575] [INFO] [akka.cluster.sbr.SplitBrainResolver] [akka://[email protected]:2551] [akkaSbrDowningNode] [ShoppingCartService-akka.actor.default-dispatcher-35] - SBR is downing [UniqueAddress(akka://[email protected]:2551,-2763929823204594617)]
[2021-06-21 12:17:19,576] [INFO] [akka.cluster.Cluster] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-35] - Cluster Node [akka://[email protected]:2551] - Node has been marked as DOWN. Shutting down myself
[2021-06-21 12:17:19,577] [INFO] [akka.cluster.Cluster] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-35] - Cluster Node [akka://[email protected]:2551] - Shutting down...
[2021-06-21 12:17:19,577] [DEBUG] [akka.cluster.typed.internal.receptionist.ClusterReceptionist] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.internal-dispatcher-14] - ClusterReceptionist [akka://[email protected]:2551] - terminated/removed
[2021-06-21 12:17:19,578] [INFO] [akka.cluster.Cluster] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-35] - Cluster Node [akka://[email protected]:2551] - Successfully shut down
[2021-06-21 12:17:19,579] [DEBUG] [akka.cluster.Cluster] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-21] - Publish AddressTerminated [akka://[email protected]:2552]
[2021-06-21 12:17:19,580] [DEBUG] [akka.cluster.typed.internal.receptionist.ClusterReceptionist] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.internal-dispatcher-22] - Cancel all timers
[2021-06-21 12:17:19,583] [INFO] [akka.actor.LocalActorRef] [akka://[email protected]:2551] [akkaDeadLetter] [ShoppingCartService-akka.actor.default-dispatcher-35] - Message [akka.cluster.ClusterUserAction$Down] to Actor[akka://ShoppingCartService/system/cluster/core/daemon#572391440] was not delivered. [1] dead letters encountered. If this is not an expected behavior then Actor[akka://ShoppingCartService/system/cluster/core/daemon#572391440] may have terminated unexpectedly. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'.
[2021-06-21 12:17:19,583] [DEBUG] [akka.actor.CoordinatedShutdown] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-35] - Running CoordinatedShutdown with reason [ClusterDowningReason]
[2021-06-21 12:17:19,583] [DEBUG] [akka.actor.CoordinatedShutdown] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-35] - Performing phase [before-service-unbind] with [0] tasks
[2021-06-21 12:17:19,584] [DEBUG] [akka.actor.CoordinatedShutdown] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-35] - Performing phase [service-unbind] with [1] tasks.
[2021-06-21 12:17:19,586] [DEBUG] [akka.actor.CoordinatedShutdown] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-35] - Performing task [http-unbind-/127.0.0.1:8101] in CoordinatedShutdown phase [service-unbind]
[2021-06-21 12:17:19,588] [DEBUG] [akka.actor.CoordinatedShutdown] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-3] - Performing phase [service-requests-done] with [1] tasks.
[2021-06-21 12:17:19,588] [DEBUG] [akka.actor.CoordinatedShutdown] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-3] - Performing task [http-terminate-/127.0.0.1:8101] in CoordinatedShutdown phase [service-requests-done]
[2021-06-21 12:17:19,589] [INFO] [akka.actor.LocalActorRef] [akka://[email protected]:2551] [akkaDeadLetter] [ShoppingCartService-akka.actor.default-dispatcher-35] - Message [akka.cluster.GossipEnvelope] from Actor[akka://[email protected]:2552/system/cluster/core/daemon#1146845275] to Actor[akka://ShoppingCartService/system/cluster/core/daemon#572391440] was not delivered. [2] dead letters encountered. If this is not an expected behavior then Actor[akka://ShoppingCartService/system/cluster/core/daemon#572391440] may have terminated unexpectedly. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'.
[2021-06-21 12:17:19,590] [INFO] [akka.actor.LocalActorRef] [akka://[email protected]:2551] [akkaDeadLetter] [ShoppingCartService-akka.actor.default-dispatcher-35] - Message [akka.cluster.GossipEnvelope] from Actor[akka://[email protected]:2552/system/cluster/core/daemon#1146845275] to Actor[akka://ShoppingCartService/system/cluster/core/daemon#572391440] was not delivered. [3] dead letters encountered. If this is not an expected behavior then Actor[akka://ShoppingCartService/system/cluster/core/daemon#572391440] may have terminated unexpectedly. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'.
[2021-06-21 12:17:19,591] [DEBUG] [akka.actor.CoordinatedShutdown] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-35] - Performing phase [service-stop] with [0] tasks
[2021-06-21 12:17:19,591] [DEBUG] [akka.actor.CoordinatedShutdown] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-35] - Performing phase [before-cluster-shutdown] with [0] tasks
[2021-06-21 12:17:19,591] [DEBUG] [akka.actor.CoordinatedShutdown] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-35] - Performing phase [cluster-sharding-shutdown-region] with [1] tasks.
[2021-06-21 12:17:19,591] [DEBUG] [akka.actor.CoordinatedShutdown] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-35] - Performing task [region-shutdown] in CoordinatedShutdown phase [cluster-sharding-shutdown-region]
[2021-06-21 12:17:19,591] [DEBUG] [akka.actor.CoordinatedShutdown] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-35] - Performing phase [cluster-leave] with [1] tasks.
[2021-06-21 12:17:19,591] [DEBUG] [akka.actor.CoordinatedShutdown] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-35] - Performing task [leave] in CoordinatedShutdown phase [cluster-leave]
[2021-06-21 12:17:19,591] [DEBUG] [akka.actor.CoordinatedShutdown] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-35] - Performing phase [cluster-exiting] with [4] tasks.
[2021-06-21 12:17:19,591] [DEBUG] [akka.actor.CoordinatedShutdown] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-35] - Performing task [wait-exiting] in CoordinatedShutdown phase [cluster-exiting]
[2021-06-21 12:17:19,591] [DEBUG] [akka.actor.CoordinatedShutdown] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-35] - Performing task [wait-singleton-exiting] in CoordinatedShutdown phase [cluster-exiting]
[2021-06-21 12:17:19,591] [DEBUG] [akka.actor.CoordinatedShutdown] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-35] - Performing task [singleton-exiting-2] in CoordinatedShutdown phase [cluster-exiting]
[2021-06-21 12:17:19,591] [DEBUG] [akka.actor.CoordinatedShutdown] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-35] - Performing task [singleton-exiting-1] in CoordinatedShutdown phase [cluster-exiting]
[2021-06-21 12:17:19,591] [DEBUG] [akka.actor.CoordinatedShutdown] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-35] - Performing phase [cluster-exiting-done] with [1] tasks.
[2021-06-21 12:17:19,592] [DEBUG] [akka.actor.CoordinatedShutdown] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-35] - Performing task [exiting-completed] in CoordinatedShutdown phase [cluster-exiting-done]
[2021-06-21 12:17:19,592] [DEBUG] [akka.actor.CoordinatedShutdown] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-35] - Performing phase [cluster-shutdown] with [1] tasks.
[2021-06-21 12:17:19,592] [DEBUG] [akka.actor.CoordinatedShutdown] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-35] - Performing task [wait-shutdown] in CoordinatedShutdown phase [cluster-shutdown]
[2021-06-21 12:17:19,592] [DEBUG] [akka.actor.CoordinatedShutdown] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-35] - Performing phase [before-actor-system-terminate] with [0] tasks
[2021-06-21 12:17:19,592] [DEBUG] [akka.actor.CoordinatedShutdown] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-35] - Performing phase [actor-system-terminate] with [1] tasks.
[2021-06-21 12:17:19,592] [DEBUG] [akka.actor.CoordinatedShutdown] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-35] - Performing task [terminate-system] in CoordinatedShutdown phase [actor-system-terminate]
[2021-06-21 12:17:19,594] [INFO] [akka.remote.RemoteActorRefProvider$RemotingTerminator] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-3] - Shutting down remote daemon.
[2021-06-21 12:17:19,595] [INFO] [akka.remote.RemoteActorRefProvider$RemotingTerminator] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-3] - Remote daemon shut down; proceeding with flushing remote transports.
[2021-06-21 12:17:19,631] [WARN] [akka.stream.Materializer] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-3] - [outbound connection to [akka://[email protected]:2552], control stream] Upstream failed, cause: StreamTcpException: The connection has been aborted
[2021-06-21 12:17:19,631] [WARN] [akka.stream.Materializer] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-3] - [outbound connection to [akka://[email protected]:2552], message stream] Upstream failed, cause: StreamTcpException: The connection has been aborted
[2021-06-21 12:17:19,639] [INFO] [akka.remote.RemoteActorRefProvider$RemotingTerminator] [akka://[email protected]:2551] [] [ShoppingCartService-akka.actor.default-dispatcher-3] - Remoting shut down.
[2021-06-21 12:17:19,646] [INFO] [com.zaxxer.hikari.HikariDataSource] [] [] [ShoppingCartService-akka.actor.default-dispatcher-35] - db - Shutdown initiated...
[2021-06-21 12:17:19,650] [INFO] [com.zaxxer.hikari.HikariDataSource] [] [] [ShoppingCartService-akka.actor.default-dispatcher-35] - db - Shutdown completed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One thing to note is that sharding already acts quickly on the downing, see

ShoppingCart: Self downed, stopping ShardRegion
PostStop cart [cart2]

}
}

private def acquireLease(): Unit = {
log.debug("SBR trying to acquire lease")
implicit val ec: ExecutionContext = internalDispatcher
Expand Down