2013-04-15 09:26:51 +02:00
|
|
|
/**
|
2018-01-04 17:26:29 +00:00
|
|
|
* Copyright (C) 2009-2018 Lightbend Inc. <https://www.lightbend.com>
|
2013-04-15 09:26:51 +02:00
|
|
|
*/
|
|
|
|
|
package akka.cluster
|
|
|
|
|
|
|
|
|
|
import scala.concurrent.duration.FiniteDuration
|
2015-02-19 15:49:02 +01:00
|
|
|
import akka.actor._
|
2013-04-15 09:26:51 +02:00
|
|
|
import akka.cluster.ClusterEvent.CurrentClusterState
|
|
|
|
|
import akka.cluster.ClusterEvent.MemberEvent
|
|
|
|
|
import akka.cluster.ClusterEvent.MemberUp
|
|
|
|
|
import akka.cluster.ClusterEvent.MemberRemoved
|
2015-08-25 17:20:05 -05:00
|
|
|
import akka.cluster.ClusterEvent.MemberWeaklyUp
|
2013-04-15 09:26:51 +02:00
|
|
|
import akka.remote.FailureDetectorRegistry
|
|
|
|
|
import akka.remote.RemoteWatcher
|
2016-09-19 11:17:41 +02:00
|
|
|
import akka.remote.RARP
|
2013-04-15 09:26:51 +02:00
|
|
|
|
2013-04-17 21:03:16 +02:00
|
|
|
/**
|
|
|
|
|
* INTERNAL API
|
|
|
|
|
*/
|
2013-04-15 09:26:51 +02:00
|
|
|
private[cluster] object ClusterRemoteWatcher {
|
|
|
|
|
/**
|
|
|
|
|
* Factory method for `ClusterRemoteWatcher` [[akka.actor.Props]].
|
|
|
|
|
*/
|
|
|
|
|
def props(
|
2016-06-02 14:06:57 +02:00
|
|
|
failureDetector: FailureDetectorRegistry[Address],
|
|
|
|
|
heartbeatInterval: FiniteDuration,
|
|
|
|
|
unreachableReaperInterval: FiniteDuration,
|
2013-05-04 16:22:40 +02:00
|
|
|
heartbeatExpectedResponseAfter: FiniteDuration): Props =
|
2013-04-15 09:26:51 +02:00
|
|
|
Props(classOf[ClusterRemoteWatcher], failureDetector, heartbeatInterval, unreachableReaperInterval,
|
2013-05-30 14:03:35 +02:00
|
|
|
heartbeatExpectedResponseAfter).withDeploy(Deploy.local)
|
2013-04-15 09:26:51 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* INTERNAL API
|
|
|
|
|
*
|
|
|
|
|
* Specialization of [[akka.remote.RemoteWatcher]] that keeps
|
|
|
|
|
* track of cluster member nodes and is responsible for watchees on cluster nodes.
|
2015-02-19 15:49:02 +01:00
|
|
|
* [[akka.actor.AddressTerminated]] is published when node is removed from cluster.
|
2013-04-15 09:26:51 +02:00
|
|
|
*
|
|
|
|
|
* `RemoteWatcher` handles non-cluster nodes. `ClusterRemoteWatcher` will take
|
|
|
|
|
* over responsibility from `RemoteWatcher` if a watch is added before a node is member
|
|
|
|
|
* of the cluster and then later becomes cluster member.
|
|
|
|
|
*/
|
|
|
|
|
private[cluster] class ClusterRemoteWatcher(
|
2016-06-02 14:06:57 +02:00
|
|
|
failureDetector: FailureDetectorRegistry[Address],
|
|
|
|
|
heartbeatInterval: FiniteDuration,
|
|
|
|
|
unreachableReaperInterval: FiniteDuration,
|
2013-05-04 16:22:40 +02:00
|
|
|
heartbeatExpectedResponseAfter: FiniteDuration)
|
2013-04-15 09:26:51 +02:00
|
|
|
extends RemoteWatcher(
|
|
|
|
|
failureDetector,
|
|
|
|
|
heartbeatInterval,
|
|
|
|
|
unreachableReaperInterval,
|
2013-05-04 16:22:40 +02:00
|
|
|
heartbeatExpectedResponseAfter) {
|
2013-04-15 09:26:51 +02:00
|
|
|
|
2016-09-28 14:04:58 +02:00
|
|
|
private val arteryEnabled = RARP(context.system).provider.remoteSettings.Artery.Enabled
|
2013-04-19 08:52:27 +02:00
|
|
|
val cluster = Cluster(context.system)
|
|
|
|
|
import cluster.selfAddress
|
|
|
|
|
|
2016-09-28 14:04:58 +02:00
|
|
|
private final case class DelayedQuarantine(m: Member, previousStatus: MemberStatus) extends NoSerializationVerificationNeeded
|
|
|
|
|
|
2013-04-15 09:26:51 +02:00
|
|
|
var clusterNodes: Set[Address] = Set.empty
|
|
|
|
|
|
|
|
|
|
override def preStart(): Unit = {
|
|
|
|
|
super.preStart()
|
2013-04-19 08:52:27 +02:00
|
|
|
cluster.subscribe(self, classOf[MemberEvent])
|
2013-04-15 09:26:51 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
override def postStop(): Unit = {
|
|
|
|
|
super.postStop()
|
2013-04-19 08:52:27 +02:00
|
|
|
cluster.unsubscribe(self)
|
2013-04-15 09:26:51 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
override def receive = receiveClusterEvent orElse super.receive
|
|
|
|
|
|
|
|
|
|
def receiveClusterEvent: Actor.Receive = {
|
|
|
|
|
case state: CurrentClusterState ⇒
|
2013-04-19 08:52:27 +02:00
|
|
|
clusterNodes = state.members.collect { case m if m.address != selfAddress ⇒ m.address }
|
2013-04-15 09:26:51 +02:00
|
|
|
clusterNodes foreach takeOverResponsibility
|
2015-10-30 14:59:36 +01:00
|
|
|
unreachable = unreachable diff clusterNodes
|
2016-09-28 14:04:58 +02:00
|
|
|
case MemberUp(m) ⇒ memberUp(m)
|
|
|
|
|
case MemberWeaklyUp(m) ⇒ memberUp(m)
|
|
|
|
|
case MemberRemoved(m, previousStatus) ⇒ memberRemoved(m, previousStatus)
|
|
|
|
|
case _: MemberEvent ⇒ // not interesting
|
|
|
|
|
case DelayedQuarantine(m, previousStatus) ⇒ delayedQuarantine(m, previousStatus)
|
2013-04-15 09:26:51 +02:00
|
|
|
}
|
|
|
|
|
|
2015-08-25 17:20:05 -05:00
|
|
|
def memberUp(m: Member): Unit =
|
|
|
|
|
if (m.address != selfAddress) {
|
|
|
|
|
clusterNodes += m.address
|
|
|
|
|
takeOverResponsibility(m.address)
|
|
|
|
|
unreachable -= m.address
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def memberRemoved(m: Member, previousStatus: MemberStatus): Unit =
|
|
|
|
|
if (m.address != selfAddress) {
|
|
|
|
|
clusterNodes -= m.address
|
2016-09-28 14:04:58 +02:00
|
|
|
|
2016-09-21 17:27:34 +02:00
|
|
|
if (previousStatus == MemberStatus.Down) {
|
2016-09-30 10:51:44 +02:00
|
|
|
quarantine(m.address, Some(m.uniqueAddress.longUid), s"Cluster member removed, previous status [$previousStatus]")
|
2016-09-28 14:04:58 +02:00
|
|
|
} else if (arteryEnabled) {
|
|
|
|
|
// don't quarantine gracefully removed members (leaving) directly,
|
|
|
|
|
// give Cluster Singleton some time to exchange TakeOver/HandOver messages.
|
|
|
|
|
import context.dispatcher
|
|
|
|
|
context.system.scheduler.scheduleOnce(cluster.settings.QuarantineRemovedNodeAfter, self, DelayedQuarantine(m, previousStatus))
|
2015-08-25 17:20:05 -05:00
|
|
|
}
|
2016-09-28 14:04:58 +02:00
|
|
|
|
2015-08-25 17:20:05 -05:00
|
|
|
publishAddressTerminated(m.address)
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-28 14:04:58 +02:00
|
|
|
def delayedQuarantine(m: Member, previousStatus: MemberStatus): Unit =
|
|
|
|
|
quarantine(m.address, Some(m.uniqueAddress.longUid), s"Cluster member removed, previous status [$previousStatus]")
|
|
|
|
|
|
2015-02-19 15:49:02 +01:00
|
|
|
override def watchNode(watchee: InternalActorRef) =
|
|
|
|
|
if (!clusterNodes(watchee.path.address)) super.watchNode(watchee)
|
|
|
|
|
|
2013-04-15 09:26:51 +02:00
|
|
|
/**
|
|
|
|
|
* When a cluster node is added this class takes over the
|
|
|
|
|
* responsibility for watchees on that node already handled
|
|
|
|
|
* by super RemoteWatcher.
|
|
|
|
|
*/
|
2015-02-19 15:49:02 +01:00
|
|
|
def takeOverResponsibility(address: Address): Unit =
|
|
|
|
|
if (watchingNodes(address)) {
|
|
|
|
|
log.debug("Cluster is taking over responsibility of node: [{}]", address)
|
|
|
|
|
unwatchNode(address)
|
2013-04-15 09:26:51 +02:00
|
|
|
}
|
|
|
|
|
|
2015-08-25 17:20:05 -05:00
|
|
|
}
|