/** * Copyright (C) 2009-2018 Lightbend Inc. */ package akka.cluster import scala.concurrent.duration.FiniteDuration import akka.actor._ import akka.cluster.ClusterEvent.CurrentClusterState import akka.cluster.ClusterEvent.MemberEvent import akka.cluster.ClusterEvent.MemberJoined import akka.cluster.ClusterEvent.MemberUp import akka.cluster.ClusterEvent.MemberRemoved import akka.cluster.ClusterEvent.MemberWeaklyUp import akka.remote.FailureDetectorRegistry import akka.remote.RemoteWatcher import akka.remote.RARP /** * INTERNAL API */ private[cluster] object ClusterRemoteWatcher { /** * Factory method for `ClusterRemoteWatcher` [[akka.actor.Props]]. */ def props( failureDetector: FailureDetectorRegistry[Address], heartbeatInterval: FiniteDuration, unreachableReaperInterval: FiniteDuration, heartbeatExpectedResponseAfter: FiniteDuration): Props = Props(classOf[ClusterRemoteWatcher], failureDetector, heartbeatInterval, unreachableReaperInterval, heartbeatExpectedResponseAfter).withDeploy(Deploy.local) } /** * INTERNAL API * * Specialization of [[akka.remote.RemoteWatcher]] that keeps * track of cluster member nodes and is responsible for watchees on cluster nodes. * [[akka.actor.AddressTerminated]] is published when node is removed from cluster. * * `RemoteWatcher` handles non-cluster nodes. `ClusterRemoteWatcher` will take * over responsibility from `RemoteWatcher` if a watch is added before a node is member * of the cluster and then later becomes cluster member. */ private[cluster] class ClusterRemoteWatcher( failureDetector: FailureDetectorRegistry[Address], heartbeatInterval: FiniteDuration, unreachableReaperInterval: FiniteDuration, heartbeatExpectedResponseAfter: FiniteDuration) extends RemoteWatcher( failureDetector, heartbeatInterval, unreachableReaperInterval, heartbeatExpectedResponseAfter) { private val arteryEnabled = RARP(context.system).provider.remoteSettings.Artery.Enabled val cluster = Cluster(context.system) import cluster.selfAddress private final case class DelayedQuarantine(m: Member, previousStatus: MemberStatus) extends NoSerializationVerificationNeeded private var pendingDelayedQuarantine: Set[UniqueAddress] = Set.empty var clusterNodes: Set[Address] = Set.empty override def preStart(): Unit = { super.preStart() cluster.subscribe(self, classOf[MemberEvent]) } override def postStop(): Unit = { super.postStop() cluster.unsubscribe(self) } override def receive = receiveClusterEvent orElse super.receive def receiveClusterEvent: Actor.Receive = { case state: CurrentClusterState ⇒ clusterNodes = state.members.collect { case m if m.address != selfAddress ⇒ m.address } clusterNodes foreach takeOverResponsibility unreachable = unreachable diff clusterNodes case MemberJoined(m) ⇒ memberJoined(m) case MemberUp(m) ⇒ memberUp(m) case MemberWeaklyUp(m) ⇒ memberUp(m) case MemberRemoved(m, previousStatus) ⇒ memberRemoved(m, previousStatus) case _: MemberEvent ⇒ // not interesting case DelayedQuarantine(m, previousStatus) ⇒ delayedQuarantine(m, previousStatus) } private def memberJoined(m: Member): Unit = { if (m.address != selfAddress) quarantineOldIncarnation(m) } def memberUp(m: Member): Unit = if (m.address != selfAddress) { quarantineOldIncarnation(m) clusterNodes += m.address takeOverResponsibility(m.address) unreachable -= m.address } def memberRemoved(m: Member, previousStatus: MemberStatus): Unit = if (m.address != selfAddress) { clusterNodes -= m.address if (previousStatus == MemberStatus.Down) { quarantine(m.address, Some(m.uniqueAddress.longUid), s"Cluster member removed, previous status [$previousStatus]", harmless = false) } else if (arteryEnabled) { // Don't quarantine gracefully removed members (leaving) directly, // give Cluster Singleton some time to exchange TakeOver/HandOver messages. // If new incarnation of same host:port is seen then the quarantine of previous incarnation // is triggered earlier. pendingDelayedQuarantine += m.uniqueAddress import context.dispatcher context.system.scheduler.scheduleOnce(cluster.settings.QuarantineRemovedNodeAfter, self, DelayedQuarantine(m, previousStatus)) } publishAddressTerminated(m.address) } def quarantineOldIncarnation(newIncarnation: Member): Unit = { // If new incarnation of same host:port is seen then quarantine previous incarnation if (pendingDelayedQuarantine.nonEmpty) pendingDelayedQuarantine.find(_.address == newIncarnation.address).foreach { oldIncarnation ⇒ pendingDelayedQuarantine -= oldIncarnation quarantine(oldIncarnation.address, Some(oldIncarnation.longUid), s"Cluster member removed, new incarnation joined", harmless = true) } } def delayedQuarantine(m: Member, previousStatus: MemberStatus): Unit = { if (pendingDelayedQuarantine(m.uniqueAddress)) { pendingDelayedQuarantine -= m.uniqueAddress quarantine(m.address, Some(m.uniqueAddress.longUid), s"Cluster member removed, previous status [$previousStatus]", harmless = true) } } override def watchNode(watchee: InternalActorRef): Unit = if (!clusterNodes(watchee.path.address)) super.watchNode(watchee) /** * When a cluster node is added this class takes over the * responsibility for watchees on that node already handled * by super RemoteWatcher. */ def takeOverResponsibility(address: Address): Unit = if (watchingNodes(address)) { log.debug("Cluster is taking over responsibility of node: [{}]", address) unwatchNode(address) } }