Move heartbeat sending out from ClusterCoreDaemon, see #2284
This commit is contained in:
parent
5b0a2ec7ee
commit
cecde67226
2 changed files with 86 additions and 46 deletions
|
|
@ -155,8 +155,8 @@ private[cluster] final class ClusterDaemon(settings: ClusterSettings) extends Ac
|
|||
withDispatcher(context.props.dispatcher), name = "publisher")
|
||||
val core = context.actorOf(Props(new ClusterCoreDaemon(publisher)).
|
||||
withDispatcher(context.props.dispatcher), name = "core")
|
||||
context.actorOf(Props[ClusterHeartbeatDaemon].
|
||||
withDispatcher(context.props.dispatcher), name = "heartbeat")
|
||||
context.actorOf(Props[ClusterHeartbeatReceiver].
|
||||
withDispatcher(context.props.dispatcher), name = "heartbeatReceiver")
|
||||
if (settings.MetricsEnabled) context.actorOf(Props(new ClusterMetricsCollector(publisher)).
|
||||
withDispatcher(context.props.dispatcher), name = "metrics")
|
||||
|
||||
|
|
@ -172,26 +172,24 @@ private[cluster] final class ClusterDaemon(settings: ClusterSettings) extends Ac
|
|||
private[cluster] final class ClusterCoreDaemon(publisher: ActorRef) extends Actor with ActorLogging {
|
||||
import ClusterLeaderAction._
|
||||
import InternalClusterAction._
|
||||
import ClusterHeartbeatSender._
|
||||
import ClusterHeartbeatSender.JoinInProgress
|
||||
|
||||
val cluster = Cluster(context.system)
|
||||
import cluster.{ selfAddress, scheduler, failureDetector }
|
||||
import cluster.settings._
|
||||
|
||||
val vclockNode = VectorClock.Node(selfAddress.toString)
|
||||
val selfHeartbeat = Heartbeat(selfAddress)
|
||||
|
||||
// note that self is not initially member,
|
||||
// and the Gossip is not versioned for this 'Node' yet
|
||||
var latestGossip: Gossip = Gossip()
|
||||
var joinInProgress: Map[Address, Deadline] = Map.empty
|
||||
|
||||
var stats = ClusterStats()
|
||||
|
||||
val heartbeatSender = context.actorOf(Props[ClusterHeartbeatSender].
|
||||
withDispatcher(UseDispatcher), name = "heartbeatSender")
|
||||
val coreSender = context.actorOf(Props[ClusterCoreSender].
|
||||
withDispatcher(UseDispatcher), name = "coreSender")
|
||||
val heartbeatSender = context.actorOf(Props[ClusterHeartbeatSender].
|
||||
withDispatcher(UseDispatcher), name = "heartbeatSender")
|
||||
|
||||
import context.dispatcher
|
||||
|
||||
|
|
@ -201,12 +199,6 @@ private[cluster] final class ClusterCoreDaemon(publisher: ActorRef) extends Acto
|
|||
self ! GossipTick
|
||||
}
|
||||
|
||||
// start periodic heartbeat to all nodes in cluster
|
||||
val heartbeatTask =
|
||||
FixedRateTask(scheduler, PeriodicTasksInitialDelay.max(HeartbeatInterval).asInstanceOf[FiniteDuration], HeartbeatInterval) {
|
||||
self ! HeartbeatTick
|
||||
}
|
||||
|
||||
// start periodic cluster failure detector reaping (moving nodes condemned by the failure detector to unreachable list)
|
||||
val failureDetectorReaperTask =
|
||||
FixedRateTask(scheduler, PeriodicTasksInitialDelay.max(UnreachableNodesReaperInterval).asInstanceOf[FiniteDuration], UnreachableNodesReaperInterval) {
|
||||
|
|
@ -232,7 +224,6 @@ private[cluster] final class ClusterCoreDaemon(publisher: ActorRef) extends Acto
|
|||
|
||||
override def postStop(): Unit = {
|
||||
gossipTask.cancel()
|
||||
heartbeatTask.cancel()
|
||||
failureDetectorReaperTask.cancel()
|
||||
leaderActionsTask.cancel()
|
||||
publishStatsTask foreach { _.cancel() }
|
||||
|
|
@ -250,7 +241,6 @@ private[cluster] final class ClusterCoreDaemon(publisher: ActorRef) extends Acto
|
|||
case msg: GossipEnvelope ⇒ receiveGossip(msg)
|
||||
case msg: GossipMergeConflict ⇒ receiveGossipMerge(msg)
|
||||
case GossipTick ⇒ gossip()
|
||||
case HeartbeatTick ⇒ heartbeat()
|
||||
case ReapUnreachableTick ⇒ reapUnreachableMembers()
|
||||
case LeaderActionsTick ⇒ leaderActions()
|
||||
case PublishStatsTick ⇒ publishInternalStats()
|
||||
|
|
@ -293,11 +283,11 @@ private[cluster] final class ClusterCoreDaemon(publisher: ActorRef) extends Acto
|
|||
val localGossip = latestGossip
|
||||
// wipe our state since a node that joins a cluster must be empty
|
||||
latestGossip = Gossip()
|
||||
joinInProgress = Map(address -> (Deadline.now + JoinTimeout))
|
||||
|
||||
// wipe the failure detector since we are starting fresh and shouldn't care about the past
|
||||
failureDetector.reset()
|
||||
|
||||
heartbeatSender ! JoinInProgress(address, Deadline.now + JoinTimeout)
|
||||
publish(localGossip)
|
||||
|
||||
context.become(initialized)
|
||||
|
|
@ -517,12 +507,7 @@ private[cluster] final class ClusterCoreDaemon(publisher: ActorRef) extends Acto
|
|||
else if (remoteGossip.version < localGossip.version) localGossip // local gossip is newer
|
||||
else remoteGossip // remote gossip is newer
|
||||
|
||||
val newJoinInProgress =
|
||||
if (joinInProgress.isEmpty) joinInProgress
|
||||
else joinInProgress -- winningGossip.members.map(_.address) -- winningGossip.overview.unreachable.map(_.address)
|
||||
|
||||
latestGossip = winningGossip seen selfAddress
|
||||
joinInProgress = newJoinInProgress
|
||||
|
||||
// for all new joining nodes we remove them from the failure detector
|
||||
(latestGossip.members -- localGossip.members).foreach {
|
||||
|
|
@ -744,27 +729,10 @@ private[cluster] final class ClusterCoreDaemon(publisher: ActorRef) extends Acto
|
|||
}
|
||||
}
|
||||
|
||||
def heartbeat(): Unit = {
|
||||
removeOverdueJoinInProgress()
|
||||
|
||||
val beatTo = latestGossip.members.toSeq.map(_.address) ++ joinInProgress.keys
|
||||
|
||||
val deadline = Deadline.now + HeartbeatInterval
|
||||
beatTo.foreach { address ⇒ if (address != selfAddress) heartbeatSender ! SendHeartbeat(selfHeartbeat, address, deadline) }
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes overdue joinInProgress from State.
|
||||
*/
|
||||
def removeOverdueJoinInProgress(): Unit = {
|
||||
joinInProgress --= joinInProgress collect { case (address, deadline) if deadline.isOverdue ⇒ address }
|
||||
}
|
||||
|
||||
/**
|
||||
* Reaps the unreachable members (moves them to the 'unreachable' list in the cluster overview) according to the failure detector's verdict.
|
||||
*/
|
||||
def reapUnreachableMembers(): Unit = {
|
||||
|
||||
if (!isSingletonCluster && isAvailable) {
|
||||
// only scrutinize if we are a non-singleton cluster and available
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue