Move heartbeat sending out from ClusterCoreDaemon, see #2284

2012-10-01 10:02:48 +02:00 · 2012-10-01 10:02:48 +02:00 · cecde67226
commit cecde67226
parent 5b0a2ec7ee
2 changed files with 86 additions and 46 deletions
--- a/akka-cluster/src/main/scala/akka/cluster/ClusterDaemon.scala
+++ b/akka-cluster/src/main/scala/akka/cluster/ClusterDaemon.scala
@ -155,8 +155,8 @@ private[cluster] final class ClusterDaemon(settings: ClusterSettings) extends Ac
    withDispatcher(context.props.dispatcher), name = "publisher")
  val core = context.actorOf(Props(new ClusterCoreDaemon(publisher)).
    withDispatcher(context.props.dispatcher), name = "core")
-  context.actorOf(Props[ClusterHeartbeatDaemon].
-    withDispatcher(context.props.dispatcher), name = "heartbeat")
+  context.actorOf(Props[ClusterHeartbeatReceiver].
+    withDispatcher(context.props.dispatcher), name = "heartbeatReceiver")
  if (settings.MetricsEnabled) context.actorOf(Props(new ClusterMetricsCollector(publisher)).
    withDispatcher(context.props.dispatcher), name = "metrics")

@ -172,26 +172,24 @@ private[cluster] final class ClusterDaemon(settings: ClusterSettings) extends Ac
 private[cluster] final class ClusterCoreDaemon(publisher: ActorRef) extends Actor with ActorLogging {
  import ClusterLeaderAction._
  import InternalClusterAction._
-  import ClusterHeartbeatSender._
+  import ClusterHeartbeatSender.JoinInProgress

  val cluster = Cluster(context.system)
  import cluster.{ selfAddress, scheduler, failureDetector }
  import cluster.settings._

  val vclockNode = VectorClock.Node(selfAddress.toString)
-  val selfHeartbeat = Heartbeat(selfAddress)

  // note that self is not initially member,
  // and the Gossip is not versioned for this 'Node' yet
  var latestGossip: Gossip = Gossip()
-  var joinInProgress: Map[Address, Deadline] = Map.empty

  var stats = ClusterStats()

-  val heartbeatSender = context.actorOf(Props[ClusterHeartbeatSender].
-    withDispatcher(UseDispatcher), name = "heartbeatSender")
  val coreSender = context.actorOf(Props[ClusterCoreSender].
    withDispatcher(UseDispatcher), name = "coreSender")
+  val heartbeatSender = context.actorOf(Props[ClusterHeartbeatSender].
+    withDispatcher(UseDispatcher), name = "heartbeatSender")

  import context.dispatcher

@ -201,12 +199,6 @@ private[cluster] final class ClusterCoreDaemon(publisher: ActorRef) extends Acto
      self ! GossipTick
    }

-  // start periodic heartbeat to all nodes in cluster
-  val heartbeatTask =
-    FixedRateTask(scheduler, PeriodicTasksInitialDelay.max(HeartbeatInterval).asInstanceOf[FiniteDuration], HeartbeatInterval) {
-      self ! HeartbeatTick
-    }
-
  // start periodic cluster failure detector reaping (moving nodes condemned by the failure detector to unreachable list)
  val failureDetectorReaperTask =
    FixedRateTask(scheduler, PeriodicTasksInitialDelay.max(UnreachableNodesReaperInterval).asInstanceOf[FiniteDuration], UnreachableNodesReaperInterval) {
@ -232,7 +224,6 @@ private[cluster] final class ClusterCoreDaemon(publisher: ActorRef) extends Acto

  override def postStop(): Unit = {
    gossipTask.cancel()
-    heartbeatTask.cancel()
    failureDetectorReaperTask.cancel()
    leaderActionsTask.cancel()
    publishStatsTask foreach { _.cancel() }
@ -250,7 +241,6 @@ private[cluster] final class ClusterCoreDaemon(publisher: ActorRef) extends Acto
    case msg: GossipEnvelope              ⇒ receiveGossip(msg)
    case msg: GossipMergeConflict         ⇒ receiveGossipMerge(msg)
    case GossipTick                       ⇒ gossip()
-    case HeartbeatTick                    ⇒ heartbeat()
    case ReapUnreachableTick              ⇒ reapUnreachableMembers()
    case LeaderActionsTick                ⇒ leaderActions()
    case PublishStatsTick                 ⇒ publishInternalStats()
@ -293,11 +283,11 @@ private[cluster] final class ClusterCoreDaemon(publisher: ActorRef) extends Acto
      val localGossip = latestGossip
      // wipe our state since a node that joins a cluster must be empty
      latestGossip = Gossip()
-      joinInProgress = Map(address -> (Deadline.now + JoinTimeout))

      // wipe the failure detector since we are starting fresh and shouldn't care about the past
      failureDetector.reset()

+      heartbeatSender ! JoinInProgress(address, Deadline.now + JoinTimeout)
      publish(localGossip)

      context.become(initialized)
@ -517,12 +507,7 @@ private[cluster] final class ClusterCoreDaemon(publisher: ActorRef) extends Acto
          else if (remoteGossip.version < localGossip.version) localGossip // local gossip is newer
          else remoteGossip // remote gossip is newer

-        val newJoinInProgress =
-          if (joinInProgress.isEmpty) joinInProgress
-          else joinInProgress -- winningGossip.members.map(_.address) -- winningGossip.overview.unreachable.map(_.address)
-
        latestGossip = winningGossip seen selfAddress
-        joinInProgress = newJoinInProgress

        // for all new joining nodes we remove them from the failure detector
        (latestGossip.members -- localGossip.members).foreach {
@ -744,27 +729,10 @@ private[cluster] final class ClusterCoreDaemon(publisher: ActorRef) extends Acto
    }
  }

-  def heartbeat(): Unit = {
-    removeOverdueJoinInProgress()
-
-    val beatTo = latestGossip.members.toSeq.map(_.address) ++ joinInProgress.keys
-
-    val deadline = Deadline.now + HeartbeatInterval
-    beatTo.foreach { address ⇒ if (address != selfAddress) heartbeatSender ! SendHeartbeat(selfHeartbeat, address, deadline) }
-  }
-
-  /**
-   * Removes overdue joinInProgress from State.
-   */
-  def removeOverdueJoinInProgress(): Unit = {
-    joinInProgress --= joinInProgress collect { case (address, deadline) if deadline.isOverdue ⇒ address }
-  }
-
  /**
   * Reaps the unreachable members (moves them to the 'unreachable' list in the cluster overview) according to the failure detector's verdict.
   */
  def reapUnreachableMembers(): Unit = {
-
    if (!isSingletonCluster && isAvailable) {
      // only scrutinize if we are a non-singleton cluster and available