=clu Improve cluster downing

* avoid using Down and Exiting member from being used for joining * delay shut down of Down member until the information is spread to all reachable members, e.g. downing several nodes via one node * akka.cluster.down-removal-margin setting Margin until shards or singletons that belonged to a downed/removed partition are created in surviving partition. Used by singleton and sharding. * remove the retry count parameters/settings for singleton in favor of deriving those from the removal-margin
2015-05-30 16:12:22 +02:00 · 2015-05-30 16:12:22 +02:00 · 2a88f4fb29
commit 2a88f4fb29
parent bf28260cd0
12 changed files with 227 additions and 101 deletions
--- a/akka-cluster/src/main/scala/akka/cluster/ClusterDaemon.scala
+++ b/akka-cluster/src/main/scala/akka/cluster/ClusterDaemon.scala
@ -228,6 +228,7 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
  protected def selfUniqueAddress = cluster.selfUniqueAddress

  val NumberOfGossipsBeforeShutdownWhenLeaderExits = 3
+  val MaxGossipsBeforeShuttingDownMyself = 5

  def vclockName(node: UniqueAddress): String = node.address + "-" + node.uid
  val vclockNode = VectorClock.Node(vclockName(selfUniqueAddress))
@ -371,7 +372,14 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
    case other             ⇒ super.unhandled(other)
  }

-  def initJoin(): Unit = sender() ! InitJoinAck(selfAddress)
+  def initJoin(): Unit = {
+    val selfStatus = latestGossip.member(selfUniqueAddress).status
+    if (Gossip.removeUnreachableWithMemberStatus.contains(selfStatus))
+      // prevents a Down and Exiting node from being used for joining
+      sender() ! InitJoinNack(selfAddress)
+    else
+      sender() ! InitJoinAck(selfAddress)
+  }

  def joinSeedNodes(newSeedNodes: immutable.IndexedSeq[Address]): Unit = {
    if (newSeedNodes.nonEmpty) {
@ -444,12 +452,15 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
   * current gossip state, including the new joining member.
   */
  def joining(node: UniqueAddress, roles: Set[String]): Unit = {
+    val selfStatus = latestGossip.member(selfUniqueAddress).status
    if (node.address.protocol != selfAddress.protocol)
      log.warning("Member with wrong protocol tried to join, but was ignored, expected [{}] but was [{}]",
        selfAddress.protocol, node.address.protocol)
    else if (node.address.system != selfAddress.system)
      log.warning("Member with wrong ActorSystem name tried to join, but was ignored, expected [{}] but was [{}]",
        selfAddress.system, node.address.system)
+    else if (Gossip.removeUnreachableWithMemberStatus.contains(selfStatus))
+      logInfo("Trying to join [{}] to [{}] member, ignoring. Use a member that is Up instead.", node, selfStatus)
    else {
      val localMembers = latestGossip.members

@ -544,15 +555,15 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
    val localReachability = localOverview.reachability

    // check if the node to DOWN is in the `members` set
-    localMembers.collectFirst { case m if m.address == address ⇒ m.copy(status = Down) } match {
-      case Some(m) ⇒
+    localMembers.find(_.address == address) match {
+      case Some(m) if (m.status != Down) ⇒
        if (localReachability.isReachable(m.uniqueAddress))
          logInfo("Marking node [{}] as [{}]", m.address, Down)
        else
          logInfo("Marking unreachable node [{}] as [{}]", m.address, Down)

        // replace member (changed status)
-        val newMembers = localMembers - m + m
+        val newMembers = localMembers - m + m.copy(status = Down)
        // remove nodes marked as DOWN from the `seen` table
        val newSeen = localSeen - m.uniqueAddress

@ -562,6 +573,7 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
        updateLatestGossip(newGossip)

        publish(latestGossip)
+      case Some(_) ⇒ // already down
      case None ⇒
        logInfo("Ignoring down of unknown node [{}] as [{}]", address)
    }
@ -676,7 +688,7 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
      publish(latestGossip)

      val selfStatus = latestGossip.member(selfUniqueAddress).status
-      if (selfStatus == Exiting || selfStatus == Down)
+      if (selfStatus == Exiting)
        shutdown()
      else if (talkback) {
        // send back gossip to sender() when sender() had different view, i.e. merge, or sender() had
@ -766,7 +778,7 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
  /**
   * Runs periodic leader actions, such as member status transitions, assigning partitions etc.
   */
-  def leaderActions(): Unit =
+  def leaderActions(): Unit = {
    if (latestGossip.isLeader(selfUniqueAddress, selfUniqueAddress)) {
      // only run the leader actions if we are the LEADER
      val firstNotice = 20
@ -785,6 +797,27 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
              s"${m.address} ${m.status} seen=${latestGossip.seenByNode(m.uniqueAddress)}").mkString(", "))
      }
    }
+    shutdownSelfWhenDown()
+  }
+
+  def shutdownSelfWhenDown(): Unit = {
+    if (latestGossip.member(selfUniqueAddress).status == Down) {
+      // When all reachable have seen the state this member will shutdown itself when it has
+      // status Down. The down commands should spread before we shutdown.
+      val unreachable = latestGossip.overview.reachability.allUnreachableOrTerminated
+      val downed = latestGossip.members.collect { case m if m.status == Down ⇒ m.uniqueAddress }
+      if (downed.forall(node ⇒ unreachable(node) || latestGossip.seenByNode(node))) {
+        // the reason for not shutting down immediately is to give the gossip a chance to spread
+        // the downing information to other downed nodes, so that they can shutdown themselves
+        logInfo("Shutting down myself")
+        // not crucial to send gossip, but may speedup removal since fallback to failure detection is not needed
+        // if other downed know that this node has seen the version
+        downed.filterNot(n ⇒ unreachable(n) || n == selfUniqueAddress).take(MaxGossipsBeforeShuttingDownMyself)
+          .foreach(gossipTo)
+        shutdown()
+      }
+    }
+  }

  /**
   * Leader actions are as follows: