Handle CoordinatedShutdown exiting-completed when not joined, #26832

* assertion failed: Nodes not part of cluster have marked the Gossip as seen * trying to mark the Gossip as seen before it has joined, which may happen if CoordinatedShutdown is running before the node has joined
2019-04-30 14:47:41 +02:00 · 2019-04-30 14:47:41 +02:00 · a77db34f8f
commit a77db34f8f
parent 7b59c0c785
1 changed files with 27 additions and 24 deletions
--- a/akka-cluster/src/main/scala/akka/cluster/ClusterDaemon.scala
+++ b/akka-cluster/src/main/scala/akka/cluster/ClusterDaemon.scala
@ -826,33 +826,36 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef, joinConfigCompatCh
    logInfo("Exiting completed")
    // ExitingCompleted sent via CoordinatedShutdown to continue the leaving process.
    exitingTasksInProgress = false
-    // mark as seen
-    membershipState = membershipState.seen()
-    assertLatestGossip()
-    publishMembershipState()
+    // status Removed also before joining
+    if (membershipState.selfMember.status != MemberStatus.Removed) {
+      // mark as seen
+      membershipState = membershipState.seen()
+      assertLatestGossip()
+      publishMembershipState()

-    // Let others know (best effort) before shutdown. Otherwise they will not see
-    // convergence of the Exiting state until they have detected this node as
-    // unreachable and the required downing has finished. They will still need to detect
-    // unreachable, but Exiting unreachable will be removed without downing, i.e.
-    // normally the leaving of a leader will be graceful without the need
-    // for downing. However, if those final gossip messages never arrive it is
-    // alright to require the downing, because that is probably caused by a
-    // network failure anyway.
-    gossipRandomN(NumberOfGossipsBeforeShutdownWhenLeaderExits)
+      // Let others know (best effort) before shutdown. Otherwise they will not see
+      // convergence of the Exiting state until they have detected this node as
+      // unreachable and the required downing has finished. They will still need to detect
+      // unreachable, but Exiting unreachable will be removed without downing, i.e.
+      // normally the leaving of a leader will be graceful without the need
+      // for downing. However, if those final gossip messages never arrive it is
+      // alright to require the downing, because that is probably caused by a
+      // network failure anyway.
+      gossipRandomN(NumberOfGossipsBeforeShutdownWhenLeaderExits)

-    // send ExitingConfirmed to two potential leaders
-    val membersExceptSelf = latestGossip.members.filter(_.uniqueAddress != selfUniqueAddress)
+      // send ExitingConfirmed to two potential leaders
+      val membersExceptSelf = latestGossip.members.filter(_.uniqueAddress != selfUniqueAddress)

-    membershipState.leaderOf(membersExceptSelf) match {
-      case Some(node1) =>
-        clusterCore(node1.address) ! ExitingConfirmed(selfUniqueAddress)
-        membershipState.leaderOf(membersExceptSelf.filterNot(_.uniqueAddress == node1)) match {
-          case Some(node2) =>
-            clusterCore(node2.address) ! ExitingConfirmed(selfUniqueAddress)
-          case None => // no more potential leader
-        }
-      case None => // no leader
+      membershipState.leaderOf(membersExceptSelf) match {
+        case Some(node1) =>
+          clusterCore(node1.address) ! ExitingConfirmed(selfUniqueAddress)
+          membershipState.leaderOf(membersExceptSelf.filterNot(_.uniqueAddress == node1)) match {
+            case Some(node2) =>
+              clusterCore(node2.address) ! ExitingConfirmed(selfUniqueAddress)
+            case None => // no more potential leader
+          }
+        case None => // no leader
+      }
    }

    shutdown()