Cluster singleton manager: don't send member events to FSM during shutdown (#24236)

There exists a race where a cluter node that is being downed seens its self as the oldest node (as it has had the other nodes removed) and it takes over the singleton manager sending the real oldest node to go into the End state meaning that cluster singletons never work again. This fix simply prevents Member events being given to the Cluster Manager FSM during a shut down, instread relying on SelfExiting. This also hardens the test by not downing the node that the current sharding coordinator is running on as well as fixing a bug in the probes.
2018-01-05 08:47:43 +00:00 · 2018-01-05 08:47:43 +00:00 · 0380cc517a
commit 0380cc517a
parent 1d14c387a0
4 changed files with 26 additions and 22 deletions
--- a/akka-cluster-tools/src/main/scala/akka/cluster/singleton/ClusterSingletonManager.scala
+++ b/akka-cluster-tools/src/main/scala/akka/cluster/singleton/ClusterSingletonManager.scala
@ -303,9 +303,12 @@ object ClusterSingletonManager {
      }

      def sendFirstChange(): Unit = {
-        val event = changes.head
-        changes = changes.tail
-        context.parent ! event
+        // don't send cluster change events if this node is shutting its self down, just wait for SelfExiting
+        if (!cluster.isTerminated) {
+          val event = changes.head
+          changes = changes.tail
+          context.parent ! event
+        }
      }

      def receive = {
@ -331,7 +334,7 @@ object ClusterSingletonManager {
          context.unbecome()
        case MemberUp(m) ⇒
          add(m)
-          deliverChanges
+          deliverChanges()
        case MemberRemoved(m, _) ⇒
          remove(m)
          deliverChanges()
@ -357,9 +360,7 @@ object ClusterSingletonManager {
          case _              ⇒ super.unhandled(msg)
        }
      }
-
    }
-
  }
 }

@ -763,7 +764,7 @@ class ClusterSingletonManager(
    case (Event(Terminated(ref), HandingOverData(singleton, handOverTo))) if ref == singleton ⇒
      handOverDone(handOverTo)

-    case Event(HandOverToMe, d @ HandingOverData(singleton, handOverTo)) if handOverTo == Some(sender()) ⇒
+    case Event(HandOverToMe, HandingOverData(singleton, handOverTo)) if handOverTo == Some(sender()) ⇒
      // retry
      sender() ! HandOverInProgress
      stay