Make cluster fault handling more robust, see #3030

* ClusterCoreDaemon and ClusterDomainEventPublisher can't be restarted
  because the state would be obsolete.
* Add extra supervisor level for ClusterCoreDaemon and
  ClusterDomainEventPublisher, which will shutdown the member
  on failure in children.
* Publish the final removed state on postStop in
  ClusterDomainEventPublisher. This also simplifies the removing
  process.
This commit is contained in:
Patrik Nordwall 2013-02-11 10:40:01 +01:00
parent b002bda23f
commit cab78e5174
6 changed files with 86 additions and 32 deletions

View file

@ -341,6 +341,15 @@ private[cluster] final class ClusterDomainEventPublisher extends Actor with Acto
var latestConvergedGossip: Gossip = Gossip.empty
var bufferedEvents: immutable.IndexedSeq[ClusterDomainEvent] = Vector.empty
override def preRestart(reason: Throwable, message: Option[Any]) {
// don't postStop when restarted, no children to stop
}
override def postStop(): Unit = {
// publish the final removed state before shutting down
publishChanges(Gossip.empty)
}
def receive = {
case PublishChanges(newGossip) publishChanges(newGossip)
case currentStats: CurrentInternalStats publishInternalStats(currentStats)
@ -349,7 +358,6 @@ private[cluster] final class ClusterDomainEventPublisher extends Actor with Acto
case Unsubscribe(subscriber, to) unsubscribe(subscriber, to)
case PublishEvent(event) publish(event)
case PublishStart publishStart()
case PublishDone publishDone(sender)
}
def eventStream: EventStream = context.system.eventStream
@ -435,11 +443,6 @@ private[cluster] final class ClusterDomainEventPublisher extends Actor with Acto
publishCurrentClusterState(None)
}
def publishDone(receiver: ActorRef): Unit = {
clearState()
receiver ! PublishDoneFinished
}
def clearState(): Unit = {
latestGossip = Gossip.empty
latestConvergedGossip = Gossip.empty