!clu #2307 Allow transition from unreachable to reachable

* Replace unreachable Set with Reachability table
* Unreachable members stay in member Set
* Downing a live member was moved it to the unreachable Set,
  and then removed from there by the leader. That will not
  work when flipping back to reachable, so a Down member must
  be detected as unreachable before beeing removed. Similar
  to Exiting. Member shuts down itself if it sees itself as
  Down.
* Flip back to reachable when failure detector monitors it as
  available again
* ReachableMember event
* Can't ignore gossip from aggregated unreachable (see SurviveNetworkInstabilitySpec)
* Make use of ReachableMember event in cluster router
* End heartbeat when acknowledged, EndHeartbeatAck
* Remove nr-of-end-heartbeats from conf
* Full reachability info in JMX cluster status
* Don't use interval after unreachable for AccrualFailureDetector history
* Add QuarantinedEvent to remoting, used for Reachability.Terminated
* Prune reachability table when all reachable
* Update documentation
* Performance testing and optimizations
This commit is contained in:
Patrik Nordwall 2013-08-27 15:14:53 +02:00
parent beba5d9f76
commit dc9fe4f19c
43 changed files with 2425 additions and 1169 deletions

View file

@ -38,12 +38,13 @@ object MultiNodeClusterSpec {
jmx.enabled = off
gossip-interval = 200 ms
leader-actions-interval = 200 ms
unreachable-nodes-reaper-interval = 200 ms
unreachable-nodes-reaper-interval = 500 ms
periodic-tasks-initial-delay = 300 ms
publish-stats-interval = 0 s # always, when it happens
failure-detector.heartbeat-interval = 400 ms
failure-detector.heartbeat-interval = 500 ms
}
akka.loglevel = INFO
akka.log-dead-letters = off
akka.log-dead-letters-during-shutdown = off
akka.remote.log-remote-lifecycle-events = off
akka.loggers = ["akka.testkit.TestEventListener"]
@ -114,8 +115,10 @@ trait MultiNodeClusterSpec extends Suite with STMultiNodeSpec with WatchedByCoro
classOf[InternalClusterAction.Tick],
classOf[akka.actor.PoisonPill],
classOf[akka.dispatch.sysmsg.DeathWatchNotification],
akka.remote.transport.AssociationHandle.Disassociated.getClass,
akka.remote.transport.ActorTransportAdapter.DisassociateUnderlying.getClass,
classOf[akka.remote.transport.AssociationHandle.Disassociated],
// akka.remote.transport.AssociationHandle.Disassociated.getClass,
classOf[akka.remote.transport.ActorTransportAdapter.DisassociateUnderlying],
// akka.remote.transport.ActorTransportAdapter.DisassociateUnderlying.getClass,
classOf[akka.remote.transport.AssociationHandle.InboundPayload])(sys)
}
@ -125,6 +128,10 @@ trait MultiNodeClusterSpec extends Suite with STMultiNodeSpec with WatchedByCoro
if (!sys.log.isDebugEnabled)
sys.eventStream.publish(Mute(EventFilter.error(pattern = ".*Marking.* as UNREACHABLE.*")))
def muteMarkingAsReachable(sys: ActorSystem = system): Unit =
if (!sys.log.isDebugEnabled)
sys.eventStream.publish(Mute(EventFilter.info(pattern = ".*Marking.* as REACHABLE.*")))
override def afterAll(): Unit = {
if (!log.isDebugEnabled) {
muteDeadLetters()()
@ -292,7 +299,8 @@ trait MultiNodeClusterSpec extends Suite with STMultiNodeSpec with WatchedByCoro
}
}
def awaitAllReachable(): Unit = awaitAssert(clusterView.unreachableMembers.isEmpty)
def awaitAllReachable(): Unit =
awaitAssert(clusterView.unreachableMembers must be(Set.empty))
/**
* Wait until the specified nodes have seen the same gossip overview.