!clu #2307 Allow transition from unreachable to reachable

* Replace unreachable Set with Reachability table
* Unreachable members stay in member Set
* Downing a live member was moved it to the unreachable Set,
  and then removed from there by the leader. That will not
  work when flipping back to reachable, so a Down member must
  be detected as unreachable before beeing removed. Similar
  to Exiting. Member shuts down itself if it sees itself as
  Down.
* Flip back to reachable when failure detector monitors it as
  available again
* ReachableMember event
* Can't ignore gossip from aggregated unreachable (see SurviveNetworkInstabilitySpec)
* Make use of ReachableMember event in cluster router
* End heartbeat when acknowledged, EndHeartbeatAck
* Remove nr-of-end-heartbeats from conf
* Full reachability info in JMX cluster status
* Don't use interval after unreachable for AccrualFailureDetector history
* Add QuarantinedEvent to remoting, used for Reachability.Terminated
* Prune reachability table when all reachable
* Update documentation
* Performance testing and optimizations
This commit is contained in:
Patrik Nordwall 2013-08-27 15:14:53 +02:00
parent beba5d9f76
commit dc9fe4f19c
43 changed files with 2425 additions and 1169 deletions

View file

@ -21,6 +21,7 @@ import akka.routing.RoundRobinRouter
import akka.routing.RoutedActorRef
import akka.routing.RouterRoutees
import akka.testkit._
import akka.remote.transport.ThrottlerTransportAdapter.Direction
object ClusterRoundRobinRoutedActorMultiJvmSpec extends MultiNodeConfig {
@ -86,6 +87,8 @@ object ClusterRoundRobinRoutedActorMultiJvmSpec extends MultiNodeConfig {
nodeConfig(first, second)(ConfigFactory.parseString("""akka.cluster.roles =["a", "c"]"""))
nodeConfig(third)(ConfigFactory.parseString("""akka.cluster.roles =["b", "c"]"""))
testTransport(on = true)
}
class ClusterRoundRobinRoutedActorMultiJvmNode1 extends ClusterRoundRobinRoutedActorSpec
@ -300,6 +303,31 @@ abstract class ClusterRoundRobinRoutedActorSpec extends MultiNodeSpec(ClusterRou
enterBarrier("after-8")
}
"remove routees for unreachable nodes, and add when reachable again" taggedAs LongRunningTest in within(30.seconds) {
// myservice is already running
def routees = currentRoutees(router4)
def routeeAddresses = (routees map fullAddress).toSet
runOn(first) {
// 4 nodes, 1 routee on each node
awaitAssert(currentRoutees(router4).size must be(4))
testConductor.blackhole(first, second, Direction.Both).await
awaitAssert(routees.size must be(3))
routeeAddresses must not contain (address(second))
testConductor.passThrough(first, second, Direction.Both).await
awaitAssert(routees.size must be(4))
routeeAddresses must contain(address(second))
}
enterBarrier("after-9")
}
"deploy programatically defined routees to other node when a node becomes down" taggedAs LongRunningTest in {
muteMarkingAsUnreachable()
@ -313,7 +341,7 @@ abstract class ClusterRoundRobinRoutedActorSpec extends MultiNodeSpec(ClusterRou
val downRoutee = routees.find(_.path.address == downAddress).get
cluster.down(downAddress)
expectMsgType[Terminated].actor must be(downRoutee)
expectMsgType[Terminated](15.seconds).actor must be(downRoutee)
awaitAssert {
routeeAddresses must contain(notUsedAddress)
routeeAddresses must not contain (downAddress)
@ -330,7 +358,7 @@ abstract class ClusterRoundRobinRoutedActorSpec extends MultiNodeSpec(ClusterRou
replies.values.sum must be(iterationCount)
}
enterBarrier("after-9")
enterBarrier("after-10")
}
}