Use consistent hash to heartbeat to a few nodes instead of all, see #2284

* Previously heartbeat messages was sent to all other members, i.e.
  each member was monitored by all other members in the cluster.
* This was the number one know scalability bottleneck, due to the
  number of interconnections.
* Limit sending of heartbeats to a few (5) members. Select and
  re-balance with consistent hashing algorithm when new members
  are added or removed.
* Send a few EndHeartbeat when ending send of Heartbeat messages.
This commit is contained in:
Patrik Nordwall 2012-10-01 14:12:20 +02:00
parent 7557433491
commit 3f73705abc
8 changed files with 172 additions and 60 deletions

View file

@ -42,7 +42,7 @@ object LargeClusterMultiJvmSpec extends MultiNodeConfig {
gossip-interval = 500 ms
auto-join = off
auto-down = on
failure-detector.acceptable-heartbeat-pause = 10s
failure-detector.acceptable-heartbeat-pause = 5s
publish-stats-interval = 0 s # always, when it happens
}
akka.event-handlers = ["akka.testkit.TestEventListener"]
@ -57,7 +57,9 @@ object LargeClusterMultiJvmSpec extends MultiNodeConfig {
akka.scheduler.tick-duration = 33 ms
akka.remote.log-remote-lifecycle-events = off
akka.remote.netty.execution-pool-size = 4
#akka.remote.netty.reconnection-time-window = 1s
#akka.remote.netty.reconnection-time-window = 10s
akka.remote.netty.read-timeout = 5s
akka.remote.netty.write-timeout = 5s
akka.remote.netty.backoff-timeout = 500ms
akka.remote.netty.connection-timeout = 500ms