Use consistent hash to heartbeat to a few nodes instead of all, see #2284

* Previously heartbeat messages was sent to all other members, i.e. each member was monitored by all other members in the cluster. * This was the number one know scalability bottleneck, due to the number of interconnections. * Limit sending of heartbeats to a few (5) members. Select and re-balance with consistent hashing algorithm when new members are added or removed. * Send a few EndHeartbeat when ending send of Heartbeat messages.
2012-10-01 14:12:20 +02:00 · 2012-10-01 14:12:20 +02:00 · 3f73705abc
commit 3f73705abc
parent 7557433491
8 changed files with 172 additions and 60 deletions
--- a/akka-cluster/src/multi-jvm/scala/akka/cluster/LargeClusterSpec.scala
+++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/LargeClusterSpec.scala
@ -42,7 +42,7 @@ object LargeClusterMultiJvmSpec extends MultiNodeConfig {
      gossip-interval = 500 ms
      auto-join = off
      auto-down = on
-      failure-detector.acceptable-heartbeat-pause = 10s
+      failure-detector.acceptable-heartbeat-pause = 5s
      publish-stats-interval = 0 s # always, when it happens
    }
    akka.event-handlers = ["akka.testkit.TestEventListener"]
@ -57,7 +57,9 @@ object LargeClusterMultiJvmSpec extends MultiNodeConfig {
    akka.scheduler.tick-duration = 33 ms
    akka.remote.log-remote-lifecycle-events = off
    akka.remote.netty.execution-pool-size = 4
-    #akka.remote.netty.reconnection-time-window = 1s
+    #akka.remote.netty.reconnection-time-window = 10s
+    akka.remote.netty.read-timeout = 5s
+    akka.remote.netty.write-timeout = 5s
    akka.remote.netty.backoff-timeout = 500ms
    akka.remote.netty.connection-timeout = 500ms