Use consistent hash to heartbeat to a few nodes instead of all, see #2284
* Previously heartbeat messages was sent to all other members, i.e. each member was monitored by all other members in the cluster. * This was the number one know scalability bottleneck, due to the number of interconnections. * Limit sending of heartbeats to a few (5) members. Select and re-balance with consistent hashing algorithm when new members are added or removed. * Send a few EndHeartbeat when ending send of Heartbeat messages.
This commit is contained in:
parent
7557433491
commit
3f73705abc
8 changed files with 172 additions and 60 deletions
|
|
@ -42,7 +42,7 @@ object LargeClusterMultiJvmSpec extends MultiNodeConfig {
|
|||
gossip-interval = 500 ms
|
||||
auto-join = off
|
||||
auto-down = on
|
||||
failure-detector.acceptable-heartbeat-pause = 10s
|
||||
failure-detector.acceptable-heartbeat-pause = 5s
|
||||
publish-stats-interval = 0 s # always, when it happens
|
||||
}
|
||||
akka.event-handlers = ["akka.testkit.TestEventListener"]
|
||||
|
|
@ -57,7 +57,9 @@ object LargeClusterMultiJvmSpec extends MultiNodeConfig {
|
|||
akka.scheduler.tick-duration = 33 ms
|
||||
akka.remote.log-remote-lifecycle-events = off
|
||||
akka.remote.netty.execution-pool-size = 4
|
||||
#akka.remote.netty.reconnection-time-window = 1s
|
||||
#akka.remote.netty.reconnection-time-window = 10s
|
||||
akka.remote.netty.read-timeout = 5s
|
||||
akka.remote.netty.write-timeout = 5s
|
||||
akka.remote.netty.backoff-timeout = 500ms
|
||||
akka.remote.netty.connection-timeout = 500ms
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue