* moved to cluster tests, in new package akka.cluster.testkit * changed config in tests * migration guide * documentation clarificiations for Downing and Leaving * update warnings in Singleton and Sharding
This commit is contained in:
parent
064f06f5a6
commit
a217d5566e
61 changed files with 414 additions and 309 deletions
|
|
@ -63,7 +63,8 @@ abstract class ClusterShardingFailureSpecConfig(val mode: String) extends MultiN
|
||||||
akka.loglevel = INFO
|
akka.loglevel = INFO
|
||||||
akka.actor.provider = "cluster"
|
akka.actor.provider = "cluster"
|
||||||
akka.remote.classic.log-remote-lifecycle-events = off
|
akka.remote.classic.log-remote-lifecycle-events = off
|
||||||
akka.cluster.auto-down-unreachable-after = 0s
|
akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
akka.cluster.testkit.auto-down-unreachable-after = 0s
|
||||||
akka.cluster.roles = ["backend"]
|
akka.cluster.roles = ["backend"]
|
||||||
akka.persistence.journal.plugin = "akka.persistence.journal.leveldb-shared"
|
akka.persistence.journal.plugin = "akka.persistence.journal.leveldb-shared"
|
||||||
akka.persistence.journal.leveldb-shared {
|
akka.persistence.journal.leveldb-shared {
|
||||||
|
|
|
||||||
|
|
@ -50,7 +50,8 @@ object ClusterShardingGetStateSpecConfig extends MultiNodeConfig {
|
||||||
akka.loglevel = INFO
|
akka.loglevel = INFO
|
||||||
akka.actor.provider = "cluster"
|
akka.actor.provider = "cluster"
|
||||||
akka.remote.log-remote-lifecycle-events = off
|
akka.remote.log-remote-lifecycle-events = off
|
||||||
akka.cluster.auto-down-unreachable-after = 0s
|
akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
akka.cluster.testkit.auto-down-unreachable-after = 0s
|
||||||
akka.cluster.sharding {
|
akka.cluster.sharding {
|
||||||
coordinator-failure-backoff = 3s
|
coordinator-failure-backoff = 3s
|
||||||
shard-failure-backoff = 3s
|
shard-failure-backoff = 3s
|
||||||
|
|
|
||||||
|
|
@ -56,7 +56,8 @@ object ClusterShardingGetStatsSpecConfig extends MultiNodeConfig {
|
||||||
akka.actor.provider = "cluster"
|
akka.actor.provider = "cluster"
|
||||||
akka.remote.classic.log-remote-lifecycle-events = off
|
akka.remote.classic.log-remote-lifecycle-events = off
|
||||||
akka.log-dead-letters-during-shutdown = off
|
akka.log-dead-letters-during-shutdown = off
|
||||||
akka.cluster.auto-down-unreachable-after = 0s
|
akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
akka.cluster.testkit.auto-down-unreachable-after = 0s
|
||||||
akka.cluster.sharding {
|
akka.cluster.sharding {
|
||||||
state-store-mode = "ddata"
|
state-store-mode = "ddata"
|
||||||
updating-state-timeout = 2s
|
updating-state-timeout = 2s
|
||||||
|
|
|
||||||
|
|
@ -67,7 +67,8 @@ abstract class ClusterShardingLeavingSpecConfig(val mode: String) extends MultiN
|
||||||
akka.loglevel = INFO
|
akka.loglevel = INFO
|
||||||
akka.actor.provider = "cluster"
|
akka.actor.provider = "cluster"
|
||||||
akka.remote.classic.log-remote-lifecycle-events = off
|
akka.remote.classic.log-remote-lifecycle-events = off
|
||||||
akka.cluster.auto-down-unreachable-after = 0s
|
akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
akka.cluster.testkit.auto-down-unreachable-after = 0s
|
||||||
akka.persistence.journal.plugin = "akka.persistence.journal.leveldb-shared"
|
akka.persistence.journal.plugin = "akka.persistence.journal.leveldb-shared"
|
||||||
akka.persistence.journal.leveldb-shared {
|
akka.persistence.journal.leveldb-shared {
|
||||||
timeout = 5s
|
timeout = 5s
|
||||||
|
|
|
||||||
|
|
@ -55,7 +55,8 @@ object ClusterShardingQueriesSpecConfig extends MultiNodeConfig {
|
||||||
akka.actor.provider = "cluster"
|
akka.actor.provider = "cluster"
|
||||||
akka.remote.classic.log-remote-lifecycle-events = off
|
akka.remote.classic.log-remote-lifecycle-events = off
|
||||||
akka.log-dead-letters-during-shutdown = off
|
akka.log-dead-letters-during-shutdown = off
|
||||||
akka.cluster.auto-down-unreachable-after = 0s
|
akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
akka.cluster.testkit.auto-down-unreachable-after = 0s
|
||||||
akka.cluster.sharding {
|
akka.cluster.sharding {
|
||||||
state-store-mode = "ddata"
|
state-store-mode = "ddata"
|
||||||
shard-region-query-timeout = 0ms
|
shard-region-query-timeout = 0ms
|
||||||
|
|
|
||||||
|
|
@ -61,7 +61,8 @@ abstract class ClusterShardingRememberEntitiesNewExtractorSpecConfig(val mode: S
|
||||||
ConfigFactory
|
ConfigFactory
|
||||||
.parseString(s"""
|
.parseString(s"""
|
||||||
akka.actor.provider = "cluster"
|
akka.actor.provider = "cluster"
|
||||||
akka.cluster.auto-down-unreachable-after = 0s
|
akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
akka.cluster.testkit.auto-down-unreachable-after = 0s
|
||||||
akka.remote.classic.log-remote-lifecycle-events = off
|
akka.remote.classic.log-remote-lifecycle-events = off
|
||||||
akka.persistence.journal.plugin = "akka.persistence.journal.leveldb-shared"
|
akka.persistence.journal.plugin = "akka.persistence.journal.leveldb-shared"
|
||||||
akka.persistence.journal.leveldb-shared {
|
akka.persistence.journal.leveldb-shared {
|
||||||
|
|
|
||||||
|
|
@ -56,7 +56,8 @@ object ClusterShardingRememberEntitiesPerfSpecConfig extends MultiNodeConfig {
|
||||||
commonConfig(ConfigFactory.parseString(s"""
|
commonConfig(ConfigFactory.parseString(s"""
|
||||||
akka.loglevel = INFO
|
akka.loglevel = INFO
|
||||||
akka.actor.provider = "cluster"
|
akka.actor.provider = "cluster"
|
||||||
akka.cluster.auto-down-unreachable-after = 0s
|
akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
akka.cluster.testkit.auto-down-unreachable-after = 0s
|
||||||
akka.remote.log-remote-lifecycle-events = off
|
akka.remote.log-remote-lifecycle-events = off
|
||||||
akka.testconductor.barrier-timeout = 3 minutes
|
akka.testconductor.barrier-timeout = 3 minutes
|
||||||
akka.remote.artery.advanced.outbound-message-queue-size = 10000
|
akka.remote.artery.advanced.outbound-message-queue-size = 10000
|
||||||
|
|
|
||||||
|
|
@ -69,7 +69,8 @@ abstract class ClusterShardingRememberEntitiesSpecConfig(val mode: String, val r
|
||||||
modeConfig
|
modeConfig
|
||||||
.withFallback(ConfigFactory.parseString(s"""
|
.withFallback(ConfigFactory.parseString(s"""
|
||||||
akka.actor.provider = "cluster"
|
akka.actor.provider = "cluster"
|
||||||
akka.cluster.auto-down-unreachable-after = 0s
|
akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
akka.cluster.testkit.auto-down-unreachable-after = 0s
|
||||||
akka.remote.log-remote-lifecycle-events = off
|
akka.remote.log-remote-lifecycle-events = off
|
||||||
akka.cluster.sharding.state-store-mode = "$mode"
|
akka.cluster.sharding.state-store-mode = "$mode"
|
||||||
akka.cluster.sharding.distributed-data.durable.lmdb {
|
akka.cluster.sharding.distributed-data.durable.lmdb {
|
||||||
|
|
|
||||||
|
|
@ -135,7 +135,8 @@ abstract class ClusterShardingSpecConfig(val mode: String, val entityRecoveryStr
|
||||||
akka.loglevel = INFO
|
akka.loglevel = INFO
|
||||||
akka.actor.provider = "cluster"
|
akka.actor.provider = "cluster"
|
||||||
akka.remote.log-remote-lifecycle-events = off
|
akka.remote.log-remote-lifecycle-events = off
|
||||||
akka.cluster.auto-down-unreachable-after = 0s
|
akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
akka.cluster.testkit.auto-down-unreachable-after = 0s
|
||||||
akka.cluster.roles = ["backend"]
|
akka.cluster.roles = ["backend"]
|
||||||
akka.cluster.distributed-data.gossip-interval = 1s
|
akka.cluster.distributed-data.gossip-interval = 1s
|
||||||
akka.persistence.journal.plugin = "akka.persistence.journal.leveldb-shared"
|
akka.persistence.journal.plugin = "akka.persistence.journal.leveldb-shared"
|
||||||
|
|
|
||||||
|
|
@ -61,7 +61,8 @@ object MultiDcClusterShardingSpecConfig extends MultiNodeConfig {
|
||||||
akka.cluster {
|
akka.cluster {
|
||||||
debug.verbose-heartbeat-logging = on
|
debug.verbose-heartbeat-logging = on
|
||||||
debug.verbose-gossip-logging = on
|
debug.verbose-gossip-logging = on
|
||||||
auto-down-unreachable-after = 0s
|
downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
testkit.auto-down-unreachable-after = 0s
|
||||||
sharding {
|
sharding {
|
||||||
retry-interval = 200ms
|
retry-interval = 200ms
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -44,7 +44,8 @@ abstract class MultiNodeClusterShardingConfig(
|
||||||
.withFallback(ConfigFactory.parseString(s"""
|
.withFallback(ConfigFactory.parseString(s"""
|
||||||
akka.loglevel = $loglevel
|
akka.loglevel = $loglevel
|
||||||
akka.actor.provider = "cluster"
|
akka.actor.provider = "cluster"
|
||||||
akka.cluster.auto-down-unreachable-after = 0s
|
akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
akka.cluster.testkit.auto-down-unreachable-after = 0s
|
||||||
akka.remote.log-remote-lifecycle-events = off
|
akka.remote.log-remote-lifecycle-events = off
|
||||||
akka.cluster.sharding.state-store-mode = "$mode"
|
akka.cluster.sharding.state-store-mode = "$mode"
|
||||||
akka.cluster.sharding.distributed-data.durable.lmdb {
|
akka.cluster.sharding.distributed-data.durable.lmdb {
|
||||||
|
|
|
||||||
|
|
@ -36,7 +36,6 @@ import akka.pattern.ask
|
||||||
import akka.pattern.pipe
|
import akka.pattern.pipe
|
||||||
import akka.util.JavaDurationConverters._
|
import akka.util.JavaDurationConverters._
|
||||||
import akka.util.Timeout
|
import akka.util.Timeout
|
||||||
import com.github.ghik.silencer.silent
|
|
||||||
import com.typesafe.config.Config
|
import com.typesafe.config.Config
|
||||||
|
|
||||||
object ClusterSingletonManagerSettings {
|
object ClusterSingletonManagerSettings {
|
||||||
|
|
@ -45,7 +44,6 @@ object ClusterSingletonManagerSettings {
|
||||||
* Create settings from the default configuration
|
* Create settings from the default configuration
|
||||||
* `akka.cluster.singleton`.
|
* `akka.cluster.singleton`.
|
||||||
*/
|
*/
|
||||||
@silent("deprecated") // DownRemovalMargin
|
|
||||||
def apply(system: ActorSystem): ClusterSingletonManagerSettings =
|
def apply(system: ActorSystem): ClusterSingletonManagerSettings =
|
||||||
apply(system.settings.config.getConfig("akka.cluster.singleton"))
|
apply(system.settings.config.getConfig("akka.cluster.singleton"))
|
||||||
// note that this setting has some additional logic inside the ClusterSingletonManager
|
// note that this setting has some additional logic inside the ClusterSingletonManager
|
||||||
|
|
|
||||||
|
|
@ -45,7 +45,8 @@ object ClusterClientSpec extends MultiNodeConfig {
|
||||||
akka.loglevel = INFO
|
akka.loglevel = INFO
|
||||||
akka.actor.provider = "cluster"
|
akka.actor.provider = "cluster"
|
||||||
akka.remote.log-remote-lifecycle-events = off
|
akka.remote.log-remote-lifecycle-events = off
|
||||||
akka.cluster.auto-down-unreachable-after = 0s
|
akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
akka.cluster.testkit.auto-down-unreachable-after = 0s
|
||||||
akka.cluster.client.heartbeat-interval = 1s
|
akka.cluster.client.heartbeat-interval = 1s
|
||||||
akka.cluster.client.acceptable-heartbeat-pause = 3s
|
akka.cluster.client.acceptable-heartbeat-pause = 3s
|
||||||
akka.cluster.client.refresh-contacts-interval = 1s
|
akka.cluster.client.refresh-contacts-interval = 1s
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,8 @@ object DistributedPubSubMediatorSpec extends MultiNodeConfig {
|
||||||
akka.loglevel = INFO
|
akka.loglevel = INFO
|
||||||
akka.actor.provider = "cluster"
|
akka.actor.provider = "cluster"
|
||||||
akka.remote.log-remote-lifecycle-events = off
|
akka.remote.log-remote-lifecycle-events = off
|
||||||
akka.cluster.auto-down-unreachable-after = 0s
|
akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
akka.cluster.testkit.auto-down-unreachable-after = 0s
|
||||||
akka.cluster.pub-sub.max-delta-elements = 500
|
akka.cluster.pub-sub.max-delta-elements = 500
|
||||||
"""))
|
"""))
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,8 @@ object DistributedPubSubRestartSpec extends MultiNodeConfig {
|
||||||
akka.cluster.pub-sub.gossip-interval = 500ms
|
akka.cluster.pub-sub.gossip-interval = 500ms
|
||||||
akka.actor.provider = cluster
|
akka.actor.provider = cluster
|
||||||
akka.remote.log-remote-lifecycle-events = off
|
akka.remote.log-remote-lifecycle-events = off
|
||||||
akka.cluster.auto-down-unreachable-after = off
|
akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
akka.cluster.testkit.auto-down-unreachable-after = off
|
||||||
"""))
|
"""))
|
||||||
|
|
||||||
testTransport(on = true)
|
testTransport(on = true)
|
||||||
|
|
|
||||||
|
|
@ -35,7 +35,8 @@ object ClusterSingletonManagerChaosSpec extends MultiNodeConfig {
|
||||||
akka.loglevel = INFO
|
akka.loglevel = INFO
|
||||||
akka.actor.provider = "cluster"
|
akka.actor.provider = "cluster"
|
||||||
akka.remote.log-remote-lifecycle-events = off
|
akka.remote.log-remote-lifecycle-events = off
|
||||||
akka.cluster.auto-down-unreachable-after = 0s
|
akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
akka.cluster.testkit.auto-down-unreachable-after = 0s
|
||||||
"""))
|
"""))
|
||||||
|
|
||||||
case object EchoStarted
|
case object EchoStarted
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,8 @@ object ClusterSingletonManagerLeaseSpec extends MultiNodeConfig {
|
||||||
akka.loglevel = INFO
|
akka.loglevel = INFO
|
||||||
akka.actor.provider = "cluster"
|
akka.actor.provider = "cluster"
|
||||||
akka.remote.log-remote-lifecycle-events = off
|
akka.remote.log-remote-lifecycle-events = off
|
||||||
akka.cluster.auto-down-unreachable-after = 0s
|
akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
akka.cluster.testkit.auto-down-unreachable-after = 0s
|
||||||
test-lease {
|
test-lease {
|
||||||
lease-class = akka.cluster.TestLeaseActorClient
|
lease-class = akka.cluster.TestLeaseActorClient
|
||||||
heartbeat-interval = 1s
|
heartbeat-interval = 1s
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,8 @@ object ClusterSingletonManagerLeave2Spec extends MultiNodeConfig {
|
||||||
akka.loglevel = INFO
|
akka.loglevel = INFO
|
||||||
akka.actor.provider = "cluster"
|
akka.actor.provider = "cluster"
|
||||||
akka.remote.log-remote-lifecycle-events = off
|
akka.remote.log-remote-lifecycle-events = off
|
||||||
akka.cluster.auto-down-unreachable-after = off
|
akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
akka.cluster.testkit.auto-down-unreachable-after = off
|
||||||
"""))
|
"""))
|
||||||
|
|
||||||
case object EchoStarted
|
case object EchoStarted
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,8 @@ object ClusterSingletonManagerLeaveSpec extends MultiNodeConfig {
|
||||||
akka.loglevel = INFO
|
akka.loglevel = INFO
|
||||||
akka.actor.provider = "cluster"
|
akka.actor.provider = "cluster"
|
||||||
akka.remote.log-remote-lifecycle-events = off
|
akka.remote.log-remote-lifecycle-events = off
|
||||||
akka.cluster.auto-down-unreachable-after = off
|
akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
akka.cluster.testkit.auto-down-unreachable-after = off
|
||||||
"""))
|
"""))
|
||||||
|
|
||||||
case object EchoStarted
|
case object EchoStarted
|
||||||
|
|
|
||||||
|
|
@ -40,7 +40,8 @@ object ClusterSingletonManagerSpec extends MultiNodeConfig {
|
||||||
akka.loglevel = INFO
|
akka.loglevel = INFO
|
||||||
akka.actor.provider = "cluster"
|
akka.actor.provider = "cluster"
|
||||||
akka.remote.log-remote-lifecycle-events = off
|
akka.remote.log-remote-lifecycle-events = off
|
||||||
akka.cluster.auto-down-unreachable-after = 0s
|
akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
akka.cluster.testkit.auto-down-unreachable-after = 0s
|
||||||
"""))
|
"""))
|
||||||
|
|
||||||
nodeConfig(first, second, third, fourth, fifth, sixth)(ConfigFactory.parseString("akka.cluster.roles =[worker]"))
|
nodeConfig(first, second, third, fourth, fifth, sixth)(ConfigFactory.parseString("akka.cluster.roles =[worker]"))
|
||||||
|
|
|
||||||
|
|
@ -27,7 +27,8 @@ object ClusterSingletonManagerStartupSpec extends MultiNodeConfig {
|
||||||
akka.loglevel = INFO
|
akka.loglevel = INFO
|
||||||
akka.actor.provider = "cluster"
|
akka.actor.provider = "cluster"
|
||||||
akka.remote.log-remote-lifecycle-events = off
|
akka.remote.log-remote-lifecycle-events = off
|
||||||
akka.cluster.auto-down-unreachable-after = 0s
|
akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
akka.cluster.testkit.auto-down-unreachable-after = 0s
|
||||||
"""))
|
"""))
|
||||||
|
|
||||||
case object EchoStarted
|
case object EchoStarted
|
||||||
|
|
|
||||||
|
|
@ -44,7 +44,8 @@ class ClusterSingletonLeavingSpeedSpec
|
||||||
"""
|
"""
|
||||||
akka.loglevel = DEBUG
|
akka.loglevel = DEBUG
|
||||||
akka.actor.provider = akka.cluster.ClusterActorRefProvider
|
akka.actor.provider = akka.cluster.ClusterActorRefProvider
|
||||||
akka.cluster.auto-down-unreachable-after = 2s
|
akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
akka.cluster.testkit.auto-down-unreachable-after = 2s
|
||||||
|
|
||||||
# With 10 systems and setting min-number-of-hand-over-retries to 5 and gossip-interval to 2s it's possible to
|
# With 10 systems and setting min-number-of-hand-over-retries to 5 and gossip-interval to 2s it's possible to
|
||||||
# reproduce the ClusterSingletonManagerIsStuck and slow hand over in issue #25639
|
# reproduce the ClusterSingletonManagerIsStuck and slow hand over in issue #25639
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,8 @@ class ClusterSingletonRestart2Spec
|
||||||
akka.loglevel = INFO
|
akka.loglevel = INFO
|
||||||
akka.cluster.roles = [singleton]
|
akka.cluster.roles = [singleton]
|
||||||
akka.actor.provider = akka.cluster.ClusterActorRefProvider
|
akka.actor.provider = akka.cluster.ClusterActorRefProvider
|
||||||
akka.cluster.auto-down-unreachable-after = 2s
|
akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
akka.cluster.testkit.auto-down-unreachable-after = 2s
|
||||||
akka.cluster.singleton.min-number-of-hand-over-retries = 5
|
akka.cluster.singleton.min-number-of-hand-over-retries = 5
|
||||||
akka.remote {
|
akka.remote {
|
||||||
classic.netty.tcp {
|
classic.netty.tcp {
|
||||||
|
|
|
||||||
|
|
@ -14,10 +14,12 @@ import akka.testkit.TestActors
|
||||||
import akka.testkit.TestProbe
|
import akka.testkit.TestProbe
|
||||||
import com.typesafe.config.ConfigFactory
|
import com.typesafe.config.ConfigFactory
|
||||||
|
|
||||||
class ClusterSingletonRestartSpec extends AkkaSpec("""
|
class ClusterSingletonRestartSpec
|
||||||
|
extends AkkaSpec("""
|
||||||
akka.loglevel = INFO
|
akka.loglevel = INFO
|
||||||
akka.actor.provider = akka.cluster.ClusterActorRefProvider
|
akka.actor.provider = akka.cluster.ClusterActorRefProvider
|
||||||
akka.cluster.auto-down-unreachable-after = 2s
|
akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
akka.cluster.testkit.auto-down-unreachable-after = 2s
|
||||||
akka.remote {
|
akka.remote {
|
||||||
classic.netty.tcp {
|
classic.netty.tcp {
|
||||||
hostname = "127.0.0.1"
|
hostname = "127.0.0.1"
|
||||||
|
|
|
||||||
|
|
@ -52,7 +52,6 @@ object ClusterReceptionistSpec {
|
||||||
}
|
}
|
||||||
|
|
||||||
akka.cluster {
|
akka.cluster {
|
||||||
#auto-down-unreachable-after = 0s
|
|
||||||
jmx.multi-mbeans-in-same-jvm = on
|
jmx.multi-mbeans-in-same-jvm = on
|
||||||
failure-detector.acceptable-heartbeat-pause = 3s
|
failure-detector.acceptable-heartbeat-pause = 3s
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,8 @@
|
||||||
|
# #27788 Remove AutoDowning
|
||||||
|
ProblemFilters.exclude[MissingClassProblem]("akka.cluster.AutoDown$UnreachableTimeout")
|
||||||
|
ProblemFilters.exclude[MissingClassProblem]("akka.cluster.AutoDown$UnreachableTimeout$")
|
||||||
|
ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.ClusterSettings.AutoDownUnreachableAfter")
|
||||||
|
ProblemFilters.exclude[MissingClassProblem]("akka.cluster.AutoDownBase")
|
||||||
|
ProblemFilters.exclude[MissingClassProblem]("akka.cluster.AutoDown$")
|
||||||
|
ProblemFilters.exclude[MissingClassProblem]("akka.cluster.AutoDown")
|
||||||
|
ProblemFilters.exclude[MissingClassProblem]("akka.cluster.AutoDowning")
|
||||||
|
|
@ -35,33 +35,17 @@ akka {
|
||||||
# attempts.
|
# attempts.
|
||||||
shutdown-after-unsuccessful-join-seed-nodes = off
|
shutdown-after-unsuccessful-join-seed-nodes = off
|
||||||
|
|
||||||
# Should the 'leader' in the cluster be allowed to automatically mark
|
|
||||||
# unreachable nodes as DOWN after a configured time of unreachability?
|
|
||||||
# Using auto-down implies that two separate clusters will automatically be
|
|
||||||
# formed in case of network partition.
|
|
||||||
#
|
|
||||||
# Don't enable this in production, see 'Auto-downing (DO NOT USE)' section
|
|
||||||
# of Akka Cluster documentation.
|
|
||||||
#
|
|
||||||
# Disable with "off" or specify a duration to enable auto-down.
|
|
||||||
# If a downing-provider-class is configured this setting is ignored.
|
|
||||||
auto-down-unreachable-after = off
|
|
||||||
|
|
||||||
# Time margin after which shards or singletons that belonged to a downed/removed
|
# Time margin after which shards or singletons that belonged to a downed/removed
|
||||||
# partition are created in surviving partition. The purpose of this margin is that
|
# partition are created in surviving partition. The purpose of this margin is that
|
||||||
# in case of a network partition the persistent actors in the non-surviving partitions
|
# in case of a network partition the persistent actors in the non-surviving partitions
|
||||||
# must be stopped before corresponding persistent actors are started somewhere else.
|
# must be stopped before corresponding persistent actors are started somewhere else.
|
||||||
# This is useful if you implement downing strategies that handle network partitions,
|
# This is useful if you implement downing strategies that handle network partitions,
|
||||||
# e.g. by keeping the larger side of the partition and shutting down the smaller side.
|
# e.g. by keeping the larger side of the partition and shutting down the smaller side.
|
||||||
# It will not add any extra safety for auto-down-unreachable-after, since that is not
|
|
||||||
# handling network partitions.
|
|
||||||
# Disable with "off" or specify a duration to enable.
|
# Disable with "off" or specify a duration to enable.
|
||||||
down-removal-margin = off
|
down-removal-margin = off
|
||||||
|
|
||||||
# Pluggable support for downing of nodes in the cluster.
|
# Pluggable support for downing of nodes in the cluster.
|
||||||
# If this setting is left empty behavior will depend on 'auto-down-unreachable' in the following ways:
|
# If this setting is left empty the `NoDowning` provider is used and no automatic downing will be performed.
|
||||||
# * if it is 'off' the `NoDowning` provider is used and no automatic downing will be performed
|
|
||||||
# * if it is set to a duration the `AutoDowning` provider is with the configured downing duration
|
|
||||||
#
|
#
|
||||||
# If specified the value must be the fully qualified class name of a subclass of
|
# If specified the value must be the fully qualified class name of a subclass of
|
||||||
# `akka.cluster.DowningProvider` having a public one argument constructor accepting an `ActorSystem`
|
# `akka.cluster.DowningProvider` having a public one argument constructor accepting an `ActorSystem`
|
||||||
|
|
|
||||||
|
|
@ -125,8 +125,19 @@ class Cluster(val system: ExtendedActorSystem) extends Extension {
|
||||||
}
|
}
|
||||||
|
|
||||||
// needs to be lazy to allow downing provider impls to access Cluster (if not we get deadlock)
|
// needs to be lazy to allow downing provider impls to access Cluster (if not we get deadlock)
|
||||||
lazy val downingProvider: DowningProvider =
|
lazy val downingProvider: DowningProvider = {
|
||||||
|
checkAutoDownUsage()
|
||||||
DowningProvider.load(settings.DowningProviderClassName, system)
|
DowningProvider.load(settings.DowningProviderClassName, system)
|
||||||
|
}
|
||||||
|
|
||||||
|
private def checkAutoDownUsage(): Unit = {
|
||||||
|
if (settings.DowningProviderClassName == "akka.cluster.AutoDowning" ||
|
||||||
|
(settings.config.hasPath("auto-down-unreachable-after") && settings.config.getString(
|
||||||
|
"auto-down-unreachable-after") != "off"))
|
||||||
|
logWarning(
|
||||||
|
"auto-down has been removed in Akka 2.6.0. See " +
|
||||||
|
"https://doc.akka.io/docs/akka/2.6/typed/cluster.html#downing for alternatives.")
|
||||||
|
}
|
||||||
|
|
||||||
// ========================================================
|
// ========================================================
|
||||||
// ===================== WORK DAEMONS =====================
|
// ===================== WORK DAEMONS =====================
|
||||||
|
|
|
||||||
|
|
@ -406,12 +406,17 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef, joinConfigCompatCh
|
||||||
override def preStart(): Unit = {
|
override def preStart(): Unit = {
|
||||||
subscribeQuarantinedEvent()
|
subscribeQuarantinedEvent()
|
||||||
|
|
||||||
cluster.downingProvider.downingActorProps.foreach { props =>
|
cluster.downingProvider.downingActorProps match {
|
||||||
val propsWithDispatcher =
|
case Some(props) =>
|
||||||
if (props.dispatcher == Deploy.NoDispatcherGiven) props.withDispatcher(context.props.dispatcher)
|
val propsWithDispatcher =
|
||||||
else props
|
if (props.dispatcher == Deploy.NoDispatcherGiven) props.withDispatcher(context.props.dispatcher)
|
||||||
|
else props
|
||||||
|
|
||||||
context.actorOf(propsWithDispatcher, name = "downingProvider")
|
context.actorOf(propsWithDispatcher, name = "downingProvider")
|
||||||
|
case None =>
|
||||||
|
logInfo(
|
||||||
|
"No downing-provider-class configured, manual cluster downing required, see " +
|
||||||
|
"https://doc.akka.io/docs/akka/current/typed/cluster.html#downing")
|
||||||
}
|
}
|
||||||
|
|
||||||
if (seedNodes.isEmpty) {
|
if (seedNodes.isEmpty) {
|
||||||
|
|
@ -420,7 +425,7 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef, joinConfigCompatCh
|
||||||
else
|
else
|
||||||
logInfo(
|
logInfo(
|
||||||
"No seed-nodes configured, manual cluster join required, see " +
|
"No seed-nodes configured, manual cluster join required, see " +
|
||||||
"https://doc.akka.io/docs/akka/current/cluster-usage.html#joining-to-seed-nodes")
|
"https://doc.akka.io/docs/akka/current/typed/cluster.html#joining")
|
||||||
} else {
|
} else {
|
||||||
self ! JoinSeedNodes(seedNodes)
|
self ! JoinSeedNodes(seedNodes)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -116,21 +116,6 @@ final class ClusterSettings(val config: Config, val systemName: String) {
|
||||||
cc.getMillisDuration(key).requiring(_ >= Duration.Zero, key + " >= 0s")
|
cc.getMillisDuration(key).requiring(_ >= Duration.Zero, key + " >= 0s")
|
||||||
}
|
}
|
||||||
|
|
||||||
// specific to the [[akka.cluster.DefaultDowningProvider]]
|
|
||||||
val AutoDownUnreachableAfter: Duration = {
|
|
||||||
val key = "auto-down-unreachable-after"
|
|
||||||
toRootLowerCase(cc.getString(key)) match {
|
|
||||||
case "off" => Duration.Undefined
|
|
||||||
case _ => cc.getMillisDuration(key).requiring(_ >= Duration.Zero, key + " >= 0s, or off")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @deprecated Specific to [[akka.cluster.AutoDown]] should not be used anywhere else, instead
|
|
||||||
* ``Cluster.downingProvider.downRemovalMargin`` should be used as it allows the downing provider to decide removal
|
|
||||||
* margins
|
|
||||||
*/
|
|
||||||
@deprecated("Use Cluster.downingProvider.downRemovalMargin", since = "2.4.5")
|
|
||||||
val DownRemovalMargin: FiniteDuration = {
|
val DownRemovalMargin: FiniteDuration = {
|
||||||
val key = "down-removal-margin"
|
val key = "down-removal-margin"
|
||||||
toRootLowerCase(cc.getString(key)) match {
|
toRootLowerCase(cc.getString(key)) match {
|
||||||
|
|
@ -142,7 +127,6 @@ final class ClusterSettings(val config: Config, val systemName: String) {
|
||||||
val DowningProviderClassName: String = {
|
val DowningProviderClassName: String = {
|
||||||
val name = cc.getString("downing-provider-class")
|
val name = cc.getString("downing-provider-class")
|
||||||
if (name.nonEmpty) name
|
if (name.nonEmpty) name
|
||||||
else if (AutoDownUnreachableAfter.isFinite) classOf[AutoDowning].getName
|
|
||||||
else classOf[NoDowning].getName
|
else classOf[NoDowning].getName
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,6 @@ package akka.cluster
|
||||||
|
|
||||||
import akka.ConfigurationException
|
import akka.ConfigurationException
|
||||||
import akka.actor.{ ActorSystem, ExtendedActorSystem, Props }
|
import akka.actor.{ ActorSystem, ExtendedActorSystem, Props }
|
||||||
import com.github.ghik.silencer.silent
|
|
||||||
|
|
||||||
import scala.concurrent.duration.FiniteDuration
|
import scala.concurrent.duration.FiniteDuration
|
||||||
|
|
||||||
|
|
@ -35,6 +34,15 @@ private[cluster] object DowningProvider {
|
||||||
/**
|
/**
|
||||||
* API for plugins that will handle downing of cluster nodes. Concrete plugins must subclass and
|
* API for plugins that will handle downing of cluster nodes. Concrete plugins must subclass and
|
||||||
* have a public one argument constructor accepting an [[akka.actor.ActorSystem]].
|
* have a public one argument constructor accepting an [[akka.actor.ActorSystem]].
|
||||||
|
*
|
||||||
|
* A custom `DowningProvider` can be configured with `akka.cluster.downing-provider-class`
|
||||||
|
*
|
||||||
|
* When implementing a downing provider you should make sure that it will not split the cluster into
|
||||||
|
* several separate clusters in case of network problems or system overload (long GC pauses). This
|
||||||
|
* is much more difficult than it might be perceived at first, so carefully read the concerns and scenarios
|
||||||
|
* described in
|
||||||
|
* https://doc.akka.io/docs/akka/current/typed/cluster.html#downing and
|
||||||
|
* https://doc.akka.io/docs/akka-enhancements/current/split-brain-resolver.html
|
||||||
*/
|
*/
|
||||||
abstract class DowningProvider {
|
abstract class DowningProvider {
|
||||||
|
|
||||||
|
|
@ -61,11 +69,9 @@ abstract class DowningProvider {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Default downing provider used when no provider is configured and 'auto-down-unreachable-after'
|
* Default downing provider used when no provider is configured.
|
||||||
* is not enabled.
|
|
||||||
*/
|
*/
|
||||||
final class NoDowning(system: ActorSystem) extends DowningProvider {
|
final class NoDowning(system: ActorSystem) extends DowningProvider {
|
||||||
@silent("deprecated")
|
|
||||||
override def downRemovalMargin: FiniteDuration = Cluster(system).settings.DownRemovalMargin
|
override def downRemovalMargin: FiniteDuration = Cluster(system).settings.DownRemovalMargin
|
||||||
override val downingActorProps: Option[Props] = None
|
override val downingActorProps: Option[Props] = None
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,8 @@ object LeaderDowningAllOtherNodesMultiJvmSpec extends MultiNodeConfig {
|
||||||
debugConfig(on = false)
|
debugConfig(on = false)
|
||||||
.withFallback(ConfigFactory.parseString("""
|
.withFallback(ConfigFactory.parseString("""
|
||||||
akka.cluster.failure-detector.monitored-by-nr-of-members = 2
|
akka.cluster.failure-detector.monitored-by-nr-of-members = 2
|
||||||
akka.cluster.auto-down-unreachable-after = 1s
|
akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
akka.cluster.testkit.auto-down-unreachable-after = 1s
|
||||||
"""))
|
"""))
|
||||||
.withFallback(MultiNodeClusterSpec.clusterConfig))
|
.withFallback(MultiNodeClusterSpec.clusterConfig))
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,9 @@ final case class LeaderDowningNodeThatIsUnreachableMultiNodeConfig(failureDetect
|
||||||
|
|
||||||
commonConfig(
|
commonConfig(
|
||||||
debugConfig(on = false)
|
debugConfig(on = false)
|
||||||
.withFallback(ConfigFactory.parseString("akka.cluster.auto-down-unreachable-after = 2s"))
|
.withFallback(ConfigFactory.parseString("""
|
||||||
|
akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
akka.cluster.testkit.auto-down-unreachable-after = 2s"""))
|
||||||
.withFallback(MultiNodeClusterSpec.clusterConfig(failureDetectorPuppet)))
|
.withFallback(MultiNodeClusterSpec.clusterConfig(failureDetectorPuppet)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,9 @@ object LeaderLeavingMultiJvmSpec extends MultiNodeConfig {
|
||||||
|
|
||||||
commonConfig(
|
commonConfig(
|
||||||
debugConfig(on = false)
|
debugConfig(on = false)
|
||||||
.withFallback(ConfigFactory.parseString("akka.cluster.auto-down-unreachable-after = 0s"))
|
.withFallback(ConfigFactory.parseString("""
|
||||||
|
akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
akka.cluster.testkit.auto-down-unreachable-after = 0s"""))
|
||||||
.withFallback(MultiNodeClusterSpec.clusterConfigWithFailureDetectorPuppet))
|
.withFallback(MultiNodeClusterSpec.clusterConfigWithFailureDetectorPuppet))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -39,7 +39,8 @@ object MultiDcSplitBrainMultiJvmSpec extends MultiNodeConfig {
|
||||||
akka.cluster {
|
akka.cluster {
|
||||||
gossip-interval = 500ms
|
gossip-interval = 500ms
|
||||||
leader-actions-interval = 1s
|
leader-actions-interval = 1s
|
||||||
auto-down-unreachable-after = 1s
|
downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
testkit.auto-down-unreachable-after = 1s
|
||||||
}
|
}
|
||||||
""")
|
""")
|
||||||
.withFallback(MultiNodeClusterSpec.clusterConfig))
|
.withFallback(MultiNodeClusterSpec.clusterConfig))
|
||||||
|
|
|
||||||
|
|
@ -21,8 +21,10 @@ object NodeChurnMultiJvmSpec extends MultiNodeConfig {
|
||||||
val third = role("third")
|
val third = role("third")
|
||||||
|
|
||||||
commonConfig(
|
commonConfig(
|
||||||
debugConfig(on = false).withFallback(ConfigFactory.parseString("""
|
debugConfig(on = false)
|
||||||
akka.cluster.auto-down-unreachable-after = 1s
|
.withFallback(ConfigFactory.parseString("""
|
||||||
|
akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
akka.cluster.testkit.auto-down-unreachable-after = 1s
|
||||||
akka.cluster.prune-gossip-tombstones-after = 1s
|
akka.cluster.prune-gossip-tombstones-after = 1s
|
||||||
akka.remote.classic.log-frame-size-exceeding = 1200b
|
akka.remote.classic.log-frame-size-exceeding = 1200b
|
||||||
akka.remote.artery.advanced.aeron {
|
akka.remote.artery.advanced.aeron {
|
||||||
|
|
@ -30,7 +32,8 @@ object NodeChurnMultiJvmSpec extends MultiNodeConfig {
|
||||||
embedded-media-driver = off
|
embedded-media-driver = off
|
||||||
aeron-dir = "target/aeron-NodeChurnSpec"
|
aeron-dir = "target/aeron-NodeChurnSpec"
|
||||||
}
|
}
|
||||||
""")).withFallback(MultiNodeClusterSpec.clusterConfig))
|
"""))
|
||||||
|
.withFallback(MultiNodeClusterSpec.clusterConfig))
|
||||||
|
|
||||||
class LogListener(testActor: ActorRef) extends Actor {
|
class LogListener(testActor: ActorRef) extends Actor {
|
||||||
def receive = {
|
def receive = {
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,7 @@ object NodeDowningAndBeingRemovedMultiJvmSpec extends MultiNodeConfig {
|
||||||
commonConfig(
|
commonConfig(
|
||||||
debugConfig(on = false).withFallback(
|
debugConfig(on = false).withFallback(
|
||||||
ConfigFactory
|
ConfigFactory
|
||||||
.parseString("akka.cluster.auto-down-unreachable-after = off")
|
.parseString("akka.cluster.testkit.auto-down-unreachable-after = off")
|
||||||
.withFallback(MultiNodeClusterSpec.clusterConfig)))
|
.withFallback(MultiNodeClusterSpec.clusterConfig)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -21,10 +21,12 @@ object QuickRestartMultiJvmSpec extends MultiNodeConfig {
|
||||||
val third = role("third")
|
val third = role("third")
|
||||||
|
|
||||||
commonConfig(
|
commonConfig(
|
||||||
debugConfig(on = false).withFallback(ConfigFactory.parseString("""
|
debugConfig(on = false)
|
||||||
akka.cluster.auto-down-unreachable-after = off
|
.withFallback(ConfigFactory.parseString("""
|
||||||
|
akka.cluster.testkit.auto-down-unreachable-after = off
|
||||||
akka.cluster.allow-weakly-up-members = off
|
akka.cluster.allow-weakly-up-members = off
|
||||||
""")).withFallback(MultiNodeClusterSpec.clusterConfig))
|
"""))
|
||||||
|
.withFallback(MultiNodeClusterSpec.clusterConfig))
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -28,11 +28,13 @@ object RestartFirstSeedNodeMultiJvmSpec extends MultiNodeConfig {
|
||||||
val seed3 = role("seed3")
|
val seed3 = role("seed3")
|
||||||
|
|
||||||
commonConfig(
|
commonConfig(
|
||||||
debugConfig(on = false).withFallback(ConfigFactory.parseString("""
|
debugConfig(on = false)
|
||||||
akka.cluster.auto-down-unreachable-after = off
|
.withFallback(ConfigFactory.parseString("""
|
||||||
|
akka.cluster.testkit.auto-down-unreachable-after = off
|
||||||
akka.cluster.retry-unsuccessful-join-after = 3s
|
akka.cluster.retry-unsuccessful-join-after = 3s
|
||||||
akka.cluster.allow-weakly-up-members = off
|
akka.cluster.allow-weakly-up-members = off
|
||||||
""")).withFallback(MultiNodeClusterSpec.clusterConfig))
|
"""))
|
||||||
|
.withFallback(MultiNodeClusterSpec.clusterConfig))
|
||||||
}
|
}
|
||||||
|
|
||||||
class RestartFirstSeedNodeMultiJvmNode1 extends RestartFirstSeedNodeSpec
|
class RestartFirstSeedNodeMultiJvmNode1 extends RestartFirstSeedNodeSpec
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,8 @@ object RestartNode2SpecMultiJvmSpec extends MultiNodeConfig {
|
||||||
commonConfig(
|
commonConfig(
|
||||||
debugConfig(on = false)
|
debugConfig(on = false)
|
||||||
.withFallback(ConfigFactory.parseString("""
|
.withFallback(ConfigFactory.parseString("""
|
||||||
akka.cluster.auto-down-unreachable-after = 2s
|
akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
akka.cluster.testkit.auto-down-unreachable-after = 2s
|
||||||
akka.cluster.retry-unsuccessful-join-after = 3s
|
akka.cluster.retry-unsuccessful-join-after = 3s
|
||||||
akka.cluster.allow-weakly-up-members = off
|
akka.cluster.allow-weakly-up-members = off
|
||||||
akka.remote.retry-gate-closed-for = 45s
|
akka.remote.retry-gate-closed-for = 45s
|
||||||
|
|
|
||||||
|
|
@ -29,7 +29,7 @@ object RestartNode3MultiJvmSpec extends MultiNodeConfig {
|
||||||
commonConfig(
|
commonConfig(
|
||||||
debugConfig(on = false)
|
debugConfig(on = false)
|
||||||
.withFallback(ConfigFactory.parseString("""
|
.withFallback(ConfigFactory.parseString("""
|
||||||
akka.cluster.auto-down-unreachable-after = off
|
akka.cluster.testkit.auto-down-unreachable-after = off
|
||||||
akka.cluster.allow-weakly-up-members = off
|
akka.cluster.allow-weakly-up-members = off
|
||||||
# test is using Java serialization and not priority to rewrite
|
# test is using Java serialization and not priority to rewrite
|
||||||
akka.actor.allow-java-serialization = on
|
akka.actor.allow-java-serialization = on
|
||||||
|
|
|
||||||
|
|
@ -34,7 +34,8 @@ object RestartNodeMultiJvmSpec extends MultiNodeConfig {
|
||||||
commonConfig(
|
commonConfig(
|
||||||
debugConfig(on = false)
|
debugConfig(on = false)
|
||||||
.withFallback(ConfigFactory.parseString("""
|
.withFallback(ConfigFactory.parseString("""
|
||||||
akka.cluster.auto-down-unreachable-after = 5s
|
akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
akka.cluster.testkit.auto-down-unreachable-after = 5s
|
||||||
akka.cluster.allow-weakly-up-members = off
|
akka.cluster.allow-weakly-up-members = off
|
||||||
#akka.remote.use-passive-connections = off
|
#akka.remote.use-passive-connections = off
|
||||||
# test is using Java serialization and not priority to rewrite
|
# test is using Java serialization and not priority to rewrite
|
||||||
|
|
|
||||||
|
|
@ -16,12 +16,16 @@ final case class SingletonClusterMultiNodeConfig(failureDetectorPuppet: Boolean)
|
||||||
val first = role("first")
|
val first = role("first")
|
||||||
val second = role("second")
|
val second = role("second")
|
||||||
|
|
||||||
commonConfig(debugConfig(on = false).withFallback(ConfigFactory.parseString("""
|
commonConfig(
|
||||||
|
debugConfig(on = false)
|
||||||
|
.withFallback(ConfigFactory.parseString("""
|
||||||
akka.cluster {
|
akka.cluster {
|
||||||
auto-down-unreachable-after = 0s
|
downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
testkit.auto-down-unreachable-after = 0s
|
||||||
failure-detector.threshold = 4
|
failure-detector.threshold = 4
|
||||||
}
|
}
|
||||||
""")).withFallback(MultiNodeClusterSpec.clusterConfig(failureDetectorPuppet)))
|
"""))
|
||||||
|
.withFallback(MultiNodeClusterSpec.clusterConfig(failureDetectorPuppet)))
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -21,12 +21,16 @@ final case class SplitBrainMultiNodeConfig(failureDetectorPuppet: Boolean) exten
|
||||||
val fourth = role("fourth")
|
val fourth = role("fourth")
|
||||||
val fifth = role("fifth")
|
val fifth = role("fifth")
|
||||||
|
|
||||||
commonConfig(debugConfig(on = false).withFallback(ConfigFactory.parseString("""
|
commonConfig(
|
||||||
|
debugConfig(on = false)
|
||||||
|
.withFallback(ConfigFactory.parseString("""
|
||||||
akka.remote.retry-gate-closed-for = 3 s
|
akka.remote.retry-gate-closed-for = 3 s
|
||||||
akka.cluster {
|
akka.cluster {
|
||||||
auto-down-unreachable-after = 1s
|
downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
testkit.auto-down-unreachable-after = 1s
|
||||||
failure-detector.threshold = 4
|
failure-detector.threshold = 4
|
||||||
}""")).withFallback(MultiNodeClusterSpec.clusterConfig(failureDetectorPuppet)))
|
}"""))
|
||||||
|
.withFallback(MultiNodeClusterSpec.clusterConfig(failureDetectorPuppet)))
|
||||||
|
|
||||||
testTransport(on = true)
|
testTransport(on = true)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -34,10 +34,14 @@ object StreamRefSpec extends MultiNodeConfig {
|
||||||
val second = role("second")
|
val second = role("second")
|
||||||
val third = role("third")
|
val third = role("third")
|
||||||
|
|
||||||
commonConfig(debugConfig(on = false).withFallback(ConfigFactory.parseString("""
|
commonConfig(
|
||||||
|
debugConfig(on = false)
|
||||||
|
.withFallback(ConfigFactory.parseString("""
|
||||||
akka.cluster {
|
akka.cluster {
|
||||||
auto-down-unreachable-after = 1s
|
downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
}""")).withFallback(MultiNodeClusterSpec.clusterConfig))
|
testkit.auto-down-unreachable-after = 1s
|
||||||
|
}"""))
|
||||||
|
.withFallback(MultiNodeClusterSpec.clusterConfig))
|
||||||
|
|
||||||
testTransport(on = true)
|
testTransport(on = true)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -119,7 +119,8 @@ private[cluster] object StressMultiJvmSpec extends MultiNodeConfig {
|
||||||
akka.actor.provider = cluster
|
akka.actor.provider = cluster
|
||||||
akka.cluster {
|
akka.cluster {
|
||||||
failure-detector.acceptable-heartbeat-pause = 10s
|
failure-detector.acceptable-heartbeat-pause = 10s
|
||||||
auto-down-unreachable-after = 1s
|
downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
testkit.auto-down-unreachable-after = 1s
|
||||||
publish-stats-interval = 1s
|
publish-stats-interval = 1s
|
||||||
}
|
}
|
||||||
akka.loggers = ["akka.testkit.TestEventListener"]
|
akka.loggers = ["akka.testkit.TestEventListener"]
|
||||||
|
|
|
||||||
|
|
@ -42,7 +42,6 @@ class ClusterConfigSpec extends AkkaSpec {
|
||||||
LeaderActionsInterval should ===(1 second)
|
LeaderActionsInterval should ===(1 second)
|
||||||
UnreachableNodesReaperInterval should ===(1 second)
|
UnreachableNodesReaperInterval should ===(1 second)
|
||||||
PublishStatsInterval should ===(Duration.Undefined)
|
PublishStatsInterval should ===(Duration.Undefined)
|
||||||
AutoDownUnreachableAfter should ===(Duration.Undefined)
|
|
||||||
DownRemovalMargin should ===(Duration.Zero)
|
DownRemovalMargin should ===(Duration.Zero)
|
||||||
MinNrOfMembers should ===(1)
|
MinNrOfMembers should ===(1)
|
||||||
MinNrOfMembersOfRole should ===(Map.empty[String, Int])
|
MinNrOfMembersOfRole should ===(Map.empty[String, Int])
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,8 @@ import com.typesafe.config.{ Config, ConfigFactory }
|
||||||
object ClusterLogSpec {
|
object ClusterLogSpec {
|
||||||
val config = """
|
val config = """
|
||||||
akka.cluster {
|
akka.cluster {
|
||||||
auto-down-unreachable-after = 0s
|
downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
testkit.auto-down-unreachable-after = 0s
|
||||||
publish-stats-interval = 0 s # always, when it happens
|
publish-stats-interval = 0 s # always, when it happens
|
||||||
failure-detector.implementation-class = akka.cluster.FailureDetectorPuppet
|
failure-detector.implementation-class = akka.cluster.FailureDetectorPuppet
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,8 @@ import scala.concurrent.duration._
|
||||||
object ClusterSpec {
|
object ClusterSpec {
|
||||||
val config = """
|
val config = """
|
||||||
akka.cluster {
|
akka.cluster {
|
||||||
auto-down-unreachable-after = 0s
|
downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
testkit.auto-down-unreachable-after = 0s
|
||||||
periodic-tasks-initial-delay = 120 seconds // turn off scheduled tasks
|
periodic-tasks-initial-delay = 120 seconds // turn off scheduled tasks
|
||||||
publish-stats-interval = 0 s # always, when it happens
|
publish-stats-interval = 0 s # always, when it happens
|
||||||
failure-detector.implementation-class = akka.cluster.FailureDetectorPuppet
|
failure-detector.implementation-class = akka.cluster.FailureDetectorPuppet
|
||||||
|
|
|
||||||
|
|
@ -6,14 +6,17 @@ package akka.cluster
|
||||||
|
|
||||||
import java.util.concurrent.atomic.AtomicBoolean
|
import java.util.concurrent.atomic.AtomicBoolean
|
||||||
|
|
||||||
|
import scala.concurrent.duration._
|
||||||
|
|
||||||
import akka.ConfigurationException
|
import akka.ConfigurationException
|
||||||
import akka.actor.{ ActorSystem, Props }
|
import akka.actor.ActorSystem
|
||||||
import akka.testkit.TestKit.{ awaitCond, shutdownActorSystem }
|
import akka.actor.Props
|
||||||
|
import akka.testkit.TestKit.awaitCond
|
||||||
|
import akka.testkit.TestKit.shutdownActorSystem
|
||||||
import akka.util.unused
|
import akka.util.unused
|
||||||
import com.typesafe.config.ConfigFactory
|
import com.typesafe.config.ConfigFactory
|
||||||
import org.scalatest.{ Matchers, WordSpec }
|
import org.scalatest.Matchers
|
||||||
|
import org.scalatest.WordSpec
|
||||||
import scala.concurrent.duration._
|
|
||||||
|
|
||||||
class FailingDowningProvider(@unused system: ActorSystem) extends DowningProvider {
|
class FailingDowningProvider(@unused system: ActorSystem) extends DowningProvider {
|
||||||
override val downRemovalMargin: FiniteDuration = 20.seconds
|
override val downRemovalMargin: FiniteDuration = 20.seconds
|
||||||
|
|
@ -39,6 +42,10 @@ class DowningProviderSpec extends WordSpec with Matchers {
|
||||||
loglevel = WARNING
|
loglevel = WARNING
|
||||||
actor.provider = "cluster"
|
actor.provider = "cluster"
|
||||||
remote {
|
remote {
|
||||||
|
artery.canonical {
|
||||||
|
hostname = 127.0.0.1
|
||||||
|
port = 0
|
||||||
|
}
|
||||||
classic.netty.tcp {
|
classic.netty.tcp {
|
||||||
hostname = "127.0.0.1"
|
hostname = "127.0.0.1"
|
||||||
port = 0
|
port = 0
|
||||||
|
|
@ -55,16 +62,6 @@ class DowningProviderSpec extends WordSpec with Matchers {
|
||||||
shutdownActorSystem(system)
|
shutdownActorSystem(system)
|
||||||
}
|
}
|
||||||
|
|
||||||
"use akka.cluster.AutoDowning if 'auto-down-unreachable-after' is configured" in {
|
|
||||||
val system = ActorSystem(
|
|
||||||
"auto-downing",
|
|
||||||
ConfigFactory.parseString("""
|
|
||||||
akka.cluster.auto-down-unreachable-after = 18d
|
|
||||||
""").withFallback(baseConf))
|
|
||||||
Cluster(system).downingProvider shouldBe an[AutoDowning]
|
|
||||||
shutdownActorSystem(system)
|
|
||||||
}
|
|
||||||
|
|
||||||
"use the specified downing provider" in {
|
"use the specified downing provider" in {
|
||||||
val system = ActorSystem(
|
val system = ActorSystem(
|
||||||
"auto-downing",
|
"auto-downing",
|
||||||
|
|
|
||||||
|
|
@ -259,7 +259,8 @@ class JoinConfigCompatCheckerSpec extends AkkaSpec with ClusterTestKit {
|
||||||
akka.cluster {
|
akka.cluster {
|
||||||
|
|
||||||
# using explicit downing provider class
|
# using explicit downing provider class
|
||||||
downing-provider-class = "akka.cluster.AutoDowning"
|
downing-provider-class = "akka.cluster.testkit.AutoDowning"
|
||||||
|
testkit.auto-down-unreachable-after = 0s
|
||||||
|
|
||||||
configuration-compatibility-check {
|
configuration-compatibility-check {
|
||||||
enforce-on-join = on
|
enforce-on-join = on
|
||||||
|
|
|
||||||
|
|
@ -2,17 +2,82 @@
|
||||||
* Copyright (C) 2009-2019 Lightbend Inc. <https://www.lightbend.com>
|
* Copyright (C) 2009-2019 Lightbend Inc. <https://www.lightbend.com>
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package akka.cluster
|
package akka.cluster.testkit
|
||||||
|
|
||||||
import akka.ConfigurationException
|
|
||||||
import akka.actor.{ Actor, ActorSystem, Address, Cancellable, Props, Scheduler }
|
|
||||||
|
|
||||||
import scala.concurrent.duration.FiniteDuration
|
|
||||||
import akka.cluster.ClusterEvent._
|
|
||||||
|
|
||||||
import scala.concurrent.duration.Duration
|
import scala.concurrent.duration.Duration
|
||||||
|
import scala.concurrent.duration.FiniteDuration
|
||||||
|
|
||||||
|
import akka.actor.Actor
|
||||||
import akka.actor.ActorLogging
|
import akka.actor.ActorLogging
|
||||||
import com.github.ghik.silencer.silent
|
import akka.actor.ActorSystem
|
||||||
|
import akka.actor.Address
|
||||||
|
import akka.actor.Cancellable
|
||||||
|
import akka.actor.Props
|
||||||
|
import akka.actor.Scheduler
|
||||||
|
import akka.cluster.Cluster
|
||||||
|
import akka.cluster.ClusterEvent._
|
||||||
|
import akka.cluster.DowningProvider
|
||||||
|
import akka.cluster.Member
|
||||||
|
import akka.cluster.MembershipState
|
||||||
|
import akka.cluster.UniqueAddress
|
||||||
|
import akka.util.Helpers.ConfigOps
|
||||||
|
import akka.util.Helpers.Requiring
|
||||||
|
import akka.util.Helpers.toRootLowerCase
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Downing provider used for testing.
|
||||||
|
*
|
||||||
|
* Auto-downing is a naïve approach to remove unreachable nodes from the cluster membership.
|
||||||
|
* In a production environment it will eventually break down the cluster.
|
||||||
|
* When a network partition occurs, both sides of the partition will see the other side as unreachable
|
||||||
|
* and remove it from the cluster. This results in the formation of two separate, disconnected, clusters
|
||||||
|
* (known as *Split Brain*).
|
||||||
|
*
|
||||||
|
* This behavior is not limited to network partitions. It can also occur if a node in the cluster is
|
||||||
|
* overloaded, or experiences a long GC pause.
|
||||||
|
*
|
||||||
|
* When using Cluster Singleton or Cluster Sharding it can break the contract provided by those features.
|
||||||
|
* Both provide a guarantee that an actor will be unique in a cluster.
|
||||||
|
* With the auto-down feature enabled, it is possible for multiple independent clusters to form (*Split Brain*).
|
||||||
|
* When this happens the guaranteed uniqueness will no longer be true resulting in undesirable behavior
|
||||||
|
* in the system.
|
||||||
|
*
|
||||||
|
* This is even more severe when Akka Persistence is used in conjunction with Cluster Sharding.
|
||||||
|
* In this case, the lack of unique actors can cause multiple actors to write to the same journal.
|
||||||
|
* Akka Persistence operates on a single writer principle. Having multiple writers will corrupt
|
||||||
|
* the journal and make it unusable.
|
||||||
|
*
|
||||||
|
* Finally, even if you don't use features such as Persistence, Sharding, or Singletons, auto-downing can lead the
|
||||||
|
* system to form multiple small clusters. These small clusters will be independent from each other. They will be
|
||||||
|
* unable to communicate and as a result you may experience performance degradation. Once this condition occurs,
|
||||||
|
* it will require manual intervention in order to reform the cluster.
|
||||||
|
*
|
||||||
|
* Because of these issues, auto-downing should never be used in a production environment.
|
||||||
|
*/
|
||||||
|
final class AutoDowning(system: ActorSystem) extends DowningProvider {
|
||||||
|
|
||||||
|
private def clusterSettings = Cluster(system).settings
|
||||||
|
|
||||||
|
private val AutoDownUnreachableAfter: Duration = {
|
||||||
|
val key = "akka.cluster.testkit.auto-down-unreachable-after"
|
||||||
|
// it's not in reference.conf, since only used in tests
|
||||||
|
if (clusterSettings.config.hasPath(key)) {
|
||||||
|
toRootLowerCase(clusterSettings.config.getString(key)) match {
|
||||||
|
case "off" => Duration.Undefined
|
||||||
|
case _ => clusterSettings.config.getMillisDuration(key).requiring(_ >= Duration.Zero, key + " >= 0s, or off")
|
||||||
|
}
|
||||||
|
} else
|
||||||
|
Duration.Undefined
|
||||||
|
}
|
||||||
|
|
||||||
|
override def downRemovalMargin: FiniteDuration = clusterSettings.DownRemovalMargin
|
||||||
|
|
||||||
|
override def downingActorProps: Option[Props] =
|
||||||
|
AutoDownUnreachableAfter match {
|
||||||
|
case d: FiniteDuration => Some(AutoDown.props(d))
|
||||||
|
case _ => None // auto-down-unreachable-after = off
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* INTERNAL API
|
* INTERNAL API
|
||||||
|
|
@ -25,26 +90,6 @@ private[cluster] object AutoDown {
|
||||||
final case class UnreachableTimeout(node: UniqueAddress)
|
final case class UnreachableTimeout(node: UniqueAddress)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Used when no custom provider is configured and 'auto-down-unreachable-after' is enabled.
|
|
||||||
*/
|
|
||||||
final class AutoDowning(system: ActorSystem) extends DowningProvider {
|
|
||||||
|
|
||||||
private def clusterSettings = Cluster(system).settings
|
|
||||||
|
|
||||||
@silent("deprecated")
|
|
||||||
override def downRemovalMargin: FiniteDuration = clusterSettings.DownRemovalMargin
|
|
||||||
|
|
||||||
override def downingActorProps: Option[Props] =
|
|
||||||
clusterSettings.AutoDownUnreachableAfter match {
|
|
||||||
case d: FiniteDuration => Some(AutoDown.props(d))
|
|
||||||
case _ =>
|
|
||||||
// I don't think this can actually happen
|
|
||||||
throw new ConfigurationException(
|
|
||||||
"AutoDowning downing provider selected but 'akka.cluster.auto-down-unreachable-after' not set")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* INTERNAL API
|
* INTERNAL API
|
||||||
*
|
*
|
||||||
|
|
@ -68,9 +113,7 @@ private[cluster] class AutoDown(autoDownUnreachableAfter: FiniteDuration)
|
||||||
|
|
||||||
// re-subscribe when restart
|
// re-subscribe when restart
|
||||||
override def preStart(): Unit = {
|
override def preStart(): Unit = {
|
||||||
log.warning(
|
log.debug("Auto-down is enabled in test.")
|
||||||
"Don't use auto-down feature of Akka Cluster in production. " +
|
|
||||||
"See 'Auto-downing (DO NOT USE)' section of Akka Cluster documentation.")
|
|
||||||
cluster.subscribe(self, classOf[ClusterDomainEvent])
|
cluster.subscribe(self, classOf[ClusterDomainEvent])
|
||||||
super.preStart()
|
super.preStart()
|
||||||
}
|
}
|
||||||
|
|
@ -81,11 +124,7 @@ private[cluster] class AutoDown(autoDownUnreachableAfter: FiniteDuration)
|
||||||
|
|
||||||
override def down(node: Address): Unit = {
|
override def down(node: Address): Unit = {
|
||||||
require(leader)
|
require(leader)
|
||||||
logInfo(
|
logInfo("Leader is auto-downing unreachable node [{}].", node)
|
||||||
"Leader is auto-downing unreachable node [{}]. " +
|
|
||||||
"Don't use auto-down feature of Akka Cluster in production. " +
|
|
||||||
"See 'Auto-downing (DO NOT USE)' section of Akka Cluster documentation.",
|
|
||||||
node)
|
|
||||||
cluster.down(node)
|
cluster.down(node)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -2,15 +2,18 @@
|
||||||
* Copyright (C) 2009-2019 Lightbend Inc. <https://www.lightbend.com>
|
* Copyright (C) 2009-2019 Lightbend Inc. <https://www.lightbend.com>
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package akka.cluster
|
package akka.cluster.testkit
|
||||||
|
|
||||||
import scala.concurrent.duration._
|
import scala.concurrent.duration._
|
||||||
import akka.actor.Address
|
|
||||||
import akka.actor.Scheduler
|
|
||||||
import akka.actor.ActorRef
|
import akka.actor.ActorRef
|
||||||
|
import akka.actor.Address
|
||||||
import akka.actor.Props
|
import akka.actor.Props
|
||||||
import akka.cluster.MemberStatus._
|
import akka.actor.Scheduler
|
||||||
import akka.cluster.ClusterEvent._
|
import akka.cluster.ClusterEvent._
|
||||||
|
import akka.cluster.Member
|
||||||
|
import akka.cluster.MemberStatus._
|
||||||
|
import akka.cluster.TestMember
|
||||||
import akka.remote.RARP
|
import akka.remote.RARP
|
||||||
import akka.testkit.AkkaSpec
|
import akka.testkit.AkkaSpec
|
||||||
import akka.testkit.TimingTest
|
import akka.testkit.TimingTest
|
||||||
|
|
@ -32,7 +32,8 @@ object LotsOfDataBot {
|
||||||
// Override the configuration of the port
|
// Override the configuration of the port
|
||||||
val config = ConfigFactory
|
val config = ConfigFactory
|
||||||
.parseString("akka.remote.classic.netty.tcp.port=" + port)
|
.parseString("akka.remote.classic.netty.tcp.port=" + port)
|
||||||
.withFallback(ConfigFactory.load(ConfigFactory.parseString("""
|
.withFallback(
|
||||||
|
ConfigFactory.load(ConfigFactory.parseString("""
|
||||||
passive = off
|
passive = off
|
||||||
max-entries = 100000
|
max-entries = 100000
|
||||||
akka.actor.provider = "cluster"
|
akka.actor.provider = "cluster"
|
||||||
|
|
@ -48,7 +49,8 @@ object LotsOfDataBot {
|
||||||
"akka://ClusterSystem@127.0.0.1:2551",
|
"akka://ClusterSystem@127.0.0.1:2551",
|
||||||
"akka://ClusterSystem@127.0.0.1:2552"]
|
"akka://ClusterSystem@127.0.0.1:2552"]
|
||||||
|
|
||||||
auto-down-unreachable-after = 10s
|
downing-provider-class = akka.cluster.testkit.AutoDowning
|
||||||
|
testkit.auto-down-unreachable-after = 10s
|
||||||
}
|
}
|
||||||
""")))
|
""")))
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -227,14 +227,6 @@ graceful leaving process of a cluster member.
|
||||||
|
|
||||||
See @ref:[removal of Internal Cluster Sharding Data](typed/cluster-sharding.md#removal-of-internal-cluster-sharding-data) in the documentation of the new APIs.
|
See @ref:[removal of Internal Cluster Sharding Data](typed/cluster-sharding.md#removal-of-internal-cluster-sharding-data) in the documentation of the new APIs.
|
||||||
|
|
||||||
## Configuration
|
|
||||||
|
|
||||||
`ClusterShardingSettings` is a parameter to the `start` method of
|
|
||||||
the `ClusterSharding` extension, i.e. each each entity type can be configured with different settings
|
|
||||||
if needed.
|
|
||||||
|
|
||||||
See @ref:[configuration](typed/cluster-sharding.md#configuration) for more information.
|
|
||||||
|
|
||||||
## Inspecting cluster sharding state
|
## Inspecting cluster sharding state
|
||||||
|
|
||||||
Two requests to inspect the cluster state are available:
|
Two requests to inspect the cluster state are available:
|
||||||
|
|
@ -256,20 +248,13 @@ directly sending messages to the individual entities.
|
||||||
|
|
||||||
## Lease
|
## Lease
|
||||||
|
|
||||||
A @ref[lease](coordination.md) can be used as an additional safety measure to ensure a shard
|
A lease can be used as an additional safety measure to ensure a shard does not run on two nodes.
|
||||||
does not run on two nodes.
|
See @ref:[Lease](typed/cluster-sharding.md#lease) in the documentation of the new APIs.
|
||||||
|
|
||||||
Reasons for how this can happen:
|
## Configuration
|
||||||
|
|
||||||
* Network partitions without an appropriate downing provider
|
`ClusterShardingSettings` is a parameter to the `start` method of
|
||||||
* Mistakes in the deployment process leading to two separate Akka Clusters
|
the `ClusterSharding` extension, i.e. each each entity type can be configured with different settings
|
||||||
* Timing issues between removing members from the Cluster on one side of a network partition and shutting them down on the other side
|
if needed.
|
||||||
|
|
||||||
A lease can be a final backup that means that each shard won't create child entity actors unless it has the lease.
|
See @ref:[configuration](typed/cluster-sharding.md#configuration) for more information.
|
||||||
|
|
||||||
To use a lease for sharding set `akka.cluster.sharding.use-lease` to the configuration location
|
|
||||||
of the lease to use. Each shard will try and acquire a lease with with the name `<actor system name>-shard-<type name>-<shard id>` and
|
|
||||||
the owner is set to the `Cluster(system).selfAddress.hostPort`.
|
|
||||||
|
|
||||||
If a shard can't acquire a lease it will remain uninitialized so messages for entities it owns will
|
|
||||||
be buffered in the `ShardRegion`. If the lease is lost after initialization the Shard will be terminated.
|
|
||||||
|
|
|
||||||
|
|
@ -104,6 +104,14 @@ Scala
|
||||||
Java
|
Java
|
||||||
: @@snip [SimpleClusterListener2.java](/akka-docs/src/test/java/jdocs/cluster/SimpleClusterListener2.java) { #join }
|
: @@snip [SimpleClusterListener2.java](/akka-docs/src/test/java/jdocs/cluster/SimpleClusterListener2.java) { #join }
|
||||||
|
|
||||||
|
## Leaving
|
||||||
|
|
||||||
|
See @ref:[Leaving](typed/cluster.md#leaving) in the documentation of the new APIs.
|
||||||
|
|
||||||
|
## Downing
|
||||||
|
|
||||||
|
See @ref:[Downing](typed/cluster.md#downing) in the documentation of the new APIs.
|
||||||
|
|
||||||
<a id="cluster-subscriber"></a>
|
<a id="cluster-subscriber"></a>
|
||||||
## Subscribe to Cluster Events
|
## Subscribe to Cluster Events
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@
|
||||||
## Commercial Support
|
## Commercial Support
|
||||||
|
|
||||||
Commercial support is provided by [Lightbend](http://www.lightbend.com).
|
Commercial support is provided by [Lightbend](http://www.lightbend.com).
|
||||||
Akka is part of the [Lightbend Reactive Platform](http://www.lightbend.com/platform).
|
Akka is part of the [Lightbend Platform](http://www.lightbend.com/platform).
|
||||||
|
|
||||||
## Sponsors
|
## Sponsors
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,40 @@ is [no longer available as a static method](https://github.com/scala/bug/issues/
|
||||||
|
|
||||||
If you are still using Scala 2.11 then you must upgrade to 2.12 or 2.13
|
If you are still using Scala 2.11 then you must upgrade to 2.12 or 2.13
|
||||||
|
|
||||||
|
## Auto-downing removed
|
||||||
|
|
||||||
|
Auto-downing of unreachable Cluster members have been removed after warnings and recommendations against using it
|
||||||
|
for many years. It was by default disabled, but could be enabled with configuration
|
||||||
|
`akka.cluster.auto-down-unreachable-after`.
|
||||||
|
|
||||||
|
For alternatives see the @ref:[documentation about Downing](../typed/cluster.md#downing).
|
||||||
|
|
||||||
|
Auto-downing was a naïve approach to remove unreachable nodes from the cluster membership.
|
||||||
|
In a production environment it will eventually break down the cluster.
|
||||||
|
When a network partition occurs, both sides of the partition will see the other side as unreachable
|
||||||
|
and remove it from the cluster. This results in the formation of two separate, disconnected, clusters
|
||||||
|
(known as *Split Brain*).
|
||||||
|
|
||||||
|
This behavior is not limited to network partitions. It can also occur if a node in the cluster is
|
||||||
|
overloaded, or experiences a long GC pause.
|
||||||
|
|
||||||
|
When using @ref:[Cluster Singleton](../typed/cluster-singleton.md) or @ref:[Cluster Sharding](../typed/cluster-sharding.md)
|
||||||
|
it can break the contract provided by those features. Both provide a guarantee that an actor will be unique in a cluster.
|
||||||
|
With the auto-down feature enabled, it is possible for multiple independent clusters to form (*Split Brain*).
|
||||||
|
When this happens the guaranteed uniqueness will no longer be true resulting in undesirable behavior in the system.
|
||||||
|
|
||||||
|
This is even more severe when @ref:[Akka Persistence](../typed/persistence.md) is used in conjunction with
|
||||||
|
Cluster Sharding. In this case, the lack of unique actors can cause multiple actors to write to the same journal.
|
||||||
|
Akka Persistence operates on a single writer principle. Having multiple writers will corrupt the journal
|
||||||
|
and make it unusable.
|
||||||
|
|
||||||
|
Finally, even if you don't use features such as Persistence, Sharding, or Singletons, auto-downing can lead the
|
||||||
|
system to form multiple small clusters. These small clusters will be independent from each other. They will be
|
||||||
|
unable to communicate and as a result you may experience performance degradation. Once this condition occurs,
|
||||||
|
it will require manual intervention in order to reform the cluster.
|
||||||
|
|
||||||
|
Because of these issues, auto-downing should **never** be used in a production environment.
|
||||||
|
|
||||||
## Removed features that were deprecated
|
## Removed features that were deprecated
|
||||||
|
|
||||||
After being deprecated since 2.5.0, the following have been removed in Akka 2.6.
|
After being deprecated since 2.5.0, the following have been removed in Akka 2.6.
|
||||||
|
|
@ -94,13 +128,25 @@ to make remote interactions look like local method calls.
|
||||||
Warnings about `TypedActor` have been [mentioned in documentation](https://doc.akka.io/docs/akka/2.5/typed-actors.html#when-to-use-typed-actors)
|
Warnings about `TypedActor` have been [mentioned in documentation](https://doc.akka.io/docs/akka/2.5/typed-actors.html#when-to-use-typed-actors)
|
||||||
for many years.
|
for many years.
|
||||||
|
|
||||||
|
### akka-protobuf
|
||||||
|
|
||||||
|
`akka-protobuf` was never intended to be used by end users but perhaps this was not well-documented.
|
||||||
|
Applications should use standard Protobuf dependency instead of `akka-protobuf`. The artifact is still
|
||||||
|
published, but the transitive dependency to `akka-protobuf` has been removed.
|
||||||
|
|
||||||
|
Akka is now using Protobuf version 3.9.0 for serialization of messages defined by Akka.
|
||||||
|
|
||||||
|
### Cluster Client
|
||||||
|
|
||||||
|
Cluster client has been deprecated as of 2.6 in favor of [Akka gRPC](https://doc.akka.io/docs/akka-grpc/current/index.html).
|
||||||
|
It is not advised to build new applications with Cluster client, and existing users @ref[should migrate to Akka gRPC](../cluster-client.md#migration-to-akka-grpc).
|
||||||
|
|
||||||
### akka.Main
|
### akka.Main
|
||||||
|
|
||||||
`akka.Main` is deprecated in favour of starting the `ActorSystem` from a custom main class instead. `akka.Main` was not
|
`akka.Main` is deprecated in favour of starting the `ActorSystem` from a custom main class instead. `akka.Main` was not
|
||||||
adding much value and typically a custom main class is needed anyway.
|
adding much value and typically a custom main class is needed anyway.
|
||||||
|
|
||||||
@@ Remoting
|
## Remoting
|
||||||
|
|
||||||
### Default remoting is now Artery TCP
|
### Default remoting is now Artery TCP
|
||||||
|
|
||||||
|
|
@ -184,20 +230,7 @@ For TCP:
|
||||||
|
|
||||||
Classic remoting is deprecated but can be used in `2.6.` Explicitly disable Artery by setting property `akka.remote.artery.enabled` to `false`. Further, any configuration under `akka.remote` that is
|
Classic remoting is deprecated but can be used in `2.6.` Explicitly disable Artery by setting property `akka.remote.artery.enabled` to `false`. Further, any configuration under `akka.remote` that is
|
||||||
specific to classic remoting needs to be moved to `akka.remote.classic`. To see which configuration options
|
specific to classic remoting needs to be moved to `akka.remote.classic`. To see which configuration options
|
||||||
are specific to classic search for them in: [`akka-remote/reference.conf`](/akka-remote/src/main/resources/reference.conf)
|
are specific to classic search for them in: @ref:[`akka-remote/reference.conf`](../general/configuration.md#config-akka-remote).
|
||||||
|
|
||||||
### akka-protobuf
|
|
||||||
|
|
||||||
`akka-protobuf` was never intended to be used by end users but perhaps this was not well-documented.
|
|
||||||
Applications should use standard Protobuf dependency instead of `akka-protobuf`. The artifact is still
|
|
||||||
published, but the transitive dependency to `akka-protobuf` has been removed.
|
|
||||||
|
|
||||||
Akka is now using Protobuf version 3.9.0 for serialization of messages defined by Akka.
|
|
||||||
|
|
||||||
### Cluster Client
|
|
||||||
|
|
||||||
Cluster client has been deprecated as of 2.6 in favor of [Akka gRPC](https://doc.akka.io/docs/akka-grpc/current/index.html).
|
|
||||||
It is not advised to build new applications with Cluster client, and existing users @ref[should migrate to Akka gRPC](../cluster-client.md#migration-to-akka-grpc).
|
|
||||||
|
|
||||||
## Java Serialization
|
## Java Serialization
|
||||||
|
|
||||||
|
|
@ -235,14 +268,12 @@ handling that type and it was previously "accidentally" serialized with Java ser
|
||||||
The following documents configuration changes and behavior changes where no action is required. In some cases the old
|
The following documents configuration changes and behavior changes where no action is required. In some cases the old
|
||||||
behavior can be restored via configuration.
|
behavior can be restored via configuration.
|
||||||
|
|
||||||
### Remoting
|
### Remoting dependencies have been made optional
|
||||||
|
|
||||||
#### Remoting dependencies have been made optional
|
|
||||||
|
|
||||||
Classic remoting depends on Netty and Artery UDP depends on Aeron. These are now both optional dependencies that need
|
Classic remoting depends on Netty and Artery UDP depends on Aeron. These are now both optional dependencies that need
|
||||||
to be explicitly added. See @ref[classic remoting](../remoting.md) or @ref[artery remoting](../remoting-artery.md) for instructions.
|
to be explicitly added. See @ref[classic remoting](../remoting.md) or @ref[artery remoting](../remoting-artery.md) for instructions.
|
||||||
|
|
||||||
#### Remote watch and deployment have been disabled without Cluster use
|
### Remote watch and deployment have been disabled without Cluster use
|
||||||
|
|
||||||
By default, these remoting features are disabled when not using Akka Cluster:
|
By default, these remoting features are disabled when not using Akka Cluster:
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -43,10 +43,10 @@ if that feature is enabled.
|
||||||
|
|
||||||
@@@ warning
|
@@@ warning
|
||||||
|
|
||||||
**Don't use Cluster Sharding together with Automatic Downing**,
|
Make sure to not use a Cluster downing strategy that may split the cluster into several separate clusters in
|
||||||
since it allows the cluster to split up into two separate clusters, which in turn will result
|
case of network problems or system overload (long GC pauses), since that will result in *multiple shards and entities*
|
||||||
in *multiple shards and entities* being started, one in each separate cluster!
|
being started, one in each separate cluster!
|
||||||
See @ref:[Downing](cluster.md#automatic-vs-manual-downing).
|
See @ref:[Downing](cluster.md#downing).
|
||||||
|
|
||||||
@@@
|
@@@
|
||||||
|
|
||||||
|
|
@ -304,6 +304,26 @@ rebalanced to other nodes.
|
||||||
See @ref:[How To Startup when Cluster Size Reached](cluster.md#how-to-startup-when-a-cluster-size-is-reached)
|
See @ref:[How To Startup when Cluster Size Reached](cluster.md#how-to-startup-when-a-cluster-size-is-reached)
|
||||||
for more information about `min-nr-of-members`.
|
for more information about `min-nr-of-members`.
|
||||||
|
|
||||||
|
## Lease
|
||||||
|
|
||||||
|
A @ref[lease](../coordination.md) can be used as an additional safety measure to ensure a shard
|
||||||
|
does not run on two nodes.
|
||||||
|
|
||||||
|
Reasons for how this can happen:
|
||||||
|
|
||||||
|
* Network partitions without an appropriate downing provider
|
||||||
|
* Mistakes in the deployment process leading to two separate Akka Clusters
|
||||||
|
* Timing issues between removing members from the Cluster on one side of a network partition and shutting them down on the other side
|
||||||
|
|
||||||
|
A lease can be a final backup that means that each shard won't create child entity actors unless it has the lease.
|
||||||
|
|
||||||
|
To use a lease for sharding set `akka.cluster.sharding.use-lease` to the configuration location
|
||||||
|
of the lease to use. Each shard will try and acquire a lease with with the name `<actor system name>-shard-<type name>-<shard id>` and
|
||||||
|
the owner is set to the `Cluster(system).selfAddress.hostPort`.
|
||||||
|
|
||||||
|
If a shard can't acquire a lease it will remain uninitialized so messages for entities it owns will
|
||||||
|
be buffered in the `ShardRegion`. If the lease is lost after initialization the Shard will be terminated.
|
||||||
|
|
||||||
## Removal of internal Cluster Sharding data
|
## Removal of internal Cluster Sharding data
|
||||||
|
|
||||||
Removal of internal Cluster Sharding data is only relevant for "Persistent Mode".
|
Removal of internal Cluster Sharding data is only relevant for "Persistent Mode".
|
||||||
|
|
@ -326,15 +346,6 @@ cannot startup because of corrupt data, which may happen if accidentally
|
||||||
two clusters were running at the same time, e.g. caused by using auto-down
|
two clusters were running at the same time, e.g. caused by using auto-down
|
||||||
and there was a network partition.
|
and there was a network partition.
|
||||||
|
|
||||||
@@@ warning
|
|
||||||
|
|
||||||
**Don't use Cluster Sharding together with Automatic Downing**,
|
|
||||||
since it allows the cluster to split up into two separate clusters, which in turn will result
|
|
||||||
in *multiple shards and entities* being started, one in each separate cluster!
|
|
||||||
See @ref:[Downing](cluster.md#automatic-vs-manual-downing).
|
|
||||||
|
|
||||||
@@@
|
|
||||||
|
|
||||||
Use this program as a standalone Java main program:
|
Use this program as a standalone Java main program:
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
@ -347,7 +358,7 @@ The program is included in the `akka-cluster-sharding` jar file. It
|
||||||
is easiest to run it with same classpath and configuration as your ordinary
|
is easiest to run it with same classpath and configuration as your ordinary
|
||||||
application. It can be run from sbt or Maven in similar way.
|
application. It can be run from sbt or Maven in similar way.
|
||||||
|
|
||||||
Specify the entity type names (same as you use in the `start` method
|
Specify the entity type names (same as you use in the `init` method
|
||||||
of `ClusterSharding`) as program arguments.
|
of `ClusterSharding`) as program arguments.
|
||||||
|
|
||||||
If you specify `-2.3` as the first program argument it will also try
|
If you specify `-2.3` as the first program argument it will also try
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,15 @@ such as single-point of bottleneck. Single-point of failure is also a relevant c
|
||||||
but for some cases this feature takes care of that by making sure that another singleton
|
but for some cases this feature takes care of that by making sure that another singleton
|
||||||
instance will eventually be started.
|
instance will eventually be started.
|
||||||
|
|
||||||
|
@@@ warning
|
||||||
|
|
||||||
|
Make sure to not use a Cluster downing strategy that may split the cluster into several separate clusters in
|
||||||
|
case of network problems or system overload (long GC pauses), since that will result in in *multiple Singletons*
|
||||||
|
being started, one in each separate cluster!
|
||||||
|
See @ref:[Downing](cluster.md#downing).
|
||||||
|
|
||||||
|
@@@
|
||||||
|
|
||||||
### Singleton manager
|
### Singleton manager
|
||||||
|
|
||||||
The cluster singleton pattern manages one singleton actor instance among all cluster nodes or a group of nodes tagged with
|
The cluster singleton pattern manages one singleton actor instance among all cluster nodes or a group of nodes tagged with
|
||||||
|
|
@ -80,23 +89,20 @@ The singleton instance will not run on members with status @ref:[WeaklyUp](clust
|
||||||
|
|
||||||
This pattern may seem to be very tempting to use at first, but it has several drawbacks, some of them are listed below:
|
This pattern may seem to be very tempting to use at first, but it has several drawbacks, some of them are listed below:
|
||||||
|
|
||||||
* the cluster singleton may quickly become a *performance bottleneck*,
|
* The cluster singleton may quickly become a *performance bottleneck*.
|
||||||
* you can not rely on the cluster singleton to be *non-stop* available — e.g. when the node on which the singleton has
|
* You can not rely on the cluster singleton to be *non-stop* available — e.g. when the node on which the singleton
|
||||||
been running dies, it will take a few seconds for this to be noticed and the singleton be migrated to another node,
|
has been running dies, it will take a few seconds for this to be noticed and the singleton be migrated to another node.
|
||||||
* in the case of a *network partition* appearing in a Cluster that is using Automatic Downing (see docs for
|
* If many singletons are used be aware of that all will run on the oldest node (or oldest with configured role).
|
||||||
@ref:[Auto Downing](cluster.md#auto-downing-do-not-use),
|
@ref:[Cluster Sharding](cluster-sharding.md) combined with keeping the "singleton" entities alive can be a better
|
||||||
it may happen that the isolated clusters each decide to spin up their own singleton, meaning that there might be multiple
|
alternative.
|
||||||
singletons running in the system, yet the Clusters have no way of finding out about them (because of the partition).
|
|
||||||
|
|
||||||
Especially the last point is something you should be aware of — in general when using the Cluster Singleton pattern
|
|
||||||
you should take care of downing nodes yourself and not rely on the timing based auto-down feature.
|
|
||||||
|
|
||||||
@@@ warning
|
@@@ warning
|
||||||
|
|
||||||
**Don't use Cluster Singleton together with Automatic Downing**,
|
Make sure to not use a Cluster downing strategy that may split the cluster into several separate clusters in
|
||||||
since it allows the cluster to split up into two separate clusters, which in turn will result
|
case of network problems or system overload (long GC pauses), since that will result in in *multiple Singletons*
|
||||||
in *multiple Singletons* being started, one in each separate cluster!
|
being started, one in each separate cluster!
|
||||||
|
See @ref:[Downing](cluster.md#downing).
|
||||||
|
|
||||||
@@@
|
@@@
|
||||||
|
|
||||||
## Example
|
## Example
|
||||||
|
|
|
||||||
|
|
@ -255,95 +255,69 @@ after the restart, when it come up as new incarnation of existing member in the
|
||||||
trying to join in, then the existing one will be removed from the cluster and then it will
|
trying to join in, then the existing one will be removed from the cluster and then it will
|
||||||
be allowed to join.
|
be allowed to join.
|
||||||
|
|
||||||
<a id="automatic-vs-manual-downing"></a>
|
|
||||||
### Downing
|
|
||||||
|
|
||||||
When a member is considered by the failure detector to be `unreachable` the
|
|
||||||
leader is not allowed to perform its duties, such as changing status of
|
|
||||||
new joining members to 'Up'. The node must first become `reachable` again, or the
|
|
||||||
status of the unreachable member must be changed to 'Down'. Changing status to 'Down'
|
|
||||||
can be performed automatically or manually. By default it must be done manually, using
|
|
||||||
@ref:[JMX](../additional/operations.md#jmx) or @ref:[HTTP](../additional/operations.md#http).
|
|
||||||
|
|
||||||
It can also be performed programmatically with @scala[`Cluster(system).down(address)`]@java[`Cluster.get(system).down(address)`].
|
|
||||||
|
|
||||||
If a node is still running and sees its self as Down it will shutdown. @ref:[Coordinated Shutdown](../actors.md#coordinated-shutdown) will automatically
|
|
||||||
run if `run-coordinated-shutdown-when-down` is set to `on` (the default) however the node will not try
|
|
||||||
and leave the cluster gracefully so sharding and singleton migration will not occur.
|
|
||||||
|
|
||||||
A production solution for the downing problem is provided by
|
|
||||||
[Split Brain Resolver](http://developer.lightbend.com/docs/akka-commercial-addons/current/split-brain-resolver.html),
|
|
||||||
which is part of the [Lightbend Reactive Platform](http://www.lightbend.com/platform).
|
|
||||||
If you don’t use RP, you should anyway carefully read the [documentation](http://developer.lightbend.com/docs/akka-commercial-addons/current/split-brain-resolver.html)
|
|
||||||
of the Split Brain Resolver and make sure that the solution you are using handles the concerns
|
|
||||||
described there.
|
|
||||||
|
|
||||||
### Auto-downing - DO NOT USE
|
|
||||||
|
|
||||||
There is an automatic downing feature that you should not use in production. For testing you can enable it with configuration:
|
|
||||||
|
|
||||||
```
|
|
||||||
akka.cluster.auto-down-unreachable-after = 120s
|
|
||||||
```
|
|
||||||
|
|
||||||
This means that the cluster leader member will change the `unreachable` node
|
|
||||||
status to `down` automatically after the configured time of unreachability.
|
|
||||||
|
|
||||||
This is a naïve approach to remove unreachable nodes from the cluster membership.
|
|
||||||
It can be useful during development but in a production environment it will eventually breakdown the cluster.
|
|
||||||
When a network partition occurs, both sides of the partition will see the other side as unreachable and remove it from the cluster.
|
|
||||||
This results in the formation of two separate, disconnected, clusters (known as *Split Brain*).
|
|
||||||
|
|
||||||
This behaviour is not limited to network partitions. It can also occur if a node
|
|
||||||
in the cluster is overloaded, or experiences a long GC pause.
|
|
||||||
|
|
||||||
@@@ warning
|
|
||||||
|
|
||||||
We recommend against using the auto-down feature of Akka Cluster in production. It
|
|
||||||
has multiple undesirable consequences for production systems.
|
|
||||||
|
|
||||||
If you are using @ref:[Cluster Singleton](cluster-singleton.md) or @ref:[Cluster Sharding](cluster-sharding.md) it can break the contract provided by
|
|
||||||
those features. Both provide a guarantee that an actor will be unique in a cluster.
|
|
||||||
With the auto-down feature enabled, it is possible for multiple independent clusters
|
|
||||||
to form (*Split Brain*). When this happens the guaranteed uniqueness will no
|
|
||||||
longer be true resulting in undesirable behaviour in the system.
|
|
||||||
|
|
||||||
This is even more severe when @ref:[Akka Persistence](persistence.md) is used in
|
|
||||||
conjunction with Cluster Sharding. In this case, the lack of unique actors can
|
|
||||||
cause multiple actors to write to the same journal. Akka Persistence operates on a
|
|
||||||
single writer principle. Having multiple writers will corrupt the journal
|
|
||||||
and make it unusable.
|
|
||||||
|
|
||||||
Finally, even if you don't use features such as Persistence, Sharding, or Singletons,
|
|
||||||
auto-downing can lead the system to form multiple small clusters. These small
|
|
||||||
clusters will be independent from each other. They will be unable to communicate
|
|
||||||
and as a result you may experience performance degradation. Once this condition
|
|
||||||
occurs, it will require manual intervention in order to reform the cluster.
|
|
||||||
|
|
||||||
Because of these issues, auto-downing should **never** be used in a production environment.
|
|
||||||
|
|
||||||
@@@
|
|
||||||
|
|
||||||
### Leaving
|
### Leaving
|
||||||
|
|
||||||
There are two ways to remove a member from the cluster.
|
There are a few ways to remove a member from the cluster.
|
||||||
|
|
||||||
1. The recommended way to leave a cluster is a graceful exit, informing the cluster that a node shall leave.
|
1. The recommended way to leave a cluster is a graceful exit, informing the cluster that a node shall leave.
|
||||||
This can be performed using @ref:[JMX](../additional/operations.md#jmx) or @ref:[HTTP](../additional/operations.md#http).
|
This is performed by @ref:[Coordinated Shutdown](../actors.md#coordinated-shutdown) when the `ActorSystem`
|
||||||
This method will offer faster hand off to peer nodes during node shutdown.
|
is terminated and also when a SIGTERM is sent from the environment to stop the JVM process.
|
||||||
1. When a graceful exit is not possible, you can stop the actor system (or the JVM process, for example a SIGTERM sent from the environment). It will be detected
|
1. Graceful exit can also be performed using @ref:[HTTP](../additional/operations.md#http) or @ref:[JMX](../additional/operations.md#jmx).
|
||||||
as unreachable and removed after the automatic or manual downing.
|
1. When a graceful exit is not possible, for example in case of abrupt termination of the the JVM process, the node
|
||||||
|
will be detected as unreachable by other nodes and removed after @ref:[Downing](#downing).
|
||||||
|
|
||||||
The @ref:[Coordinated Shutdown](../actors.md#coordinated-shutdown) will automatically run when the cluster node sees itself as
|
Graceful leaving will offer faster hand off to peer nodes during node shutdown than abrupt termination and downing.
|
||||||
|
|
||||||
|
The @ref:[Coordinated Shutdown](../actors.md#coordinated-shutdown) will also run when the cluster node sees itself as
|
||||||
`Exiting`, i.e. leaving from another node will trigger the shutdown process on the leaving node.
|
`Exiting`, i.e. leaving from another node will trigger the shutdown process on the leaving node.
|
||||||
Tasks for graceful leaving of cluster including graceful shutdown of Cluster Singletons and
|
Tasks for graceful leaving of cluster including graceful shutdown of Cluster Singletons and
|
||||||
Cluster Sharding are added automatically when Akka Cluster is used, i.e. running the shutdown
|
Cluster Sharding are added automatically when Akka Cluster is used, i.e. running the shutdown
|
||||||
process will also trigger the graceful leaving if it's not already in progress.
|
process will also trigger the graceful leaving if it's not already in progress.
|
||||||
|
|
||||||
Normally this is handled automatically, but in case of network failures during this process it might still
|
Normally this is handled automatically, but in case of network failures during this process it might still
|
||||||
be necessary to set the node’s status to `Down` in order to complete the removal. For handling network failures
|
be necessary to set the node’s status to `Down` in order to complete the removal, see @ref:[Downing](#downing).
|
||||||
see [Split Brain Resolver](http://developer.lightbend.com/docs/akka-commercial-addons/current/split-brain-resolver.html),
|
|
||||||
part of the [Lightbend Reactive Platform](http://www.lightbend.com/platform).
|
### Downing
|
||||||
|
|
||||||
|
In many cases a member can gracefully exit from the cluster as described in @ref:[Leaving](#leaving), but
|
||||||
|
there are scenarios when an explicit downing decision is needed before it can be removed. For example in case
|
||||||
|
of abrupt termination of the the JVM process, system overload that doesn't recover, or network partitions
|
||||||
|
that don't heal. I such cases the node(s) will be detected as unreachable by other nodes, but they must also
|
||||||
|
be marked as `Down` before they are removed.
|
||||||
|
|
||||||
|
When a member is considered by the failure detector to be `unreachable` the
|
||||||
|
leader is not allowed to perform its duties, such as changing status of
|
||||||
|
new joining members to 'Up'. The node must first become `reachable` again, or the
|
||||||
|
status of the unreachable member must be changed to `Down`. Changing status to `Down`
|
||||||
|
can be performed automatically or manually.
|
||||||
|
|
||||||
|
By default, downing must be performed manually using @ref:[HTTP](../additional/operations.md#http) or @ref:[JMX](../additional/operations.md#jmx).
|
||||||
|
|
||||||
|
Note that @ref:[Cluster Singleton](cluster-singleton.md) or @ref:[Cluster Sharding entities](cluster-sharding.md) that
|
||||||
|
are running on a crashed (unreachable) node will not be started on another node until the previous node has
|
||||||
|
been removed from the Cluster. Removal of crashed (unreachable) nodes is performed after a downing decision.
|
||||||
|
|
||||||
|
A production solution for downing is provided by
|
||||||
|
[Split Brain Resolver](https://doc.akka.io/docs/akka-enhancements/current/split-brain-resolver.html),
|
||||||
|
which is part of the [Lightbend Platform](http://www.lightbend.com/platform).
|
||||||
|
If you don’t have a Lightbend Platform Subscription, you should still carefully read the
|
||||||
|
[documentation](https://doc.akka.io/docs/akka-enhancements/current/split-brain-resolver.html)
|
||||||
|
of the Split Brain Resolver and make sure that the solution you are using handles the concerns and scenarios
|
||||||
|
described there.
|
||||||
|
|
||||||
|
A custom downing strategy can be implemented with a @apidoc[akka.cluster.DowningProvider] and enabled with
|
||||||
|
configuration `akka.cluster.downing-provider-class`.
|
||||||
|
|
||||||
|
Downing can also be performed programmatically with @scala[`Cluster(system).manager ! Down(address)`]@java[`Cluster.get(system).manager().tell(Down(address))`],
|
||||||
|
but that is mostly useful from tests and when implementing a `DowningProvider`.
|
||||||
|
|
||||||
|
If a crashed node is restarted with the same hostname and port and joining the cluster again the previous incarnation
|
||||||
|
of that member will be downed and removed. The new join attempt with same hostname and port is used as evidence
|
||||||
|
that the previous is not alive any more.
|
||||||
|
|
||||||
|
If a node is still running and sees its self as `Down` it will shutdown. @ref:[Coordinated Shutdown](../actors.md#coordinated-shutdown) will automatically
|
||||||
|
run if `run-coordinated-shutdown-when-down` is set to `on` (the default) however the node will not try
|
||||||
|
and leave the cluster gracefully.
|
||||||
|
|
||||||
## Node Roles
|
## Node Roles
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue