From a217d5566ea2555ff7cd2cdfb9b5ecf4f9a8bca5 Mon Sep 17 00:00:00 2001 From: Patrik Nordwall Date: Thu, 3 Oct 2019 14:08:43 +0200 Subject: [PATCH] Remove auto-downing, #27788 (#27855) * moved to cluster tests, in new package akka.cluster.testkit * changed config in tests * migration guide * documentation clarificiations for Downing and Leaving * update warnings in Singleton and Sharding --- .../sharding/ClusterShardingFailureSpec.scala | 3 +- .../ClusterShardingGetStateSpec.scala | 3 +- .../ClusterShardingGetStatsSpec.scala | 3 +- .../sharding/ClusterShardingLeavingSpec.scala | 3 +- .../sharding/ClusterShardingQueriesSpec.scala | 3 +- ...dingRememberEntitiesNewExtractorSpec.scala | 3 +- ...sterShardingRememberEntitiesPerfSpec.scala | 3 +- .../ClusterShardingRememberEntitiesSpec.scala | 3 +- .../sharding/ClusterShardingSpec.scala | 3 +- .../sharding/MultiDcClusterShardingSpec.scala | 3 +- .../MultiNodeClusterShardingConfig.scala | 3 +- .../singleton/ClusterSingletonManager.scala | 2 - .../cluster/client/ClusterClientSpec.scala | 3 +- .../DistributedPubSubMediatorSpec.scala | 3 +- .../pubsub/DistributedPubSubRestartSpec.scala | 3 +- .../ClusterSingletonManagerChaosSpec.scala | 3 +- .../ClusterSingletonManagerLeaseSpec.scala | 3 +- .../ClusterSingletonManagerLeave2Spec.scala | 3 +- .../ClusterSingletonManagerLeaveSpec.scala | 3 +- .../ClusterSingletonManagerSpec.scala | 3 +- .../ClusterSingletonManagerStartupSpec.scala | 3 +- .../ClusterSingletonLeavingSpeedSpec.scala | 3 +- .../ClusterSingletonRestart2Spec.scala | 3 +- .../ClusterSingletonRestartSpec.scala | 6 +- .../ClusterReceptionistSpec.scala | 1 - .../issue-27788-remove-auto-down.excludes | 8 ++ .../src/main/resources/reference.conf | 18 +-- .../src/main/scala/akka/cluster/Cluster.scala | 13 +- .../scala/akka/cluster/ClusterDaemon.scala | 17 ++- .../scala/akka/cluster/ClusterSettings.scala | 16 --- .../scala/akka/cluster/DowningProvider.scala | 14 +- .../LeaderDowningAllOtherNodesSpec.scala | 3 +- ...aderDowningNodeThatIsUnreachableSpec.scala | 4 +- .../akka/cluster/LeaderLeavingSpec.scala | 4 +- .../akka/cluster/MultiDcSplitBrainSpec.scala | 3 +- .../scala/akka/cluster/NodeChurnSpec.scala | 9 +- .../NodeDowningAndBeingRemovedSpec.scala | 2 +- .../scala/akka/cluster/QuickRestartSpec.scala | 8 +- .../cluster/RestartFirstSeedNodeSpec.scala | 8 +- .../scala/akka/cluster/RestartNode2Spec.scala | 3 +- .../scala/akka/cluster/RestartNode3Spec.scala | 2 +- .../scala/akka/cluster/RestartNodeSpec.scala | 3 +- .../akka/cluster/SingletonClusterSpec.scala | 10 +- .../scala/akka/cluster/SplitBrainSpec.scala | 10 +- .../scala/akka/cluster/StreamRefSpec.scala | 10 +- .../scala/akka/cluster/StressSpec.scala | 3 +- .../akka/cluster/ClusterConfigSpec.scala | 1 - .../scala/akka/cluster/ClusterLogSpec.scala | 3 +- .../test/scala/akka/cluster/ClusterSpec.scala | 3 +- .../akka/cluster/DowningProviderSpec.scala | 27 ++-- .../cluster/JoinConfigCompatCheckerSpec.scala | 3 +- .../akka/cluster/testkit}/AutoDown.scala | 111 ++++++++++----- .../cluster/{ => testkit}/AutoDownSpec.scala | 11 +- .../akka/cluster/ddata/LotsOfDataBot.scala | 6 +- .../src/main/paradox/cluster-sharding.md | 29 +--- akka-docs/src/main/paradox/cluster-usage.md | 8 ++ akka-docs/src/main/paradox/project/links.md | 2 +- .../project/migration-guide-2.5.x-2.6.x.md | 69 ++++++--- .../main/paradox/typed/cluster-sharding.md | 39 ++++-- .../main/paradox/typed/cluster-singleton.md | 36 +++-- akka-docs/src/main/paradox/typed/cluster.md | 132 +++++++----------- 61 files changed, 414 insertions(+), 309 deletions(-) create mode 100644 akka-cluster/src/main/mima-filters/2.5.x.backwards.excludes/issue-27788-remove-auto-down.excludes rename akka-cluster/src/{main/scala/akka/cluster => test/scala/akka/cluster/testkit}/AutoDown.scala (58%) rename akka-cluster/src/test/scala/akka/cluster/{ => testkit}/AutoDownSpec.scala (97%) diff --git a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingFailureSpec.scala b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingFailureSpec.scala index f092fefc59..b088ca1909 100644 --- a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingFailureSpec.scala +++ b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingFailureSpec.scala @@ -63,7 +63,8 @@ abstract class ClusterShardingFailureSpecConfig(val mode: String) extends MultiN akka.loglevel = INFO akka.actor.provider = "cluster" akka.remote.classic.log-remote-lifecycle-events = off - akka.cluster.auto-down-unreachable-after = 0s + akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning + akka.cluster.testkit.auto-down-unreachable-after = 0s akka.cluster.roles = ["backend"] akka.persistence.journal.plugin = "akka.persistence.journal.leveldb-shared" akka.persistence.journal.leveldb-shared { diff --git a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingGetStateSpec.scala b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingGetStateSpec.scala index db06e71672..40fa71a694 100644 --- a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingGetStateSpec.scala +++ b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingGetStateSpec.scala @@ -50,7 +50,8 @@ object ClusterShardingGetStateSpecConfig extends MultiNodeConfig { akka.loglevel = INFO akka.actor.provider = "cluster" akka.remote.log-remote-lifecycle-events = off - akka.cluster.auto-down-unreachable-after = 0s + akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning + akka.cluster.testkit.auto-down-unreachable-after = 0s akka.cluster.sharding { coordinator-failure-backoff = 3s shard-failure-backoff = 3s diff --git a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingGetStatsSpec.scala b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingGetStatsSpec.scala index 3eb245ca1a..af03558e2c 100644 --- a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingGetStatsSpec.scala +++ b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingGetStatsSpec.scala @@ -56,7 +56,8 @@ object ClusterShardingGetStatsSpecConfig extends MultiNodeConfig { akka.actor.provider = "cluster" akka.remote.classic.log-remote-lifecycle-events = off akka.log-dead-letters-during-shutdown = off - akka.cluster.auto-down-unreachable-after = 0s + akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning + akka.cluster.testkit.auto-down-unreachable-after = 0s akka.cluster.sharding { state-store-mode = "ddata" updating-state-timeout = 2s diff --git a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingLeavingSpec.scala b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingLeavingSpec.scala index 5552574e3b..d9300fc497 100644 --- a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingLeavingSpec.scala +++ b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingLeavingSpec.scala @@ -67,7 +67,8 @@ abstract class ClusterShardingLeavingSpecConfig(val mode: String) extends MultiN akka.loglevel = INFO akka.actor.provider = "cluster" akka.remote.classic.log-remote-lifecycle-events = off - akka.cluster.auto-down-unreachable-after = 0s + akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning + akka.cluster.testkit.auto-down-unreachable-after = 0s akka.persistence.journal.plugin = "akka.persistence.journal.leveldb-shared" akka.persistence.journal.leveldb-shared { timeout = 5s diff --git a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingQueriesSpec.scala b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingQueriesSpec.scala index 79d7c9dc24..3486673e84 100644 --- a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingQueriesSpec.scala +++ b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingQueriesSpec.scala @@ -55,7 +55,8 @@ object ClusterShardingQueriesSpecConfig extends MultiNodeConfig { akka.actor.provider = "cluster" akka.remote.classic.log-remote-lifecycle-events = off akka.log-dead-letters-during-shutdown = off - akka.cluster.auto-down-unreachable-after = 0s + akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning + akka.cluster.testkit.auto-down-unreachable-after = 0s akka.cluster.sharding { state-store-mode = "ddata" shard-region-query-timeout = 0ms diff --git a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingRememberEntitiesNewExtractorSpec.scala b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingRememberEntitiesNewExtractorSpec.scala index 2c8863c9e6..b96f772168 100644 --- a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingRememberEntitiesNewExtractorSpec.scala +++ b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingRememberEntitiesNewExtractorSpec.scala @@ -61,7 +61,8 @@ abstract class ClusterShardingRememberEntitiesNewExtractorSpecConfig(val mode: S ConfigFactory .parseString(s""" akka.actor.provider = "cluster" - akka.cluster.auto-down-unreachable-after = 0s + akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning + akka.cluster.testkit.auto-down-unreachable-after = 0s akka.remote.classic.log-remote-lifecycle-events = off akka.persistence.journal.plugin = "akka.persistence.journal.leveldb-shared" akka.persistence.journal.leveldb-shared { diff --git a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingRememberEntitiesPerfSpec.scala b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingRememberEntitiesPerfSpec.scala index 3e51cfdcfe..8633516bd0 100644 --- a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingRememberEntitiesPerfSpec.scala +++ b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingRememberEntitiesPerfSpec.scala @@ -56,7 +56,8 @@ object ClusterShardingRememberEntitiesPerfSpecConfig extends MultiNodeConfig { commonConfig(ConfigFactory.parseString(s""" akka.loglevel = INFO akka.actor.provider = "cluster" - akka.cluster.auto-down-unreachable-after = 0s + akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning + akka.cluster.testkit.auto-down-unreachable-after = 0s akka.remote.log-remote-lifecycle-events = off akka.testconductor.barrier-timeout = 3 minutes akka.remote.artery.advanced.outbound-message-queue-size = 10000 diff --git a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingRememberEntitiesSpec.scala b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingRememberEntitiesSpec.scala index 56ba11ebc8..5c35fb24f3 100644 --- a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingRememberEntitiesSpec.scala +++ b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingRememberEntitiesSpec.scala @@ -69,7 +69,8 @@ abstract class ClusterShardingRememberEntitiesSpecConfig(val mode: String, val r modeConfig .withFallback(ConfigFactory.parseString(s""" akka.actor.provider = "cluster" - akka.cluster.auto-down-unreachable-after = 0s + akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning + akka.cluster.testkit.auto-down-unreachable-after = 0s akka.remote.log-remote-lifecycle-events = off akka.cluster.sharding.state-store-mode = "$mode" akka.cluster.sharding.distributed-data.durable.lmdb { diff --git a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingSpec.scala b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingSpec.scala index 61cc9872aa..e1b4f74c0f 100644 --- a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingSpec.scala +++ b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingSpec.scala @@ -135,7 +135,8 @@ abstract class ClusterShardingSpecConfig(val mode: String, val entityRecoveryStr akka.loglevel = INFO akka.actor.provider = "cluster" akka.remote.log-remote-lifecycle-events = off - akka.cluster.auto-down-unreachable-after = 0s + akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning + akka.cluster.testkit.auto-down-unreachable-after = 0s akka.cluster.roles = ["backend"] akka.cluster.distributed-data.gossip-interval = 1s akka.persistence.journal.plugin = "akka.persistence.journal.leveldb-shared" diff --git a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/MultiDcClusterShardingSpec.scala b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/MultiDcClusterShardingSpec.scala index f0debccf94..330d9ec043 100644 --- a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/MultiDcClusterShardingSpec.scala +++ b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/MultiDcClusterShardingSpec.scala @@ -61,7 +61,8 @@ object MultiDcClusterShardingSpecConfig extends MultiNodeConfig { akka.cluster { debug.verbose-heartbeat-logging = on debug.verbose-gossip-logging = on - auto-down-unreachable-after = 0s + downing-provider-class = akka.cluster.testkit.AutoDowning + testkit.auto-down-unreachable-after = 0s sharding { retry-interval = 200ms } diff --git a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/MultiNodeClusterShardingConfig.scala b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/MultiNodeClusterShardingConfig.scala index 4f2aa7e9c2..57b07ea365 100644 --- a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/MultiNodeClusterShardingConfig.scala +++ b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/MultiNodeClusterShardingConfig.scala @@ -44,7 +44,8 @@ abstract class MultiNodeClusterShardingConfig( .withFallback(ConfigFactory.parseString(s""" akka.loglevel = $loglevel akka.actor.provider = "cluster" - akka.cluster.auto-down-unreachable-after = 0s + akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning + akka.cluster.testkit.auto-down-unreachable-after = 0s akka.remote.log-remote-lifecycle-events = off akka.cluster.sharding.state-store-mode = "$mode" akka.cluster.sharding.distributed-data.durable.lmdb { diff --git a/akka-cluster-tools/src/main/scala/akka/cluster/singleton/ClusterSingletonManager.scala b/akka-cluster-tools/src/main/scala/akka/cluster/singleton/ClusterSingletonManager.scala index 57f25679ef..d7c29f302a 100644 --- a/akka-cluster-tools/src/main/scala/akka/cluster/singleton/ClusterSingletonManager.scala +++ b/akka-cluster-tools/src/main/scala/akka/cluster/singleton/ClusterSingletonManager.scala @@ -36,7 +36,6 @@ import akka.pattern.ask import akka.pattern.pipe import akka.util.JavaDurationConverters._ import akka.util.Timeout -import com.github.ghik.silencer.silent import com.typesafe.config.Config object ClusterSingletonManagerSettings { @@ -45,7 +44,6 @@ object ClusterSingletonManagerSettings { * Create settings from the default configuration * `akka.cluster.singleton`. */ - @silent("deprecated") // DownRemovalMargin def apply(system: ActorSystem): ClusterSingletonManagerSettings = apply(system.settings.config.getConfig("akka.cluster.singleton")) // note that this setting has some additional logic inside the ClusterSingletonManager diff --git a/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/client/ClusterClientSpec.scala b/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/client/ClusterClientSpec.scala index 1f04b2a43e..c1b6543b18 100644 --- a/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/client/ClusterClientSpec.scala +++ b/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/client/ClusterClientSpec.scala @@ -45,7 +45,8 @@ object ClusterClientSpec extends MultiNodeConfig { akka.loglevel = INFO akka.actor.provider = "cluster" akka.remote.log-remote-lifecycle-events = off - akka.cluster.auto-down-unreachable-after = 0s + akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning + akka.cluster.testkit.auto-down-unreachable-after = 0s akka.cluster.client.heartbeat-interval = 1s akka.cluster.client.acceptable-heartbeat-pause = 3s akka.cluster.client.refresh-contacts-interval = 1s diff --git a/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/pubsub/DistributedPubSubMediatorSpec.scala b/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/pubsub/DistributedPubSubMediatorSpec.scala index 920376ec45..1607aa87b9 100644 --- a/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/pubsub/DistributedPubSubMediatorSpec.scala +++ b/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/pubsub/DistributedPubSubMediatorSpec.scala @@ -30,7 +30,8 @@ object DistributedPubSubMediatorSpec extends MultiNodeConfig { akka.loglevel = INFO akka.actor.provider = "cluster" akka.remote.log-remote-lifecycle-events = off - akka.cluster.auto-down-unreachable-after = 0s + akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning + akka.cluster.testkit.auto-down-unreachable-after = 0s akka.cluster.pub-sub.max-delta-elements = 500 """)) diff --git a/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/pubsub/DistributedPubSubRestartSpec.scala b/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/pubsub/DistributedPubSubRestartSpec.scala index 7b86153c5a..34bac1b264 100644 --- a/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/pubsub/DistributedPubSubRestartSpec.scala +++ b/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/pubsub/DistributedPubSubRestartSpec.scala @@ -33,7 +33,8 @@ object DistributedPubSubRestartSpec extends MultiNodeConfig { akka.cluster.pub-sub.gossip-interval = 500ms akka.actor.provider = cluster akka.remote.log-remote-lifecycle-events = off - akka.cluster.auto-down-unreachable-after = off + akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning + akka.cluster.testkit.auto-down-unreachable-after = off """)) testTransport(on = true) diff --git a/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/singleton/ClusterSingletonManagerChaosSpec.scala b/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/singleton/ClusterSingletonManagerChaosSpec.scala index b389a80868..cf711a6d53 100644 --- a/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/singleton/ClusterSingletonManagerChaosSpec.scala +++ b/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/singleton/ClusterSingletonManagerChaosSpec.scala @@ -35,7 +35,8 @@ object ClusterSingletonManagerChaosSpec extends MultiNodeConfig { akka.loglevel = INFO akka.actor.provider = "cluster" akka.remote.log-remote-lifecycle-events = off - akka.cluster.auto-down-unreachable-after = 0s + akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning + akka.cluster.testkit.auto-down-unreachable-after = 0s """)) case object EchoStarted diff --git a/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/singleton/ClusterSingletonManagerLeaseSpec.scala b/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/singleton/ClusterSingletonManagerLeaseSpec.scala index 8862921e94..d4cfeda085 100644 --- a/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/singleton/ClusterSingletonManagerLeaseSpec.scala +++ b/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/singleton/ClusterSingletonManagerLeaseSpec.scala @@ -28,7 +28,8 @@ object ClusterSingletonManagerLeaseSpec extends MultiNodeConfig { akka.loglevel = INFO akka.actor.provider = "cluster" akka.remote.log-remote-lifecycle-events = off - akka.cluster.auto-down-unreachable-after = 0s + akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning + akka.cluster.testkit.auto-down-unreachable-after = 0s test-lease { lease-class = akka.cluster.TestLeaseActorClient heartbeat-interval = 1s diff --git a/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/singleton/ClusterSingletonManagerLeave2Spec.scala b/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/singleton/ClusterSingletonManagerLeave2Spec.scala index 3c446994c9..b03e3feae5 100644 --- a/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/singleton/ClusterSingletonManagerLeave2Spec.scala +++ b/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/singleton/ClusterSingletonManagerLeave2Spec.scala @@ -33,7 +33,8 @@ object ClusterSingletonManagerLeave2Spec extends MultiNodeConfig { akka.loglevel = INFO akka.actor.provider = "cluster" akka.remote.log-remote-lifecycle-events = off - akka.cluster.auto-down-unreachable-after = off + akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning + akka.cluster.testkit.auto-down-unreachable-after = off """)) case object EchoStarted diff --git a/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/singleton/ClusterSingletonManagerLeaveSpec.scala b/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/singleton/ClusterSingletonManagerLeaveSpec.scala index 356569026d..4eb231e409 100644 --- a/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/singleton/ClusterSingletonManagerLeaveSpec.scala +++ b/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/singleton/ClusterSingletonManagerLeaveSpec.scala @@ -26,7 +26,8 @@ object ClusterSingletonManagerLeaveSpec extends MultiNodeConfig { akka.loglevel = INFO akka.actor.provider = "cluster" akka.remote.log-remote-lifecycle-events = off - akka.cluster.auto-down-unreachable-after = off + akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning + akka.cluster.testkit.auto-down-unreachable-after = off """)) case object EchoStarted diff --git a/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/singleton/ClusterSingletonManagerSpec.scala b/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/singleton/ClusterSingletonManagerSpec.scala index 8c7b371aa7..b2bbf3cf2d 100644 --- a/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/singleton/ClusterSingletonManagerSpec.scala +++ b/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/singleton/ClusterSingletonManagerSpec.scala @@ -40,7 +40,8 @@ object ClusterSingletonManagerSpec extends MultiNodeConfig { akka.loglevel = INFO akka.actor.provider = "cluster" akka.remote.log-remote-lifecycle-events = off - akka.cluster.auto-down-unreachable-after = 0s + akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning + akka.cluster.testkit.auto-down-unreachable-after = 0s """)) nodeConfig(first, second, third, fourth, fifth, sixth)(ConfigFactory.parseString("akka.cluster.roles =[worker]")) diff --git a/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/singleton/ClusterSingletonManagerStartupSpec.scala b/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/singleton/ClusterSingletonManagerStartupSpec.scala index 90d6be22c4..9135b91f70 100644 --- a/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/singleton/ClusterSingletonManagerStartupSpec.scala +++ b/akka-cluster-tools/src/multi-jvm/scala/akka/cluster/singleton/ClusterSingletonManagerStartupSpec.scala @@ -27,7 +27,8 @@ object ClusterSingletonManagerStartupSpec extends MultiNodeConfig { akka.loglevel = INFO akka.actor.provider = "cluster" akka.remote.log-remote-lifecycle-events = off - akka.cluster.auto-down-unreachable-after = 0s + akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning + akka.cluster.testkit.auto-down-unreachable-after = 0s """)) case object EchoStarted diff --git a/akka-cluster-tools/src/test/scala/akka/cluster/singleton/ClusterSingletonLeavingSpeedSpec.scala b/akka-cluster-tools/src/test/scala/akka/cluster/singleton/ClusterSingletonLeavingSpeedSpec.scala index 0d0234308e..be681af35f 100644 --- a/akka-cluster-tools/src/test/scala/akka/cluster/singleton/ClusterSingletonLeavingSpeedSpec.scala +++ b/akka-cluster-tools/src/test/scala/akka/cluster/singleton/ClusterSingletonLeavingSpeedSpec.scala @@ -44,7 +44,8 @@ class ClusterSingletonLeavingSpeedSpec """ akka.loglevel = DEBUG akka.actor.provider = akka.cluster.ClusterActorRefProvider - akka.cluster.auto-down-unreachable-after = 2s + akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning + akka.cluster.testkit.auto-down-unreachable-after = 2s # With 10 systems and setting min-number-of-hand-over-retries to 5 and gossip-interval to 2s it's possible to # reproduce the ClusterSingletonManagerIsStuck and slow hand over in issue #25639 diff --git a/akka-cluster-tools/src/test/scala/akka/cluster/singleton/ClusterSingletonRestart2Spec.scala b/akka-cluster-tools/src/test/scala/akka/cluster/singleton/ClusterSingletonRestart2Spec.scala index fc3e8fa2d9..1b3a6e7e2c 100644 --- a/akka-cluster-tools/src/test/scala/akka/cluster/singleton/ClusterSingletonRestart2Spec.scala +++ b/akka-cluster-tools/src/test/scala/akka/cluster/singleton/ClusterSingletonRestart2Spec.scala @@ -31,7 +31,8 @@ class ClusterSingletonRestart2Spec akka.loglevel = INFO akka.cluster.roles = [singleton] akka.actor.provider = akka.cluster.ClusterActorRefProvider - akka.cluster.auto-down-unreachable-after = 2s + akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning + akka.cluster.testkit.auto-down-unreachable-after = 2s akka.cluster.singleton.min-number-of-hand-over-retries = 5 akka.remote { classic.netty.tcp { diff --git a/akka-cluster-tools/src/test/scala/akka/cluster/singleton/ClusterSingletonRestartSpec.scala b/akka-cluster-tools/src/test/scala/akka/cluster/singleton/ClusterSingletonRestartSpec.scala index d9c1c7a3e7..5fcce1e362 100644 --- a/akka-cluster-tools/src/test/scala/akka/cluster/singleton/ClusterSingletonRestartSpec.scala +++ b/akka-cluster-tools/src/test/scala/akka/cluster/singleton/ClusterSingletonRestartSpec.scala @@ -14,10 +14,12 @@ import akka.testkit.TestActors import akka.testkit.TestProbe import com.typesafe.config.ConfigFactory -class ClusterSingletonRestartSpec extends AkkaSpec(""" +class ClusterSingletonRestartSpec + extends AkkaSpec(""" akka.loglevel = INFO akka.actor.provider = akka.cluster.ClusterActorRefProvider - akka.cluster.auto-down-unreachable-after = 2s + akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning + akka.cluster.testkit.auto-down-unreachable-after = 2s akka.remote { classic.netty.tcp { hostname = "127.0.0.1" diff --git a/akka-cluster-typed/src/test/scala/akka/cluster/typed/internal/receptionist/ClusterReceptionistSpec.scala b/akka-cluster-typed/src/test/scala/akka/cluster/typed/internal/receptionist/ClusterReceptionistSpec.scala index 7645a0a970..9372e1ed76 100644 --- a/akka-cluster-typed/src/test/scala/akka/cluster/typed/internal/receptionist/ClusterReceptionistSpec.scala +++ b/akka-cluster-typed/src/test/scala/akka/cluster/typed/internal/receptionist/ClusterReceptionistSpec.scala @@ -52,7 +52,6 @@ object ClusterReceptionistSpec { } akka.cluster { - #auto-down-unreachable-after = 0s jmx.multi-mbeans-in-same-jvm = on failure-detector.acceptable-heartbeat-pause = 3s } diff --git a/akka-cluster/src/main/mima-filters/2.5.x.backwards.excludes/issue-27788-remove-auto-down.excludes b/akka-cluster/src/main/mima-filters/2.5.x.backwards.excludes/issue-27788-remove-auto-down.excludes new file mode 100644 index 0000000000..6384b1a4da --- /dev/null +++ b/akka-cluster/src/main/mima-filters/2.5.x.backwards.excludes/issue-27788-remove-auto-down.excludes @@ -0,0 +1,8 @@ +# #27788 Remove AutoDowning +ProblemFilters.exclude[MissingClassProblem]("akka.cluster.AutoDown$UnreachableTimeout") +ProblemFilters.exclude[MissingClassProblem]("akka.cluster.AutoDown$UnreachableTimeout$") +ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.ClusterSettings.AutoDownUnreachableAfter") +ProblemFilters.exclude[MissingClassProblem]("akka.cluster.AutoDownBase") +ProblemFilters.exclude[MissingClassProblem]("akka.cluster.AutoDown$") +ProblemFilters.exclude[MissingClassProblem]("akka.cluster.AutoDown") +ProblemFilters.exclude[MissingClassProblem]("akka.cluster.AutoDowning") diff --git a/akka-cluster/src/main/resources/reference.conf b/akka-cluster/src/main/resources/reference.conf index f28eb27658..a9b27a3f2a 100644 --- a/akka-cluster/src/main/resources/reference.conf +++ b/akka-cluster/src/main/resources/reference.conf @@ -35,33 +35,17 @@ akka { # attempts. shutdown-after-unsuccessful-join-seed-nodes = off - # Should the 'leader' in the cluster be allowed to automatically mark - # unreachable nodes as DOWN after a configured time of unreachability? - # Using auto-down implies that two separate clusters will automatically be - # formed in case of network partition. - # - # Don't enable this in production, see 'Auto-downing (DO NOT USE)' section - # of Akka Cluster documentation. - # - # Disable with "off" or specify a duration to enable auto-down. - # If a downing-provider-class is configured this setting is ignored. - auto-down-unreachable-after = off - # Time margin after which shards or singletons that belonged to a downed/removed # partition are created in surviving partition. The purpose of this margin is that # in case of a network partition the persistent actors in the non-surviving partitions # must be stopped before corresponding persistent actors are started somewhere else. # This is useful if you implement downing strategies that handle network partitions, # e.g. by keeping the larger side of the partition and shutting down the smaller side. - # It will not add any extra safety for auto-down-unreachable-after, since that is not - # handling network partitions. # Disable with "off" or specify a duration to enable. down-removal-margin = off # Pluggable support for downing of nodes in the cluster. - # If this setting is left empty behavior will depend on 'auto-down-unreachable' in the following ways: - # * if it is 'off' the `NoDowning` provider is used and no automatic downing will be performed - # * if it is set to a duration the `AutoDowning` provider is with the configured downing duration + # If this setting is left empty the `NoDowning` provider is used and no automatic downing will be performed. # # If specified the value must be the fully qualified class name of a subclass of # `akka.cluster.DowningProvider` having a public one argument constructor accepting an `ActorSystem` diff --git a/akka-cluster/src/main/scala/akka/cluster/Cluster.scala b/akka-cluster/src/main/scala/akka/cluster/Cluster.scala index 74aded1d7c..89b5d73365 100644 --- a/akka-cluster/src/main/scala/akka/cluster/Cluster.scala +++ b/akka-cluster/src/main/scala/akka/cluster/Cluster.scala @@ -125,8 +125,19 @@ class Cluster(val system: ExtendedActorSystem) extends Extension { } // needs to be lazy to allow downing provider impls to access Cluster (if not we get deadlock) - lazy val downingProvider: DowningProvider = + lazy val downingProvider: DowningProvider = { + checkAutoDownUsage() DowningProvider.load(settings.DowningProviderClassName, system) + } + + private def checkAutoDownUsage(): Unit = { + if (settings.DowningProviderClassName == "akka.cluster.AutoDowning" || + (settings.config.hasPath("auto-down-unreachable-after") && settings.config.getString( + "auto-down-unreachable-after") != "off")) + logWarning( + "auto-down has been removed in Akka 2.6.0. See " + + "https://doc.akka.io/docs/akka/2.6/typed/cluster.html#downing for alternatives.") + } // ======================================================== // ===================== WORK DAEMONS ===================== diff --git a/akka-cluster/src/main/scala/akka/cluster/ClusterDaemon.scala b/akka-cluster/src/main/scala/akka/cluster/ClusterDaemon.scala index e5bcb40bcf..c5f056a96e 100644 --- a/akka-cluster/src/main/scala/akka/cluster/ClusterDaemon.scala +++ b/akka-cluster/src/main/scala/akka/cluster/ClusterDaemon.scala @@ -406,12 +406,17 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef, joinConfigCompatCh override def preStart(): Unit = { subscribeQuarantinedEvent() - cluster.downingProvider.downingActorProps.foreach { props => - val propsWithDispatcher = - if (props.dispatcher == Deploy.NoDispatcherGiven) props.withDispatcher(context.props.dispatcher) - else props + cluster.downingProvider.downingActorProps match { + case Some(props) => + val propsWithDispatcher = + if (props.dispatcher == Deploy.NoDispatcherGiven) props.withDispatcher(context.props.dispatcher) + else props - context.actorOf(propsWithDispatcher, name = "downingProvider") + context.actorOf(propsWithDispatcher, name = "downingProvider") + case None => + logInfo( + "No downing-provider-class configured, manual cluster downing required, see " + + "https://doc.akka.io/docs/akka/current/typed/cluster.html#downing") } if (seedNodes.isEmpty) { @@ -420,7 +425,7 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef, joinConfigCompatCh else logInfo( "No seed-nodes configured, manual cluster join required, see " + - "https://doc.akka.io/docs/akka/current/cluster-usage.html#joining-to-seed-nodes") + "https://doc.akka.io/docs/akka/current/typed/cluster.html#joining") } else { self ! JoinSeedNodes(seedNodes) } diff --git a/akka-cluster/src/main/scala/akka/cluster/ClusterSettings.scala b/akka-cluster/src/main/scala/akka/cluster/ClusterSettings.scala index b7c0f5de68..d18e4277ed 100644 --- a/akka-cluster/src/main/scala/akka/cluster/ClusterSettings.scala +++ b/akka-cluster/src/main/scala/akka/cluster/ClusterSettings.scala @@ -116,21 +116,6 @@ final class ClusterSettings(val config: Config, val systemName: String) { cc.getMillisDuration(key).requiring(_ >= Duration.Zero, key + " >= 0s") } - // specific to the [[akka.cluster.DefaultDowningProvider]] - val AutoDownUnreachableAfter: Duration = { - val key = "auto-down-unreachable-after" - toRootLowerCase(cc.getString(key)) match { - case "off" => Duration.Undefined - case _ => cc.getMillisDuration(key).requiring(_ >= Duration.Zero, key + " >= 0s, or off") - } - } - - /** - * @deprecated Specific to [[akka.cluster.AutoDown]] should not be used anywhere else, instead - * ``Cluster.downingProvider.downRemovalMargin`` should be used as it allows the downing provider to decide removal - * margins - */ - @deprecated("Use Cluster.downingProvider.downRemovalMargin", since = "2.4.5") val DownRemovalMargin: FiniteDuration = { val key = "down-removal-margin" toRootLowerCase(cc.getString(key)) match { @@ -142,7 +127,6 @@ final class ClusterSettings(val config: Config, val systemName: String) { val DowningProviderClassName: String = { val name = cc.getString("downing-provider-class") if (name.nonEmpty) name - else if (AutoDownUnreachableAfter.isFinite) classOf[AutoDowning].getName else classOf[NoDowning].getName } diff --git a/akka-cluster/src/main/scala/akka/cluster/DowningProvider.scala b/akka-cluster/src/main/scala/akka/cluster/DowningProvider.scala index 422eea0385..d25479c2ca 100644 --- a/akka-cluster/src/main/scala/akka/cluster/DowningProvider.scala +++ b/akka-cluster/src/main/scala/akka/cluster/DowningProvider.scala @@ -6,7 +6,6 @@ package akka.cluster import akka.ConfigurationException import akka.actor.{ ActorSystem, ExtendedActorSystem, Props } -import com.github.ghik.silencer.silent import scala.concurrent.duration.FiniteDuration @@ -35,6 +34,15 @@ private[cluster] object DowningProvider { /** * API for plugins that will handle downing of cluster nodes. Concrete plugins must subclass and * have a public one argument constructor accepting an [[akka.actor.ActorSystem]]. + * + * A custom `DowningProvider` can be configured with `akka.cluster.downing-provider-class` + * + * When implementing a downing provider you should make sure that it will not split the cluster into + * several separate clusters in case of network problems or system overload (long GC pauses). This + * is much more difficult than it might be perceived at first, so carefully read the concerns and scenarios + * described in + * https://doc.akka.io/docs/akka/current/typed/cluster.html#downing and + * https://doc.akka.io/docs/akka-enhancements/current/split-brain-resolver.html */ abstract class DowningProvider { @@ -61,11 +69,9 @@ abstract class DowningProvider { } /** - * Default downing provider used when no provider is configured and 'auto-down-unreachable-after' - * is not enabled. + * Default downing provider used when no provider is configured. */ final class NoDowning(system: ActorSystem) extends DowningProvider { - @silent("deprecated") override def downRemovalMargin: FiniteDuration = Cluster(system).settings.DownRemovalMargin override val downingActorProps: Option[Props] = None } diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/LeaderDowningAllOtherNodesSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/LeaderDowningAllOtherNodesSpec.scala index 151fe95038..cda610e108 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/LeaderDowningAllOtherNodesSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/LeaderDowningAllOtherNodesSpec.scala @@ -22,7 +22,8 @@ object LeaderDowningAllOtherNodesMultiJvmSpec extends MultiNodeConfig { debugConfig(on = false) .withFallback(ConfigFactory.parseString(""" akka.cluster.failure-detector.monitored-by-nr-of-members = 2 - akka.cluster.auto-down-unreachable-after = 1s + akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning + akka.cluster.testkit.auto-down-unreachable-after = 1s """)) .withFallback(MultiNodeClusterSpec.clusterConfig)) } diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/LeaderDowningNodeThatIsUnreachableSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/LeaderDowningNodeThatIsUnreachableSpec.scala index 764a8b336e..e05e32093c 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/LeaderDowningNodeThatIsUnreachableSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/LeaderDowningNodeThatIsUnreachableSpec.scala @@ -21,7 +21,9 @@ final case class LeaderDowningNodeThatIsUnreachableMultiNodeConfig(failureDetect commonConfig( debugConfig(on = false) - .withFallback(ConfigFactory.parseString("akka.cluster.auto-down-unreachable-after = 2s")) + .withFallback(ConfigFactory.parseString(""" + akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning + akka.cluster.testkit.auto-down-unreachable-after = 2s""")) .withFallback(MultiNodeClusterSpec.clusterConfig(failureDetectorPuppet))) } diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/LeaderLeavingSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/LeaderLeavingSpec.scala index c33d86ffb1..f72cbb7b0e 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/LeaderLeavingSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/LeaderLeavingSpec.scala @@ -21,7 +21,9 @@ object LeaderLeavingMultiJvmSpec extends MultiNodeConfig { commonConfig( debugConfig(on = false) - .withFallback(ConfigFactory.parseString("akka.cluster.auto-down-unreachable-after = 0s")) + .withFallback(ConfigFactory.parseString(""" + akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning + akka.cluster.testkit.auto-down-unreachable-after = 0s""")) .withFallback(MultiNodeClusterSpec.clusterConfigWithFailureDetectorPuppet)) } diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/MultiDcSplitBrainSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/MultiDcSplitBrainSpec.scala index 292169699d..279893897d 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/MultiDcSplitBrainSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/MultiDcSplitBrainSpec.scala @@ -39,7 +39,8 @@ object MultiDcSplitBrainMultiJvmSpec extends MultiNodeConfig { akka.cluster { gossip-interval = 500ms leader-actions-interval = 1s - auto-down-unreachable-after = 1s + downing-provider-class = akka.cluster.testkit.AutoDowning + testkit.auto-down-unreachable-after = 1s } """) .withFallback(MultiNodeClusterSpec.clusterConfig)) diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeChurnSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeChurnSpec.scala index 4ecb414532..20a379239f 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeChurnSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeChurnSpec.scala @@ -21,8 +21,10 @@ object NodeChurnMultiJvmSpec extends MultiNodeConfig { val third = role("third") commonConfig( - debugConfig(on = false).withFallback(ConfigFactory.parseString(""" - akka.cluster.auto-down-unreachable-after = 1s + debugConfig(on = false) + .withFallback(ConfigFactory.parseString(""" + akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning + akka.cluster.testkit.auto-down-unreachable-after = 1s akka.cluster.prune-gossip-tombstones-after = 1s akka.remote.classic.log-frame-size-exceeding = 1200b akka.remote.artery.advanced.aeron { @@ -30,7 +32,8 @@ object NodeChurnMultiJvmSpec extends MultiNodeConfig { embedded-media-driver = off aeron-dir = "target/aeron-NodeChurnSpec" } - """)).withFallback(MultiNodeClusterSpec.clusterConfig)) + """)) + .withFallback(MultiNodeClusterSpec.clusterConfig)) class LogListener(testActor: ActorRef) extends Actor { def receive = { diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeDowningAndBeingRemovedSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeDowningAndBeingRemovedSpec.scala index a7dd89432b..279039d3a2 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeDowningAndBeingRemovedSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeDowningAndBeingRemovedSpec.scala @@ -18,7 +18,7 @@ object NodeDowningAndBeingRemovedMultiJvmSpec extends MultiNodeConfig { commonConfig( debugConfig(on = false).withFallback( ConfigFactory - .parseString("akka.cluster.auto-down-unreachable-after = off") + .parseString("akka.cluster.testkit.auto-down-unreachable-after = off") .withFallback(MultiNodeClusterSpec.clusterConfig))) } diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/QuickRestartSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/QuickRestartSpec.scala index 2ceb9f3adb..40a1e5679a 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/QuickRestartSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/QuickRestartSpec.scala @@ -21,10 +21,12 @@ object QuickRestartMultiJvmSpec extends MultiNodeConfig { val third = role("third") commonConfig( - debugConfig(on = false).withFallback(ConfigFactory.parseString(""" - akka.cluster.auto-down-unreachable-after = off + debugConfig(on = false) + .withFallback(ConfigFactory.parseString(""" + akka.cluster.testkit.auto-down-unreachable-after = off akka.cluster.allow-weakly-up-members = off - """)).withFallback(MultiNodeClusterSpec.clusterConfig)) + """)) + .withFallback(MultiNodeClusterSpec.clusterConfig)) } diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/RestartFirstSeedNodeSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/RestartFirstSeedNodeSpec.scala index 8b1b2d7c64..b8ec62311a 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/RestartFirstSeedNodeSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/RestartFirstSeedNodeSpec.scala @@ -28,11 +28,13 @@ object RestartFirstSeedNodeMultiJvmSpec extends MultiNodeConfig { val seed3 = role("seed3") commonConfig( - debugConfig(on = false).withFallback(ConfigFactory.parseString(""" - akka.cluster.auto-down-unreachable-after = off + debugConfig(on = false) + .withFallback(ConfigFactory.parseString(""" + akka.cluster.testkit.auto-down-unreachable-after = off akka.cluster.retry-unsuccessful-join-after = 3s akka.cluster.allow-weakly-up-members = off - """)).withFallback(MultiNodeClusterSpec.clusterConfig)) + """)) + .withFallback(MultiNodeClusterSpec.clusterConfig)) } class RestartFirstSeedNodeMultiJvmNode1 extends RestartFirstSeedNodeSpec diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/RestartNode2Spec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/RestartNode2Spec.scala index c56b926660..b59951adf8 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/RestartNode2Spec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/RestartNode2Spec.scala @@ -28,7 +28,8 @@ object RestartNode2SpecMultiJvmSpec extends MultiNodeConfig { commonConfig( debugConfig(on = false) .withFallback(ConfigFactory.parseString(""" - akka.cluster.auto-down-unreachable-after = 2s + akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning + akka.cluster.testkit.auto-down-unreachable-after = 2s akka.cluster.retry-unsuccessful-join-after = 3s akka.cluster.allow-weakly-up-members = off akka.remote.retry-gate-closed-for = 45s diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/RestartNode3Spec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/RestartNode3Spec.scala index f0ba61733a..77753a83d7 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/RestartNode3Spec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/RestartNode3Spec.scala @@ -29,7 +29,7 @@ object RestartNode3MultiJvmSpec extends MultiNodeConfig { commonConfig( debugConfig(on = false) .withFallback(ConfigFactory.parseString(""" - akka.cluster.auto-down-unreachable-after = off + akka.cluster.testkit.auto-down-unreachable-after = off akka.cluster.allow-weakly-up-members = off # test is using Java serialization and not priority to rewrite akka.actor.allow-java-serialization = on diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/RestartNodeSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/RestartNodeSpec.scala index fbd236e66d..8c92178032 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/RestartNodeSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/RestartNodeSpec.scala @@ -34,7 +34,8 @@ object RestartNodeMultiJvmSpec extends MultiNodeConfig { commonConfig( debugConfig(on = false) .withFallback(ConfigFactory.parseString(""" - akka.cluster.auto-down-unreachable-after = 5s + akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning + akka.cluster.testkit.auto-down-unreachable-after = 5s akka.cluster.allow-weakly-up-members = off #akka.remote.use-passive-connections = off # test is using Java serialization and not priority to rewrite diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/SingletonClusterSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/SingletonClusterSpec.scala index 01a63eb140..a32e189d04 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/SingletonClusterSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/SingletonClusterSpec.scala @@ -16,12 +16,16 @@ final case class SingletonClusterMultiNodeConfig(failureDetectorPuppet: Boolean) val first = role("first") val second = role("second") - commonConfig(debugConfig(on = false).withFallback(ConfigFactory.parseString(""" + commonConfig( + debugConfig(on = false) + .withFallback(ConfigFactory.parseString(""" akka.cluster { - auto-down-unreachable-after = 0s + downing-provider-class = akka.cluster.testkit.AutoDowning + testkit.auto-down-unreachable-after = 0s failure-detector.threshold = 4 } - """)).withFallback(MultiNodeClusterSpec.clusterConfig(failureDetectorPuppet))) + """)) + .withFallback(MultiNodeClusterSpec.clusterConfig(failureDetectorPuppet))) } diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/SplitBrainSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/SplitBrainSpec.scala index 354be99d93..d9a5bd3ec6 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/SplitBrainSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/SplitBrainSpec.scala @@ -21,12 +21,16 @@ final case class SplitBrainMultiNodeConfig(failureDetectorPuppet: Boolean) exten val fourth = role("fourth") val fifth = role("fifth") - commonConfig(debugConfig(on = false).withFallback(ConfigFactory.parseString(""" + commonConfig( + debugConfig(on = false) + .withFallback(ConfigFactory.parseString(""" akka.remote.retry-gate-closed-for = 3 s akka.cluster { - auto-down-unreachable-after = 1s + downing-provider-class = akka.cluster.testkit.AutoDowning + testkit.auto-down-unreachable-after = 1s failure-detector.threshold = 4 - }""")).withFallback(MultiNodeClusterSpec.clusterConfig(failureDetectorPuppet))) + }""")) + .withFallback(MultiNodeClusterSpec.clusterConfig(failureDetectorPuppet))) testTransport(on = true) } diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/StreamRefSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/StreamRefSpec.scala index 97389bd079..7a7129bb54 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/StreamRefSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/StreamRefSpec.scala @@ -34,10 +34,14 @@ object StreamRefSpec extends MultiNodeConfig { val second = role("second") val third = role("third") - commonConfig(debugConfig(on = false).withFallback(ConfigFactory.parseString(""" + commonConfig( + debugConfig(on = false) + .withFallback(ConfigFactory.parseString(""" akka.cluster { - auto-down-unreachable-after = 1s - }""")).withFallback(MultiNodeClusterSpec.clusterConfig)) + downing-provider-class = akka.cluster.testkit.AutoDowning + testkit.auto-down-unreachable-after = 1s + }""")) + .withFallback(MultiNodeClusterSpec.clusterConfig)) testTransport(on = true) diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/StressSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/StressSpec.scala index 77d2e6e83b..a124228c08 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/StressSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/StressSpec.scala @@ -119,7 +119,8 @@ private[cluster] object StressMultiJvmSpec extends MultiNodeConfig { akka.actor.provider = cluster akka.cluster { failure-detector.acceptable-heartbeat-pause = 10s - auto-down-unreachable-after = 1s + downing-provider-class = akka.cluster.testkit.AutoDowning + testkit.auto-down-unreachable-after = 1s publish-stats-interval = 1s } akka.loggers = ["akka.testkit.TestEventListener"] diff --git a/akka-cluster/src/test/scala/akka/cluster/ClusterConfigSpec.scala b/akka-cluster/src/test/scala/akka/cluster/ClusterConfigSpec.scala index c579f5f229..00f6ec5253 100644 --- a/akka-cluster/src/test/scala/akka/cluster/ClusterConfigSpec.scala +++ b/akka-cluster/src/test/scala/akka/cluster/ClusterConfigSpec.scala @@ -42,7 +42,6 @@ class ClusterConfigSpec extends AkkaSpec { LeaderActionsInterval should ===(1 second) UnreachableNodesReaperInterval should ===(1 second) PublishStatsInterval should ===(Duration.Undefined) - AutoDownUnreachableAfter should ===(Duration.Undefined) DownRemovalMargin should ===(Duration.Zero) MinNrOfMembers should ===(1) MinNrOfMembersOfRole should ===(Map.empty[String, Int]) diff --git a/akka-cluster/src/test/scala/akka/cluster/ClusterLogSpec.scala b/akka-cluster/src/test/scala/akka/cluster/ClusterLogSpec.scala index 76ad00d18f..1efc5fc639 100644 --- a/akka-cluster/src/test/scala/akka/cluster/ClusterLogSpec.scala +++ b/akka-cluster/src/test/scala/akka/cluster/ClusterLogSpec.scala @@ -11,7 +11,8 @@ import com.typesafe.config.{ Config, ConfigFactory } object ClusterLogSpec { val config = """ akka.cluster { - auto-down-unreachable-after = 0s + downing-provider-class = akka.cluster.testkit.AutoDowning + testkit.auto-down-unreachable-after = 0s publish-stats-interval = 0 s # always, when it happens failure-detector.implementation-class = akka.cluster.FailureDetectorPuppet } diff --git a/akka-cluster/src/test/scala/akka/cluster/ClusterSpec.scala b/akka-cluster/src/test/scala/akka/cluster/ClusterSpec.scala index d09e73bbb4..6d967922ce 100644 --- a/akka-cluster/src/test/scala/akka/cluster/ClusterSpec.scala +++ b/akka-cluster/src/test/scala/akka/cluster/ClusterSpec.scala @@ -30,7 +30,8 @@ import scala.concurrent.duration._ object ClusterSpec { val config = """ akka.cluster { - auto-down-unreachable-after = 0s + downing-provider-class = akka.cluster.testkit.AutoDowning + testkit.auto-down-unreachable-after = 0s periodic-tasks-initial-delay = 120 seconds // turn off scheduled tasks publish-stats-interval = 0 s # always, when it happens failure-detector.implementation-class = akka.cluster.FailureDetectorPuppet diff --git a/akka-cluster/src/test/scala/akka/cluster/DowningProviderSpec.scala b/akka-cluster/src/test/scala/akka/cluster/DowningProviderSpec.scala index bcf3b6c79e..98bbff6b3b 100644 --- a/akka-cluster/src/test/scala/akka/cluster/DowningProviderSpec.scala +++ b/akka-cluster/src/test/scala/akka/cluster/DowningProviderSpec.scala @@ -6,14 +6,17 @@ package akka.cluster import java.util.concurrent.atomic.AtomicBoolean +import scala.concurrent.duration._ + import akka.ConfigurationException -import akka.actor.{ ActorSystem, Props } -import akka.testkit.TestKit.{ awaitCond, shutdownActorSystem } +import akka.actor.ActorSystem +import akka.actor.Props +import akka.testkit.TestKit.awaitCond +import akka.testkit.TestKit.shutdownActorSystem import akka.util.unused import com.typesafe.config.ConfigFactory -import org.scalatest.{ Matchers, WordSpec } - -import scala.concurrent.duration._ +import org.scalatest.Matchers +import org.scalatest.WordSpec class FailingDowningProvider(@unused system: ActorSystem) extends DowningProvider { override val downRemovalMargin: FiniteDuration = 20.seconds @@ -39,6 +42,10 @@ class DowningProviderSpec extends WordSpec with Matchers { loglevel = WARNING actor.provider = "cluster" remote { + artery.canonical { + hostname = 127.0.0.1 + port = 0 + } classic.netty.tcp { hostname = "127.0.0.1" port = 0 @@ -55,16 +62,6 @@ class DowningProviderSpec extends WordSpec with Matchers { shutdownActorSystem(system) } - "use akka.cluster.AutoDowning if 'auto-down-unreachable-after' is configured" in { - val system = ActorSystem( - "auto-downing", - ConfigFactory.parseString(""" - akka.cluster.auto-down-unreachable-after = 18d - """).withFallback(baseConf)) - Cluster(system).downingProvider shouldBe an[AutoDowning] - shutdownActorSystem(system) - } - "use the specified downing provider" in { val system = ActorSystem( "auto-downing", diff --git a/akka-cluster/src/test/scala/akka/cluster/JoinConfigCompatCheckerSpec.scala b/akka-cluster/src/test/scala/akka/cluster/JoinConfigCompatCheckerSpec.scala index 8b3ec9d1cf..08bd2381d0 100644 --- a/akka-cluster/src/test/scala/akka/cluster/JoinConfigCompatCheckerSpec.scala +++ b/akka-cluster/src/test/scala/akka/cluster/JoinConfigCompatCheckerSpec.scala @@ -259,7 +259,8 @@ class JoinConfigCompatCheckerSpec extends AkkaSpec with ClusterTestKit { akka.cluster { # using explicit downing provider class - downing-provider-class = "akka.cluster.AutoDowning" + downing-provider-class = "akka.cluster.testkit.AutoDowning" + testkit.auto-down-unreachable-after = 0s configuration-compatibility-check { enforce-on-join = on diff --git a/akka-cluster/src/main/scala/akka/cluster/AutoDown.scala b/akka-cluster/src/test/scala/akka/cluster/testkit/AutoDown.scala similarity index 58% rename from akka-cluster/src/main/scala/akka/cluster/AutoDown.scala rename to akka-cluster/src/test/scala/akka/cluster/testkit/AutoDown.scala index 068b7cdd84..8e76669470 100644 --- a/akka-cluster/src/main/scala/akka/cluster/AutoDown.scala +++ b/akka-cluster/src/test/scala/akka/cluster/testkit/AutoDown.scala @@ -2,17 +2,82 @@ * Copyright (C) 2009-2019 Lightbend Inc. */ -package akka.cluster - -import akka.ConfigurationException -import akka.actor.{ Actor, ActorSystem, Address, Cancellable, Props, Scheduler } - -import scala.concurrent.duration.FiniteDuration -import akka.cluster.ClusterEvent._ +package akka.cluster.testkit import scala.concurrent.duration.Duration +import scala.concurrent.duration.FiniteDuration + +import akka.actor.Actor import akka.actor.ActorLogging -import com.github.ghik.silencer.silent +import akka.actor.ActorSystem +import akka.actor.Address +import akka.actor.Cancellable +import akka.actor.Props +import akka.actor.Scheduler +import akka.cluster.Cluster +import akka.cluster.ClusterEvent._ +import akka.cluster.DowningProvider +import akka.cluster.Member +import akka.cluster.MembershipState +import akka.cluster.UniqueAddress +import akka.util.Helpers.ConfigOps +import akka.util.Helpers.Requiring +import akka.util.Helpers.toRootLowerCase + +/** + * Downing provider used for testing. + * + * Auto-downing is a naïve approach to remove unreachable nodes from the cluster membership. + * In a production environment it will eventually break down the cluster. + * When a network partition occurs, both sides of the partition will see the other side as unreachable + * and remove it from the cluster. This results in the formation of two separate, disconnected, clusters + * (known as *Split Brain*). + * + * This behavior is not limited to network partitions. It can also occur if a node in the cluster is + * overloaded, or experiences a long GC pause. + * + * When using Cluster Singleton or Cluster Sharding it can break the contract provided by those features. + * Both provide a guarantee that an actor will be unique in a cluster. + * With the auto-down feature enabled, it is possible for multiple independent clusters to form (*Split Brain*). + * When this happens the guaranteed uniqueness will no longer be true resulting in undesirable behavior + * in the system. + * + * This is even more severe when Akka Persistence is used in conjunction with Cluster Sharding. + * In this case, the lack of unique actors can cause multiple actors to write to the same journal. + * Akka Persistence operates on a single writer principle. Having multiple writers will corrupt + * the journal and make it unusable. + * + * Finally, even if you don't use features such as Persistence, Sharding, or Singletons, auto-downing can lead the + * system to form multiple small clusters. These small clusters will be independent from each other. They will be + * unable to communicate and as a result you may experience performance degradation. Once this condition occurs, + * it will require manual intervention in order to reform the cluster. + * + * Because of these issues, auto-downing should never be used in a production environment. + */ +final class AutoDowning(system: ActorSystem) extends DowningProvider { + + private def clusterSettings = Cluster(system).settings + + private val AutoDownUnreachableAfter: Duration = { + val key = "akka.cluster.testkit.auto-down-unreachable-after" + // it's not in reference.conf, since only used in tests + if (clusterSettings.config.hasPath(key)) { + toRootLowerCase(clusterSettings.config.getString(key)) match { + case "off" => Duration.Undefined + case _ => clusterSettings.config.getMillisDuration(key).requiring(_ >= Duration.Zero, key + " >= 0s, or off") + } + } else + Duration.Undefined + } + + override def downRemovalMargin: FiniteDuration = clusterSettings.DownRemovalMargin + + override def downingActorProps: Option[Props] = + AutoDownUnreachableAfter match { + case d: FiniteDuration => Some(AutoDown.props(d)) + case _ => None // auto-down-unreachable-after = off + } +} /** * INTERNAL API @@ -25,26 +90,6 @@ private[cluster] object AutoDown { final case class UnreachableTimeout(node: UniqueAddress) } -/** - * Used when no custom provider is configured and 'auto-down-unreachable-after' is enabled. - */ -final class AutoDowning(system: ActorSystem) extends DowningProvider { - - private def clusterSettings = Cluster(system).settings - - @silent("deprecated") - override def downRemovalMargin: FiniteDuration = clusterSettings.DownRemovalMargin - - override def downingActorProps: Option[Props] = - clusterSettings.AutoDownUnreachableAfter match { - case d: FiniteDuration => Some(AutoDown.props(d)) - case _ => - // I don't think this can actually happen - throw new ConfigurationException( - "AutoDowning downing provider selected but 'akka.cluster.auto-down-unreachable-after' not set") - } -} - /** * INTERNAL API * @@ -68,9 +113,7 @@ private[cluster] class AutoDown(autoDownUnreachableAfter: FiniteDuration) // re-subscribe when restart override def preStart(): Unit = { - log.warning( - "Don't use auto-down feature of Akka Cluster in production. " + - "See 'Auto-downing (DO NOT USE)' section of Akka Cluster documentation.") + log.debug("Auto-down is enabled in test.") cluster.subscribe(self, classOf[ClusterDomainEvent]) super.preStart() } @@ -81,11 +124,7 @@ private[cluster] class AutoDown(autoDownUnreachableAfter: FiniteDuration) override def down(node: Address): Unit = { require(leader) - logInfo( - "Leader is auto-downing unreachable node [{}]. " + - "Don't use auto-down feature of Akka Cluster in production. " + - "See 'Auto-downing (DO NOT USE)' section of Akka Cluster documentation.", - node) + logInfo("Leader is auto-downing unreachable node [{}].", node) cluster.down(node) } diff --git a/akka-cluster/src/test/scala/akka/cluster/AutoDownSpec.scala b/akka-cluster/src/test/scala/akka/cluster/testkit/AutoDownSpec.scala similarity index 97% rename from akka-cluster/src/test/scala/akka/cluster/AutoDownSpec.scala rename to akka-cluster/src/test/scala/akka/cluster/testkit/AutoDownSpec.scala index 2e71e73794..b961a4c607 100644 --- a/akka-cluster/src/test/scala/akka/cluster/AutoDownSpec.scala +++ b/akka-cluster/src/test/scala/akka/cluster/testkit/AutoDownSpec.scala @@ -2,15 +2,18 @@ * Copyright (C) 2009-2019 Lightbend Inc. */ -package akka.cluster +package akka.cluster.testkit import scala.concurrent.duration._ -import akka.actor.Address -import akka.actor.Scheduler + import akka.actor.ActorRef +import akka.actor.Address import akka.actor.Props -import akka.cluster.MemberStatus._ +import akka.actor.Scheduler import akka.cluster.ClusterEvent._ +import akka.cluster.Member +import akka.cluster.MemberStatus._ +import akka.cluster.TestMember import akka.remote.RARP import akka.testkit.AkkaSpec import akka.testkit.TimingTest diff --git a/akka-distributed-data/src/test/scala/akka/cluster/ddata/LotsOfDataBot.scala b/akka-distributed-data/src/test/scala/akka/cluster/ddata/LotsOfDataBot.scala index e3ca634ddd..a4b4b87b7d 100644 --- a/akka-distributed-data/src/test/scala/akka/cluster/ddata/LotsOfDataBot.scala +++ b/akka-distributed-data/src/test/scala/akka/cluster/ddata/LotsOfDataBot.scala @@ -32,7 +32,8 @@ object LotsOfDataBot { // Override the configuration of the port val config = ConfigFactory .parseString("akka.remote.classic.netty.tcp.port=" + port) - .withFallback(ConfigFactory.load(ConfigFactory.parseString(""" + .withFallback( + ConfigFactory.load(ConfigFactory.parseString(""" passive = off max-entries = 100000 akka.actor.provider = "cluster" @@ -48,7 +49,8 @@ object LotsOfDataBot { "akka://ClusterSystem@127.0.0.1:2551", "akka://ClusterSystem@127.0.0.1:2552"] - auto-down-unreachable-after = 10s + downing-provider-class = akka.cluster.testkit.AutoDowning + testkit.auto-down-unreachable-after = 10s } """))) diff --git a/akka-docs/src/main/paradox/cluster-sharding.md b/akka-docs/src/main/paradox/cluster-sharding.md index 747c08a16c..ed9fe5d280 100644 --- a/akka-docs/src/main/paradox/cluster-sharding.md +++ b/akka-docs/src/main/paradox/cluster-sharding.md @@ -227,14 +227,6 @@ graceful leaving process of a cluster member. See @ref:[removal of Internal Cluster Sharding Data](typed/cluster-sharding.md#removal-of-internal-cluster-sharding-data) in the documentation of the new APIs. -## Configuration - -`ClusterShardingSettings` is a parameter to the `start` method of -the `ClusterSharding` extension, i.e. each each entity type can be configured with different settings -if needed. - -See @ref:[configuration](typed/cluster-sharding.md#configuration) for more information. - ## Inspecting cluster sharding state Two requests to inspect the cluster state are available: @@ -256,20 +248,13 @@ directly sending messages to the individual entities. ## Lease -A @ref[lease](coordination.md) can be used as an additional safety measure to ensure a shard -does not run on two nodes. +A lease can be used as an additional safety measure to ensure a shard does not run on two nodes. +See @ref:[Lease](typed/cluster-sharding.md#lease) in the documentation of the new APIs. -Reasons for how this can happen: +## Configuration -* Network partitions without an appropriate downing provider -* Mistakes in the deployment process leading to two separate Akka Clusters -* Timing issues between removing members from the Cluster on one side of a network partition and shutting them down on the other side +`ClusterShardingSettings` is a parameter to the `start` method of +the `ClusterSharding` extension, i.e. each each entity type can be configured with different settings +if needed. -A lease can be a final backup that means that each shard won't create child entity actors unless it has the lease. - -To use a lease for sharding set `akka.cluster.sharding.use-lease` to the configuration location -of the lease to use. Each shard will try and acquire a lease with with the name `-shard--` and -the owner is set to the `Cluster(system).selfAddress.hostPort`. - -If a shard can't acquire a lease it will remain uninitialized so messages for entities it owns will -be buffered in the `ShardRegion`. If the lease is lost after initialization the Shard will be terminated. +See @ref:[configuration](typed/cluster-sharding.md#configuration) for more information. diff --git a/akka-docs/src/main/paradox/cluster-usage.md b/akka-docs/src/main/paradox/cluster-usage.md index ecb97faf1c..bca10d24be 100644 --- a/akka-docs/src/main/paradox/cluster-usage.md +++ b/akka-docs/src/main/paradox/cluster-usage.md @@ -104,6 +104,14 @@ Scala Java : @@snip [SimpleClusterListener2.java](/akka-docs/src/test/java/jdocs/cluster/SimpleClusterListener2.java) { #join } +## Leaving + +See @ref:[Leaving](typed/cluster.md#leaving) in the documentation of the new APIs. + +## Downing + +See @ref:[Downing](typed/cluster.md#downing) in the documentation of the new APIs. + ## Subscribe to Cluster Events diff --git a/akka-docs/src/main/paradox/project/links.md b/akka-docs/src/main/paradox/project/links.md index 4ea21ced95..cfeb7bda35 100644 --- a/akka-docs/src/main/paradox/project/links.md +++ b/akka-docs/src/main/paradox/project/links.md @@ -3,7 +3,7 @@ ## Commercial Support Commercial support is provided by [Lightbend](http://www.lightbend.com). -Akka is part of the [Lightbend Reactive Platform](http://www.lightbend.com/platform). +Akka is part of the [Lightbend Platform](http://www.lightbend.com/platform). ## Sponsors diff --git a/akka-docs/src/main/paradox/project/migration-guide-2.5.x-2.6.x.md b/akka-docs/src/main/paradox/project/migration-guide-2.5.x-2.6.x.md index c56d94dc29..0135882e8f 100644 --- a/akka-docs/src/main/paradox/project/migration-guide-2.5.x-2.6.x.md +++ b/akka-docs/src/main/paradox/project/migration-guide-2.5.x-2.6.x.md @@ -11,6 +11,40 @@ is [no longer available as a static method](https://github.com/scala/bug/issues/ If you are still using Scala 2.11 then you must upgrade to 2.12 or 2.13 +## Auto-downing removed + +Auto-downing of unreachable Cluster members have been removed after warnings and recommendations against using it +for many years. It was by default disabled, but could be enabled with configuration +`akka.cluster.auto-down-unreachable-after`. + +For alternatives see the @ref:[documentation about Downing](../typed/cluster.md#downing). + +Auto-downing was a naïve approach to remove unreachable nodes from the cluster membership. +In a production environment it will eventually break down the cluster. +When a network partition occurs, both sides of the partition will see the other side as unreachable +and remove it from the cluster. This results in the formation of two separate, disconnected, clusters +(known as *Split Brain*). + +This behavior is not limited to network partitions. It can also occur if a node in the cluster is +overloaded, or experiences a long GC pause. + +When using @ref:[Cluster Singleton](../typed/cluster-singleton.md) or @ref:[Cluster Sharding](../typed/cluster-sharding.md) +it can break the contract provided by those features. Both provide a guarantee that an actor will be unique in a cluster. +With the auto-down feature enabled, it is possible for multiple independent clusters to form (*Split Brain*). +When this happens the guaranteed uniqueness will no longer be true resulting in undesirable behavior in the system. + +This is even more severe when @ref:[Akka Persistence](../typed/persistence.md) is used in conjunction with +Cluster Sharding. In this case, the lack of unique actors can cause multiple actors to write to the same journal. +Akka Persistence operates on a single writer principle. Having multiple writers will corrupt the journal +and make it unusable. + +Finally, even if you don't use features such as Persistence, Sharding, or Singletons, auto-downing can lead the +system to form multiple small clusters. These small clusters will be independent from each other. They will be +unable to communicate and as a result you may experience performance degradation. Once this condition occurs, +it will require manual intervention in order to reform the cluster. + +Because of these issues, auto-downing should **never** be used in a production environment. + ## Removed features that were deprecated After being deprecated since 2.5.0, the following have been removed in Akka 2.6. @@ -94,13 +128,25 @@ to make remote interactions look like local method calls. Warnings about `TypedActor` have been [mentioned in documentation](https://doc.akka.io/docs/akka/2.5/typed-actors.html#when-to-use-typed-actors) for many years. +### akka-protobuf + +`akka-protobuf` was never intended to be used by end users but perhaps this was not well-documented. +Applications should use standard Protobuf dependency instead of `akka-protobuf`. The artifact is still +published, but the transitive dependency to `akka-protobuf` has been removed. + +Akka is now using Protobuf version 3.9.0 for serialization of messages defined by Akka. + +### Cluster Client + +Cluster client has been deprecated as of 2.6 in favor of [Akka gRPC](https://doc.akka.io/docs/akka-grpc/current/index.html). +It is not advised to build new applications with Cluster client, and existing users @ref[should migrate to Akka gRPC](../cluster-client.md#migration-to-akka-grpc). ### akka.Main `akka.Main` is deprecated in favour of starting the `ActorSystem` from a custom main class instead. `akka.Main` was not adding much value and typically a custom main class is needed anyway. -@@ Remoting +## Remoting ### Default remoting is now Artery TCP @@ -184,20 +230,7 @@ For TCP: Classic remoting is deprecated but can be used in `2.6.` Explicitly disable Artery by setting property `akka.remote.artery.enabled` to `false`. Further, any configuration under `akka.remote` that is specific to classic remoting needs to be moved to `akka.remote.classic`. To see which configuration options -are specific to classic search for them in: [`akka-remote/reference.conf`](/akka-remote/src/main/resources/reference.conf) - -### akka-protobuf - -`akka-protobuf` was never intended to be used by end users but perhaps this was not well-documented. -Applications should use standard Protobuf dependency instead of `akka-protobuf`. The artifact is still -published, but the transitive dependency to `akka-protobuf` has been removed. - -Akka is now using Protobuf version 3.9.0 for serialization of messages defined by Akka. - -### Cluster Client - -Cluster client has been deprecated as of 2.6 in favor of [Akka gRPC](https://doc.akka.io/docs/akka-grpc/current/index.html). -It is not advised to build new applications with Cluster client, and existing users @ref[should migrate to Akka gRPC](../cluster-client.md#migration-to-akka-grpc). +are specific to classic search for them in: @ref:[`akka-remote/reference.conf`](../general/configuration.md#config-akka-remote). ## Java Serialization @@ -235,14 +268,12 @@ handling that type and it was previously "accidentally" serialized with Java ser The following documents configuration changes and behavior changes where no action is required. In some cases the old behavior can be restored via configuration. -### Remoting - -#### Remoting dependencies have been made optional +### Remoting dependencies have been made optional Classic remoting depends on Netty and Artery UDP depends on Aeron. These are now both optional dependencies that need to be explicitly added. See @ref[classic remoting](../remoting.md) or @ref[artery remoting](../remoting-artery.md) for instructions. -#### Remote watch and deployment have been disabled without Cluster use +### Remote watch and deployment have been disabled without Cluster use By default, these remoting features are disabled when not using Akka Cluster: diff --git a/akka-docs/src/main/paradox/typed/cluster-sharding.md b/akka-docs/src/main/paradox/typed/cluster-sharding.md index 1e75ccebd4..f1a5d8ad95 100644 --- a/akka-docs/src/main/paradox/typed/cluster-sharding.md +++ b/akka-docs/src/main/paradox/typed/cluster-sharding.md @@ -43,10 +43,10 @@ if that feature is enabled. @@@ warning -**Don't use Cluster Sharding together with Automatic Downing**, -since it allows the cluster to split up into two separate clusters, which in turn will result -in *multiple shards and entities* being started, one in each separate cluster! -See @ref:[Downing](cluster.md#automatic-vs-manual-downing). +Make sure to not use a Cluster downing strategy that may split the cluster into several separate clusters in +case of network problems or system overload (long GC pauses), since that will result in *multiple shards and entities* +being started, one in each separate cluster! +See @ref:[Downing](cluster.md#downing). @@@ @@ -304,6 +304,26 @@ rebalanced to other nodes. See @ref:[How To Startup when Cluster Size Reached](cluster.md#how-to-startup-when-a-cluster-size-is-reached) for more information about `min-nr-of-members`. +## Lease + +A @ref[lease](../coordination.md) can be used as an additional safety measure to ensure a shard +does not run on two nodes. + +Reasons for how this can happen: + +* Network partitions without an appropriate downing provider +* Mistakes in the deployment process leading to two separate Akka Clusters +* Timing issues between removing members from the Cluster on one side of a network partition and shutting them down on the other side + +A lease can be a final backup that means that each shard won't create child entity actors unless it has the lease. + +To use a lease for sharding set `akka.cluster.sharding.use-lease` to the configuration location +of the lease to use. Each shard will try and acquire a lease with with the name `-shard--` and +the owner is set to the `Cluster(system).selfAddress.hostPort`. + +If a shard can't acquire a lease it will remain uninitialized so messages for entities it owns will +be buffered in the `ShardRegion`. If the lease is lost after initialization the Shard will be terminated. + ## Removal of internal Cluster Sharding data Removal of internal Cluster Sharding data is only relevant for "Persistent Mode". @@ -326,15 +346,6 @@ cannot startup because of corrupt data, which may happen if accidentally two clusters were running at the same time, e.g. caused by using auto-down and there was a network partition. -@@@ warning - -**Don't use Cluster Sharding together with Automatic Downing**, -since it allows the cluster to split up into two separate clusters, which in turn will result -in *multiple shards and entities* being started, one in each separate cluster! -See @ref:[Downing](cluster.md#automatic-vs-manual-downing). - -@@@ - Use this program as a standalone Java main program: ``` @@ -347,7 +358,7 @@ The program is included in the `akka-cluster-sharding` jar file. It is easiest to run it with same classpath and configuration as your ordinary application. It can be run from sbt or Maven in similar way. -Specify the entity type names (same as you use in the `start` method +Specify the entity type names (same as you use in the `init` method of `ClusterSharding`) as program arguments. If you specify `-2.3` as the first program argument it will also try diff --git a/akka-docs/src/main/paradox/typed/cluster-singleton.md b/akka-docs/src/main/paradox/typed/cluster-singleton.md index 67a5b0259b..b1c19fbf70 100644 --- a/akka-docs/src/main/paradox/typed/cluster-singleton.md +++ b/akka-docs/src/main/paradox/typed/cluster-singleton.md @@ -32,6 +32,15 @@ such as single-point of bottleneck. Single-point of failure is also a relevant c but for some cases this feature takes care of that by making sure that another singleton instance will eventually be started. +@@@ warning + +Make sure to not use a Cluster downing strategy that may split the cluster into several separate clusters in +case of network problems or system overload (long GC pauses), since that will result in in *multiple Singletons* +being started, one in each separate cluster! +See @ref:[Downing](cluster.md#downing). + +@@@ + ### Singleton manager The cluster singleton pattern manages one singleton actor instance among all cluster nodes or a group of nodes tagged with @@ -80,23 +89,20 @@ The singleton instance will not run on members with status @ref:[WeaklyUp](clust This pattern may seem to be very tempting to use at first, but it has several drawbacks, some of them are listed below: - * the cluster singleton may quickly become a *performance bottleneck*, - * you can not rely on the cluster singleton to be *non-stop* available — e.g. when the node on which the singleton has -been running dies, it will take a few seconds for this to be noticed and the singleton be migrated to another node, - * in the case of a *network partition* appearing in a Cluster that is using Automatic Downing (see docs for -@ref:[Auto Downing](cluster.md#auto-downing-do-not-use), -it may happen that the isolated clusters each decide to spin up their own singleton, meaning that there might be multiple -singletons running in the system, yet the Clusters have no way of finding out about them (because of the partition). - -Especially the last point is something you should be aware of — in general when using the Cluster Singleton pattern -you should take care of downing nodes yourself and not rely on the timing based auto-down feature. + * The cluster singleton may quickly become a *performance bottleneck*. + * You can not rely on the cluster singleton to be *non-stop* available — e.g. when the node on which the singleton + has been running dies, it will take a few seconds for this to be noticed and the singleton be migrated to another node. + * If many singletons are used be aware of that all will run on the oldest node (or oldest with configured role). + @ref:[Cluster Sharding](cluster-sharding.md) combined with keeping the "singleton" entities alive can be a better + alternative. @@@ warning - -**Don't use Cluster Singleton together with Automatic Downing**, -since it allows the cluster to split up into two separate clusters, which in turn will result -in *multiple Singletons* being started, one in each separate cluster! - + +Make sure to not use a Cluster downing strategy that may split the cluster into several separate clusters in +case of network problems or system overload (long GC pauses), since that will result in in *multiple Singletons* +being started, one in each separate cluster! +See @ref:[Downing](cluster.md#downing). + @@@ ## Example diff --git a/akka-docs/src/main/paradox/typed/cluster.md b/akka-docs/src/main/paradox/typed/cluster.md index fa5df66e67..53e3b98710 100644 --- a/akka-docs/src/main/paradox/typed/cluster.md +++ b/akka-docs/src/main/paradox/typed/cluster.md @@ -255,95 +255,69 @@ after the restart, when it come up as new incarnation of existing member in the trying to join in, then the existing one will be removed from the cluster and then it will be allowed to join. - -### Downing - -When a member is considered by the failure detector to be `unreachable` the -leader is not allowed to perform its duties, such as changing status of -new joining members to 'Up'. The node must first become `reachable` again, or the -status of the unreachable member must be changed to 'Down'. Changing status to 'Down' -can be performed automatically or manually. By default it must be done manually, using -@ref:[JMX](../additional/operations.md#jmx) or @ref:[HTTP](../additional/operations.md#http). - -It can also be performed programmatically with @scala[`Cluster(system).down(address)`]@java[`Cluster.get(system).down(address)`]. - -If a node is still running and sees its self as Down it will shutdown. @ref:[Coordinated Shutdown](../actors.md#coordinated-shutdown) will automatically -run if `run-coordinated-shutdown-when-down` is set to `on` (the default) however the node will not try -and leave the cluster gracefully so sharding and singleton migration will not occur. - -A production solution for the downing problem is provided by -[Split Brain Resolver](http://developer.lightbend.com/docs/akka-commercial-addons/current/split-brain-resolver.html), -which is part of the [Lightbend Reactive Platform](http://www.lightbend.com/platform). -If you don’t use RP, you should anyway carefully read the [documentation](http://developer.lightbend.com/docs/akka-commercial-addons/current/split-brain-resolver.html) -of the Split Brain Resolver and make sure that the solution you are using handles the concerns -described there. - -### Auto-downing - DO NOT USE - -There is an automatic downing feature that you should not use in production. For testing you can enable it with configuration: - -``` -akka.cluster.auto-down-unreachable-after = 120s -``` - -This means that the cluster leader member will change the `unreachable` node -status to `down` automatically after the configured time of unreachability. - -This is a naïve approach to remove unreachable nodes from the cluster membership. -It can be useful during development but in a production environment it will eventually breakdown the cluster. -When a network partition occurs, both sides of the partition will see the other side as unreachable and remove it from the cluster. -This results in the formation of two separate, disconnected, clusters (known as *Split Brain*). - -This behaviour is not limited to network partitions. It can also occur if a node -in the cluster is overloaded, or experiences a long GC pause. - -@@@ warning - -We recommend against using the auto-down feature of Akka Cluster in production. It -has multiple undesirable consequences for production systems. - -If you are using @ref:[Cluster Singleton](cluster-singleton.md) or @ref:[Cluster Sharding](cluster-sharding.md) it can break the contract provided by -those features. Both provide a guarantee that an actor will be unique in a cluster. -With the auto-down feature enabled, it is possible for multiple independent clusters -to form (*Split Brain*). When this happens the guaranteed uniqueness will no -longer be true resulting in undesirable behaviour in the system. - -This is even more severe when @ref:[Akka Persistence](persistence.md) is used in -conjunction with Cluster Sharding. In this case, the lack of unique actors can -cause multiple actors to write to the same journal. Akka Persistence operates on a -single writer principle. Having multiple writers will corrupt the journal -and make it unusable. - -Finally, even if you don't use features such as Persistence, Sharding, or Singletons, -auto-downing can lead the system to form multiple small clusters. These small -clusters will be independent from each other. They will be unable to communicate -and as a result you may experience performance degradation. Once this condition -occurs, it will require manual intervention in order to reform the cluster. - -Because of these issues, auto-downing should **never** be used in a production environment. - -@@@ - ### Leaving -There are two ways to remove a member from the cluster. +There are a few ways to remove a member from the cluster. -1. The recommended way to leave a cluster is a graceful exit, informing the cluster that a node shall leave. -This can be performed using @ref:[JMX](../additional/operations.md#jmx) or @ref:[HTTP](../additional/operations.md#http). -This method will offer faster hand off to peer nodes during node shutdown. -1. When a graceful exit is not possible, you can stop the actor system (or the JVM process, for example a SIGTERM sent from the environment). It will be detected -as unreachable and removed after the automatic or manual downing. +1. The recommended way to leave a cluster is a graceful exit, informing the cluster that a node shall leave. + This is performed by @ref:[Coordinated Shutdown](../actors.md#coordinated-shutdown) when the `ActorSystem` + is terminated and also when a SIGTERM is sent from the environment to stop the JVM process. +1. Graceful exit can also be performed using @ref:[HTTP](../additional/operations.md#http) or @ref:[JMX](../additional/operations.md#jmx). +1. When a graceful exit is not possible, for example in case of abrupt termination of the the JVM process, the node + will be detected as unreachable by other nodes and removed after @ref:[Downing](#downing). -The @ref:[Coordinated Shutdown](../actors.md#coordinated-shutdown) will automatically run when the cluster node sees itself as +Graceful leaving will offer faster hand off to peer nodes during node shutdown than abrupt termination and downing. + +The @ref:[Coordinated Shutdown](../actors.md#coordinated-shutdown) will also run when the cluster node sees itself as `Exiting`, i.e. leaving from another node will trigger the shutdown process on the leaving node. Tasks for graceful leaving of cluster including graceful shutdown of Cluster Singletons and Cluster Sharding are added automatically when Akka Cluster is used, i.e. running the shutdown process will also trigger the graceful leaving if it's not already in progress. Normally this is handled automatically, but in case of network failures during this process it might still -be necessary to set the node’s status to `Down` in order to complete the removal. For handling network failures -see [Split Brain Resolver](http://developer.lightbend.com/docs/akka-commercial-addons/current/split-brain-resolver.html), -part of the [Lightbend Reactive Platform](http://www.lightbend.com/platform). +be necessary to set the node’s status to `Down` in order to complete the removal, see @ref:[Downing](#downing). + +### Downing + +In many cases a member can gracefully exit from the cluster as described in @ref:[Leaving](#leaving), but +there are scenarios when an explicit downing decision is needed before it can be removed. For example in case +of abrupt termination of the the JVM process, system overload that doesn't recover, or network partitions +that don't heal. I such cases the node(s) will be detected as unreachable by other nodes, but they must also +be marked as `Down` before they are removed. + +When a member is considered by the failure detector to be `unreachable` the +leader is not allowed to perform its duties, such as changing status of +new joining members to 'Up'. The node must first become `reachable` again, or the +status of the unreachable member must be changed to `Down`. Changing status to `Down` +can be performed automatically or manually. + +By default, downing must be performed manually using @ref:[HTTP](../additional/operations.md#http) or @ref:[JMX](../additional/operations.md#jmx). + +Note that @ref:[Cluster Singleton](cluster-singleton.md) or @ref:[Cluster Sharding entities](cluster-sharding.md) that +are running on a crashed (unreachable) node will not be started on another node until the previous node has +been removed from the Cluster. Removal of crashed (unreachable) nodes is performed after a downing decision. + +A production solution for downing is provided by +[Split Brain Resolver](https://doc.akka.io/docs/akka-enhancements/current/split-brain-resolver.html), +which is part of the [Lightbend Platform](http://www.lightbend.com/platform). +If you don’t have a Lightbend Platform Subscription, you should still carefully read the +[documentation](https://doc.akka.io/docs/akka-enhancements/current/split-brain-resolver.html) +of the Split Brain Resolver and make sure that the solution you are using handles the concerns and scenarios +described there. + +A custom downing strategy can be implemented with a @apidoc[akka.cluster.DowningProvider] and enabled with +configuration `akka.cluster.downing-provider-class`. + +Downing can also be performed programmatically with @scala[`Cluster(system).manager ! Down(address)`]@java[`Cluster.get(system).manager().tell(Down(address))`], +but that is mostly useful from tests and when implementing a `DowningProvider`. + +If a crashed node is restarted with the same hostname and port and joining the cluster again the previous incarnation +of that member will be downed and removed. The new join attempt with same hostname and port is used as evidence +that the previous is not alive any more. + +If a node is still running and sees its self as `Down` it will shutdown. @ref:[Coordinated Shutdown](../actors.md#coordinated-shutdown) will automatically +run if `run-coordinated-shutdown-when-down` is set to `on` (the default) however the node will not try +and leave the cluster gracefully. ## Node Roles