From 2caa560aab97f2efbeffdd39870a15e30fcc8cd5 Mon Sep 17 00:00:00 2001 From: Patrik Nordwall Date: Wed, 30 Sep 2020 09:54:31 +0200 Subject: [PATCH] Config for when to move to WeaklyUp (#29665) * Config for when to move to WeaklyUp * noticed when I was testing with the StressSpec that it's often moving nodes to WeaklyUp in normal joining scenarios (also seen in Kubernetes testing) * better to wait some longer since the WeaklyUp will require a new convergence round and making the full joining -> up take longer time * changed existing config property to be a duration * default 7s, previously it was 3s * on => 7s --- akka-cluster/src/main/resources/reference.conf | 5 ++--- .../main/scala/akka/cluster/ClusterDaemon.scala | 3 ++- .../main/scala/akka/cluster/ClusterSettings.scala | 14 +++++++++++--- .../scala/akka/cluster/MemberWeaklyUpSpec.scala | 2 +- .../akka/cluster/MinMembersBeforeUpSpec.scala | 2 +- 5 files changed, 17 insertions(+), 9 deletions(-) diff --git a/akka-cluster/src/main/resources/reference.conf b/akka-cluster/src/main/resources/reference.conf index 69e1a9a2ac..723f91effc 100644 --- a/akka-cluster/src/main/resources/reference.conf +++ b/akka-cluster/src/main/resources/reference.conf @@ -63,10 +63,9 @@ akka { # If this is set to "off", the leader will not move 'Joining' members to 'Up' during a network # split. This feature allows the leader to accept 'Joining' members to be 'WeaklyUp' # so they become part of the cluster even during a network split. The leader will - # move `Joining` members to 'WeaklyUp' after 3 rounds of 'leader-actions-interval' - # without convergence. + # move `Joining` members to 'WeaklyUp' after this configured duration without convergence. # The leader will move 'WeaklyUp' members to 'Up' status once convergence has been reached. - allow-weakly-up-members = on + allow-weakly-up-members = 7s # The roles of this member. List of strings, e.g. roles = ["A", "B"]. # The roles are part of the membership information and can be used by diff --git a/akka-cluster/src/main/scala/akka/cluster/ClusterDaemon.scala b/akka-cluster/src/main/scala/akka/cluster/ClusterDaemon.scala index 0d535bd2cb..25a0359612 100644 --- a/akka-cluster/src/main/scala/akka/cluster/ClusterDaemon.scala +++ b/akka-cluster/src/main/scala/akka/cluster/ClusterDaemon.scala @@ -1195,7 +1195,8 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef, joinConfigCompatCh leaderActionsOnConvergence() } else { leaderActionCounter += 1 - if (cluster.settings.AllowWeaklyUpMembers && leaderActionCounter >= 3) + import cluster.settings.{ AllowWeaklyUpMembers, LeaderActionsInterval, WeaklyUpAfter } + if (AllowWeaklyUpMembers && LeaderActionsInterval * leaderActionCounter >= WeaklyUpAfter) moveJoiningToWeaklyUp() if (leaderActionCounter == firstNotice || leaderActionCounter % periodicNotice == 0) diff --git a/akka-cluster/src/main/scala/akka/cluster/ClusterSettings.scala b/akka-cluster/src/main/scala/akka/cluster/ClusterSettings.scala index 32812e68d2..41302345dc 100644 --- a/akka-cluster/src/main/scala/akka/cluster/ClusterSettings.scala +++ b/akka-cluster/src/main/scala/akka/cluster/ClusterSettings.scala @@ -5,8 +5,7 @@ package akka.cluster import scala.collection.immutable -import scala.concurrent.duration.Duration -import scala.concurrent.duration.FiniteDuration +import scala.concurrent.duration._ import com.typesafe.config.Config import com.typesafe.config.ConfigObject @@ -135,7 +134,16 @@ final class ClusterSettings(val config: Config, val systemName: String) { cc.getMillisDuration("quarantine-removed-node-after") .requiring(_ > Duration.Zero, "quarantine-removed-node-after must be > 0") - val AllowWeaklyUpMembers: Boolean = cc.getBoolean("allow-weakly-up-members") + val WeaklyUpAfter: FiniteDuration = { + val key = "allow-weakly-up-members" + toRootLowerCase(cc.getString(key)) match { + case "off" => Duration.Zero + case "on" => 7.seconds // for backwards compatibility when it wasn't a duration + case _ => cc.getMillisDuration(key).requiring(_ > Duration.Zero, key + " > 0s, or off") + } + } + + val AllowWeaklyUpMembers: Boolean = WeaklyUpAfter != Duration.Zero val SelfDataCenter: DataCenter = cc.getString("multi-data-center.self-data-center") diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/MemberWeaklyUpSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/MemberWeaklyUpSpec.scala index eb3b1e2079..78f9280892 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/MemberWeaklyUpSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/MemberWeaklyUpSpec.scala @@ -24,7 +24,7 @@ object MemberWeaklyUpSpec extends MultiNodeConfig { commonConfig(debugConfig(on = false).withFallback(ConfigFactory.parseString(""" akka.remote.retry-gate-closed-for = 3 s - akka.cluster.allow-weakly-up-members = on + akka.cluster.allow-weakly-up-members = 3 s """)).withFallback(MultiNodeClusterSpec.clusterConfig)) testTransport(on = true) diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/MinMembersBeforeUpSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/MinMembersBeforeUpSpec.scala index 414f0a4c63..10c97f712e 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/MinMembersBeforeUpSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/MinMembersBeforeUpSpec.scala @@ -34,7 +34,7 @@ object MinMembersBeforeUpWithWeaklyUpMultiJvmSpec extends MultiNodeConfig { debugConfig(on = false) .withFallback(ConfigFactory.parseString(""" akka.cluster.min-nr-of-members = 3 - akka.cluster.allow-weakly-up-members = on""")) + akka.cluster.allow-weakly-up-members = 3 s""")) .withFallback(MultiNodeClusterSpec.clusterConfigWithFailureDetectorPuppet)) }