Config for when to move to WeaklyUp (#29665)

* Config for when to move to WeaklyUp

* noticed when I was testing with the StressSpec that it's often moving nodes to WeaklyUp
  in normal joining scenarios (also seen in Kubernetes testing)
* better to wait some longer since the WeaklyUp will require a new convergence round
  and making the full joining -> up take longer time
* changed existing config property to be a duration
* default 7s, previously it was 3s

* on => 7s
This commit is contained in:
Patrik Nordwall 2020-09-30 09:54:31 +02:00 committed by GitHub
parent 94d62f34c1
commit 2caa560aab
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 17 additions and 9 deletions

View file

@ -63,10 +63,9 @@ akka {
# If this is set to "off", the leader will not move 'Joining' members to 'Up' during a network
# split. This feature allows the leader to accept 'Joining' members to be 'WeaklyUp'
# so they become part of the cluster even during a network split. The leader will
# move `Joining` members to 'WeaklyUp' after 3 rounds of 'leader-actions-interval'
# without convergence.
# move `Joining` members to 'WeaklyUp' after this configured duration without convergence.
# The leader will move 'WeaklyUp' members to 'Up' status once convergence has been reached.
allow-weakly-up-members = on
allow-weakly-up-members = 7s
# The roles of this member. List of strings, e.g. roles = ["A", "B"].
# The roles are part of the membership information and can be used by

View file

@ -1195,7 +1195,8 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef, joinConfigCompatCh
leaderActionsOnConvergence()
} else {
leaderActionCounter += 1
if (cluster.settings.AllowWeaklyUpMembers && leaderActionCounter >= 3)
import cluster.settings.{ AllowWeaklyUpMembers, LeaderActionsInterval, WeaklyUpAfter }
if (AllowWeaklyUpMembers && LeaderActionsInterval * leaderActionCounter >= WeaklyUpAfter)
moveJoiningToWeaklyUp()
if (leaderActionCounter == firstNotice || leaderActionCounter % periodicNotice == 0)

View file

@ -5,8 +5,7 @@
package akka.cluster
import scala.collection.immutable
import scala.concurrent.duration.Duration
import scala.concurrent.duration.FiniteDuration
import scala.concurrent.duration._
import com.typesafe.config.Config
import com.typesafe.config.ConfigObject
@ -135,7 +134,16 @@ final class ClusterSettings(val config: Config, val systemName: String) {
cc.getMillisDuration("quarantine-removed-node-after")
.requiring(_ > Duration.Zero, "quarantine-removed-node-after must be > 0")
val AllowWeaklyUpMembers: Boolean = cc.getBoolean("allow-weakly-up-members")
val WeaklyUpAfter: FiniteDuration = {
val key = "allow-weakly-up-members"
toRootLowerCase(cc.getString(key)) match {
case "off" => Duration.Zero
case "on" => 7.seconds // for backwards compatibility when it wasn't a duration
case _ => cc.getMillisDuration(key).requiring(_ > Duration.Zero, key + " > 0s, or off")
}
}
val AllowWeaklyUpMembers: Boolean = WeaklyUpAfter != Duration.Zero
val SelfDataCenter: DataCenter = cc.getString("multi-data-center.self-data-center")

View file

@ -24,7 +24,7 @@ object MemberWeaklyUpSpec extends MultiNodeConfig {
commonConfig(debugConfig(on = false).withFallback(ConfigFactory.parseString("""
akka.remote.retry-gate-closed-for = 3 s
akka.cluster.allow-weakly-up-members = on
akka.cluster.allow-weakly-up-members = 3 s
""")).withFallback(MultiNodeClusterSpec.clusterConfig))
testTransport(on = true)

View file

@ -34,7 +34,7 @@ object MinMembersBeforeUpWithWeaklyUpMultiJvmSpec extends MultiNodeConfig {
debugConfig(on = false)
.withFallback(ConfigFactory.parseString("""
akka.cluster.min-nr-of-members = 3
akka.cluster.allow-weakly-up-members = on"""))
akka.cluster.allow-weakly-up-members = 3 s"""))
.withFallback(MultiNodeClusterSpec.clusterConfigWithFailureDetectorPuppet))
}