113 lines
3.5 KiB
Scala
113 lines
3.5 KiB
Scala
/*
|
|
* Copyright (C) 2009-2021 Lightbend Inc. <https://www.lightbend.com>
|
|
*/
|
|
|
|
package akka.cluster
|
|
|
|
import scala.concurrent.duration._
|
|
|
|
import com.typesafe.config.ConfigFactory
|
|
|
|
import akka.remote.testkit.MultiNodeConfig
|
|
import akka.remote.testkit.MultiNodeSpec
|
|
import akka.remote.transport.ThrottlerTransportAdapter.Direction
|
|
import akka.testkit._
|
|
|
|
object ClusterAccrualFailureDetectorMultiJvmSpec extends MultiNodeConfig {
|
|
val first = role("first")
|
|
val second = role("second")
|
|
val third = role("third")
|
|
|
|
commonConfig(
|
|
debugConfig(on = false)
|
|
.withFallback(ConfigFactory.parseString("akka.cluster.failure-detector.threshold = 4"))
|
|
.withFallback(MultiNodeClusterSpec.clusterConfig))
|
|
|
|
testTransport(on = true)
|
|
}
|
|
|
|
class ClusterAccrualFailureDetectorMultiJvmNode1 extends ClusterAccrualFailureDetectorSpec
|
|
class ClusterAccrualFailureDetectorMultiJvmNode2 extends ClusterAccrualFailureDetectorSpec
|
|
class ClusterAccrualFailureDetectorMultiJvmNode3 extends ClusterAccrualFailureDetectorSpec
|
|
|
|
abstract class ClusterAccrualFailureDetectorSpec
|
|
extends MultiNodeSpec(ClusterAccrualFailureDetectorMultiJvmSpec)
|
|
with MultiNodeClusterSpec {
|
|
|
|
import ClusterAccrualFailureDetectorMultiJvmSpec._
|
|
|
|
muteMarkingAsUnreachable()
|
|
|
|
"A heartbeat driven Failure Detector" must {
|
|
|
|
"receive heartbeats so that all member nodes in the cluster are marked 'available'" taggedAs LongRunningTest in {
|
|
awaitClusterUp(first, second, third)
|
|
|
|
Thread.sleep(5.seconds.dilated.toMillis) // let them heartbeat
|
|
cluster.failureDetector.isAvailable(first) should ===(true)
|
|
cluster.failureDetector.isAvailable(second) should ===(true)
|
|
cluster.failureDetector.isAvailable(third) should ===(true)
|
|
|
|
enterBarrier("after-1")
|
|
}
|
|
|
|
"mark node as 'unavailable' when network partition and then back to 'available' when partition is healed" taggedAs
|
|
LongRunningTest in {
|
|
runOn(first) {
|
|
testConductor.blackhole(first, second, Direction.Both).await
|
|
}
|
|
|
|
enterBarrier("broken")
|
|
|
|
runOn(first) {
|
|
// detect failure...
|
|
awaitCond(!cluster.failureDetector.isAvailable(second), 15.seconds)
|
|
// other connections still ok
|
|
cluster.failureDetector.isAvailable(third) should ===(true)
|
|
}
|
|
|
|
runOn(second) {
|
|
// detect failure...
|
|
awaitCond(!cluster.failureDetector.isAvailable(first), 15.seconds)
|
|
// other connections still ok
|
|
cluster.failureDetector.isAvailable(third) should ===(true)
|
|
}
|
|
|
|
enterBarrier("partitioned")
|
|
|
|
runOn(first) {
|
|
testConductor.passThrough(first, second, Direction.Both).await
|
|
}
|
|
|
|
enterBarrier("repaired")
|
|
|
|
runOn(first, third) {
|
|
awaitCond(cluster.failureDetector.isAvailable(second), 15.seconds)
|
|
}
|
|
|
|
runOn(second) {
|
|
awaitCond(cluster.failureDetector.isAvailable(first), 15.seconds)
|
|
}
|
|
|
|
enterBarrier("after-2")
|
|
}
|
|
|
|
"mark node as 'unavailable' if a node in the cluster is shut down (and its heartbeats stops)" taggedAs LongRunningTest in {
|
|
runOn(first) {
|
|
testConductor.exit(third, 0).await
|
|
}
|
|
|
|
enterBarrier("third-shutdown")
|
|
|
|
runOn(first, second) {
|
|
// remaining nodes should detect failure...
|
|
awaitCond(!cluster.failureDetector.isAvailable(third), 15.seconds)
|
|
// other connections still ok
|
|
cluster.failureDetector.isAvailable(first) should ===(true)
|
|
cluster.failureDetector.isAvailable(second) should ===(true)
|
|
}
|
|
|
|
enterBarrier("after-3")
|
|
}
|
|
}
|
|
}
|