2018-10-29 17:19:37 +08:00
|
|
|
/*
|
2021-01-08 17:55:38 +01:00
|
|
|
* Copyright (C) 2009-2021 Lightbend Inc. <https://www.lightbend.com>
|
2012-02-18 17:48:07 +01:00
|
|
|
*/
|
2018-04-24 16:03:55 +01:00
|
|
|
|
2012-02-18 17:48:07 +01:00
|
|
|
package akka.cluster
|
2012-01-28 15:34:46 +01:00
|
|
|
|
2020-04-27 20:32:18 +08:00
|
|
|
import scala.concurrent.duration._
|
|
|
|
|
|
2012-05-28 13:48:58 +02:00
|
|
|
import com.typesafe.config.ConfigFactory
|
2020-04-27 20:32:18 +08:00
|
|
|
|
2012-05-28 13:48:58 +02:00
|
|
|
import akka.remote.testkit.MultiNodeConfig
|
|
|
|
|
import akka.remote.testkit.MultiNodeSpec
|
2013-08-27 15:14:53 +02:00
|
|
|
import akka.remote.transport.ThrottlerTransportAdapter.Direction
|
2012-05-28 13:48:58 +02:00
|
|
|
import akka.testkit._
|
|
|
|
|
|
2012-06-20 11:06:47 +02:00
|
|
|
object ClusterAccrualFailureDetectorMultiJvmSpec extends MultiNodeConfig {
|
2012-05-28 13:48:58 +02:00
|
|
|
val first = role("first")
|
|
|
|
|
val second = role("second")
|
|
|
|
|
val third = role("third")
|
2012-01-28 15:34:46 +01:00
|
|
|
|
2019-03-11 10:38:24 +01:00
|
|
|
commonConfig(
|
|
|
|
|
debugConfig(on = false)
|
|
|
|
|
.withFallback(ConfigFactory.parseString("akka.cluster.failure-detector.threshold = 4"))
|
|
|
|
|
.withFallback(MultiNodeClusterSpec.clusterConfig))
|
2013-08-27 15:14:53 +02:00
|
|
|
|
|
|
|
|
testTransport(on = true)
|
2012-05-28 13:48:58 +02:00
|
|
|
}
|
2012-01-28 15:34:46 +01:00
|
|
|
|
2012-09-06 21:48:40 +02:00
|
|
|
class ClusterAccrualFailureDetectorMultiJvmNode1 extends ClusterAccrualFailureDetectorSpec
|
|
|
|
|
class ClusterAccrualFailureDetectorMultiJvmNode2 extends ClusterAccrualFailureDetectorSpec
|
|
|
|
|
class ClusterAccrualFailureDetectorMultiJvmNode3 extends ClusterAccrualFailureDetectorSpec
|
2012-01-28 15:34:46 +01:00
|
|
|
|
2012-06-20 11:06:47 +02:00
|
|
|
abstract class ClusterAccrualFailureDetectorSpec
|
2019-03-11 10:38:24 +01:00
|
|
|
extends MultiNodeSpec(ClusterAccrualFailureDetectorMultiJvmSpec)
|
|
|
|
|
with MultiNodeClusterSpec {
|
2012-06-11 16:48:19 +02:00
|
|
|
|
2012-06-20 11:06:47 +02:00
|
|
|
import ClusterAccrualFailureDetectorMultiJvmSpec._
|
2012-01-28 15:34:46 +01:00
|
|
|
|
2012-10-01 20:08:21 +02:00
|
|
|
muteMarkingAsUnreachable()
|
|
|
|
|
|
2012-06-20 11:06:47 +02:00
|
|
|
"A heartbeat driven Failure Detector" must {
|
2012-01-28 15:34:46 +01:00
|
|
|
|
2012-06-20 11:06:47 +02:00
|
|
|
"receive heartbeats so that all member nodes in the cluster are marked 'available'" taggedAs LongRunningTest in {
|
2012-06-05 14:13:44 +02:00
|
|
|
awaitClusterUp(first, second, third)
|
2012-01-28 15:34:46 +01:00
|
|
|
|
2012-07-25 18:02:45 +02:00
|
|
|
Thread.sleep(5.seconds.dilated.toMillis) // let them heartbeat
|
2015-01-16 11:09:59 +01:00
|
|
|
cluster.failureDetector.isAvailable(first) should ===(true)
|
|
|
|
|
cluster.failureDetector.isAvailable(second) should ===(true)
|
|
|
|
|
cluster.failureDetector.isAvailable(third) should ===(true)
|
2012-06-04 11:58:09 +02:00
|
|
|
|
2012-06-15 14:39:47 +02:00
|
|
|
enterBarrier("after-1")
|
2012-05-28 13:48:58 +02:00
|
|
|
}
|
|
|
|
|
|
2013-08-27 15:14:53 +02:00
|
|
|
"mark node as 'unavailable' when network partition and then back to 'available' when partition is healed" taggedAs
|
2019-03-11 10:38:24 +01:00
|
|
|
LongRunningTest in {
|
|
|
|
|
runOn(first) {
|
|
|
|
|
testConductor.blackhole(first, second, Direction.Both).await
|
|
|
|
|
}
|
2013-08-27 15:14:53 +02:00
|
|
|
|
2019-03-11 10:38:24 +01:00
|
|
|
enterBarrier("broken")
|
2013-08-27 15:14:53 +02:00
|
|
|
|
2019-03-11 10:38:24 +01:00
|
|
|
runOn(first) {
|
|
|
|
|
// detect failure...
|
|
|
|
|
awaitCond(!cluster.failureDetector.isAvailable(second), 15.seconds)
|
|
|
|
|
// other connections still ok
|
|
|
|
|
cluster.failureDetector.isAvailable(third) should ===(true)
|
|
|
|
|
}
|
2013-08-27 15:14:53 +02:00
|
|
|
|
2019-03-11 10:38:24 +01:00
|
|
|
runOn(second) {
|
|
|
|
|
// detect failure...
|
|
|
|
|
awaitCond(!cluster.failureDetector.isAvailable(first), 15.seconds)
|
|
|
|
|
// other connections still ok
|
|
|
|
|
cluster.failureDetector.isAvailable(third) should ===(true)
|
|
|
|
|
}
|
2013-08-27 15:14:53 +02:00
|
|
|
|
2019-03-11 10:38:24 +01:00
|
|
|
enterBarrier("partitioned")
|
2013-08-27 15:14:53 +02:00
|
|
|
|
2019-03-11 10:38:24 +01:00
|
|
|
runOn(first) {
|
|
|
|
|
testConductor.passThrough(first, second, Direction.Both).await
|
|
|
|
|
}
|
2013-08-27 15:14:53 +02:00
|
|
|
|
2019-03-11 10:38:24 +01:00
|
|
|
enterBarrier("repaired")
|
2013-08-27 15:14:53 +02:00
|
|
|
|
2019-03-11 10:38:24 +01:00
|
|
|
runOn(first, third) {
|
|
|
|
|
awaitCond(cluster.failureDetector.isAvailable(second), 15.seconds)
|
|
|
|
|
}
|
2013-08-27 15:14:53 +02:00
|
|
|
|
2019-03-11 10:38:24 +01:00
|
|
|
runOn(second) {
|
|
|
|
|
awaitCond(cluster.failureDetector.isAvailable(first), 15.seconds)
|
2013-08-27 15:14:53 +02:00
|
|
|
}
|
|
|
|
|
|
2019-03-11 10:38:24 +01:00
|
|
|
enterBarrier("after-2")
|
|
|
|
|
}
|
|
|
|
|
|
2012-05-29 08:46:00 +02:00
|
|
|
"mark node as 'unavailable' if a node in the cluster is shut down (and its heartbeats stops)" taggedAs LongRunningTest in {
|
2012-05-28 13:48:58 +02:00
|
|
|
runOn(first) {
|
2013-04-23 16:44:14 +02:00
|
|
|
testConductor.exit(third, 0).await
|
2012-02-18 17:48:07 +01:00
|
|
|
}
|
2012-01-28 15:34:46 +01:00
|
|
|
|
2012-06-20 11:06:47 +02:00
|
|
|
enterBarrier("third-shutdown")
|
|
|
|
|
|
2012-05-28 13:48:58 +02:00
|
|
|
runOn(first, second) {
|
2015-06-02 21:01:00 -07:00
|
|
|
// remaining nodes should detect failure...
|
2012-06-20 11:06:47 +02:00
|
|
|
awaitCond(!cluster.failureDetector.isAvailable(third), 15.seconds)
|
2012-05-29 08:46:00 +02:00
|
|
|
// other connections still ok
|
2015-01-16 11:09:59 +01:00
|
|
|
cluster.failureDetector.isAvailable(first) should ===(true)
|
|
|
|
|
cluster.failureDetector.isAvailable(second) should ===(true)
|
2012-02-18 17:48:07 +01:00
|
|
|
}
|
2012-06-04 11:58:09 +02:00
|
|
|
|
2013-08-27 15:14:53 +02:00
|
|
|
enterBarrier("after-3")
|
2012-02-18 17:48:07 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|