Connect the dots for cross-dc reachability, #23377
* the crossDcFailureDetector was not connected to the reachability table
* additional test by listen for {Reachable/Unreachable}DataCenter events in split spec
* missing Java API for getUnreachableDataCenters in CurrentClusterState
This commit is contained in:
parent
eefe6474c3
commit
e3aada5016
3 changed files with 91 additions and 27 deletions
|
|
@ -3,9 +3,11 @@
|
|||
*/
|
||||
package akka.cluster
|
||||
|
||||
import akka.cluster.ClusterEvent.{ CurrentClusterState, DataCenterReachabilityEvent, ReachableDataCenter, UnreachableDataCenter }
|
||||
import akka.remote.testconductor.RoleName
|
||||
import akka.remote.testkit.{ MultiNodeConfig, MultiNodeSpec }
|
||||
import akka.remote.transport.ThrottlerTransportAdapter.Direction
|
||||
import akka.testkit.TestProbe
|
||||
import com.typesafe.config.ConfigFactory
|
||||
|
||||
import scala.concurrent.duration._
|
||||
|
|
@ -19,7 +21,12 @@ object MultiDcSplitBrainMultiJvmSpec extends MultiNodeConfig {
|
|||
commonConfig(ConfigFactory.parseString(
|
||||
"""
|
||||
akka.loglevel = INFO
|
||||
akka.cluster.run-coordinated-shutdown-when-down = off
|
||||
akka.cluster.multi-data-center {
|
||||
failure-detector {
|
||||
acceptable-heartbeat-pause = 4s
|
||||
heartbeat-interval = 1s
|
||||
}
|
||||
}
|
||||
""").withFallback(MultiNodeClusterSpec.clusterConfig))
|
||||
|
||||
nodeConfig(first, second)(ConfigFactory.parseString(
|
||||
|
|
@ -48,28 +55,75 @@ abstract class MultiDcSplitBrainSpec
|
|||
|
||||
val dc1 = List(first, second)
|
||||
val dc2 = List(third, fourth)
|
||||
var barrierCounter = 0
|
||||
|
||||
def splitDataCenters(notMembers: Set[RoleName]): Unit = {
|
||||
val memberNodes = (dc1 ++ dc2).filterNot(notMembers)
|
||||
val probe = TestProbe()
|
||||
runOn(memberNodes: _*) {
|
||||
cluster.subscribe(probe.ref, classOf[DataCenterReachabilityEvent])
|
||||
probe.expectMsgType[CurrentClusterState]
|
||||
}
|
||||
enterBarrier(s"split-$barrierCounter")
|
||||
barrierCounter += 1
|
||||
|
||||
def splitDataCenters(dc1: Seq[RoleName], dc2: Seq[RoleName]): Unit = {
|
||||
runOn(first) {
|
||||
for {
|
||||
dc1Node ← dc1
|
||||
dc2Node ← dc2
|
||||
} {
|
||||
for (dc1Node ← dc1; dc2Node ← dc2) {
|
||||
testConductor.blackhole(dc1Node, dc2Node, Direction.Both).await
|
||||
}
|
||||
}
|
||||
|
||||
enterBarrier(s"after-split-$barrierCounter")
|
||||
barrierCounter += 1
|
||||
|
||||
runOn(memberNodes: _*) {
|
||||
probe.expectMsgType[UnreachableDataCenter](15.seconds)
|
||||
cluster.unsubscribe(probe.ref)
|
||||
runOn(dc1: _*) {
|
||||
awaitAssert {
|
||||
cluster.state.unreachableDataCenters should ===(Set("dc2"))
|
||||
}
|
||||
}
|
||||
runOn(dc2: _*) {
|
||||
awaitAssert {
|
||||
cluster.state.unreachableDataCenters should ===(Set("dc1"))
|
||||
}
|
||||
}
|
||||
cluster.state.unreachable should ===(Set.empty)
|
||||
}
|
||||
enterBarrier(s"after-split-verified-$barrierCounter")
|
||||
barrierCounter += 1
|
||||
}
|
||||
|
||||
def unsplitDataCenters(dc1: Seq[RoleName], dc2: Seq[RoleName]): Unit = {
|
||||
def unsplitDataCenters(notMembers: Set[RoleName]): Unit = {
|
||||
val memberNodes = (dc1 ++ dc2).filterNot(notMembers)
|
||||
val probe = TestProbe()
|
||||
runOn(memberNodes: _*) {
|
||||
cluster.subscribe(probe.ref, classOf[DataCenterReachabilityEvent])
|
||||
probe.expectMsgType[CurrentClusterState]
|
||||
}
|
||||
enterBarrier(s"unsplit-$barrierCounter")
|
||||
barrierCounter += 1
|
||||
|
||||
runOn(first) {
|
||||
for {
|
||||
dc1Node ← dc1
|
||||
dc2Node ← dc2
|
||||
} {
|
||||
for (dc1Node ← dc1; dc2Node ← dc2) {
|
||||
testConductor.passThrough(dc1Node, dc2Node, Direction.Both).await
|
||||
}
|
||||
}
|
||||
|
||||
enterBarrier(s"after-unsplit-$barrierCounter")
|
||||
barrierCounter += 1
|
||||
|
||||
runOn(memberNodes: _*) {
|
||||
probe.expectMsgType[ReachableDataCenter](15.seconds)
|
||||
cluster.unsubscribe(probe.ref)
|
||||
awaitAssert {
|
||||
cluster.state.unreachableDataCenters should ===(Set.empty)
|
||||
}
|
||||
}
|
||||
enterBarrier(s"after-unsplit-verified-$barrierCounter")
|
||||
barrierCounter += 1
|
||||
|
||||
}
|
||||
|
||||
"A cluster with multiple data centers" must {
|
||||
|
|
@ -79,8 +133,7 @@ abstract class MultiDcSplitBrainSpec
|
|||
|
||||
"be able to have a data center member join while there is inter data center split" in within(20.seconds) {
|
||||
// introduce a split between data centers
|
||||
splitDataCenters(dc1 = List(first, second), dc2 = List(third))
|
||||
enterBarrier("data-center-split-1")
|
||||
splitDataCenters(notMembers = Set(fourth))
|
||||
|
||||
runOn(fourth) {
|
||||
cluster.join(third)
|
||||
|
|
@ -96,8 +149,7 @@ abstract class MultiDcSplitBrainSpec
|
|||
}
|
||||
enterBarrier("dc2-join-completed")
|
||||
|
||||
unsplitDataCenters(dc1 = List(first, second), dc2 = List(third))
|
||||
enterBarrier("data-center-unsplit-1")
|
||||
unsplitDataCenters(notMembers = Set.empty)
|
||||
|
||||
runOn(dc1: _*) {
|
||||
awaitAssert(clusterView.members.collect {
|
||||
|
|
@ -109,8 +161,7 @@ abstract class MultiDcSplitBrainSpec
|
|||
}
|
||||
|
||||
"be able to have data center member leave while there is inter data center split" in within(20.seconds) {
|
||||
splitDataCenters(dc1, dc2)
|
||||
enterBarrier("data-center-split-2")
|
||||
splitDataCenters(notMembers = Set.empty)
|
||||
|
||||
runOn(fourth) {
|
||||
cluster.leave(fourth)
|
||||
|
|
@ -121,8 +172,7 @@ abstract class MultiDcSplitBrainSpec
|
|||
}
|
||||
enterBarrier("node-4-left")
|
||||
|
||||
unsplitDataCenters(dc1, List(third))
|
||||
enterBarrier("data-center-unsplit-2")
|
||||
unsplitDataCenters(notMembers = Set(fourth))
|
||||
|
||||
runOn(first, second) {
|
||||
awaitAssert(clusterView.members.filter(_.address == address(fourth)) should ===(Set.empty))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue