MultiDcSplitBrainSpec: Turn on gossip loggig; Increase gossip frequency (#24024)
The last time this failed there was no gossip to or from a node that didn't see fifth coming back. Also note that this test doesn't quite test what it says as the split brain is repaired before starting the second actor system but without extensions to the multi jvm test kit this can't be improved. Refs #23306
This commit is contained in:
parent
e8ff22e035
commit
3bd05ce67e
2 changed files with 26 additions and 12 deletions
|
|
@ -25,6 +25,7 @@ object MultiDcSplitBrainMultiJvmSpec extends MultiNodeConfig {
|
|||
"""
|
||||
akka.loglevel = DEBUG
|
||||
akka.cluster.debug.verbose-heartbeat-logging = on
|
||||
akka.cluster.debug.verbose-gossip-logging = on
|
||||
akka.remote.netty.tcp.connection-timeout = 5 s # speedup in case of connection issue
|
||||
akka.remote.retry-gate-closed-for = 1 s
|
||||
akka.cluster.multi-data-center {
|
||||
|
|
@ -34,7 +35,7 @@ object MultiDcSplitBrainMultiJvmSpec extends MultiNodeConfig {
|
|||
}
|
||||
}
|
||||
akka.cluster {
|
||||
gossip-interval = 1s
|
||||
gossip-interval = 500ms
|
||||
leader-actions-interval = 1s
|
||||
auto-down-unreachable-after = 1s
|
||||
}
|
||||
|
|
@ -69,8 +70,8 @@ abstract class MultiDcSplitBrainSpec
|
|||
val dc2 = List(third, fourth, fifth)
|
||||
var barrierCounter = 0
|
||||
|
||||
def splitDataCenters(notMembers: Set[RoleName]): Unit = {
|
||||
val memberNodes = (dc1 ++ dc2).filterNot(notMembers)
|
||||
def splitDataCenters(doNotVerify: Set[RoleName]): Unit = {
|
||||
val memberNodes = (dc1 ++ dc2).filterNot(doNotVerify)
|
||||
val probe = TestProbe()
|
||||
runOn(memberNodes: _*) {
|
||||
cluster.subscribe(probe.ref, classOf[DataCenterReachabilityEvent])
|
||||
|
|
@ -145,7 +146,7 @@ abstract class MultiDcSplitBrainSpec
|
|||
|
||||
"be able to have a data center member join while there is inter data center split" in within(20.seconds) {
|
||||
// introduce a split between data centers
|
||||
splitDataCenters(notMembers = Set(fourth, fifth))
|
||||
splitDataCenters(doNotVerify = Set(fourth, fifth))
|
||||
|
||||
runOn(fourth) {
|
||||
cluster.join(third)
|
||||
|
|
@ -172,8 +173,10 @@ abstract class MultiDcSplitBrainSpec
|
|||
enterBarrier("inter-data-center-split-1-done")
|
||||
}
|
||||
|
||||
// fifth is still not a member of the cluster
|
||||
|
||||
"be able to have data center member leave while there is inter data center split" in within(20.seconds) {
|
||||
splitDataCenters(notMembers = Set(fifth))
|
||||
splitDataCenters(doNotVerify = Set(fifth))
|
||||
|
||||
runOn(fourth) {
|
||||
cluster.leave(fourth)
|
||||
|
|
@ -192,6 +195,8 @@ abstract class MultiDcSplitBrainSpec
|
|||
enterBarrier("inter-data-center-split-2-done")
|
||||
}
|
||||
|
||||
// forth has left the cluster, fifth is still not a member
|
||||
|
||||
"be able to have data center member restart (same host:port) while there is inter data center split" in within(60.seconds) {
|
||||
val subscribeProbe = TestProbe()
|
||||
runOn(first, second, third, fifth) {
|
||||
|
|
@ -199,6 +204,7 @@ abstract class MultiDcSplitBrainSpec
|
|||
subscribeProbe.expectMsgType[CurrentClusterState]
|
||||
}
|
||||
enterBarrier("subscribed")
|
||||
|
||||
runOn(fifth) {
|
||||
Cluster(system).join(third)
|
||||
}
|
||||
|
|
@ -211,16 +217,18 @@ abstract class MultiDcSplitBrainSpec
|
|||
}
|
||||
enterBarrier("fifth-joined")
|
||||
|
||||
splitDataCenters(notMembers = Set(fourth))
|
||||
splitDataCenters(doNotVerify = Set(fourth))
|
||||
|
||||
runOn(fifth) {
|
||||
Cluster(system).shutdown()
|
||||
}
|
||||
|
||||
runOn(third) {
|
||||
awaitAssert(clusterView.members.collect {
|
||||
case m if m.dataCenter == "dc2" ⇒ m.address
|
||||
} should ===(Set(address(third))))
|
||||
}
|
||||
|
||||
enterBarrier("fifth-removed")
|
||||
|
||||
runOn(fifth) {
|
||||
|
|
@ -230,11 +238,13 @@ abstract class MultiDcSplitBrainSpec
|
|||
enterBarrier("fifth-waiting-for-termination")
|
||||
Await.ready(system.whenTerminated, remaining)
|
||||
|
||||
val port = Cluster(system).selfAddress.port.get
|
||||
val restartedSystem = ActorSystem(
|
||||
system.name,
|
||||
ConfigFactory.parseString(s"""
|
||||
akka.remote.netty.tcp.port = ${Cluster(system).selfAddress.port.get}
|
||||
akka.remote.artery.canonical.port = ${Cluster(system).selfAddress.port.get}
|
||||
ConfigFactory.parseString(
|
||||
s"""
|
||||
akka.remote.netty.tcp.port = $port
|
||||
akka.remote.artery.canonical.port = $port
|
||||
akka.coordinated-shutdown.terminate-actor-system = on
|
||||
""").withFallback(system.settings.config))
|
||||
Cluster(restartedSystem).join(thirdAddress)
|
||||
|
|
@ -254,6 +264,7 @@ abstract class MultiDcSplitBrainSpec
|
|||
}
|
||||
testConductor.shutdown(fifth)
|
||||
}
|
||||
|
||||
runOn(remainingRoles: _*) {
|
||||
enterBarrier("fifth-restarted")
|
||||
}
|
||||
|
|
@ -267,13 +278,16 @@ abstract class MultiDcSplitBrainSpec
|
|||
subscribeProbe.expectMsgType[MemberRemoved].member.uniqueAddress should ===(fifthOriginalUniqueAddress.get)
|
||||
subscribeProbe.expectMsgType[MemberUp].member.address should ===(fifthOriginalUniqueAddress.get.address)
|
||||
}
|
||||
|
||||
runOn(remainingRoles: _*) {
|
||||
enterBarrier("fifth-re-joined")
|
||||
}
|
||||
|
||||
runOn(first) {
|
||||
// to shutdown the restartedSystem on fifth
|
||||
Cluster(system).leave(fifthOriginalUniqueAddress.get.address)
|
||||
}
|
||||
|
||||
runOn(first, second, third) {
|
||||
awaitAssert({
|
||||
clusterView.members.map(_.address) should ===(Set(address(first), address(second), address(third)))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue