Merge pull request #25678 from akka/wip-25632-down-Terminated-patriknw

Don't automatically down quarantined node, #25632
This commit is contained in:
Patrik Nordwall 2018-09-26 13:30:23 +02:00 committed by GitHub
commit d65a90b688
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 26 additions and 5 deletions

View file

@ -855,10 +855,10 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef, joinConfigCompatCh
val newGossip = localGossip copy (overview = newOverview)
updateLatestGossip(newGossip)
log.warning(
"Cluster Node [{}] - Marking node as TERMINATED [{}], due to quarantine. Node roles [{}]",
"Cluster Node [{}] - Marking node as TERMINATED [{}], due to quarantine. Node roles [{}]. " +
"It must still be marked as down before it's removed.",
selfAddress, node.address, selfRoles.mkString(","))
publishMembershipState()
downing(node.address)
}
}

View file

@ -256,7 +256,7 @@ abstract class SurviveNetworkInstabilitySpec
assertCanTalk((joining ++ others :+ first): _*)
}
"down and remove quarantined node" taggedAs LongRunningTest in within(60.seconds) {
"mark quarantined node with reachability status Terminated" taggedAs LongRunningTest in within(60.seconds) {
val others = Vector(first, third, fourth, fifth, sixth, seventh)
runOn(third) {
@ -295,9 +295,30 @@ abstract class SurviveNetworkInstabilitySpec
enterBarrier("quarantined")
runOn(others: _*) {
// second should be removed because of quarantine
awaitAssert(clusterView.members.map(_.address) should not contain (address(second)))
// not be downed, see issue #25632
Thread.sleep(2000)
val secondUniqueAddress = cluster.state.members.find(_.address == address(second)) match {
case None fail("Unexpected removal of quarantined node")
case Some(m)
m.status should ===(MemberStatus.Up) // not Down
m.uniqueAddress
}
// second should be marked with reachability status Terminated removed because of quarantine
awaitAssert(clusterView.reachability.status(secondUniqueAddress) should ===(Reachability.Terminated))
}
enterBarrier("reachability-terminated")
runOn(fourth) {
cluster.down(address(second))
}
runOn(others: _*) {
// second should be removed because of quarantine
awaitAssert(clusterView.members.map(_.address) should not contain address(second))
// and also removed from reachability table
awaitAssert(clusterView.reachability.allUnreachableOrTerminated should ===(Set.empty))
}
enterBarrier("removed-after-down")
enterBarrier("after-6")
assertCanTalk(others: _*)