Make joining to the same node multiple times work, and reenable blackhole test. See #2930

This commit is contained in:
Björn Antonsson 2013-03-20 10:32:18 +01:00
parent f18575e251
commit 5827a27b94
5 changed files with 57 additions and 27 deletions

View file

@ -182,6 +182,27 @@ trait MultiNodeClusterSpec extends Suite with STMultiNodeSpec with WatchedByCoro
enterBarrier(roles.map(_.name).mkString("-") + "-joined")
}
/**
* Join the specific node within the given period by sending repeated join
* requests at periodic intervals until we succeed.
*/
def joinWithin(joinNode: RoleName, max: Duration = remaining, interval: Duration = 1.second): Unit = {
def memberInState(member: Address, status: Seq[MemberStatus]): Boolean =
clusterView.members.exists { m (m.address == member) && status.contains(m.status) }
cluster join joinNode
awaitCond({
clusterView.refreshCurrentState()
if (memberInState(joinNode, List(MemberStatus.up)) &&
memberInState(myself, List(MemberStatus.Joining, MemberStatus.Up)))
true
else {
cluster join joinNode
false
}
}, max, interval)
}
/**
* Assert that the member addresses match the expected addresses in the
* sort order used by the cluster.

View file

@ -25,10 +25,14 @@ case class UnreachableNodeRejoinsClusterMultiNodeConfig(failureDetectorPuppet: B
commonConfig(ConfigFactory.parseString(
"""
# this setting is here to limit the number of retries and failures while the
# node is being blackholed
akka.remote.failure-detector.retry-gate-closed-for = 500 ms
akka.remote.log-remote-lifecycle-events = off
akka.cluster.publish-stats-interval = 0s
akka.loglevel = INFO
""").withFallback(debugConfig(on = false).withFallback(MultiNodeClusterSpec.clusterConfig)))
""").withFallback(debugConfig(on = false).withFallback(MultiNodeClusterSpec.clusterConfig(failureDetectorPuppet))))
testTransport(on = true)
}
@ -74,8 +78,7 @@ abstract class UnreachableNodeRejoinsClusterSpec(multiNodeConfig: UnreachableNod
endBarrier
}
// FIXME ignored due to ticket #2930 - timeout changing throttler mode
"mark a node as UNREACHABLE when we pull the network" taggedAs LongRunningTest ignore {
"mark a node as UNREACHABLE when we pull the network" taggedAs LongRunningTest in {
// let them send at least one heartbeat to each other after the gossip convergence
// because for new joining nodes we remove them from the failure detector when
// receive gossip
@ -125,8 +128,7 @@ abstract class UnreachableNodeRejoinsClusterSpec(multiNodeConfig: UnreachableNod
endBarrier
}
// FIXME ignored due to ticket #2930 - timeout changing throttler mode
"mark the node as DOWN" taggedAs LongRunningTest ignore {
"mark the node as DOWN" taggedAs LongRunningTest in {
runOn(master) {
cluster down victim
}
@ -135,13 +137,12 @@ abstract class UnreachableNodeRejoinsClusterSpec(multiNodeConfig: UnreachableNod
awaitMembersUp(roles.size - 1, Set(victim))
// eventually removed
awaitCond(clusterView.unreachableMembers.isEmpty, 15 seconds)
}
}
endBarrier
}
// FIXME ignored due to ticket #2930 - timeout changing throttler mode
"allow node to REJOIN when the network is plugged back in" taggedAs LongRunningTest ignore {
"allow node to REJOIN when the network is plugged back in" taggedAs LongRunningTest in {
runOn(first) {
// put the network back in
allBut(victim).foreach { roleName
@ -152,7 +153,7 @@ abstract class UnreachableNodeRejoinsClusterSpec(multiNodeConfig: UnreachableNod
enterBarrier("plug_in_victim")
runOn(victim) {
cluster join master
joinWithin(master, 10.seconds)
}
awaitMembersUp(roles.size)