2012-06-05 15:46:26 +02:00
|
|
|
/**
|
|
|
|
|
* Copyright (C) 2009-2012 Typesafe Inc. <http://www.typesafe.com>
|
|
|
|
|
*/
|
|
|
|
|
package akka.cluster
|
|
|
|
|
|
2012-07-26 14:47:21 +02:00
|
|
|
import language.postfixOps
|
|
|
|
|
|
2012-06-05 15:46:26 +02:00
|
|
|
import org.scalatest.BeforeAndAfter
|
|
|
|
|
import akka.remote.testkit.MultiNodeConfig
|
|
|
|
|
import akka.remote.testkit.MultiNodeSpec
|
|
|
|
|
import akka.testkit._
|
|
|
|
|
import com.typesafe.config.ConfigFactory
|
|
|
|
|
import akka.actor.Address
|
2012-07-04 11:37:56 +02:00
|
|
|
import akka.remote.testconductor.{ RoleName, Direction }
|
2012-07-22 21:40:09 +02:00
|
|
|
import scala.concurrent.util.duration._
|
2012-06-05 15:46:26 +02:00
|
|
|
|
|
|
|
|
object UnreachableNodeRejoinsClusterMultiJvmSpec extends MultiNodeConfig {
|
|
|
|
|
val first = role("first")
|
|
|
|
|
val second = role("second")
|
|
|
|
|
val third = role("third")
|
|
|
|
|
val fourth = role("fourth")
|
|
|
|
|
|
2012-06-27 13:54:43 +02:00
|
|
|
commonConfig(debugConfig(on = false).withFallback(MultiNodeClusterSpec.clusterConfig))
|
2012-06-05 15:46:26 +02:00
|
|
|
}
|
|
|
|
|
|
2012-06-27 13:54:43 +02:00
|
|
|
class UnreachableNodeRejoinsClusterWithFailureDetectorPuppetMultiJvmNode1 extends UnreachableNodeRejoinsClusterSpec with FailureDetectorPuppetStrategy
|
|
|
|
|
class UnreachableNodeRejoinsClusterWithFailureDetectorPuppetMultiJvmNode2 extends UnreachableNodeRejoinsClusterSpec with FailureDetectorPuppetStrategy
|
|
|
|
|
class UnreachableNodeRejoinsClusterWithFailureDetectorPuppetMultiJvmNode3 extends UnreachableNodeRejoinsClusterSpec with FailureDetectorPuppetStrategy
|
|
|
|
|
class UnreachableNodeRejoinsClusterWithFailureDetectorPuppetMultiJvmNode4 extends UnreachableNodeRejoinsClusterSpec with FailureDetectorPuppetStrategy
|
2012-06-05 15:46:26 +02:00
|
|
|
|
2012-06-27 13:54:43 +02:00
|
|
|
class UnreachableNodeRejoinsClusterWithAccrualFailureDetectorMultiJvmNode1 extends UnreachableNodeRejoinsClusterSpec with AccrualFailureDetectorStrategy
|
|
|
|
|
class UnreachableNodeRejoinsClusterWithAccrualFailureDetectorMultiJvmNode2 extends UnreachableNodeRejoinsClusterSpec with AccrualFailureDetectorStrategy
|
|
|
|
|
class UnreachableNodeRejoinsClusterWithAccrualFailureDetectorMultiJvmNode3 extends UnreachableNodeRejoinsClusterSpec with AccrualFailureDetectorStrategy
|
|
|
|
|
class UnreachableNodeRejoinsClusterWithAccrualFailureDetectorMultiJvmNode4 extends UnreachableNodeRejoinsClusterSpec with AccrualFailureDetectorStrategy
|
|
|
|
|
|
|
|
|
|
abstract class UnreachableNodeRejoinsClusterSpec
|
2012-06-05 15:46:26 +02:00
|
|
|
extends MultiNodeSpec(UnreachableNodeRejoinsClusterMultiJvmSpec)
|
2012-06-28 14:52:12 +02:00
|
|
|
with MultiNodeClusterSpec {
|
2012-06-05 15:46:26 +02:00
|
|
|
import UnreachableNodeRejoinsClusterMultiJvmSpec._
|
|
|
|
|
|
2012-06-28 14:52:12 +02:00
|
|
|
def allBut(role: RoleName, roles: Seq[RoleName] = roles): Seq[RoleName] = {
|
|
|
|
|
roles.filterNot(_ == role)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
lazy val sortedRoles = roles.sorted
|
2012-06-07 10:37:04 +02:00
|
|
|
lazy val master = sortedRoles(0)
|
|
|
|
|
lazy val victim = sortedRoles(1)
|
2012-06-05 15:46:26 +02:00
|
|
|
|
|
|
|
|
var endBarrierNumber = 0
|
2012-06-07 10:37:04 +02:00
|
|
|
def endBarrier: Unit = {
|
2012-06-05 15:46:26 +02:00
|
|
|
endBarrierNumber += 1
|
2012-06-27 13:54:43 +02:00
|
|
|
enterBarrier("after_" + endBarrierNumber)
|
2012-06-05 15:46:26 +02:00
|
|
|
}
|
|
|
|
|
|
2012-06-28 14:52:12 +02:00
|
|
|
"A cluster of " + roles.size + " members" must {
|
2012-06-05 15:46:26 +02:00
|
|
|
|
|
|
|
|
"reach initial convergence" taggedAs LongRunningTest in {
|
2012-07-04 11:37:56 +02:00
|
|
|
awaitClusterUp(roles: _*)
|
2012-06-05 15:46:26 +02:00
|
|
|
endBarrier
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
"mark a node as UNREACHABLE when we pull the network" taggedAs LongRunningTest in {
|
2012-07-05 09:50:58 +02:00
|
|
|
// let them send at least one heartbeat to each other after the gossip convergence
|
|
|
|
|
// because for new joining nodes we remove them from the failure detector when
|
|
|
|
|
// receive gossip
|
2012-07-25 18:02:45 +02:00
|
|
|
Thread.sleep(2.seconds.dilated.toMillis)
|
2012-07-05 09:50:58 +02:00
|
|
|
|
2012-06-05 15:46:26 +02:00
|
|
|
runOn(first) {
|
|
|
|
|
// pull network for victim node from all nodes
|
2012-07-04 11:37:56 +02:00
|
|
|
allBut(victim).foreach { roleName ⇒
|
2012-06-05 15:46:26 +02:00
|
|
|
testConductor.blackhole(victim, roleName, Direction.Both).await
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2012-06-27 13:54:43 +02:00
|
|
|
enterBarrier("unplug_victim")
|
2012-06-05 15:46:26 +02:00
|
|
|
|
2012-06-28 14:52:12 +02:00
|
|
|
val allButVictim = allBut(victim, sortedRoles)
|
2012-06-05 15:46:26 +02:00
|
|
|
runOn(victim) {
|
2012-06-28 14:52:12 +02:00
|
|
|
allButVictim.foreach(markNodeAsUnavailable(_))
|
2012-06-05 15:46:26 +02:00
|
|
|
within(30 seconds) {
|
2012-06-07 11:08:23 +02:00
|
|
|
// victim becomes all alone
|
2012-07-04 11:37:56 +02:00
|
|
|
awaitCond({
|
2012-08-15 16:47:34 +02:00
|
|
|
val members = cluster.members
|
|
|
|
|
cluster.unreachableMembers.size == (roles.size - 1) &&
|
|
|
|
|
members.size == 1 &&
|
|
|
|
|
members.forall(_.status == MemberStatus.Up)
|
2012-07-04 11:37:56 +02:00
|
|
|
})
|
2012-08-15 16:47:34 +02:00
|
|
|
cluster.unreachableMembers.map(_.address) must be((allButVictim map address).toSet)
|
|
|
|
|
cluster.convergence must be(false)
|
2012-06-05 15:46:26 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2012-07-04 11:37:56 +02:00
|
|
|
runOn(allButVictim: _*) {
|
2012-06-28 14:52:12 +02:00
|
|
|
markNodeAsUnavailable(victim)
|
2012-06-05 15:46:26 +02:00
|
|
|
within(30 seconds) {
|
|
|
|
|
// victim becomes unreachable
|
2012-07-04 11:37:56 +02:00
|
|
|
awaitCond({
|
2012-08-15 16:47:34 +02:00
|
|
|
val members = cluster.members
|
|
|
|
|
cluster.unreachableMembers.size == 1 &&
|
|
|
|
|
members.size == (roles.size - 1) &&
|
|
|
|
|
members.forall(_.status == MemberStatus.Up)
|
2012-07-04 11:37:56 +02:00
|
|
|
})
|
|
|
|
|
awaitSeenSameState(allButVictim map address: _*)
|
2012-06-05 15:46:26 +02:00
|
|
|
// still one unreachable
|
2012-08-15 16:47:34 +02:00
|
|
|
cluster.unreachableMembers.size must be(1)
|
|
|
|
|
cluster.unreachableMembers.head.address must be(node(victim).address)
|
2012-06-05 15:46:26 +02:00
|
|
|
// and therefore no convergence
|
2012-08-15 16:47:34 +02:00
|
|
|
cluster.convergence must be(false)
|
2012-06-05 15:46:26 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
endBarrier
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
"mark the node as DOWN" taggedAs LongRunningTest in {
|
|
|
|
|
runOn(master) {
|
2012-06-28 14:52:12 +02:00
|
|
|
cluster down victim
|
2012-06-05 15:46:26 +02:00
|
|
|
}
|
|
|
|
|
|
2012-07-04 11:37:56 +02:00
|
|
|
runOn(allBut(victim): _*) {
|
2012-06-28 14:52:12 +02:00
|
|
|
awaitUpConvergence(roles.size - 1, Seq(victim))
|
2012-06-05 15:46:26 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
endBarrier
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
"allow node to REJOIN when the network is plugged back in" taggedAs LongRunningTest in {
|
|
|
|
|
runOn(first) {
|
|
|
|
|
// put the network back in
|
2012-07-04 11:37:56 +02:00
|
|
|
allBut(victim).foreach { roleName ⇒
|
2012-06-05 15:46:26 +02:00
|
|
|
testConductor.passThrough(victim, roleName, Direction.Both).await
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2012-06-27 13:54:43 +02:00
|
|
|
enterBarrier("plug_in_victim")
|
2012-06-05 15:46:26 +02:00
|
|
|
|
|
|
|
|
runOn(victim) {
|
2012-06-28 14:52:12 +02:00
|
|
|
cluster join master
|
2012-06-05 15:46:26 +02:00
|
|
|
}
|
|
|
|
|
|
2012-06-28 14:52:12 +02:00
|
|
|
awaitUpConvergence(roles.size)
|
2012-06-05 15:46:26 +02:00
|
|
|
|
|
|
|
|
endBarrier
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|