Refactor cluster startup join in tests and fix barrier race

* Refactored common code to MultiNodeClusterSpec.awaitClusterUp
* Fixed some race conditions of barriers
This commit is contained in:
Patrik Nordwall 2012-06-05 14:13:44 +02:00
parent 5ccfb2cfee
commit f02793ebd6
18 changed files with 85 additions and 208 deletions

View file

@ -34,13 +34,10 @@ class ClientDowningNodeThatIsUnreachableSpec
"Client of a 4 node cluster" must {
"be able to DOWN a node that is UNREACHABLE (killed)" taggedAs LongRunningTest in {
val thirdAddress = node(third).address
awaitClusterUp(first, second, third, fourth)
runOn(first) {
startClusterNode()
awaitUpConvergence(numberOfMembers = 4)
val thirdAddress = node(third).address
testConductor.enter("all-up")
// kill 'third' node
testConductor.shutdown(third, 0)
@ -50,28 +47,19 @@ class ClientDowningNodeThatIsUnreachableSpec
awaitUpConvergence(numberOfMembers = 3, canNotBePartOfMemberRing = Seq(thirdAddress))
cluster.latestGossip.members.exists(_.address == thirdAddress) must be(false)
testConductor.enter("await-completion")
}
runOn(third) {
cluster.join(node(first).address)
awaitUpConvergence(numberOfMembers = 4)
testConductor.enter("all-up")
testConductor.enter("down-third-node")
}
runOn(second, fourth) {
cluster.join(node(first).address)
awaitUpConvergence(numberOfMembers = 4)
val thirdAddress = node(third).address
testConductor.enter("all-up")
testConductor.enter("down-third-node")
awaitUpConvergence(numberOfMembers = 3, canNotBePartOfMemberRing = Seq(thirdAddress))
testConductor.enter("await-completion")
}
testConductor.enter("await-completion")
}
}
}

View file

@ -34,42 +34,29 @@ class ClientDowningNodeThatIsUpSpec
"Client of a 4 node cluster" must {
"be able to DOWN a node that is UP (healthy and available)" taggedAs LongRunningTest in {
val thirdAddress = node(third).address
awaitClusterUp(first, second, third, fourth)
runOn(first) {
startClusterNode()
awaitUpConvergence(numberOfMembers = 4)
val thirdAddress = node(third).address
testConductor.enter("all-up")
// mark 'third' node as DOWN
cluster.down(thirdAddress)
testConductor.enter("down-third-node")
awaitUpConvergence(numberOfMembers = 3, canNotBePartOfMemberRing = Seq(thirdAddress))
cluster.latestGossip.members.exists(_.address == thirdAddress) must be(false)
testConductor.enter("await-completion")
}
runOn(third) {
cluster.join(node(first).address)
awaitUpConvergence(numberOfMembers = 4)
testConductor.enter("all-up")
testConductor.enter("down-third-node")
testConductor.enter("await-completion")
}
runOn(second, fourth) {
cluster.join(node(first).address)
awaitUpConvergence(numberOfMembers = 4)
val thirdAddress = node(third).address
testConductor.enter("all-up")
testConductor.enter("down-third-node")
awaitUpConvergence(numberOfMembers = 3, canNotBePartOfMemberRing = Seq(thirdAddress))
testConductor.enter("await-completion")
}
testConductor.enter("await-completion")
}
}
}

View file

@ -40,15 +40,7 @@ abstract class ConvergenceSpec
"A cluster of 3 members" must {
"reach initial convergence" taggedAs LongRunningTest in {
runOn(first) {
cluster.self
awaitUpConvergence(numberOfMembers = 3)
}
runOn(second, third) {
cluster.join(node(first).address)
awaitUpConvergence(numberOfMembers = 3)
}
awaitClusterUp(first, second, third)
runOn(fourth) {
// doesn't join immediately
@ -70,7 +62,7 @@ abstract class ConvergenceSpec
val firstAddress = node(first).address
val secondAddress = node(second).address
within(25 seconds) {
within(28 seconds) {
// third becomes unreachable
awaitCond(cluster.latestGossip.overview.unreachable.size == 1)
awaitCond(cluster.latestGossip.members.size == 2)

View file

@ -36,13 +36,7 @@ abstract class GossipingAccrualFailureDetectorSpec extends MultiNodeSpec(Gossipi
"A Gossip-driven Failure Detector" must {
"receive gossip heartbeats so that all member nodes in the cluster are marked 'available'" taggedAs LongRunningTest in {
// make sure that the node-to-join is started before other join
runOn(first) {
startClusterNode()
}
testConductor.enter("first-started")
cluster.join(firstAddress)
awaitClusterUp(first, second, third)
5.seconds.dilated.sleep // let them gossip
cluster.failureDetector.isAvailable(firstAddress) must be(true)

View file

@ -42,13 +42,10 @@ class LeaderDowningNodeThatIsUnreachableSpec
"The Leader in a 4 node cluster" must {
"be able to DOWN a 'last' node that is UNREACHABLE" taggedAs LongRunningTest in {
val fourthAddress = node(fourth).address
awaitClusterUp(first, second, third, fourth)
runOn(first) {
startClusterNode()
awaitUpConvergence(numberOfMembers = 4)
val fourthAddress = node(fourth).address
testConductor.enter("all-up")
// kill 'fourth' node
testConductor.shutdown(fourth, 0)
testConductor.enter("down-fourth-node")
@ -56,38 +53,26 @@ class LeaderDowningNodeThatIsUnreachableSpec
// --- HERE THE LEADER SHOULD DETECT FAILURE AND AUTO-DOWN THE UNREACHABLE NODE ---
awaitUpConvergence(numberOfMembers = 3, canNotBePartOfMemberRing = Seq(fourthAddress), 30.seconds)
testConductor.enter("await-completion")
}
runOn(fourth) {
cluster.join(node(first).address)
awaitUpConvergence(numberOfMembers = 4)
testConductor.enter("all-up")
testConductor.enter("down-fourth-node")
}
runOn(second, third) {
cluster.join(node(first).address)
awaitUpConvergence(numberOfMembers = 4)
val fourthAddress = node(fourth).address
testConductor.enter("all-up")
testConductor.enter("down-fourth-node")
awaitUpConvergence(numberOfMembers = 3, canNotBePartOfMemberRing = Seq(fourthAddress), 30.seconds)
testConductor.enter("await-completion")
}
testConductor.enter("await-completion-1")
}
"be able to DOWN a 'middle' node that is UNREACHABLE" taggedAs LongRunningTest in {
val secondAddress = node(second).address
testConductor.enter("before-down-second-node")
runOn(first) {
cluster.self
awaitUpConvergence(numberOfMembers = 3)
val secondAddress = node(second).address
testConductor.enter("all-up")
// kill 'second' node
testConductor.shutdown(second, 0)
testConductor.enter("down-second-node")
@ -95,28 +80,19 @@ class LeaderDowningNodeThatIsUnreachableSpec
// --- HERE THE LEADER SHOULD DETECT FAILURE AND AUTO-DOWN THE UNREACHABLE NODE ---
awaitUpConvergence(numberOfMembers = 2, canNotBePartOfMemberRing = Seq(secondAddress), 30.seconds)
testConductor.enter("await-completion")
}
runOn(second) {
cluster.join(node(first).address)
awaitUpConvergence(numberOfMembers = 3)
testConductor.enter("all-up")
testConductor.enter("down-second-node")
}
runOn(third) {
cluster.join(node(first).address)
awaitUpConvergence(numberOfMembers = 3)
val secondAddress = node(second).address
testConductor.enter("all-up")
testConductor.enter("down-second-node")
awaitUpConvergence(numberOfMembers = 2, canNotBePartOfMemberRing = Seq(secondAddress), 30 seconds)
testConductor.enter("await-completion")
}
testConductor.enter("await-completion-2")
}
}
}

View file

@ -33,26 +33,19 @@ abstract class LeaderElectionSpec
override def initialParticipants = 5
lazy val firstAddress = node(first).address
// sorted in the order used by the cluster
lazy val roles = Seq(first, second, third, fourth).sorted
"A cluster of four nodes" must {
"be able to 'elect' a single leader" taggedAs LongRunningTest in {
// make sure that the node-to-join is started before other join
runOn(first) {
startClusterNode()
}
testConductor.enter("first-started")
awaitClusterUp(first, second, third, fourth)
if (myself != controller) {
cluster.join(firstAddress)
awaitUpConvergence(numberOfMembers = roles.size)
cluster.isLeader must be(myself == roles.head)
assertLeaderIn(roles)
}
testConductor.enter("after")
}
@ -71,7 +64,7 @@ abstract class LeaderElectionSpec
testConductor.enter("after-shutdown", "after-down", "completed")
case `leader`
testConductor.enter("before-shutdown")
testConductor.enter("before-shutdown", "after-shutdown")
// this node will be shutdown by the controller and doesn't participate in more barriers
case `aUser`

View file

@ -18,13 +18,13 @@ object MembershipChangeListenerExitingMultiJvmSpec extends MultiNodeConfig {
commonConfig(
debugConfig(on = false)
.withFallback(ConfigFactory.parseString("""
.withFallback(ConfigFactory.parseString("""
akka.cluster {
leader-actions-interval = 5 s # increase the leader action task interval
unreachable-nodes-reaper-interval = 30 s # turn "off" reaping to unreachable node set
}
""")
.withFallback(MultiNodeClusterSpec.clusterConfig)))
.withFallback(MultiNodeClusterSpec.clusterConfig)))
}
class MembershipChangeListenerExitingMultiJvmNode1 extends MembershipChangeListenerExitingSpec
@ -46,16 +46,7 @@ abstract class MembershipChangeListenerExitingSpec
"A registered MembershipChangeListener" must {
"be notified when new node is EXITING" taggedAs LongRunningTest in {
runOn(first) {
startClusterNode()
}
testConductor.enter("first-started")
runOn(second, third) {
cluster.join(firstAddress)
}
awaitUpConvergence(numberOfMembers = 3)
testConductor.enter("rest-started")
awaitClusterUp(first, second, third)
runOn(first) {
testConductor.enter("registered-listener")
@ -70,7 +61,7 @@ abstract class MembershipChangeListenerExitingSpec
val exitingLatch = TestLatch()
cluster.registerListener(new MembershipChangeListener {
def notify(members: SortedSet[Member]) {
if (members.size == 3 && members.exists( m => m.address == secondAddress && m.status == MemberStatus.Exiting))
if (members.size == 3 && members.exists(m m.address == secondAddress && m.status == MemberStatus.Exiting))
exitingLatch.countDown()
}
})

View file

@ -17,12 +17,12 @@ object MembershipChangeListenerJoinMultiJvmSpec extends MultiNodeConfig {
commonConfig(
debugConfig(on = false)
.withFallback(ConfigFactory.parseString("""
.withFallback(ConfigFactory.parseString("""
akka.cluster {
leader-actions-interval = 5 s # increase the leader action task interval to allow time checking for JOIN before leader moves it to UP
}
""")
.withFallback(MultiNodeClusterSpec.clusterConfig)))
.withFallback(MultiNodeClusterSpec.clusterConfig)))
}
class MembershipChangeListenerJoinMultiJvmNode1 extends MembershipChangeListenerJoinSpec
@ -42,15 +42,6 @@ abstract class MembershipChangeListenerJoinSpec
"A registered MembershipChangeListener" must {
"be notified when new node is JOINING" taggedAs LongRunningTest in {
runOn(first) {
startClusterNode()
}
runOn(second) {
testConductor.enter("registered-listener")
cluster.join(firstAddress)
}
runOn(first) {
val joinLatch = TestLatch()
cluster.registerListener(new MembershipChangeListener {
@ -65,6 +56,11 @@ abstract class MembershipChangeListenerJoinSpec
cluster.convergence.isDefined must be(true)
}
runOn(second) {
testConductor.enter("registered-listener")
cluster.join(firstAddress)
}
testConductor.enter("after")
}
}

View file

@ -17,11 +17,11 @@ object MembershipChangeListenerLeavingMultiJvmSpec extends MultiNodeConfig {
commonConfig(
debugConfig(on = false)
.withFallback(ConfigFactory.parseString("""
.withFallback(ConfigFactory.parseString("""
akka.cluster.leader-actions-interval = 5 s
akka.cluster.unreachable-nodes-reaper-interval = 30 s
"""))
.withFallback(MultiNodeClusterSpec.clusterConfig))
.withFallback(MultiNodeClusterSpec.clusterConfig))
}
class MembershipChangeListenerLeavingMultiJvmNode1 extends MembershipChangeListenerLeavingSpec
@ -43,16 +43,7 @@ abstract class MembershipChangeListenerLeavingSpec
"A registered MembershipChangeListener" must {
"be notified when new node is LEAVING" taggedAs LongRunningTest in {
runOn(first) {
startClusterNode()
}
testConductor.enter("first-started")
runOn(second, third) {
cluster.join(firstAddress)
}
awaitUpConvergence(numberOfMembers = 3)
testConductor.enter("rest-started")
awaitClusterUp(first, second, third)
runOn(first) {
testConductor.enter("registered-listener")
@ -67,7 +58,7 @@ abstract class MembershipChangeListenerLeavingSpec
val latch = TestLatch()
cluster.registerListener(new MembershipChangeListener {
def notify(members: SortedSet[Member]) {
if (members.size == 3 && members.exists( m => m.address == secondAddress && m.status == MemberStatus.Leaving))
if (members.size == 3 && members.exists(m m.address == secondAddress && m.status == MemberStatus.Leaving))
latch.countDown()
}
})

View file

@ -32,13 +32,9 @@ abstract class MembershipChangeListenerSpec extends MultiNodeSpec(MembershipChan
"A set of connected cluster systems" must {
"(when two systems) after cluster convergence updates the membership table then all MembershipChangeListeners should be triggered" taggedAs LongRunningTest in {
"(when two nodes) after cluster convergence updates the membership table then all MembershipChangeListeners should be triggered" taggedAs LongRunningTest in {
// make sure that the node-to-join is started before other join
runOn(first) {
cluster.self
}
testConductor.enter("first-started")
awaitClusterUp(first)
runOn(first, second) {
cluster.join(firstAddress)
@ -56,7 +52,7 @@ abstract class MembershipChangeListenerSpec extends MultiNodeSpec(MembershipChan
testConductor.enter("after-1")
}
"(when three systems) after cluster convergence updates the membership table then all MembershipChangeListeners should be triggered" taggedAs LongRunningTest in {
"(when three nodes) after cluster convergence updates the membership table then all MembershipChangeListeners should be triggered" taggedAs LongRunningTest in {
runOn(third) {
cluster.join(firstAddress)

View file

@ -35,15 +35,6 @@ abstract class MembershipChangeListenerUpSpec
"A registered MembershipChangeListener" must {
"be notified when new node is marked as UP by the leader" taggedAs LongRunningTest in {
runOn(first) {
startClusterNode()
}
runOn(second) {
testConductor.enter("registered-listener")
cluster.join(firstAddress)
}
runOn(first) {
val upLatch = TestLatch()
cluster.registerListener(new MembershipChangeListener {
@ -58,6 +49,11 @@ abstract class MembershipChangeListenerUpSpec
awaitUpConvergence(numberOfMembers = 2)
}
runOn(second) {
testConductor.enter("registered-listener")
cluster.join(firstAddress)
}
testConductor.enter("after")
}
}

View file

@ -30,15 +30,39 @@ object MultiNodeClusterSpec {
trait MultiNodeClusterSpec { self: MultiNodeSpec
/**
* Create a cluster node using 'Cluster(system)'.
* Get or create a cluster node using 'Cluster(system)' extension.
*/
def cluster: Cluster = Cluster(system)
/**
* Use this method instead of 'cluster.self'.
* Use this method instead of 'cluster.self'
* for the initial startup of the cluster node.
*/
def startClusterNode(): Unit = cluster.self
def startCluster(roles: RoleName*): Unit = {
awaitStartCluster(false, roles.toSeq)
}
def awaitClusterUp(roles: RoleName*): Unit = {
awaitStartCluster(true, roles.toSeq)
}
private def awaitStartCluster(upConvergence: Boolean = true, roles: Seq[RoleName]): Unit = {
runOn(roles.head) {
// make sure that the node-to-join is started before other join
startClusterNode()
}
testConductor.enter(roles.head.name + "-started")
if (roles.tail.contains(myself)) {
cluster.join(node(roles.head).address)
}
if (upConvergence && roles.contains(myself)) {
awaitUpConvergence(numberOfMembers = roles.length)
}
testConductor.enter(roles.map(_.name).mkString("-") + "-joined")
}
/**
* Assert that the member addresses match the expected addresses in the
* sort order used by the cluster.

View file

@ -16,12 +16,12 @@ object NodeJoinMultiJvmSpec extends MultiNodeConfig {
commonConfig(
debugConfig(on = false)
.withFallback(ConfigFactory.parseString("""
.withFallback(ConfigFactory.parseString("""
akka.cluster {
leader-actions-interval = 5 s # increase the leader action task interval
}
""")
.withFallback(MultiNodeClusterSpec.clusterConfig)))
.withFallback(MultiNodeClusterSpec.clusterConfig)))
}
class NodeJoinMultiJvmNode1 extends NodeJoinSpec

View file

@ -40,16 +40,7 @@ abstract class NodeLeavingAndExitingAndBeingRemovedSpec
"be moved to EXITING and then to REMOVED by the reaper" taggedAs LongRunningTest in {
runOn(first) {
startClusterNode()
}
testConductor.enter("first-started")
runOn(second, third) {
cluster.join(firstAddress)
}
awaitUpConvergence(numberOfMembers = 3)
testConductor.enter("rest-started")
awaitClusterUp(first, second, third)
runOn(first) {
cluster.leave(secondAddress)

View file

@ -46,16 +46,7 @@ abstract class NodeLeavingAndExitingSpec
"be moved to EXITING by the leader" taggedAs LongRunningTest in {
runOn(first) {
startClusterNode()
}
testConductor.enter("first-started")
runOn(second, third) {
cluster.join(firstAddress)
}
awaitUpConvergence(numberOfMembers = 3)
testConductor.enter("rest-started")
awaitClusterUp(first, second, third)
runOn(first) {
cluster.leave(secondAddress)

View file

@ -40,16 +40,7 @@ abstract class NodeLeavingSpec extends MultiNodeSpec(NodeLeavingMultiJvmSpec)
"be marked as LEAVING in the converged membership table" taggedAs LongRunningTest in {
runOn(first) {
startClusterNode()
}
testConductor.enter("first-started")
runOn(second, third) {
cluster.join(firstAddress)
}
awaitUpConvergence(numberOfMembers = 3)
testConductor.enter("rest-started")
awaitClusterUp(first, second, third)
runOn(first) {
cluster.leave(secondAddress)

View file

@ -35,16 +35,7 @@ abstract class NodeShutdownSpec extends MultiNodeSpec(NodeShutdownMultiJvmSpec)
"A cluster of 2 nodes" must {
"not be singleton cluster when joined" taggedAs LongRunningTest in {
// make sure that the node-to-join is started before other join
runOn(first) {
startClusterNode()
}
testConductor.enter("first-started")
runOn(second) {
cluster.join(node(first).address)
}
awaitUpConvergence(numberOfMembers = 2)
awaitClusterUp(first, second)
cluster.isSingletonCluster must be(false)
assertLeader(first, second)

View file

@ -28,21 +28,10 @@ abstract class NodeUpSpec
override def initialParticipants = 2
lazy val firstAddress = node(first).address
lazy val secondAddress = node(second).address
"A cluster node that is joining another cluster" must {
"be moved to UP by the leader after a convergence" taggedAs LongRunningTest in {
runOn(first) {
startClusterNode()
}
runOn(second) {
cluster.join(firstAddress)
}
awaitUpConvergence(numberOfMembers = 2)
awaitClusterUp(first, second)
testConductor.enter("after")
}