Merging in master, huge work trying to get things to compile, tests not green at this stage

This commit is contained in:
Viktor Klang 2012-07-06 17:04:04 +02:00
commit ac5b5de90a
68 changed files with 3759 additions and 2144 deletions

View file

@ -36,6 +36,10 @@ abstract class JoinSeedNodeSpec
"A cluster with configured seed nodes" must {
"start the seed nodes sequentially" taggedAs LongRunningTest in {
// without looking up the addresses first there might be
// [akka://JoinSeedNodeSpec/user/TestConductorClient] cannot write GetAddress(RoleName(seed2)) while waiting for seed1
roles foreach address
runOn(seed1) {
startClusterNode()
}

View file

@ -36,12 +36,22 @@ object LargeClusterMultiJvmSpec extends MultiNodeConfig {
akka.cluster {
gossip-interval = 500 ms
auto-join = off
failure-detector.threshold = 4
auto-down = on
failure-detector.acceptable-heartbeat-pause = 10s
publish-state-interval = 0 s # always, when it happens
}
akka.loglevel = INFO
akka.actor.default-dispatcher.fork-join-executor.parallelism-max = 2
akka.actor.default-dispatcher.fork-join-executor {
# when using nodes-per-datacenter=10 we need some extra
# threads to keep up with netty connect blocking
parallelism-min = 13
parallelism-max = 13
}
akka.scheduler.tick-duration = 33 ms
akka.remote.netty.execution-pool-size = 0
akka.remote.netty.execution-pool-size = 4
#akka.remote.netty.reconnection-time-window = 1s
akka.remote.netty.backoff-timeout = 500ms
akka.remote.netty.connection-timeout = 500ms
# don't use testconductor transport in this test, especially not
# when using use-dispatcher-for-io
@ -124,8 +134,10 @@ abstract class LargeClusterSpec
val clusterNodes = ifNode(from)(joiningClusterNodes)(systems.map(Cluster(_)).toSet)
val startGossipCounts = Map.empty[Cluster, Long] ++
clusterNodes.map(c (c -> c.receivedGossipCount))
def gossipCount(c: Cluster): Long = c.receivedGossipCount - startGossipCounts(c)
clusterNodes.map(c (c -> c.latestStats.receivedGossipCount))
def gossipCount(c: Cluster): Long = {
c.latestStats.receivedGossipCount - startGossipCounts(c)
}
val startTime = System.nanoTime
def tookMillis: String = TimeUnit.NANOSECONDS.toMillis(System.nanoTime - startTime) + " ms"
@ -244,15 +256,16 @@ abstract class LargeClusterSpec
}
}
// FIXME sometimes this fails, FD marks nodes from other than second-datacenter as unavailable
"detect failure and auto-down crashed nodes in second-datacenter" taggedAs LongRunningTest ignore {
"detect failure and auto-down crashed nodes in second-datacenter" taggedAs LongRunningTest in {
val unreachableNodes = nodesPerDatacenter
val liveNodes = nodesPerDatacenter * 4
within(20.seconds + expectedMaxDuration(liveNodes)) {
within(30.seconds + (3.seconds * liveNodes)) {
val startGossipCounts = Map.empty[Cluster, Long] ++
systems.map(sys (Cluster(sys) -> Cluster(sys).receivedGossipCount))
def gossipCount(c: Cluster): Long = c.receivedGossipCount - startGossipCounts(c)
systems.map(sys (Cluster(sys) -> Cluster(sys).latestStats.receivedGossipCount))
def gossipCount(c: Cluster): Long = {
c.latestStats.receivedGossipCount - startGossipCounts(c)
}
val startTime = System.nanoTime
def tookMillis: String = TimeUnit.NANOSECONDS.toMillis(System.nanoTime - startTime) + " ms"
@ -278,10 +291,11 @@ abstract class LargeClusterSpec
runOn(firstDatacenter, thirdDatacenter, fourthDatacenter, fifthDatacenter) {
Await.ready(latch, remaining)
awaitCond(systems.forall(Cluster(_).convergence.isDefined))
val mergeCount = systems.map(sys Cluster(sys).latestStats.mergeCount).sum
val counts = systems.map(sys gossipCount(Cluster(sys)))
val formattedStats = "mean=%s min=%s max=%s".format(counts.sum / nodesPerDatacenter, counts.min, counts.max)
log.info("Convergence of [{}] nodes reached after failure, it took [{}], received [{}] gossip messages per node",
liveNodes, tookMillis, formattedStats)
log.info("Convergence of [{}] nodes reached after failure, it took [{}], received [{}] gossip messages per node, merged [{}] times",
liveNodes, tookMillis, formattedStats, mergeCount)
}
enterBarrier("after-6")

View file

@ -27,6 +27,7 @@ object MultiNodeClusterSpec {
leader-actions-interval = 200 ms
unreachable-nodes-reaper-interval = 200 ms
periodic-tasks-initial-delay = 300 ms
publish-state-interval = 0 s # always, when it happens
}
akka.test {
single-expect-default = 5 s

View file

@ -1,55 +0,0 @@
/**
* Copyright (C) 2009-2012 Typesafe Inc. <http://www.typesafe.com>
*/
package akka.cluster
import scala.collection.immutable.SortedSet
import com.typesafe.config.ConfigFactory
import akka.remote.testkit.MultiNodeConfig
import akka.remote.testkit.MultiNodeSpec
import akka.testkit._
object NodeLeavingMultiJvmSpec extends MultiNodeConfig {
val first = role("first")
val second = role("second")
val third = role("third")
commonConfig(
debugConfig(on = false)
.withFallback(ConfigFactory.parseString("akka.cluster.unreachable-nodes-reaper-frequency = 30 s"))
.withFallback(MultiNodeClusterSpec.clusterConfig))
}
class NodeLeavingMultiJvmNode1 extends NodeLeavingSpec with FailureDetectorPuppetStrategy
class NodeLeavingMultiJvmNode2 extends NodeLeavingSpec with FailureDetectorPuppetStrategy
class NodeLeavingMultiJvmNode3 extends NodeLeavingSpec with FailureDetectorPuppetStrategy
abstract class NodeLeavingSpec
extends MultiNodeSpec(NodeLeavingMultiJvmSpec)
with MultiNodeClusterSpec {
import NodeLeavingMultiJvmSpec._
"A node that is LEAVING a non-singleton cluster" must {
"be marked as LEAVING in the converged membership table" taggedAs LongRunningTest in {
awaitClusterUp(first, second, third)
runOn(first) {
cluster.leave(second)
}
enterBarrier("second-left")
runOn(first, third) {
awaitCond(cluster.latestGossip.members.exists(_.status == MemberStatus.Leaving))
val hasLeft = cluster.latestGossip.members.find(_.status == MemberStatus.Leaving)
hasLeft must be('defined)
hasLeft.get.address must be(address(second))
}
enterBarrier("finished")
}
}
}

View file

@ -38,7 +38,7 @@ abstract class NodeUpSpec
"be unaffected when joining again" taggedAs LongRunningTest in {
val unexpected = new AtomicReference[SortedSet[Member]]
val unexpected = new AtomicReference[SortedSet[Member]](SortedSet.empty)
cluster.registerListener(new MembershipChangeListener {
def notify(members: SortedSet[Member]) {
if (members.size != 2 || members.exists(_.status != MemberStatus.Up))
@ -55,7 +55,7 @@ abstract class NodeUpSpec
// let it run for a while to make sure that nothing bad happens
for (n 1 to 20) {
100.millis.dilated.sleep()
unexpected.get must be(null)
unexpected.get must be(SortedSet.empty)
cluster.latestGossip.members.forall(_.status == MemberStatus.Up) must be(true)
}

View file

@ -9,8 +9,10 @@ import akka.remote.testkit.MultiNodeConfig
import akka.remote.testkit.MultiNodeSpec
import akka.testkit._
import akka.actor.Address
import akka.pattern.ask
import akka.remote.testconductor.RoleName
import MemberStatus._
import InternalClusterAction._
object TransitionMultiJvmSpec extends MultiNodeConfig {
val first = role("first")
@ -28,7 +30,8 @@ class TransitionMultiJvmNode3 extends TransitionSpec with FailureDetectorPuppetS
abstract class TransitionSpec
extends MultiNodeSpec(TransitionMultiJvmSpec)
with MultiNodeClusterSpec {
with MultiNodeClusterSpec
with ImplicitSender {
import TransitionMultiJvmSpec._
@ -67,6 +70,22 @@ abstract class TransitionSpec
memberStatus(address) == status
}
def leaderActions(): Unit = {
cluster.clusterCore ! LeaderActionsTick
awaitPing()
}
def reapUnreachable(): Unit = {
cluster.clusterCore ! ReapUnreachableTick
awaitPing()
}
def awaitPing(): Unit = {
val ping = Ping()
cluster.clusterCore ! ping
expectMsgPF() { case pong @ Pong(`ping`, _) pong }
}
// DSL sugar for `role1 gossipTo role2`
implicit def roleExtras(role: RoleName): RoleWrapper = new RoleWrapper(role)
var gossipBarrierCounter = 0
@ -83,7 +102,8 @@ abstract class TransitionSpec
}
runOn(fromRole) {
enterBarrier("before-gossip-" + gossipBarrierCounter)
cluster.gossipTo(toRole) // send gossip
// send gossip
cluster.clusterCore ! InternalClusterAction.SendGossipTo(toRole)
// gossip chat will synchronize the views
awaitCond((Set(fromRole, toRole) -- seenLatestGossip).isEmpty)
enterBarrier("after-gossip-" + gossipBarrierCounter)
@ -104,7 +124,7 @@ abstract class TransitionSpec
cluster.isSingletonCluster must be(true)
cluster.status must be(Joining)
cluster.convergence.isDefined must be(true)
cluster.leaderActions()
leaderActions()
cluster.status must be(Up)
}
@ -127,7 +147,7 @@ abstract class TransitionSpec
enterBarrier("convergence-joining-2")
runOn(leader(first, second)) {
cluster.leaderActions()
leaderActions()
memberStatus(first) must be(Up)
memberStatus(second) must be(Up)
}
@ -182,7 +202,7 @@ abstract class TransitionSpec
enterBarrier("convergence-joining-3")
runOn(leader(first, second, third)) {
cluster.leaderActions()
leaderActions()
memberStatus(first) must be(Up)
memberStatus(second) must be(Up)
memberStatus(third) must be(Up)
@ -200,7 +220,8 @@ abstract class TransitionSpec
// first non-leader gossipTo the other non-leader
nonLeader(first, second, third).head gossipTo nonLeader(first, second, third).tail.head
runOn(nonLeader(first, second, third).head) {
cluster.gossipTo(nonLeader(first, second, third).tail.head)
// send gossip
cluster.clusterCore ! InternalClusterAction.SendGossipTo(nonLeader(first, second, third).tail.head)
}
runOn(nonLeader(first, second, third).tail.head) {
memberStatus(third) must be(Up)
@ -224,7 +245,7 @@ abstract class TransitionSpec
"perform correct transitions when second becomes unavailble" taggedAs LongRunningTest in {
runOn(third) {
markNodeAsUnavailable(second)
cluster.reapUnreachableMembers()
reapUnreachable()
cluster.latestGossip.overview.unreachable must contain(Member(second, Up))
seenLatestGossip must be(Set(third))
}

View file

@ -9,7 +9,7 @@ import akka.remote.testkit.MultiNodeSpec
import akka.testkit._
import com.typesafe.config.ConfigFactory
import akka.actor.Address
import akka.remote.testconductor.{RoleName, Direction}
import akka.remote.testconductor.{ RoleName, Direction }
import akka.util.duration._
object UnreachableNodeRejoinsClusterMultiJvmSpec extends MultiNodeConfig {
@ -26,7 +26,6 @@ class UnreachableNodeRejoinsClusterWithFailureDetectorPuppetMultiJvmNode2 extend
class UnreachableNodeRejoinsClusterWithFailureDetectorPuppetMultiJvmNode3 extends UnreachableNodeRejoinsClusterSpec with FailureDetectorPuppetStrategy
class UnreachableNodeRejoinsClusterWithFailureDetectorPuppetMultiJvmNode4 extends UnreachableNodeRejoinsClusterSpec with FailureDetectorPuppetStrategy
class UnreachableNodeRejoinsClusterWithAccrualFailureDetectorMultiJvmNode1 extends UnreachableNodeRejoinsClusterSpec with AccrualFailureDetectorStrategy
class UnreachableNodeRejoinsClusterWithAccrualFailureDetectorMultiJvmNode2 extends UnreachableNodeRejoinsClusterSpec with AccrualFailureDetectorStrategy
class UnreachableNodeRejoinsClusterWithAccrualFailureDetectorMultiJvmNode3 extends UnreachableNodeRejoinsClusterSpec with AccrualFailureDetectorStrategy
@ -41,7 +40,6 @@ abstract class UnreachableNodeRejoinsClusterSpec
roles.filterNot(_ == role)
}
lazy val sortedRoles = roles.sorted
lazy val master = sortedRoles(0)
lazy val victim = sortedRoles(1)
@ -55,14 +53,19 @@ abstract class UnreachableNodeRejoinsClusterSpec
"A cluster of " + roles.size + " members" must {
"reach initial convergence" taggedAs LongRunningTest in {
awaitClusterUp(roles:_*)
awaitClusterUp(roles: _*)
endBarrier
}
"mark a node as UNREACHABLE when we pull the network" taggedAs LongRunningTest in {
// let them send at least one heartbeat to each other after the gossip convergence
// because for new joining nodes we remove them from the failure detector when
// receive gossip
2.seconds.dilated.sleep
runOn(first) {
// pull network for victim node from all nodes
allBut(victim).foreach { roleName =>
allBut(victim).foreach { roleName
testConductor.blackhole(victim, roleName, Direction.Both).await
}
}
@ -74,24 +77,28 @@ abstract class UnreachableNodeRejoinsClusterSpec
allButVictim.foreach(markNodeAsUnavailable(_))
within(30 seconds) {
// victim becomes all alone
awaitCond({ val gossip = cluster.latestGossip
awaitCond({
val gossip = cluster.latestGossip
gossip.overview.unreachable.size == (roles.size - 1) &&
gossip.members.size == 1 &&
gossip.members.forall(_.status == MemberStatus.Up) })
gossip.members.forall(_.status == MemberStatus.Up)
})
cluster.latestGossip.overview.unreachable.map(_.address) must be((allButVictim map address).toSet)
cluster.convergence.isDefined must be(false)
}
}
runOn(allButVictim:_*) {
runOn(allButVictim: _*) {
markNodeAsUnavailable(victim)
within(30 seconds) {
// victim becomes unreachable
awaitCond({ val gossip = cluster.latestGossip
awaitCond({
val gossip = cluster.latestGossip
gossip.overview.unreachable.size == 1 &&
gossip.members.size == (roles.size - 1) &&
gossip.members.forall(_.status == MemberStatus.Up) })
awaitSeenSameState(allButVictim map address:_*)
gossip.members.forall(_.status == MemberStatus.Up)
})
awaitSeenSameState(allButVictim map address: _*)
// still one unreachable
cluster.latestGossip.overview.unreachable.size must be(1)
cluster.latestGossip.overview.unreachable.head.address must be(node(victim).address)
@ -108,7 +115,7 @@ abstract class UnreachableNodeRejoinsClusterSpec
cluster down victim
}
runOn(allBut(victim):_*) {
runOn(allBut(victim): _*) {
awaitUpConvergence(roles.size - 1, Seq(victim))
}
@ -118,7 +125,7 @@ abstract class UnreachableNodeRejoinsClusterSpec
"allow node to REJOIN when the network is plugged back in" taggedAs LongRunningTest in {
runOn(first) {
// put the network back in
allBut(victim).foreach { roleName =>
allBut(victim).foreach { roleName
testConductor.passThrough(victim, roleName, Direction.Both).await
}
}