Merging in master, huge work trying to get things to compile, tests not green at this stage
This commit is contained in:
commit
ac5b5de90a
68 changed files with 3759 additions and 2144 deletions
|
|
@ -36,6 +36,10 @@ abstract class JoinSeedNodeSpec
|
|||
|
||||
"A cluster with configured seed nodes" must {
|
||||
"start the seed nodes sequentially" taggedAs LongRunningTest in {
|
||||
// without looking up the addresses first there might be
|
||||
// [akka://JoinSeedNodeSpec/user/TestConductorClient] cannot write GetAddress(RoleName(seed2)) while waiting for seed1
|
||||
roles foreach address
|
||||
|
||||
runOn(seed1) {
|
||||
startClusterNode()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -36,12 +36,22 @@ object LargeClusterMultiJvmSpec extends MultiNodeConfig {
|
|||
akka.cluster {
|
||||
gossip-interval = 500 ms
|
||||
auto-join = off
|
||||
failure-detector.threshold = 4
|
||||
auto-down = on
|
||||
failure-detector.acceptable-heartbeat-pause = 10s
|
||||
publish-state-interval = 0 s # always, when it happens
|
||||
}
|
||||
akka.loglevel = INFO
|
||||
akka.actor.default-dispatcher.fork-join-executor.parallelism-max = 2
|
||||
akka.actor.default-dispatcher.fork-join-executor {
|
||||
# when using nodes-per-datacenter=10 we need some extra
|
||||
# threads to keep up with netty connect blocking
|
||||
parallelism-min = 13
|
||||
parallelism-max = 13
|
||||
}
|
||||
akka.scheduler.tick-duration = 33 ms
|
||||
akka.remote.netty.execution-pool-size = 0
|
||||
akka.remote.netty.execution-pool-size = 4
|
||||
#akka.remote.netty.reconnection-time-window = 1s
|
||||
akka.remote.netty.backoff-timeout = 500ms
|
||||
akka.remote.netty.connection-timeout = 500ms
|
||||
|
||||
# don't use testconductor transport in this test, especially not
|
||||
# when using use-dispatcher-for-io
|
||||
|
|
@ -124,8 +134,10 @@ abstract class LargeClusterSpec
|
|||
|
||||
val clusterNodes = ifNode(from)(joiningClusterNodes)(systems.map(Cluster(_)).toSet)
|
||||
val startGossipCounts = Map.empty[Cluster, Long] ++
|
||||
clusterNodes.map(c ⇒ (c -> c.receivedGossipCount))
|
||||
def gossipCount(c: Cluster): Long = c.receivedGossipCount - startGossipCounts(c)
|
||||
clusterNodes.map(c ⇒ (c -> c.latestStats.receivedGossipCount))
|
||||
def gossipCount(c: Cluster): Long = {
|
||||
c.latestStats.receivedGossipCount - startGossipCounts(c)
|
||||
}
|
||||
val startTime = System.nanoTime
|
||||
def tookMillis: String = TimeUnit.NANOSECONDS.toMillis(System.nanoTime - startTime) + " ms"
|
||||
|
||||
|
|
@ -244,15 +256,16 @@ abstract class LargeClusterSpec
|
|||
}
|
||||
}
|
||||
|
||||
// FIXME sometimes this fails, FD marks nodes from other than second-datacenter as unavailable
|
||||
"detect failure and auto-down crashed nodes in second-datacenter" taggedAs LongRunningTest ignore {
|
||||
"detect failure and auto-down crashed nodes in second-datacenter" taggedAs LongRunningTest in {
|
||||
val unreachableNodes = nodesPerDatacenter
|
||||
val liveNodes = nodesPerDatacenter * 4
|
||||
|
||||
within(20.seconds + expectedMaxDuration(liveNodes)) {
|
||||
within(30.seconds + (3.seconds * liveNodes)) {
|
||||
val startGossipCounts = Map.empty[Cluster, Long] ++
|
||||
systems.map(sys ⇒ (Cluster(sys) -> Cluster(sys).receivedGossipCount))
|
||||
def gossipCount(c: Cluster): Long = c.receivedGossipCount - startGossipCounts(c)
|
||||
systems.map(sys ⇒ (Cluster(sys) -> Cluster(sys).latestStats.receivedGossipCount))
|
||||
def gossipCount(c: Cluster): Long = {
|
||||
c.latestStats.receivedGossipCount - startGossipCounts(c)
|
||||
}
|
||||
val startTime = System.nanoTime
|
||||
def tookMillis: String = TimeUnit.NANOSECONDS.toMillis(System.nanoTime - startTime) + " ms"
|
||||
|
||||
|
|
@ -278,10 +291,11 @@ abstract class LargeClusterSpec
|
|||
runOn(firstDatacenter, thirdDatacenter, fourthDatacenter, fifthDatacenter) {
|
||||
Await.ready(latch, remaining)
|
||||
awaitCond(systems.forall(Cluster(_).convergence.isDefined))
|
||||
val mergeCount = systems.map(sys ⇒ Cluster(sys).latestStats.mergeCount).sum
|
||||
val counts = systems.map(sys ⇒ gossipCount(Cluster(sys)))
|
||||
val formattedStats = "mean=%s min=%s max=%s".format(counts.sum / nodesPerDatacenter, counts.min, counts.max)
|
||||
log.info("Convergence of [{}] nodes reached after failure, it took [{}], received [{}] gossip messages per node",
|
||||
liveNodes, tookMillis, formattedStats)
|
||||
log.info("Convergence of [{}] nodes reached after failure, it took [{}], received [{}] gossip messages per node, merged [{}] times",
|
||||
liveNodes, tookMillis, formattedStats, mergeCount)
|
||||
}
|
||||
|
||||
enterBarrier("after-6")
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ object MultiNodeClusterSpec {
|
|||
leader-actions-interval = 200 ms
|
||||
unreachable-nodes-reaper-interval = 200 ms
|
||||
periodic-tasks-initial-delay = 300 ms
|
||||
publish-state-interval = 0 s # always, when it happens
|
||||
}
|
||||
akka.test {
|
||||
single-expect-default = 5 s
|
||||
|
|
|
|||
|
|
@ -1,55 +0,0 @@
|
|||
/**
|
||||
* Copyright (C) 2009-2012 Typesafe Inc. <http://www.typesafe.com>
|
||||
*/
|
||||
package akka.cluster
|
||||
|
||||
import scala.collection.immutable.SortedSet
|
||||
import com.typesafe.config.ConfigFactory
|
||||
import akka.remote.testkit.MultiNodeConfig
|
||||
import akka.remote.testkit.MultiNodeSpec
|
||||
import akka.testkit._
|
||||
|
||||
object NodeLeavingMultiJvmSpec extends MultiNodeConfig {
|
||||
val first = role("first")
|
||||
val second = role("second")
|
||||
val third = role("third")
|
||||
|
||||
commonConfig(
|
||||
debugConfig(on = false)
|
||||
.withFallback(ConfigFactory.parseString("akka.cluster.unreachable-nodes-reaper-frequency = 30 s"))
|
||||
.withFallback(MultiNodeClusterSpec.clusterConfig))
|
||||
}
|
||||
|
||||
class NodeLeavingMultiJvmNode1 extends NodeLeavingSpec with FailureDetectorPuppetStrategy
|
||||
class NodeLeavingMultiJvmNode2 extends NodeLeavingSpec with FailureDetectorPuppetStrategy
|
||||
class NodeLeavingMultiJvmNode3 extends NodeLeavingSpec with FailureDetectorPuppetStrategy
|
||||
|
||||
abstract class NodeLeavingSpec
|
||||
extends MultiNodeSpec(NodeLeavingMultiJvmSpec)
|
||||
with MultiNodeClusterSpec {
|
||||
|
||||
import NodeLeavingMultiJvmSpec._
|
||||
|
||||
"A node that is LEAVING a non-singleton cluster" must {
|
||||
|
||||
"be marked as LEAVING in the converged membership table" taggedAs LongRunningTest in {
|
||||
|
||||
awaitClusterUp(first, second, third)
|
||||
|
||||
runOn(first) {
|
||||
cluster.leave(second)
|
||||
}
|
||||
enterBarrier("second-left")
|
||||
|
||||
runOn(first, third) {
|
||||
awaitCond(cluster.latestGossip.members.exists(_.status == MemberStatus.Leaving))
|
||||
|
||||
val hasLeft = cluster.latestGossip.members.find(_.status == MemberStatus.Leaving)
|
||||
hasLeft must be('defined)
|
||||
hasLeft.get.address must be(address(second))
|
||||
}
|
||||
|
||||
enterBarrier("finished")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -38,7 +38,7 @@ abstract class NodeUpSpec
|
|||
|
||||
"be unaffected when joining again" taggedAs LongRunningTest in {
|
||||
|
||||
val unexpected = new AtomicReference[SortedSet[Member]]
|
||||
val unexpected = new AtomicReference[SortedSet[Member]](SortedSet.empty)
|
||||
cluster.registerListener(new MembershipChangeListener {
|
||||
def notify(members: SortedSet[Member]) {
|
||||
if (members.size != 2 || members.exists(_.status != MemberStatus.Up))
|
||||
|
|
@ -55,7 +55,7 @@ abstract class NodeUpSpec
|
|||
// let it run for a while to make sure that nothing bad happens
|
||||
for (n ← 1 to 20) {
|
||||
100.millis.dilated.sleep()
|
||||
unexpected.get must be(null)
|
||||
unexpected.get must be(SortedSet.empty)
|
||||
cluster.latestGossip.members.forall(_.status == MemberStatus.Up) must be(true)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -9,8 +9,10 @@ import akka.remote.testkit.MultiNodeConfig
|
|||
import akka.remote.testkit.MultiNodeSpec
|
||||
import akka.testkit._
|
||||
import akka.actor.Address
|
||||
import akka.pattern.ask
|
||||
import akka.remote.testconductor.RoleName
|
||||
import MemberStatus._
|
||||
import InternalClusterAction._
|
||||
|
||||
object TransitionMultiJvmSpec extends MultiNodeConfig {
|
||||
val first = role("first")
|
||||
|
|
@ -28,7 +30,8 @@ class TransitionMultiJvmNode3 extends TransitionSpec with FailureDetectorPuppetS
|
|||
|
||||
abstract class TransitionSpec
|
||||
extends MultiNodeSpec(TransitionMultiJvmSpec)
|
||||
with MultiNodeClusterSpec {
|
||||
with MultiNodeClusterSpec
|
||||
with ImplicitSender {
|
||||
|
||||
import TransitionMultiJvmSpec._
|
||||
|
||||
|
|
@ -67,6 +70,22 @@ abstract class TransitionSpec
|
|||
memberStatus(address) == status
|
||||
}
|
||||
|
||||
def leaderActions(): Unit = {
|
||||
cluster.clusterCore ! LeaderActionsTick
|
||||
awaitPing()
|
||||
}
|
||||
|
||||
def reapUnreachable(): Unit = {
|
||||
cluster.clusterCore ! ReapUnreachableTick
|
||||
awaitPing()
|
||||
}
|
||||
|
||||
def awaitPing(): Unit = {
|
||||
val ping = Ping()
|
||||
cluster.clusterCore ! ping
|
||||
expectMsgPF() { case pong @ Pong(`ping`, _) ⇒ pong }
|
||||
}
|
||||
|
||||
// DSL sugar for `role1 gossipTo role2`
|
||||
implicit def roleExtras(role: RoleName): RoleWrapper = new RoleWrapper(role)
|
||||
var gossipBarrierCounter = 0
|
||||
|
|
@ -83,7 +102,8 @@ abstract class TransitionSpec
|
|||
}
|
||||
runOn(fromRole) {
|
||||
enterBarrier("before-gossip-" + gossipBarrierCounter)
|
||||
cluster.gossipTo(toRole) // send gossip
|
||||
// send gossip
|
||||
cluster.clusterCore ! InternalClusterAction.SendGossipTo(toRole)
|
||||
// gossip chat will synchronize the views
|
||||
awaitCond((Set(fromRole, toRole) -- seenLatestGossip).isEmpty)
|
||||
enterBarrier("after-gossip-" + gossipBarrierCounter)
|
||||
|
|
@ -104,7 +124,7 @@ abstract class TransitionSpec
|
|||
cluster.isSingletonCluster must be(true)
|
||||
cluster.status must be(Joining)
|
||||
cluster.convergence.isDefined must be(true)
|
||||
cluster.leaderActions()
|
||||
leaderActions()
|
||||
cluster.status must be(Up)
|
||||
}
|
||||
|
||||
|
|
@ -127,7 +147,7 @@ abstract class TransitionSpec
|
|||
enterBarrier("convergence-joining-2")
|
||||
|
||||
runOn(leader(first, second)) {
|
||||
cluster.leaderActions()
|
||||
leaderActions()
|
||||
memberStatus(first) must be(Up)
|
||||
memberStatus(second) must be(Up)
|
||||
}
|
||||
|
|
@ -182,7 +202,7 @@ abstract class TransitionSpec
|
|||
enterBarrier("convergence-joining-3")
|
||||
|
||||
runOn(leader(first, second, third)) {
|
||||
cluster.leaderActions()
|
||||
leaderActions()
|
||||
memberStatus(first) must be(Up)
|
||||
memberStatus(second) must be(Up)
|
||||
memberStatus(third) must be(Up)
|
||||
|
|
@ -200,7 +220,8 @@ abstract class TransitionSpec
|
|||
// first non-leader gossipTo the other non-leader
|
||||
nonLeader(first, second, third).head gossipTo nonLeader(first, second, third).tail.head
|
||||
runOn(nonLeader(first, second, third).head) {
|
||||
cluster.gossipTo(nonLeader(first, second, third).tail.head)
|
||||
// send gossip
|
||||
cluster.clusterCore ! InternalClusterAction.SendGossipTo(nonLeader(first, second, third).tail.head)
|
||||
}
|
||||
runOn(nonLeader(first, second, third).tail.head) {
|
||||
memberStatus(third) must be(Up)
|
||||
|
|
@ -224,7 +245,7 @@ abstract class TransitionSpec
|
|||
"perform correct transitions when second becomes unavailble" taggedAs LongRunningTest in {
|
||||
runOn(third) {
|
||||
markNodeAsUnavailable(second)
|
||||
cluster.reapUnreachableMembers()
|
||||
reapUnreachable()
|
||||
cluster.latestGossip.overview.unreachable must contain(Member(second, Up))
|
||||
seenLatestGossip must be(Set(third))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ import akka.remote.testkit.MultiNodeSpec
|
|||
import akka.testkit._
|
||||
import com.typesafe.config.ConfigFactory
|
||||
import akka.actor.Address
|
||||
import akka.remote.testconductor.{RoleName, Direction}
|
||||
import akka.remote.testconductor.{ RoleName, Direction }
|
||||
import akka.util.duration._
|
||||
|
||||
object UnreachableNodeRejoinsClusterMultiJvmSpec extends MultiNodeConfig {
|
||||
|
|
@ -26,7 +26,6 @@ class UnreachableNodeRejoinsClusterWithFailureDetectorPuppetMultiJvmNode2 extend
|
|||
class UnreachableNodeRejoinsClusterWithFailureDetectorPuppetMultiJvmNode3 extends UnreachableNodeRejoinsClusterSpec with FailureDetectorPuppetStrategy
|
||||
class UnreachableNodeRejoinsClusterWithFailureDetectorPuppetMultiJvmNode4 extends UnreachableNodeRejoinsClusterSpec with FailureDetectorPuppetStrategy
|
||||
|
||||
|
||||
class UnreachableNodeRejoinsClusterWithAccrualFailureDetectorMultiJvmNode1 extends UnreachableNodeRejoinsClusterSpec with AccrualFailureDetectorStrategy
|
||||
class UnreachableNodeRejoinsClusterWithAccrualFailureDetectorMultiJvmNode2 extends UnreachableNodeRejoinsClusterSpec with AccrualFailureDetectorStrategy
|
||||
class UnreachableNodeRejoinsClusterWithAccrualFailureDetectorMultiJvmNode3 extends UnreachableNodeRejoinsClusterSpec with AccrualFailureDetectorStrategy
|
||||
|
|
@ -41,7 +40,6 @@ abstract class UnreachableNodeRejoinsClusterSpec
|
|||
roles.filterNot(_ == role)
|
||||
}
|
||||
|
||||
|
||||
lazy val sortedRoles = roles.sorted
|
||||
lazy val master = sortedRoles(0)
|
||||
lazy val victim = sortedRoles(1)
|
||||
|
|
@ -55,14 +53,19 @@ abstract class UnreachableNodeRejoinsClusterSpec
|
|||
"A cluster of " + roles.size + " members" must {
|
||||
|
||||
"reach initial convergence" taggedAs LongRunningTest in {
|
||||
awaitClusterUp(roles:_*)
|
||||
awaitClusterUp(roles: _*)
|
||||
endBarrier
|
||||
}
|
||||
|
||||
"mark a node as UNREACHABLE when we pull the network" taggedAs LongRunningTest in {
|
||||
// let them send at least one heartbeat to each other after the gossip convergence
|
||||
// because for new joining nodes we remove them from the failure detector when
|
||||
// receive gossip
|
||||
2.seconds.dilated.sleep
|
||||
|
||||
runOn(first) {
|
||||
// pull network for victim node from all nodes
|
||||
allBut(victim).foreach { roleName =>
|
||||
allBut(victim).foreach { roleName ⇒
|
||||
testConductor.blackhole(victim, roleName, Direction.Both).await
|
||||
}
|
||||
}
|
||||
|
|
@ -74,24 +77,28 @@ abstract class UnreachableNodeRejoinsClusterSpec
|
|||
allButVictim.foreach(markNodeAsUnavailable(_))
|
||||
within(30 seconds) {
|
||||
// victim becomes all alone
|
||||
awaitCond({ val gossip = cluster.latestGossip
|
||||
awaitCond({
|
||||
val gossip = cluster.latestGossip
|
||||
gossip.overview.unreachable.size == (roles.size - 1) &&
|
||||
gossip.members.size == 1 &&
|
||||
gossip.members.forall(_.status == MemberStatus.Up) })
|
||||
gossip.members.forall(_.status == MemberStatus.Up)
|
||||
})
|
||||
cluster.latestGossip.overview.unreachable.map(_.address) must be((allButVictim map address).toSet)
|
||||
cluster.convergence.isDefined must be(false)
|
||||
}
|
||||
}
|
||||
|
||||
runOn(allButVictim:_*) {
|
||||
runOn(allButVictim: _*) {
|
||||
markNodeAsUnavailable(victim)
|
||||
within(30 seconds) {
|
||||
// victim becomes unreachable
|
||||
awaitCond({ val gossip = cluster.latestGossip
|
||||
awaitCond({
|
||||
val gossip = cluster.latestGossip
|
||||
gossip.overview.unreachable.size == 1 &&
|
||||
gossip.members.size == (roles.size - 1) &&
|
||||
gossip.members.forall(_.status == MemberStatus.Up) })
|
||||
awaitSeenSameState(allButVictim map address:_*)
|
||||
gossip.members.forall(_.status == MemberStatus.Up)
|
||||
})
|
||||
awaitSeenSameState(allButVictim map address: _*)
|
||||
// still one unreachable
|
||||
cluster.latestGossip.overview.unreachable.size must be(1)
|
||||
cluster.latestGossip.overview.unreachable.head.address must be(node(victim).address)
|
||||
|
|
@ -108,7 +115,7 @@ abstract class UnreachableNodeRejoinsClusterSpec
|
|||
cluster down victim
|
||||
}
|
||||
|
||||
runOn(allBut(victim):_*) {
|
||||
runOn(allBut(victim): _*) {
|
||||
awaitUpConvergence(roles.size - 1, Seq(victim))
|
||||
}
|
||||
|
||||
|
|
@ -118,7 +125,7 @@ abstract class UnreachableNodeRejoinsClusterSpec
|
|||
"allow node to REJOIN when the network is plugged back in" taggedAs LongRunningTest in {
|
||||
runOn(first) {
|
||||
// put the network back in
|
||||
allBut(victim).foreach { roleName =>
|
||||
allBut(victim).foreach { roleName ⇒
|
||||
testConductor.passThrough(victim, roleName, Direction.Both).await
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue