Make joining to the same node multiple times work, and reenable blackhole test. See #2930
This commit is contained in:
parent
f18575e251
commit
5827a27b94
5 changed files with 57 additions and 27 deletions
|
|
@ -239,6 +239,8 @@ private[cluster] final class ClusterCoreDaemon(publisher: ActorRef) extends Acto
|
||||||
|
|
||||||
var seedNodeProcess: Option[ActorRef] = None
|
var seedNodeProcess: Option[ActorRef] = None
|
||||||
|
|
||||||
|
var tryingToJoinWith: Option[Address] = None
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Looks up and returns the remote cluster command connection for the specific address.
|
* Looks up and returns the remote cluster command connection for the specific address.
|
||||||
*/
|
*/
|
||||||
|
|
@ -341,7 +343,6 @@ private[cluster] final class ClusterCoreDaemon(publisher: ActorRef) extends Acto
|
||||||
log.warning("Trying to join member with wrong ActorSystem name, but was ignored, expected [{}] but was [{}]",
|
log.warning("Trying to join member with wrong ActorSystem name, but was ignored, expected [{}] but was [{}]",
|
||||||
selfAddress.system, address.system)
|
selfAddress.system, address.system)
|
||||||
else if (!latestGossip.members.exists(_.address == address)) {
|
else if (!latestGossip.members.exists(_.address == address)) {
|
||||||
|
|
||||||
// to support manual join when joining to seed nodes is stuck (no seed nodes available)
|
// to support manual join when joining to seed nodes is stuck (no seed nodes available)
|
||||||
val snd = sender
|
val snd = sender
|
||||||
seedNodeProcess match {
|
seedNodeProcess match {
|
||||||
|
|
@ -355,15 +356,18 @@ private[cluster] final class ClusterCoreDaemon(publisher: ActorRef) extends Acto
|
||||||
case None ⇒ // no seedNodeProcess in progress
|
case None ⇒ // no seedNodeProcess in progress
|
||||||
}
|
}
|
||||||
|
|
||||||
// wipe our state since a node that joins a cluster must be empty
|
// only wipe the state if we're not in the process of joining this address
|
||||||
latestGossip = Gossip.empty
|
if (tryingToJoinWith.forall(_ != address)) {
|
||||||
// wipe the failure detector since we are starting fresh and shouldn't care about the past
|
tryingToJoinWith = Some(address)
|
||||||
failureDetector.reset()
|
// wipe our state since a node that joins a cluster must be empty
|
||||||
// wipe the publisher since we are starting fresh
|
latestGossip = Gossip.empty
|
||||||
publisher ! PublishStart
|
// wipe the failure detector since we are starting fresh and shouldn't care about the past
|
||||||
|
failureDetector.reset()
|
||||||
publish(latestGossip)
|
// wipe the publisher since we are starting fresh
|
||||||
|
publisher ! PublishStart
|
||||||
|
|
||||||
|
publish(latestGossip)
|
||||||
|
}
|
||||||
context.become(initialized)
|
context.become(initialized)
|
||||||
if (address == selfAddress)
|
if (address == selfAddress)
|
||||||
joining(address, cluster.selfRoles)
|
joining(address, cluster.selfRoles)
|
||||||
|
|
@ -517,6 +521,10 @@ private[cluster] final class ClusterCoreDaemon(publisher: ActorRef) extends Acto
|
||||||
} else if (localGossip.overview.isNonDownUnreachable(from)) {
|
} else if (localGossip.overview.isNonDownUnreachable(from)) {
|
||||||
log.debug("Ignoring received gossip from unreachable [{}] ", from)
|
log.debug("Ignoring received gossip from unreachable [{}] ", from)
|
||||||
} else {
|
} else {
|
||||||
|
// if we're in the remote gossip and not Removed, then we're not joining
|
||||||
|
if (tryingToJoinWith.nonEmpty && remoteGossip.member(selfAddress).status != Removed)
|
||||||
|
tryingToJoinWith = None
|
||||||
|
|
||||||
val comparison = remoteGossip.version tryCompareTo localGossip.version
|
val comparison = remoteGossip.version tryCompareTo localGossip.version
|
||||||
val conflict = comparison.isEmpty
|
val conflict = comparison.isEmpty
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -182,6 +182,27 @@ trait MultiNodeClusterSpec extends Suite with STMultiNodeSpec with WatchedByCoro
|
||||||
enterBarrier(roles.map(_.name).mkString("-") + "-joined")
|
enterBarrier(roles.map(_.name).mkString("-") + "-joined")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Join the specific node within the given period by sending repeated join
|
||||||
|
* requests at periodic intervals until we succeed.
|
||||||
|
*/
|
||||||
|
def joinWithin(joinNode: RoleName, max: Duration = remaining, interval: Duration = 1.second): Unit = {
|
||||||
|
def memberInState(member: Address, status: Seq[MemberStatus]): Boolean =
|
||||||
|
clusterView.members.exists { m ⇒ (m.address == member) && status.contains(m.status) }
|
||||||
|
|
||||||
|
cluster join joinNode
|
||||||
|
awaitCond({
|
||||||
|
clusterView.refreshCurrentState()
|
||||||
|
if (memberInState(joinNode, List(MemberStatus.up)) &&
|
||||||
|
memberInState(myself, List(MemberStatus.Joining, MemberStatus.Up)))
|
||||||
|
true
|
||||||
|
else {
|
||||||
|
cluster join joinNode
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}, max, interval)
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Assert that the member addresses match the expected addresses in the
|
* Assert that the member addresses match the expected addresses in the
|
||||||
* sort order used by the cluster.
|
* sort order used by the cluster.
|
||||||
|
|
|
||||||
|
|
@ -25,10 +25,14 @@ case class UnreachableNodeRejoinsClusterMultiNodeConfig(failureDetectorPuppet: B
|
||||||
|
|
||||||
commonConfig(ConfigFactory.parseString(
|
commonConfig(ConfigFactory.parseString(
|
||||||
"""
|
"""
|
||||||
|
# this setting is here to limit the number of retries and failures while the
|
||||||
|
# node is being blackholed
|
||||||
|
akka.remote.failure-detector.retry-gate-closed-for = 500 ms
|
||||||
|
|
||||||
akka.remote.log-remote-lifecycle-events = off
|
akka.remote.log-remote-lifecycle-events = off
|
||||||
akka.cluster.publish-stats-interval = 0s
|
akka.cluster.publish-stats-interval = 0s
|
||||||
akka.loglevel = INFO
|
akka.loglevel = INFO
|
||||||
""").withFallback(debugConfig(on = false).withFallback(MultiNodeClusterSpec.clusterConfig)))
|
""").withFallback(debugConfig(on = false).withFallback(MultiNodeClusterSpec.clusterConfig(failureDetectorPuppet))))
|
||||||
|
|
||||||
testTransport(on = true)
|
testTransport(on = true)
|
||||||
}
|
}
|
||||||
|
|
@ -74,8 +78,7 @@ abstract class UnreachableNodeRejoinsClusterSpec(multiNodeConfig: UnreachableNod
|
||||||
endBarrier
|
endBarrier
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME ignored due to ticket #2930 - timeout changing throttler mode
|
"mark a node as UNREACHABLE when we pull the network" taggedAs LongRunningTest in {
|
||||||
"mark a node as UNREACHABLE when we pull the network" taggedAs LongRunningTest ignore {
|
|
||||||
// let them send at least one heartbeat to each other after the gossip convergence
|
// let them send at least one heartbeat to each other after the gossip convergence
|
||||||
// because for new joining nodes we remove them from the failure detector when
|
// because for new joining nodes we remove them from the failure detector when
|
||||||
// receive gossip
|
// receive gossip
|
||||||
|
|
@ -125,8 +128,7 @@ abstract class UnreachableNodeRejoinsClusterSpec(multiNodeConfig: UnreachableNod
|
||||||
endBarrier
|
endBarrier
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME ignored due to ticket #2930 - timeout changing throttler mode
|
"mark the node as DOWN" taggedAs LongRunningTest in {
|
||||||
"mark the node as DOWN" taggedAs LongRunningTest ignore {
|
|
||||||
runOn(master) {
|
runOn(master) {
|
||||||
cluster down victim
|
cluster down victim
|
||||||
}
|
}
|
||||||
|
|
@ -135,13 +137,12 @@ abstract class UnreachableNodeRejoinsClusterSpec(multiNodeConfig: UnreachableNod
|
||||||
awaitMembersUp(roles.size - 1, Set(victim))
|
awaitMembersUp(roles.size - 1, Set(victim))
|
||||||
// eventually removed
|
// eventually removed
|
||||||
awaitCond(clusterView.unreachableMembers.isEmpty, 15 seconds)
|
awaitCond(clusterView.unreachableMembers.isEmpty, 15 seconds)
|
||||||
}
|
|
||||||
|
|
||||||
|
}
|
||||||
endBarrier
|
endBarrier
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME ignored due to ticket #2930 - timeout changing throttler mode
|
"allow node to REJOIN when the network is plugged back in" taggedAs LongRunningTest in {
|
||||||
"allow node to REJOIN when the network is plugged back in" taggedAs LongRunningTest ignore {
|
|
||||||
runOn(first) {
|
runOn(first) {
|
||||||
// put the network back in
|
// put the network back in
|
||||||
allBut(victim).foreach { roleName ⇒
|
allBut(victim).foreach { roleName ⇒
|
||||||
|
|
@ -152,7 +153,7 @@ abstract class UnreachableNodeRejoinsClusterSpec(multiNodeConfig: UnreachableNod
|
||||||
enterBarrier("plug_in_victim")
|
enterBarrier("plug_in_victim")
|
||||||
|
|
||||||
runOn(victim) {
|
runOn(victim) {
|
||||||
cluster join master
|
joinWithin(master, 10.seconds)
|
||||||
}
|
}
|
||||||
|
|
||||||
awaitMembersUp(roles.size)
|
awaitMembersUp(roles.size)
|
||||||
|
|
|
||||||
|
|
@ -147,10 +147,10 @@ object ThrottlerTransportAdapter {
|
||||||
* Management Command to force dissocation of an address.
|
* Management Command to force dissocation of an address.
|
||||||
*/
|
*/
|
||||||
@SerialVersionUID(1L)
|
@SerialVersionUID(1L)
|
||||||
case class ForceDissociate(address: Address)
|
case class ForceDisassociate(address: Address)
|
||||||
|
|
||||||
@SerialVersionUID(1L)
|
@SerialVersionUID(1L)
|
||||||
case object ForceDissociateAck {
|
case object ForceDisassociateAck {
|
||||||
/**
|
/**
|
||||||
* Java API: get the singleton instance
|
* Java API: get the singleton instance
|
||||||
*/
|
*/
|
||||||
|
|
@ -174,8 +174,8 @@ class ThrottlerTransportAdapter(_wrappedTransport: Transport, _system: ExtendedA
|
||||||
cmd match {
|
cmd match {
|
||||||
case s: SetThrottle ⇒
|
case s: SetThrottle ⇒
|
||||||
manager ? s map { case SetThrottleAck ⇒ true }
|
manager ? s map { case SetThrottleAck ⇒ true }
|
||||||
case f: ForceDissociate ⇒
|
case f: ForceDisassociate ⇒
|
||||||
manager ? f map { case ForceDissociateAck ⇒ true }
|
manager ? f map { case ForceDisassociateAck ⇒ true }
|
||||||
case _ ⇒ wrappedTransport.managementCommand(cmd)
|
case _ ⇒ wrappedTransport.managementCommand(cmd)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -231,13 +231,13 @@ private[transport] class ThrottlerManager(wrappedTransport: Transport) extends A
|
||||||
case _ ⇒ ok
|
case _ ⇒ ok
|
||||||
}
|
}
|
||||||
Future.sequence(allAcks).map(_ ⇒ SetThrottleAck) pipeTo sender
|
Future.sequence(allAcks).map(_ ⇒ SetThrottleAck) pipeTo sender
|
||||||
case ForceDissociate(address) ⇒
|
case ForceDisassociate(address) ⇒
|
||||||
val naked = nakedAddress(address)
|
val naked = nakedAddress(address)
|
||||||
handleTable.foreach {
|
handleTable.foreach {
|
||||||
case (`naked`, handle) ⇒ handle.disassociate()
|
case (`naked`, handle) ⇒ handle.disassociate()
|
||||||
case _ ⇒
|
case _ ⇒
|
||||||
}
|
}
|
||||||
sender ! ForceDissociateAck
|
sender ! ForceDisassociateAck
|
||||||
|
|
||||||
case Checkin(origin, handle) ⇒
|
case Checkin(origin, handle) ⇒
|
||||||
val naked: Address = nakedAddress(origin)
|
val naked: Address = nakedAddress(origin)
|
||||||
|
|
|
||||||
|
|
@ -74,10 +74,10 @@ class ThrottlerTransportAdapterSpec extends AkkaSpec(configA) with ImplicitSende
|
||||||
Await.result(transport.managementCommand(SetThrottle(rootBAddress, direction, mode)), 3.seconds)
|
Await.result(transport.managementCommand(SetThrottle(rootBAddress, direction, mode)), 3.seconds)
|
||||||
}
|
}
|
||||||
|
|
||||||
def dissociate(): Boolean = {
|
def disassociate(): Boolean = {
|
||||||
val rootBAddress = Address("akka", "systemB", "localhost", rootB.address.port.get)
|
val rootBAddress = Address("akka", "systemB", "localhost", rootB.address.port.get)
|
||||||
val transport = system.asInstanceOf[ExtendedActorSystem].provider.asInstanceOf[RemoteActorRefProvider].transport
|
val transport = system.asInstanceOf[ExtendedActorSystem].provider.asInstanceOf[RemoteActorRefProvider].transport
|
||||||
Await.result(transport.managementCommand(ForceDissociate(rootBAddress)), 3.seconds)
|
Await.result(transport.managementCommand(ForceDisassociate(rootBAddress)), 3.seconds)
|
||||||
}
|
}
|
||||||
|
|
||||||
"ThrottlerTransportAdapter" must {
|
"ThrottlerTransportAdapter" must {
|
||||||
|
|
@ -99,7 +99,7 @@ class ThrottlerTransportAdapterSpec extends AkkaSpec(configA) with ImplicitSende
|
||||||
|
|
||||||
here ! "Blackhole 2"
|
here ! "Blackhole 2"
|
||||||
expectNoMsg(1.seconds)
|
expectNoMsg(1.seconds)
|
||||||
dissociate() must be(true)
|
disassociate() must be(true)
|
||||||
expectNoMsg(1.seconds)
|
expectNoMsg(1.seconds)
|
||||||
|
|
||||||
throttle(Direction.Both, Unthrottled) must be(true)
|
throttle(Direction.Both, Unthrottled) must be(true)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue