!clu #2307 Allow transition from unreachable to reachable

* Replace unreachable Set with Reachability table
* Unreachable members stay in member Set
* Downing a live member was moved it to the unreachable Set,
  and then removed from there by the leader. That will not
  work when flipping back to reachable, so a Down member must
  be detected as unreachable before beeing removed. Similar
  to Exiting. Member shuts down itself if it sees itself as
  Down.
* Flip back to reachable when failure detector monitors it as
  available again
* ReachableMember event
* Can't ignore gossip from aggregated unreachable (see SurviveNetworkInstabilitySpec)
* Make use of ReachableMember event in cluster router
* End heartbeat when acknowledged, EndHeartbeatAck
* Remove nr-of-end-heartbeats from conf
* Full reachability info in JMX cluster status
* Don't use interval after unreachable for AccrualFailureDetector history
* Add QuarantinedEvent to remoting, used for Reachability.Terminated
* Prune reachability table when all reachable
* Update documentation
* Performance testing and optimizations
This commit is contained in:
Patrik Nordwall 2013-08-27 15:14:53 +02:00
parent beba5d9f76
commit dc9fe4f19c
43 changed files with 2425 additions and 1169 deletions

View file

@ -15,6 +15,7 @@ import akka.cluster.MemberStatus._
import akka.cluster.ClusterEvent._
import akka.dispatch.{ UnboundedMessageQueueSemantics, RequiresMessageQueue }
import scala.collection.breakOut
import akka.remote.QuarantinedEvent
/**
* Base trait for all cluster messages. All ClusterMessage's are serializable.
@ -264,13 +265,16 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
Some(scheduler.schedule(PeriodicTasksInitialDelay.max(d), d, self, PublishStatsTick))
}
override def preStart(): Unit =
override def preStart(): Unit = {
context.system.eventStream.subscribe(self, classOf[QuarantinedEvent])
if (SeedNodes.isEmpty)
logInfo("No seed-nodes configured, manual cluster join required")
else
self ! JoinSeedNodes(SeedNodes)
}
override def postStop(): Unit = {
context.system.eventStream.unsubscribe(self)
gossipTask.cancel()
failureDetectorReaperTask.cancel()
leaderActionsTask.cancel()
@ -323,6 +327,7 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
case ClusterUserAction.Leave(address) leaving(address)
case SendGossipTo(address) sendGossipTo(address)
case msg: SubscriptionMessage publisher forward msg
case QuarantinedEvent(address, uid) quarantined(UniqueAddress(address, uid))
case ClusterUserAction.JoinTo(address)
logInfo("Trying to join [{}] when already part of a cluster, ignoring", address)
case JoinSeedNodes(seedNodes)
@ -419,12 +424,11 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
selfAddress.system, node.address.system)
else {
val localMembers = latestGossip.members
val localUnreachable = latestGossip.overview.unreachable
// check by address without uid to make sure that node with same host:port is not allowed
// to join until previous node with that host:port has been removed from the cluster
val alreadyMember = localMembers.exists(_.address == node.address)
val isUnreachable = localUnreachable.exists(_.address == node.address)
val isUnreachable = !latestGossip.overview.reachability.isReachable(node)
if (alreadyMember)
logInfo("Existing member [{}] is trying to join, ignoring", node)
@ -488,14 +492,13 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
}
/**
* This method is called when a member sees itself as Exiting.
* This method is called when a member sees itself as Exiting or Down.
*/
def shutdown(): Unit = cluster.shutdown()
/**
* State transition to DOW.
* The node to DOWN is removed from the `members` set and put in the `unreachable` set (if not already there)
* and its status is set to DOWN. The node is also removed from the `seen` table.
* State transition to DOWN.
* Its status is set to DOWN. The node is also removed from the `seen` table.
*
* The node will eventually be removed by the leader, and only after removal a new node with same address can
* join the cluster through the normal joining procedure.
@ -505,46 +508,50 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
val localMembers = localGossip.members
val localOverview = localGossip.overview
val localSeen = localOverview.seen
val localUnreachableMembers = localOverview.unreachable
val localReachability = localOverview.reachability
// 1. check if the node to DOWN is in the `members` set
val downedMember: Option[Member] =
localMembers.collectFirst { case m if m.address == address m.copy(status = Down) }
val newMembers = downedMember match {
// check if the node to DOWN is in the `members` set
localMembers.collectFirst { case m if m.address == address m.copy(status = Down) } match {
case Some(m)
logInfo("Marking node [{}] as [{}]", m.address, Down)
localMembers - m
case None localMembers
if (localReachability.isReachable(m.uniqueAddress))
logInfo("Marking node [{}] as [{}]", m.address, Down)
else
logInfo("Marking unreachable node [{}] as [{}]", m.address, Down)
// replace member (changed status)
val newMembers = localMembers - m + m
// remove nodes marked as DOWN from the `seen` table
val newSeen = localSeen - m.uniqueAddress
// update gossip overview
val newOverview = localOverview copy (seen = newSeen)
val newGossip = localGossip copy (members = newMembers, overview = newOverview) // update gossip
updateLatestGossip(newGossip)
publish(latestGossip)
case None
logInfo("Ignoring down of unknown node [{}] as [{}]", address)
}
// 2. check if the node to DOWN is in the `unreachable` set
val newUnreachableMembers =
localUnreachableMembers.map { member
// no need to DOWN members already DOWN
if (member.address == address && member.status != Down) {
logInfo("Marking unreachable node [{}] as [{}]", member.address, Down)
member copy (status = Down)
} else member
}
}
// 3. add the newly DOWNED members from the `members` (in step 1.) to the `newUnreachableMembers` set.
val newUnreachablePlusNewlyDownedMembers = newUnreachableMembers ++ downedMember
// 4. remove nodes marked as DOWN from the `seen` table
val newSeen = localSeen -- newUnreachablePlusNewlyDownedMembers.collect { case m if m.status == Down m.uniqueAddress }
// update gossip overview
val newOverview = localOverview copy (seen = newSeen, unreachable = newUnreachablePlusNewlyDownedMembers)
val newGossip = localGossip copy (overview = newOverview, members = newMembers) // update gossip
updateLatestGossip(newGossip)
publish(latestGossip)
def quarantined(node: UniqueAddress): Unit = {
val localGossip = latestGossip
if (localGossip.hasMember(node)) {
val newReachability = latestGossip.overview.reachability.terminated(selfUniqueAddress, node)
val newOverview = localGossip.overview copy (reachability = newReachability)
val newGossip = localGossip copy (overview = newOverview)
updateLatestGossip(newGossip)
log.warning("Cluster Node [{}] - Marking node as TERMINATED [{}], due to quarantine",
selfAddress, node.address)
publish(latestGossip)
downing(node.address)
}
}
def receiveGossipStatus(status: GossipStatus): Unit = {
val from = status.from
if (latestGossip.overview.unreachable.exists(_.uniqueAddress == from))
if (!latestGossip.overview.reachability.isReachable(selfUniqueAddress, from))
logInfo("Ignoring received gossip status from unreachable [{}] ", from)
else if (latestGossip.members.forall(_.uniqueAddress != from))
log.debug("Cluster Node [{}] - Ignoring received gossip status from unknown [{}]", selfAddress, from)
@ -578,10 +585,10 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
if (envelope.to != selfUniqueAddress) {
logInfo("Ignoring received gossip intended for someone else, from [{}] to [{}]", from.address, envelope.to)
Ignored
} else if (remoteGossip.overview.unreachable.exists(_.address == selfAddress)) {
} else if (!remoteGossip.overview.reachability.isReachable(selfUniqueAddress)) {
logInfo("Ignoring received gossip with myself as unreachable, from [{}]", from.address)
Ignored
} else if (localGossip.overview.unreachable.exists(_.uniqueAddress == from)) {
} else if (!localGossip.overview.reachability.isReachable(selfUniqueAddress, from)) {
logInfo("Ignoring received gossip from unreachable [{}] ", from)
Ignored
} else if (localGossip.members.forall(_.uniqueAddress != from)) {
@ -634,7 +641,8 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
publish(latestGossip)
if (latestGossip.member(selfUniqueAddress).status == Exiting)
val selfStatus = latestGossip.member(selfUniqueAddress).status
if (selfStatus == Exiting || selfStatus == Down)
shutdown()
else if (talkback) {
// send back gossip to sender when sender had different view, i.e. merge, or sender had
@ -653,23 +661,26 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
def gossip(): Unit = {
log.debug("Cluster Node [{}] - Initiating new round of gossip", selfAddress)
if (!isSingletonCluster && isAvailable) {
if (!isSingletonCluster) {
val localGossip = latestGossip
val preferredGossipTargets: Vector[UniqueAddress] =
if (ThreadLocalRandom.current.nextDouble() < GossipDifferentViewProbability) { // If it's time to try to gossip to some nodes with a different view
// gossip to a random alive member with preference to a member with older gossip version
localGossip.members.collect { case m if !localGossip.seenByNode(m.uniqueAddress) m.uniqueAddress }(breakOut)
} else Vector.empty[UniqueAddress]
localGossip.members.collect {
case m if !localGossip.seenByNode(m.uniqueAddress) && validNodeForGossip(m.uniqueAddress)
m.uniqueAddress
}(breakOut)
} else Vector.empty
if (preferredGossipTargets.nonEmpty) {
val peer = selectRandomNode(preferredGossipTargets filterNot (_ == selfUniqueAddress))
val peer = selectRandomNode(preferredGossipTargets)
// send full gossip because it has different view
peer foreach gossipTo
} else {
// Fall back to localGossip; important to not accidentally use `map` of the SortedSet, since the original order is not preserved)
val peer = selectRandomNode(localGossip.members.toIndexedSeq.collect {
case m if m.uniqueAddress != selfUniqueAddress m.uniqueAddress
case m if validNodeForGossip(m.uniqueAddress) m.uniqueAddress
})
peer foreach { node
if (localGossip.seenByNode(node)) gossipStatusTo(node)
@ -684,8 +695,8 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
* assigning partitions etc.
*/
def leaderActions(): Unit =
if (latestGossip.isLeader(selfUniqueAddress) && isAvailable) {
// only run the leader actions if we are the LEADER and available
if (latestGossip.isLeader(selfUniqueAddress)) {
// only run the leader actions if we are the LEADER
if (AutoDown)
leaderAutoDownActions()
@ -712,7 +723,6 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
val localMembers = localGossip.members
val localOverview = localGossip.overview
val localSeen = localOverview.seen
val localUnreachableMembers = localOverview.unreachable
val hasPartionHandoffCompletedSuccessfully: Boolean = {
// TODO implement partion handoff and a check if it is completed - now just returns TRUE - e.g. has completed successfully
@ -726,9 +736,11 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
}
def isJoiningToUp(m: Member): Boolean = m.status == Joining && enoughMembers
val (removedUnreachable, newUnreachable) = localUnreachableMembers partition { m
Gossip.removeUnreachableWithMemberStatus(m.status)
}
val removedUnreachable = for {
node localOverview.reachability.allUnreachableOrTerminated
m = localGossip.member(node)
if Gossip.removeUnreachableWithMemberStatus(m.status)
} yield m
val changedMembers = localMembers collect {
var upNumber = 0
@ -758,12 +770,15 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
// handle changes
// replace changed members
val newMembers = localMembers -- changedMembers ++ changedMembers
val newMembers = changedMembers ++ localMembers -- removedUnreachable
// removing REMOVED nodes from the `seen` table
val newSeen = localSeen -- removedUnreachable.map(_.uniqueAddress)
val newOverview = localOverview copy (seen = newSeen, unreachable = newUnreachable) // update gossip overview
val newGossip = localGossip copy (members = newMembers, overview = newOverview) // update gossip
val removed = removedUnreachable.map(_.uniqueAddress)
val newSeen = localSeen -- removed
// removing REMOVED nodes from the `reachability` table
val newReachability = localOverview.reachability.remove(removed)
val newOverview = localOverview copy (seen = newSeen, reachability = newReachability)
val newGossip = localGossip copy (members = newMembers, overview = newOverview)
updateLatestGossip(newGossip)
@ -802,25 +817,27 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
*/
def leaderAutoDownActions(): Unit = {
val localGossip = latestGossip
val localMembers = localGossip.members
val localOverview = localGossip.overview
val localSeen = localOverview.seen
val localUnreachableMembers = localOverview.unreachable
val changedUnreachableMembers = localUnreachableMembers collect {
case m if !Gossip.convergenceSkipUnreachableWithMemberStatus(m.status) m copy (status = Down)
}
val changedUnreachableMembers = for {
node localOverview.reachability.allUnreachableOrTerminated
m = localGossip.member(node)
if m.status != Removed && !Gossip.convergenceSkipUnreachableWithMemberStatus(m.status)
} yield m.copy(status = Down)
if (changedUnreachableMembers.nonEmpty) {
// handle changes
// replace changed unreachable
val newUnreachableMembers = localUnreachableMembers -- changedUnreachableMembers ++ changedUnreachableMembers
val newMembers = localMembers -- changedUnreachableMembers ++ changedUnreachableMembers
// removing nodes marked as Down/Exiting from the `seen` table
val newSeen = localSeen -- changedUnreachableMembers.map(_.uniqueAddress)
val newOverview = localOverview copy (seen = newSeen, unreachable = newUnreachableMembers) // update gossip overview
val newGossip = localGossip copy (overview = newOverview) // update gossip
val newOverview = localOverview copy (seen = newSeen) // update gossip overview
val newGossip = localGossip copy (members = newMembers, overview = newOverview) // update gossip
updateLatestGossip(newGossip)
@ -834,39 +851,54 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
}
/**
* Reaps the unreachable members (moves them to the `unreachable` list in the cluster overview) according to the failure detector's verdict.
* Reaps the unreachable members according to the failure detector's verdict.
*/
def reapUnreachableMembers(): Unit = {
if (!isSingletonCluster && isAvailable) {
// only scrutinize if we are a non-singleton cluster and available
if (!isSingletonCluster) {
// only scrutinize if we are a non-singleton cluster
val localGossip = latestGossip
val localOverview = localGossip.overview
val localMembers = localGossip.members
val localUnreachableMembers = localGossip.overview.unreachable
val newlyDetectedUnreachableMembers = localMembers filterNot { member
member.uniqueAddress == selfUniqueAddress || failureDetector.isAvailable(member.address)
member.uniqueAddress == selfUniqueAddress ||
localOverview.reachability.status(selfUniqueAddress, member.uniqueAddress) == Reachability.Unreachable ||
localOverview.reachability.status(selfUniqueAddress, member.uniqueAddress) == Reachability.Terminated ||
failureDetector.isAvailable(member.address)
}
if (newlyDetectedUnreachableMembers.nonEmpty) {
val newlyDetectedReachableMembers = localOverview.reachability.allUnreachableFrom(selfUniqueAddress) collect {
case node if node != selfUniqueAddress && failureDetector.isAvailable(node.address)
localGossip.member(node)
}
val newMembers = localMembers -- newlyDetectedUnreachableMembers
val newUnreachableMembers = localUnreachableMembers ++ newlyDetectedUnreachableMembers
if (newlyDetectedUnreachableMembers.nonEmpty || newlyDetectedReachableMembers.nonEmpty) {
val newOverview = localOverview copy (unreachable = newUnreachableMembers)
val newGossip = localGossip copy (overview = newOverview, members = newMembers)
val newReachability1 = (localOverview.reachability /: newlyDetectedUnreachableMembers) {
(reachability, m) reachability.unreachable(selfUniqueAddress, m.uniqueAddress)
}
val newReachability2 = (newReachability1 /: newlyDetectedReachableMembers) {
(reachability, m) reachability.reachable(selfUniqueAddress, m.uniqueAddress)
}
updateLatestGossip(newGossip)
if (newReachability2 ne localOverview.reachability) {
val newOverview = localOverview copy (reachability = newReachability2)
val newGossip = localGossip copy (overview = newOverview)
val (exiting, nonExiting) = newlyDetectedUnreachableMembers.partition(_.status == Exiting)
if (nonExiting.nonEmpty)
log.error("Cluster Node [{}] - Marking node(s) as UNREACHABLE [{}]", selfAddress, nonExiting.mkString(", "))
if (exiting.nonEmpty)
logInfo("Marking exiting node(s) as UNREACHABLE [{}]. This is expected and they will be removed.",
exiting.mkString(", "))
updateLatestGossip(newGossip)
publish(latestGossip)
val (exiting, nonExiting) = newlyDetectedUnreachableMembers.partition(_.status == Exiting)
if (nonExiting.nonEmpty)
log.warning("Cluster Node [{}] - Marking node(s) as UNREACHABLE [{}]", selfAddress, nonExiting.mkString(", "))
if (exiting.nonEmpty)
logInfo("Marking exiting node(s) as UNREACHABLE [{}]. This is expected and they will be removed.",
exiting.mkString(", "))
if (newlyDetectedReachableMembers.nonEmpty)
logInfo("Marking node(s) as REACHABLE [{}]", newlyDetectedReachableMembers.mkString(", "))
publish(latestGossip)
}
}
}
}
@ -877,8 +909,6 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
def isSingletonCluster: Boolean = latestGossip.isSingletonCluster
def isAvailable: Boolean = !latestGossip.isUnreachable(selfUniqueAddress)
// needed for tests
def sendGossipTo(address: Address): Unit = {
latestGossip.members.foreach(m
@ -906,7 +936,8 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
clusterCore(node.address) ! GossipStatus(selfUniqueAddress, latestGossip.version)
def validNodeForGossip(node: UniqueAddress): Boolean =
(node != selfUniqueAddress && latestGossip.members.exists(_.uniqueAddress == node))
(node != selfUniqueAddress && latestGossip.hasMember(node) &&
latestGossip.overview.reachability.isReachable(node))
def updateLatestGossip(newGossip: Gossip): Unit = {
// Updating the vclock version for the changes