!clu #2307 Allow transition from unreachable to reachable
* Replace unreachable Set with Reachability table * Unreachable members stay in member Set * Downing a live member was moved it to the unreachable Set, and then removed from there by the leader. That will not work when flipping back to reachable, so a Down member must be detected as unreachable before beeing removed. Similar to Exiting. Member shuts down itself if it sees itself as Down. * Flip back to reachable when failure detector monitors it as available again * ReachableMember event * Can't ignore gossip from aggregated unreachable (see SurviveNetworkInstabilitySpec) * Make use of ReachableMember event in cluster router * End heartbeat when acknowledged, EndHeartbeatAck * Remove nr-of-end-heartbeats from conf * Full reachability info in JMX cluster status * Don't use interval after unreachable for AccrualFailureDetector history * Add QuarantinedEvent to remoting, used for Reachability.Terminated * Prune reachability table when all reachable * Update documentation * Performance testing and optimizations
This commit is contained in:
parent
beba5d9f76
commit
dc9fe4f19c
43 changed files with 2425 additions and 1169 deletions
|
|
@ -15,6 +15,7 @@ import akka.cluster.MemberStatus._
|
|||
import akka.cluster.ClusterEvent._
|
||||
import akka.dispatch.{ UnboundedMessageQueueSemantics, RequiresMessageQueue }
|
||||
import scala.collection.breakOut
|
||||
import akka.remote.QuarantinedEvent
|
||||
|
||||
/**
|
||||
* Base trait for all cluster messages. All ClusterMessage's are serializable.
|
||||
|
|
@ -264,13 +265,16 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
|
|||
Some(scheduler.schedule(PeriodicTasksInitialDelay.max(d), d, self, PublishStatsTick))
|
||||
}
|
||||
|
||||
override def preStart(): Unit =
|
||||
override def preStart(): Unit = {
|
||||
context.system.eventStream.subscribe(self, classOf[QuarantinedEvent])
|
||||
if (SeedNodes.isEmpty)
|
||||
logInfo("No seed-nodes configured, manual cluster join required")
|
||||
else
|
||||
self ! JoinSeedNodes(SeedNodes)
|
||||
}
|
||||
|
||||
override def postStop(): Unit = {
|
||||
context.system.eventStream.unsubscribe(self)
|
||||
gossipTask.cancel()
|
||||
failureDetectorReaperTask.cancel()
|
||||
leaderActionsTask.cancel()
|
||||
|
|
@ -323,6 +327,7 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
|
|||
case ClusterUserAction.Leave(address) ⇒ leaving(address)
|
||||
case SendGossipTo(address) ⇒ sendGossipTo(address)
|
||||
case msg: SubscriptionMessage ⇒ publisher forward msg
|
||||
case QuarantinedEvent(address, uid) ⇒ quarantined(UniqueAddress(address, uid))
|
||||
case ClusterUserAction.JoinTo(address) ⇒
|
||||
logInfo("Trying to join [{}] when already part of a cluster, ignoring", address)
|
||||
case JoinSeedNodes(seedNodes) ⇒
|
||||
|
|
@ -419,12 +424,11 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
|
|||
selfAddress.system, node.address.system)
|
||||
else {
|
||||
val localMembers = latestGossip.members
|
||||
val localUnreachable = latestGossip.overview.unreachable
|
||||
|
||||
// check by address without uid to make sure that node with same host:port is not allowed
|
||||
// to join until previous node with that host:port has been removed from the cluster
|
||||
val alreadyMember = localMembers.exists(_.address == node.address)
|
||||
val isUnreachable = localUnreachable.exists(_.address == node.address)
|
||||
val isUnreachable = !latestGossip.overview.reachability.isReachable(node)
|
||||
|
||||
if (alreadyMember)
|
||||
logInfo("Existing member [{}] is trying to join, ignoring", node)
|
||||
|
|
@ -488,14 +492,13 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
|
|||
}
|
||||
|
||||
/**
|
||||
* This method is called when a member sees itself as Exiting.
|
||||
* This method is called when a member sees itself as Exiting or Down.
|
||||
*/
|
||||
def shutdown(): Unit = cluster.shutdown()
|
||||
|
||||
/**
|
||||
* State transition to DOW.
|
||||
* The node to DOWN is removed from the `members` set and put in the `unreachable` set (if not already there)
|
||||
* and its status is set to DOWN. The node is also removed from the `seen` table.
|
||||
* State transition to DOWN.
|
||||
* Its status is set to DOWN. The node is also removed from the `seen` table.
|
||||
*
|
||||
* The node will eventually be removed by the leader, and only after removal a new node with same address can
|
||||
* join the cluster through the normal joining procedure.
|
||||
|
|
@ -505,46 +508,50 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
|
|||
val localMembers = localGossip.members
|
||||
val localOverview = localGossip.overview
|
||||
val localSeen = localOverview.seen
|
||||
val localUnreachableMembers = localOverview.unreachable
|
||||
val localReachability = localOverview.reachability
|
||||
|
||||
// 1. check if the node to DOWN is in the `members` set
|
||||
val downedMember: Option[Member] =
|
||||
localMembers.collectFirst { case m if m.address == address ⇒ m.copy(status = Down) }
|
||||
|
||||
val newMembers = downedMember match {
|
||||
// check if the node to DOWN is in the `members` set
|
||||
localMembers.collectFirst { case m if m.address == address ⇒ m.copy(status = Down) } match {
|
||||
case Some(m) ⇒
|
||||
logInfo("Marking node [{}] as [{}]", m.address, Down)
|
||||
localMembers - m
|
||||
case None ⇒ localMembers
|
||||
if (localReachability.isReachable(m.uniqueAddress))
|
||||
logInfo("Marking node [{}] as [{}]", m.address, Down)
|
||||
else
|
||||
logInfo("Marking unreachable node [{}] as [{}]", m.address, Down)
|
||||
|
||||
// replace member (changed status)
|
||||
val newMembers = localMembers - m + m
|
||||
// remove nodes marked as DOWN from the `seen` table
|
||||
val newSeen = localSeen - m.uniqueAddress
|
||||
|
||||
// update gossip overview
|
||||
val newOverview = localOverview copy (seen = newSeen)
|
||||
val newGossip = localGossip copy (members = newMembers, overview = newOverview) // update gossip
|
||||
updateLatestGossip(newGossip)
|
||||
|
||||
publish(latestGossip)
|
||||
case None ⇒
|
||||
logInfo("Ignoring down of unknown node [{}] as [{}]", address)
|
||||
}
|
||||
|
||||
// 2. check if the node to DOWN is in the `unreachable` set
|
||||
val newUnreachableMembers =
|
||||
localUnreachableMembers.map { member ⇒
|
||||
// no need to DOWN members already DOWN
|
||||
if (member.address == address && member.status != Down) {
|
||||
logInfo("Marking unreachable node [{}] as [{}]", member.address, Down)
|
||||
member copy (status = Down)
|
||||
} else member
|
||||
}
|
||||
}
|
||||
|
||||
// 3. add the newly DOWNED members from the `members` (in step 1.) to the `newUnreachableMembers` set.
|
||||
val newUnreachablePlusNewlyDownedMembers = newUnreachableMembers ++ downedMember
|
||||
|
||||
// 4. remove nodes marked as DOWN from the `seen` table
|
||||
val newSeen = localSeen -- newUnreachablePlusNewlyDownedMembers.collect { case m if m.status == Down ⇒ m.uniqueAddress }
|
||||
|
||||
// update gossip overview
|
||||
val newOverview = localOverview copy (seen = newSeen, unreachable = newUnreachablePlusNewlyDownedMembers)
|
||||
val newGossip = localGossip copy (overview = newOverview, members = newMembers) // update gossip
|
||||
updateLatestGossip(newGossip)
|
||||
|
||||
publish(latestGossip)
|
||||
def quarantined(node: UniqueAddress): Unit = {
|
||||
val localGossip = latestGossip
|
||||
if (localGossip.hasMember(node)) {
|
||||
val newReachability = latestGossip.overview.reachability.terminated(selfUniqueAddress, node)
|
||||
val newOverview = localGossip.overview copy (reachability = newReachability)
|
||||
val newGossip = localGossip copy (overview = newOverview)
|
||||
updateLatestGossip(newGossip)
|
||||
log.warning("Cluster Node [{}] - Marking node as TERMINATED [{}], due to quarantine",
|
||||
selfAddress, node.address)
|
||||
publish(latestGossip)
|
||||
downing(node.address)
|
||||
}
|
||||
}
|
||||
|
||||
def receiveGossipStatus(status: GossipStatus): Unit = {
|
||||
val from = status.from
|
||||
if (latestGossip.overview.unreachable.exists(_.uniqueAddress == from))
|
||||
if (!latestGossip.overview.reachability.isReachable(selfUniqueAddress, from))
|
||||
logInfo("Ignoring received gossip status from unreachable [{}] ", from)
|
||||
else if (latestGossip.members.forall(_.uniqueAddress != from))
|
||||
log.debug("Cluster Node [{}] - Ignoring received gossip status from unknown [{}]", selfAddress, from)
|
||||
|
|
@ -578,10 +585,10 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
|
|||
if (envelope.to != selfUniqueAddress) {
|
||||
logInfo("Ignoring received gossip intended for someone else, from [{}] to [{}]", from.address, envelope.to)
|
||||
Ignored
|
||||
} else if (remoteGossip.overview.unreachable.exists(_.address == selfAddress)) {
|
||||
} else if (!remoteGossip.overview.reachability.isReachable(selfUniqueAddress)) {
|
||||
logInfo("Ignoring received gossip with myself as unreachable, from [{}]", from.address)
|
||||
Ignored
|
||||
} else if (localGossip.overview.unreachable.exists(_.uniqueAddress == from)) {
|
||||
} else if (!localGossip.overview.reachability.isReachable(selfUniqueAddress, from)) {
|
||||
logInfo("Ignoring received gossip from unreachable [{}] ", from)
|
||||
Ignored
|
||||
} else if (localGossip.members.forall(_.uniqueAddress != from)) {
|
||||
|
|
@ -634,7 +641,8 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
|
|||
|
||||
publish(latestGossip)
|
||||
|
||||
if (latestGossip.member(selfUniqueAddress).status == Exiting)
|
||||
val selfStatus = latestGossip.member(selfUniqueAddress).status
|
||||
if (selfStatus == Exiting || selfStatus == Down)
|
||||
shutdown()
|
||||
else if (talkback) {
|
||||
// send back gossip to sender when sender had different view, i.e. merge, or sender had
|
||||
|
|
@ -653,23 +661,26 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
|
|||
def gossip(): Unit = {
|
||||
log.debug("Cluster Node [{}] - Initiating new round of gossip", selfAddress)
|
||||
|
||||
if (!isSingletonCluster && isAvailable) {
|
||||
if (!isSingletonCluster) {
|
||||
val localGossip = latestGossip
|
||||
|
||||
val preferredGossipTargets: Vector[UniqueAddress] =
|
||||
if (ThreadLocalRandom.current.nextDouble() < GossipDifferentViewProbability) { // If it's time to try to gossip to some nodes with a different view
|
||||
// gossip to a random alive member with preference to a member with older gossip version
|
||||
localGossip.members.collect { case m if !localGossip.seenByNode(m.uniqueAddress) ⇒ m.uniqueAddress }(breakOut)
|
||||
} else Vector.empty[UniqueAddress]
|
||||
localGossip.members.collect {
|
||||
case m if !localGossip.seenByNode(m.uniqueAddress) && validNodeForGossip(m.uniqueAddress) ⇒
|
||||
m.uniqueAddress
|
||||
}(breakOut)
|
||||
} else Vector.empty
|
||||
|
||||
if (preferredGossipTargets.nonEmpty) {
|
||||
val peer = selectRandomNode(preferredGossipTargets filterNot (_ == selfUniqueAddress))
|
||||
val peer = selectRandomNode(preferredGossipTargets)
|
||||
// send full gossip because it has different view
|
||||
peer foreach gossipTo
|
||||
} else {
|
||||
// Fall back to localGossip; important to not accidentally use `map` of the SortedSet, since the original order is not preserved)
|
||||
val peer = selectRandomNode(localGossip.members.toIndexedSeq.collect {
|
||||
case m if m.uniqueAddress != selfUniqueAddress ⇒ m.uniqueAddress
|
||||
case m if validNodeForGossip(m.uniqueAddress) ⇒ m.uniqueAddress
|
||||
})
|
||||
peer foreach { node ⇒
|
||||
if (localGossip.seenByNode(node)) gossipStatusTo(node)
|
||||
|
|
@ -684,8 +695,8 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
|
|||
* assigning partitions etc.
|
||||
*/
|
||||
def leaderActions(): Unit =
|
||||
if (latestGossip.isLeader(selfUniqueAddress) && isAvailable) {
|
||||
// only run the leader actions if we are the LEADER and available
|
||||
if (latestGossip.isLeader(selfUniqueAddress)) {
|
||||
// only run the leader actions if we are the LEADER
|
||||
|
||||
if (AutoDown)
|
||||
leaderAutoDownActions()
|
||||
|
|
@ -712,7 +723,6 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
|
|||
val localMembers = localGossip.members
|
||||
val localOverview = localGossip.overview
|
||||
val localSeen = localOverview.seen
|
||||
val localUnreachableMembers = localOverview.unreachable
|
||||
|
||||
val hasPartionHandoffCompletedSuccessfully: Boolean = {
|
||||
// TODO implement partion handoff and a check if it is completed - now just returns TRUE - e.g. has completed successfully
|
||||
|
|
@ -726,9 +736,11 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
|
|||
}
|
||||
def isJoiningToUp(m: Member): Boolean = m.status == Joining && enoughMembers
|
||||
|
||||
val (removedUnreachable, newUnreachable) = localUnreachableMembers partition { m ⇒
|
||||
Gossip.removeUnreachableWithMemberStatus(m.status)
|
||||
}
|
||||
val removedUnreachable = for {
|
||||
node ← localOverview.reachability.allUnreachableOrTerminated
|
||||
m = localGossip.member(node)
|
||||
if Gossip.removeUnreachableWithMemberStatus(m.status)
|
||||
} yield m
|
||||
|
||||
val changedMembers = localMembers collect {
|
||||
var upNumber = 0
|
||||
|
|
@ -758,12 +770,15 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
|
|||
// handle changes
|
||||
|
||||
// replace changed members
|
||||
val newMembers = localMembers -- changedMembers ++ changedMembers
|
||||
val newMembers = changedMembers ++ localMembers -- removedUnreachable
|
||||
|
||||
// removing REMOVED nodes from the `seen` table
|
||||
val newSeen = localSeen -- removedUnreachable.map(_.uniqueAddress)
|
||||
val newOverview = localOverview copy (seen = newSeen, unreachable = newUnreachable) // update gossip overview
|
||||
val newGossip = localGossip copy (members = newMembers, overview = newOverview) // update gossip
|
||||
val removed = removedUnreachable.map(_.uniqueAddress)
|
||||
val newSeen = localSeen -- removed
|
||||
// removing REMOVED nodes from the `reachability` table
|
||||
val newReachability = localOverview.reachability.remove(removed)
|
||||
val newOverview = localOverview copy (seen = newSeen, reachability = newReachability)
|
||||
val newGossip = localGossip copy (members = newMembers, overview = newOverview)
|
||||
|
||||
updateLatestGossip(newGossip)
|
||||
|
||||
|
|
@ -802,25 +817,27 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
|
|||
*/
|
||||
def leaderAutoDownActions(): Unit = {
|
||||
val localGossip = latestGossip
|
||||
val localMembers = localGossip.members
|
||||
val localOverview = localGossip.overview
|
||||
val localSeen = localOverview.seen
|
||||
val localUnreachableMembers = localOverview.unreachable
|
||||
|
||||
val changedUnreachableMembers = localUnreachableMembers collect {
|
||||
case m if !Gossip.convergenceSkipUnreachableWithMemberStatus(m.status) ⇒ m copy (status = Down)
|
||||
}
|
||||
val changedUnreachableMembers = for {
|
||||
node ← localOverview.reachability.allUnreachableOrTerminated
|
||||
m = localGossip.member(node)
|
||||
if m.status != Removed && !Gossip.convergenceSkipUnreachableWithMemberStatus(m.status)
|
||||
} yield m.copy(status = Down)
|
||||
|
||||
if (changedUnreachableMembers.nonEmpty) {
|
||||
// handle changes
|
||||
|
||||
// replace changed unreachable
|
||||
val newUnreachableMembers = localUnreachableMembers -- changedUnreachableMembers ++ changedUnreachableMembers
|
||||
val newMembers = localMembers -- changedUnreachableMembers ++ changedUnreachableMembers
|
||||
|
||||
// removing nodes marked as Down/Exiting from the `seen` table
|
||||
val newSeen = localSeen -- changedUnreachableMembers.map(_.uniqueAddress)
|
||||
|
||||
val newOverview = localOverview copy (seen = newSeen, unreachable = newUnreachableMembers) // update gossip overview
|
||||
val newGossip = localGossip copy (overview = newOverview) // update gossip
|
||||
val newOverview = localOverview copy (seen = newSeen) // update gossip overview
|
||||
val newGossip = localGossip copy (members = newMembers, overview = newOverview) // update gossip
|
||||
|
||||
updateLatestGossip(newGossip)
|
||||
|
||||
|
|
@ -834,39 +851,54 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
|
|||
}
|
||||
|
||||
/**
|
||||
* Reaps the unreachable members (moves them to the `unreachable` list in the cluster overview) according to the failure detector's verdict.
|
||||
* Reaps the unreachable members according to the failure detector's verdict.
|
||||
*/
|
||||
def reapUnreachableMembers(): Unit = {
|
||||
if (!isSingletonCluster && isAvailable) {
|
||||
// only scrutinize if we are a non-singleton cluster and available
|
||||
if (!isSingletonCluster) {
|
||||
// only scrutinize if we are a non-singleton cluster
|
||||
|
||||
val localGossip = latestGossip
|
||||
val localOverview = localGossip.overview
|
||||
val localMembers = localGossip.members
|
||||
val localUnreachableMembers = localGossip.overview.unreachable
|
||||
|
||||
val newlyDetectedUnreachableMembers = localMembers filterNot { member ⇒
|
||||
member.uniqueAddress == selfUniqueAddress || failureDetector.isAvailable(member.address)
|
||||
member.uniqueAddress == selfUniqueAddress ||
|
||||
localOverview.reachability.status(selfUniqueAddress, member.uniqueAddress) == Reachability.Unreachable ||
|
||||
localOverview.reachability.status(selfUniqueAddress, member.uniqueAddress) == Reachability.Terminated ||
|
||||
failureDetector.isAvailable(member.address)
|
||||
}
|
||||
|
||||
if (newlyDetectedUnreachableMembers.nonEmpty) {
|
||||
val newlyDetectedReachableMembers = localOverview.reachability.allUnreachableFrom(selfUniqueAddress) collect {
|
||||
case node if node != selfUniqueAddress && failureDetector.isAvailable(node.address) ⇒
|
||||
localGossip.member(node)
|
||||
}
|
||||
|
||||
val newMembers = localMembers -- newlyDetectedUnreachableMembers
|
||||
val newUnreachableMembers = localUnreachableMembers ++ newlyDetectedUnreachableMembers
|
||||
if (newlyDetectedUnreachableMembers.nonEmpty || newlyDetectedReachableMembers.nonEmpty) {
|
||||
|
||||
val newOverview = localOverview copy (unreachable = newUnreachableMembers)
|
||||
val newGossip = localGossip copy (overview = newOverview, members = newMembers)
|
||||
val newReachability1 = (localOverview.reachability /: newlyDetectedUnreachableMembers) {
|
||||
(reachability, m) ⇒ reachability.unreachable(selfUniqueAddress, m.uniqueAddress)
|
||||
}
|
||||
val newReachability2 = (newReachability1 /: newlyDetectedReachableMembers) {
|
||||
(reachability, m) ⇒ reachability.reachable(selfUniqueAddress, m.uniqueAddress)
|
||||
}
|
||||
|
||||
updateLatestGossip(newGossip)
|
||||
if (newReachability2 ne localOverview.reachability) {
|
||||
val newOverview = localOverview copy (reachability = newReachability2)
|
||||
val newGossip = localGossip copy (overview = newOverview)
|
||||
|
||||
val (exiting, nonExiting) = newlyDetectedUnreachableMembers.partition(_.status == Exiting)
|
||||
if (nonExiting.nonEmpty)
|
||||
log.error("Cluster Node [{}] - Marking node(s) as UNREACHABLE [{}]", selfAddress, nonExiting.mkString(", "))
|
||||
if (exiting.nonEmpty)
|
||||
logInfo("Marking exiting node(s) as UNREACHABLE [{}]. This is expected and they will be removed.",
|
||||
exiting.mkString(", "))
|
||||
updateLatestGossip(newGossip)
|
||||
|
||||
publish(latestGossip)
|
||||
val (exiting, nonExiting) = newlyDetectedUnreachableMembers.partition(_.status == Exiting)
|
||||
if (nonExiting.nonEmpty)
|
||||
log.warning("Cluster Node [{}] - Marking node(s) as UNREACHABLE [{}]", selfAddress, nonExiting.mkString(", "))
|
||||
if (exiting.nonEmpty)
|
||||
logInfo("Marking exiting node(s) as UNREACHABLE [{}]. This is expected and they will be removed.",
|
||||
exiting.mkString(", "))
|
||||
if (newlyDetectedReachableMembers.nonEmpty)
|
||||
logInfo("Marking node(s) as REACHABLE [{}]", newlyDetectedReachableMembers.mkString(", "))
|
||||
|
||||
publish(latestGossip)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -877,8 +909,6 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
|
|||
|
||||
def isSingletonCluster: Boolean = latestGossip.isSingletonCluster
|
||||
|
||||
def isAvailable: Boolean = !latestGossip.isUnreachable(selfUniqueAddress)
|
||||
|
||||
// needed for tests
|
||||
def sendGossipTo(address: Address): Unit = {
|
||||
latestGossip.members.foreach(m ⇒
|
||||
|
|
@ -906,7 +936,8 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
|
|||
clusterCore(node.address) ! GossipStatus(selfUniqueAddress, latestGossip.version)
|
||||
|
||||
def validNodeForGossip(node: UniqueAddress): Boolean =
|
||||
(node != selfUniqueAddress && latestGossip.members.exists(_.uniqueAddress == node))
|
||||
(node != selfUniqueAddress && latestGossip.hasMember(node) &&
|
||||
latestGossip.overview.reachability.isReachable(node))
|
||||
|
||||
def updateLatestGossip(newGossip: Gossip): Unit = {
|
||||
// Updating the vclock version for the changes
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue