=clu Improve cluster downing

* avoid using Down and Exiting member from being used for joining
* delay shut down of Down member until the information is spread
  to all reachable members, e.g. downing several nodes via one node
* akka.cluster.down-removal-margin setting
  Margin until shards or singletons that belonged to a
  downed/removed partition are created in surviving partition.
  Used by singleton and sharding.
* remove the retry count parameters/settings for singleton in
  favor of deriving those from the removal-margin
This commit is contained in:
Patrik Nordwall 2015-05-30 16:12:22 +02:00
parent bf28260cd0
commit 2a88f4fb29
12 changed files with 227 additions and 101 deletions

View file

@ -228,6 +228,7 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
protected def selfUniqueAddress = cluster.selfUniqueAddress
val NumberOfGossipsBeforeShutdownWhenLeaderExits = 3
val MaxGossipsBeforeShuttingDownMyself = 5
def vclockName(node: UniqueAddress): String = node.address + "-" + node.uid
val vclockNode = VectorClock.Node(vclockName(selfUniqueAddress))
@ -371,7 +372,14 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
case other super.unhandled(other)
}
def initJoin(): Unit = sender() ! InitJoinAck(selfAddress)
def initJoin(): Unit = {
val selfStatus = latestGossip.member(selfUniqueAddress).status
if (Gossip.removeUnreachableWithMemberStatus.contains(selfStatus))
// prevents a Down and Exiting node from being used for joining
sender() ! InitJoinNack(selfAddress)
else
sender() ! InitJoinAck(selfAddress)
}
def joinSeedNodes(newSeedNodes: immutable.IndexedSeq[Address]): Unit = {
if (newSeedNodes.nonEmpty) {
@ -444,12 +452,15 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
* current gossip state, including the new joining member.
*/
def joining(node: UniqueAddress, roles: Set[String]): Unit = {
val selfStatus = latestGossip.member(selfUniqueAddress).status
if (node.address.protocol != selfAddress.protocol)
log.warning("Member with wrong protocol tried to join, but was ignored, expected [{}] but was [{}]",
selfAddress.protocol, node.address.protocol)
else if (node.address.system != selfAddress.system)
log.warning("Member with wrong ActorSystem name tried to join, but was ignored, expected [{}] but was [{}]",
selfAddress.system, node.address.system)
else if (Gossip.removeUnreachableWithMemberStatus.contains(selfStatus))
logInfo("Trying to join [{}] to [{}] member, ignoring. Use a member that is Up instead.", node, selfStatus)
else {
val localMembers = latestGossip.members
@ -544,15 +555,15 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
val localReachability = localOverview.reachability
// check if the node to DOWN is in the `members` set
localMembers.collectFirst { case m if m.address == address m.copy(status = Down) } match {
case Some(m)
localMembers.find(_.address == address) match {
case Some(m) if (m.status != Down)
if (localReachability.isReachable(m.uniqueAddress))
logInfo("Marking node [{}] as [{}]", m.address, Down)
else
logInfo("Marking unreachable node [{}] as [{}]", m.address, Down)
// replace member (changed status)
val newMembers = localMembers - m + m
val newMembers = localMembers - m + m.copy(status = Down)
// remove nodes marked as DOWN from the `seen` table
val newSeen = localSeen - m.uniqueAddress
@ -562,6 +573,7 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
updateLatestGossip(newGossip)
publish(latestGossip)
case Some(_) // already down
case None
logInfo("Ignoring down of unknown node [{}] as [{}]", address)
}
@ -676,7 +688,7 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
publish(latestGossip)
val selfStatus = latestGossip.member(selfUniqueAddress).status
if (selfStatus == Exiting || selfStatus == Down)
if (selfStatus == Exiting)
shutdown()
else if (talkback) {
// send back gossip to sender() when sender() had different view, i.e. merge, or sender() had
@ -766,7 +778,7 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
/**
* Runs periodic leader actions, such as member status transitions, assigning partitions etc.
*/
def leaderActions(): Unit =
def leaderActions(): Unit = {
if (latestGossip.isLeader(selfUniqueAddress, selfUniqueAddress)) {
// only run the leader actions if we are the LEADER
val firstNotice = 20
@ -785,6 +797,27 @@ private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with
s"${m.address} ${m.status} seen=${latestGossip.seenByNode(m.uniqueAddress)}").mkString(", "))
}
}
shutdownSelfWhenDown()
}
def shutdownSelfWhenDown(): Unit = {
if (latestGossip.member(selfUniqueAddress).status == Down) {
// When all reachable have seen the state this member will shutdown itself when it has
// status Down. The down commands should spread before we shutdown.
val unreachable = latestGossip.overview.reachability.allUnreachableOrTerminated
val downed = latestGossip.members.collect { case m if m.status == Down m.uniqueAddress }
if (downed.forall(node unreachable(node) || latestGossip.seenByNode(node))) {
// the reason for not shutting down immediately is to give the gossip a chance to spread
// the downing information to other downed nodes, so that they can shutdown themselves
logInfo("Shutting down myself")
// not crucial to send gossip, but may speedup removal since fallback to failure detection is not needed
// if other downed know that this node has seen the version
downed.filterNot(n unreachable(n) || n == selfUniqueAddress).take(MaxGossipsBeforeShuttingDownMyself)
.foreach(gossipTo)
shutdown()
}
}
}
/**
* Leader actions are as follows: