Transition from Down to Removed, see #3075
This commit is contained in:
parent
c3c904761f
commit
5c7747e7fa
10 changed files with 52 additions and 72 deletions
|
|
@ -388,20 +388,17 @@ private[cluster] final class ClusterCoreDaemon(publisher: ActorRef) extends Acto
|
|||
val localUnreachable = latestGossip.overview.unreachable
|
||||
|
||||
val alreadyMember = localMembers.exists(_.address == node)
|
||||
val isUnreachable = latestGossip.overview.isNonDownUnreachable(node)
|
||||
val isUnreachable = localUnreachable.exists(_.address == node)
|
||||
|
||||
if (!alreadyMember && !isUnreachable) {
|
||||
// remove the node from the 'unreachable' set in case it is a DOWN node that is rejoining cluster
|
||||
val (rejoiningMember, newUnreachableMembers) = localUnreachable partition { _.address == node }
|
||||
val newOverview = latestGossip.overview copy (unreachable = newUnreachableMembers)
|
||||
|
||||
// remove the node from the failure detector if it is a DOWN node that is rejoining cluster
|
||||
if (rejoiningMember.nonEmpty) failureDetector.remove(node)
|
||||
// remove the node from the failure detector
|
||||
failureDetector.remove(node)
|
||||
|
||||
// add joining node as Joining
|
||||
// add self in case someone else joins before self has joined (Set discards duplicates)
|
||||
val newMembers = localMembers + Member(node, Joining) + Member(selfAddress, Joining)
|
||||
val newGossip = latestGossip copy (overview = newOverview, members = newMembers)
|
||||
val newGossip = latestGossip copy (members = newMembers)
|
||||
|
||||
val versionedGossip = newGossip :+ vclockNode
|
||||
val seenVersionedGossip = versionedGossip seen selfAddress
|
||||
|
|
@ -678,10 +675,11 @@ private[cluster] final class ClusterCoreDaemon(publisher: ActorRef) extends Acto
|
|||
// 3. Non-exiting remain -- When all partition handoff has completed
|
||||
// 4. Move EXITING => REMOVED -- When all nodes have seen that the node is EXITING (convergence) - remove the nodes from the node ring and seen table
|
||||
// 5. Move UNREACHABLE => DOWN -- When the node is in the UNREACHABLE set it can be auto-down by leader
|
||||
// 6. Updating the vclock version for the changes
|
||||
// 7. Updating the 'seen' table
|
||||
// 8. Try to update the state with the new gossip
|
||||
// 9. If success - run all the side-effecting processing
|
||||
// 6. Move DOWN => REMOVED -- When all nodes have seen that the node is DOWN (convergence) - remove the nodes from the node ring and seen table
|
||||
// 7. Updating the vclock version for the changes
|
||||
// 8. Updating the 'seen' table
|
||||
// 9. Try to update the state with the new gossip
|
||||
// 10. If success - run all the side-effecting processing
|
||||
|
||||
val (
|
||||
newGossip: Gossip,
|
||||
|
|
@ -699,45 +697,46 @@ private[cluster] final class ClusterCoreDaemon(publisher: ActorRef) extends Acto
|
|||
|
||||
// transform the node member ring
|
||||
val newMembers = localMembers collect {
|
||||
// 1. Move JOINING => UP (once all nodes have seen that this node is JOINING, i.e. we have a convergence)
|
||||
// Move JOINING => UP (once all nodes have seen that this node is JOINING, i.e. we have a convergence)
|
||||
// and minimum number of nodes have joined the cluster
|
||||
case member if isJoiningToUp(member) ⇒ member copy (status = Up)
|
||||
// 2. Move LEAVING => EXITING (once we have a convergence on LEAVING
|
||||
// Move LEAVING => EXITING (once we have a convergence on LEAVING
|
||||
// *and* if we have a successful partition handoff)
|
||||
case member if member.status == Leaving && hasPartionHandoffCompletedSuccessfully ⇒
|
||||
member copy (status = Exiting)
|
||||
// 3. Everyone else that is not Exiting stays as they are
|
||||
case member if member.status != Exiting ⇒ member
|
||||
// 4. Move EXITING => REMOVED - e.g. remove the nodes from the 'members' set/node ring and seen table
|
||||
// Everyone else that is not Exiting stays as they are
|
||||
case member if member.status != Exiting && member.status != Down ⇒ member
|
||||
// Move EXITING => REMOVED, DOWN => REMOVED - i.e. remove the nodes from the 'members' set/node ring and seen table
|
||||
}
|
||||
|
||||
// ----------------------
|
||||
// 5. Store away all stuff needed for the side-effecting processing in 10.
|
||||
// Store away all stuff needed for the side-effecting processing
|
||||
// ----------------------
|
||||
|
||||
// Check for the need to do side-effecting on successful state change
|
||||
// Repeat the checking for transitions between JOINING -> UP, LEAVING -> EXITING, EXITING -> REMOVED
|
||||
// Repeat the checking for transitions between JOINING -> UP, LEAVING -> EXITING, EXITING -> REMOVED, DOWN -> REMOVED
|
||||
// to check for state-changes and to store away removed and exiting members for later notification
|
||||
// 1. check for state-changes to update
|
||||
// 2. store away removed and exiting members so we can separate the pure state changes
|
||||
val (removedMembers, newMembers1) = localMembers partition (_.status == Exiting)
|
||||
val (removedMembers, newMembers1) = localMembers partition (m ⇒ m.status == Exiting || m.status == Down)
|
||||
val removedMembers2 = removedMembers ++ localUnreachableMembers.filter(_.status == Down)
|
||||
|
||||
val (upMembers, newMembers2) = newMembers1 partition (isJoiningToUp(_))
|
||||
|
||||
val exitingMembers = newMembers2 filter (_.status == Leaving && hasPartionHandoffCompletedSuccessfully)
|
||||
|
||||
val hasChangedState = removedMembers.nonEmpty || upMembers.nonEmpty || exitingMembers.nonEmpty
|
||||
val hasChangedState = removedMembers2.nonEmpty || upMembers.nonEmpty || exitingMembers.nonEmpty
|
||||
|
||||
// removing REMOVED nodes from the 'seen' table
|
||||
val newSeen = localSeen -- removedMembers.map(_.address)
|
||||
val newSeen = localSeen -- removedMembers2.map(_.address)
|
||||
|
||||
// removing REMOVED nodes from the 'unreachable' set
|
||||
val newUnreachableMembers = localUnreachableMembers -- removedMembers
|
||||
val newUnreachableMembers = localUnreachableMembers -- removedMembers2
|
||||
|
||||
val newOverview = localOverview copy (seen = newSeen, unreachable = newUnreachableMembers) // update gossip overview
|
||||
val newGossip = localGossip copy (members = newMembers, overview = newOverview) // update gossip
|
||||
|
||||
(newGossip, hasChangedState, upMembers, exitingMembers, removedMembers, Member.none)
|
||||
(newGossip, hasChangedState, upMembers, exitingMembers, removedMembers2, Member.none)
|
||||
|
||||
} else if (AutoDown) {
|
||||
// we don't have convergence - so we might have unreachable nodes
|
||||
|
|
@ -745,7 +744,7 @@ private[cluster] final class ClusterCoreDaemon(publisher: ActorRef) extends Acto
|
|||
// if 'auto-down' is turned on, then try to auto-down any unreachable nodes
|
||||
val newUnreachableMembers = localUnreachableMembers collect {
|
||||
// ----------------------
|
||||
// 6. Move UNREACHABLE => DOWN (auto-downing by leader)
|
||||
// Move UNREACHABLE => DOWN (auto-downing by leader)
|
||||
// ----------------------
|
||||
case member if member.status != Down ⇒ member copy (status = Down)
|
||||
case downMember ⇒ downMember // no need to DOWN members already DOWN
|
||||
|
|
@ -766,12 +765,12 @@ private[cluster] final class ClusterCoreDaemon(publisher: ActorRef) extends Acto
|
|||
|
||||
if (hasChangedState) { // we have a change of state - version it and try to update
|
||||
// ----------------------
|
||||
// 6. Updating the vclock version for the changes
|
||||
// Updating the vclock version for the changes
|
||||
// ----------------------
|
||||
val versionedGossip = newGossip :+ vclockNode
|
||||
|
||||
// ----------------------
|
||||
// 7. Updating the 'seen' table
|
||||
// Updating the 'seen' table
|
||||
// Unless the leader (this node) is part of the removed members, i.e. the leader have moved himself from EXITING -> REMOVED
|
||||
// ----------------------
|
||||
val seenVersionedGossip =
|
||||
|
|
@ -779,12 +778,12 @@ private[cluster] final class ClusterCoreDaemon(publisher: ActorRef) extends Acto
|
|||
else versionedGossip seen selfAddress
|
||||
|
||||
// ----------------------
|
||||
// 8. Update the state with the new gossip
|
||||
// Update the state with the new gossip
|
||||
// ----------------------
|
||||
latestGossip = seenVersionedGossip
|
||||
|
||||
// ----------------------
|
||||
// 9. Run all the side-effecting processing
|
||||
// Run all the side-effecting processing
|
||||
// ----------------------
|
||||
|
||||
// log the move of members from joining to up
|
||||
|
|
|
|||
|
|
@ -282,9 +282,9 @@ object ClusterEvent {
|
|||
}
|
||||
val unreachableDownedEvents = unreachableDownMembers map MemberDowned
|
||||
|
||||
val removedEvents = (oldGossip.members -- newGossip.members -- newGossip.overview.unreachable) map { m ⇒
|
||||
MemberRemoved(m.copy(status = Removed))
|
||||
}
|
||||
val removedMembers = (oldGossip.members -- newGossip.members -- newGossip.overview.unreachable) ++
|
||||
(oldGossip.overview.unreachable -- newGossip.overview.unreachable)
|
||||
val removedEvents = removedMembers.map(m ⇒ MemberRemoved(m.copy(status = Removed)))
|
||||
|
||||
(new VectorBuilder[MemberEvent]() ++= memberEvents ++= downedEvents ++= unreachableDownedEvents
|
||||
++= removedEvents).result()
|
||||
|
|
@ -413,9 +413,7 @@ private[cluster] final class ClusterDomainEventPublisher extends Actor with Acto
|
|||
latestConvergedGossip = newGossip
|
||||
bufferedEvents foreach { event ⇒
|
||||
event match {
|
||||
case m: MemberEvent if m.isInstanceOf[MemberDowned] || m.isInstanceOf[MemberRemoved] ⇒
|
||||
// TODO MemberDowned match should probably be covered by MemberRemoved, see ticket #2788
|
||||
// but right now we don't change Downed to Removed
|
||||
case m: MemberEvent if m.isInstanceOf[MemberRemoved] ⇒
|
||||
publish(event)
|
||||
// notify DeathWatch about downed node
|
||||
publish(AddressTerminated(m.member.address))
|
||||
|
|
|
|||
|
|
@ -145,12 +145,7 @@ private[cluster] case class Gossip(
|
|||
val mergedVClock = this.version merge that.version
|
||||
|
||||
// 2. merge unreachable by selecting the single Member with highest MemberStatus out of the Member groups
|
||||
// FIXME allowing Down -> Joining should be adjusted as part of ticket #2788
|
||||
val mergedUnreachable = Member.pickHighestPriority(
|
||||
this.overview.unreachable.filterNot(m1 ⇒
|
||||
m1.status == Down && that.members.exists(m2 ⇒ m2.status == Joining && m2.address == m1.address)),
|
||||
that.overview.unreachable.filterNot(m1 ⇒
|
||||
m1.status == Down && this.members.exists(m2 ⇒ m2.status == Joining && m2.address == m1.address)))
|
||||
val mergedUnreachable = Member.pickHighestPriority(this.overview.unreachable, that.overview.unreachable)
|
||||
|
||||
// 3. merge members by selecting the single Member with highest MemberStatus out of the Member groups,
|
||||
// and exclude unreachable
|
||||
|
|
|
|||
|
|
@ -82,8 +82,11 @@ abstract class ClusterDeathWatchSpec
|
|||
enterBarrier("second-terminated")
|
||||
|
||||
markNodeAsUnavailable(third)
|
||||
awaitCond(clusterView.members.forall(_.address != address(third)))
|
||||
awaitCond(clusterView.unreachableMembers.exists(_.address == address(third)))
|
||||
cluster.down(third)
|
||||
// removed
|
||||
awaitCond(clusterView.unreachableMembers.forall(_.address != address(third)))
|
||||
expectMsg(path3)
|
||||
enterBarrier("third-terminated")
|
||||
|
||||
|
|
@ -95,8 +98,11 @@ abstract class ClusterDeathWatchSpec
|
|||
enterBarrier("watch-established")
|
||||
runOn(third) {
|
||||
markNodeAsUnavailable(second)
|
||||
awaitCond(clusterView.members.forall(_.address != address(second)))
|
||||
awaitCond(clusterView.unreachableMembers.exists(_.address == address(second)))
|
||||
cluster.down(second)
|
||||
// removed
|
||||
awaitCond(clusterView.unreachableMembers.forall(_.address != address(second)))
|
||||
}
|
||||
enterBarrier("second-terminated")
|
||||
enterBarrier("third-terminated")
|
||||
|
|
@ -131,8 +137,11 @@ abstract class ClusterDeathWatchSpec
|
|||
enterBarrier("hello-deployed")
|
||||
|
||||
markNodeAsUnavailable(first)
|
||||
awaitCond(clusterView.members.forall(_.address != address(first)))
|
||||
awaitCond(clusterView.unreachableMembers.exists(_.address == address(first)))
|
||||
cluster.down(first)
|
||||
// removed
|
||||
awaitCond(clusterView.unreachableMembers.forall(_.address != address(first)))
|
||||
|
||||
val t = expectMsgType[Terminated]
|
||||
t.actor must be(hello)
|
||||
|
|
|
|||
|
|
@ -84,11 +84,13 @@ abstract class LeaderElectionSpec(multiNodeConfig: LeaderElectionMultiNodeConfig
|
|||
|
||||
// detect failure
|
||||
markNodeAsUnavailable(leaderAddress)
|
||||
awaitCond(clusterView.unreachableMembers.exists(m ⇒ m.address == leaderAddress))
|
||||
awaitCond(clusterView.unreachableMembers.exists(_.address == leaderAddress))
|
||||
enterBarrier("after-unavailable" + n)
|
||||
|
||||
// user marks the shutdown leader as DOWN
|
||||
cluster.down(leaderAddress)
|
||||
// removed
|
||||
awaitCond(clusterView.unreachableMembers.forall(_.address != leaderAddress))
|
||||
enterBarrier("after-down" + n, "completed" + n)
|
||||
|
||||
case _ if remainingRoles.contains(myself) ⇒
|
||||
|
|
@ -96,7 +98,7 @@ abstract class LeaderElectionSpec(multiNodeConfig: LeaderElectionMultiNodeConfig
|
|||
val leaderAddress = address(leader)
|
||||
enterBarrier("before-shutdown" + n, "after-shutdown" + n)
|
||||
|
||||
awaitCond(clusterView.unreachableMembers.exists(m ⇒ m.address == leaderAddress))
|
||||
awaitCond(clusterView.unreachableMembers.exists(_.address == leaderAddress))
|
||||
enterBarrier("after-unavailable" + n)
|
||||
|
||||
enterBarrier("after-down" + n)
|
||||
|
|
|
|||
|
|
@ -117,6 +117,7 @@ abstract class MBeanSpec
|
|||
runOn(first, second, third) {
|
||||
awaitUpConvergence(3, canNotBePartOfMemberRing = Set(fourthAddress))
|
||||
assertMembers(clusterView.members, first, second, third)
|
||||
awaitCond(mbeanServer.getAttribute(mbeanName, "Unreachable") == "")
|
||||
}
|
||||
|
||||
enterBarrier("after-5")
|
||||
|
|
|
|||
|
|
@ -108,7 +108,7 @@ abstract class RestartFirstSeedNodeSpec
|
|||
}
|
||||
runOn(seed2, seed3) {
|
||||
awaitUpConvergence(2, canNotBePartOfMemberRing = Set(seedNodes.head))
|
||||
awaitCond(clusterView.unreachableMembers.exists(m ⇒ m.status == Down && m.address == seedNodes.head))
|
||||
awaitCond(clusterView.unreachableMembers.forall(_.address != seedNodes.head))
|
||||
}
|
||||
enterBarrier("seed1-shutdown")
|
||||
|
||||
|
|
|
|||
|
|
@ -67,7 +67,7 @@ abstract class SplitBrainSpec(multiNodeConfig: SplitBrainMultiNodeConfig)
|
|||
enterBarrier("after-1")
|
||||
}
|
||||
|
||||
"detect network partition and mark nodes on other side as unreachable" taggedAs LongRunningTest in {
|
||||
"detect network partition and mark nodes on other side as unreachable and form new cluster" taggedAs LongRunningTest in within(30 seconds) {
|
||||
val thirdAddress = address(third)
|
||||
enterBarrier("before-split")
|
||||
|
||||
|
|
@ -86,35 +86,19 @@ abstract class SplitBrainSpec(multiNodeConfig: SplitBrainMultiNodeConfig)
|
|||
for (role ← side1) markNodeAsUnavailable(role)
|
||||
}
|
||||
|
||||
runOn(side1: _*) {
|
||||
awaitCond(clusterView.unreachableMembers.map(_.address) == (side2.toSet map address), 25 seconds)
|
||||
}
|
||||
runOn(side2: _*) {
|
||||
awaitCond(clusterView.unreachableMembers.map(_.address) == (side1.toSet map address), 25 seconds)
|
||||
}
|
||||
|
||||
enterBarrier("after-2")
|
||||
}
|
||||
|
||||
"auto-down the other nodes and form new cluster with potentially new leader" taggedAs LongRunningTest in {
|
||||
|
||||
runOn(side1: _*) {
|
||||
// auto-down = on
|
||||
awaitCond(clusterView.unreachableMembers.forall(m ⇒ m.status == MemberStatus.Down), 15 seconds)
|
||||
clusterView.unreachableMembers.map(_.address) must be(side2.toSet map address)
|
||||
awaitUpConvergence(side1.size, side2.toSet map address)
|
||||
assertLeader(side1: _*)
|
||||
}
|
||||
|
||||
runOn(side2: _*) {
|
||||
// auto-down = on
|
||||
awaitCond(clusterView.unreachableMembers.forall(m ⇒ m.status == MemberStatus.Down), 15 seconds)
|
||||
clusterView.unreachableMembers.map(_.address) must be(side1.toSet map address)
|
||||
awaitUpConvergence(side2.size, side1.toSet map address)
|
||||
assertLeader(side2: _*)
|
||||
}
|
||||
|
||||
enterBarrier("after-3")
|
||||
enterBarrier("after-2")
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -131,6 +131,8 @@ abstract class UnreachableNodeRejoinsClusterSpec(multiNodeConfig: UnreachableNod
|
|||
|
||||
runOn(allBut(victim): _*) {
|
||||
awaitUpConvergence(roles.size - 1, Set(victim))
|
||||
// eventually removed
|
||||
awaitCond(clusterView.unreachableMembers.isEmpty, 15 seconds)
|
||||
}
|
||||
|
||||
endBarrier
|
||||
|
|
|
|||
|
|
@ -79,16 +79,6 @@ class GossipSpec extends WordSpec with MustMatchers {
|
|||
|
||||
}
|
||||
|
||||
"merge by allowing Down -> Joining" in {
|
||||
val g1 = Gossip(members = SortedSet(a1, b1), overview = GossipOverview(unreachable = Set(e3)))
|
||||
val g2 = Gossip(members = SortedSet(a1, b1, e1), overview = GossipOverview(unreachable = Set.empty))
|
||||
|
||||
val merged2 = g2 merge g1
|
||||
merged2.members must be(SortedSet(a1, b1, e1))
|
||||
merged2.members.toSeq.map(_.status) must be(Seq(Up, Up, Joining))
|
||||
merged2.overview.unreachable must be(Set.empty)
|
||||
}
|
||||
|
||||
"start with fresh seen table after merge" in {
|
||||
val g1 = Gossip(members = SortedSet(a1, e1)).seen(a1.address).seen(e1.address)
|
||||
val g2 = Gossip(members = SortedSet(a2, e2)).seen(a2.address).seen(e2.address)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue