Transition from Down to Removed, see #3075

This commit is contained in:
Patrik Nordwall 2013-03-05 15:32:13 +01:00
parent c3c904761f
commit 5c7747e7fa
10 changed files with 52 additions and 72 deletions

View file

@ -388,20 +388,17 @@ private[cluster] final class ClusterCoreDaemon(publisher: ActorRef) extends Acto
val localUnreachable = latestGossip.overview.unreachable val localUnreachable = latestGossip.overview.unreachable
val alreadyMember = localMembers.exists(_.address == node) val alreadyMember = localMembers.exists(_.address == node)
val isUnreachable = latestGossip.overview.isNonDownUnreachable(node) val isUnreachable = localUnreachable.exists(_.address == node)
if (!alreadyMember && !isUnreachable) { if (!alreadyMember && !isUnreachable) {
// remove the node from the 'unreachable' set in case it is a DOWN node that is rejoining cluster
val (rejoiningMember, newUnreachableMembers) = localUnreachable partition { _.address == node }
val newOverview = latestGossip.overview copy (unreachable = newUnreachableMembers)
// remove the node from the failure detector if it is a DOWN node that is rejoining cluster // remove the node from the failure detector
if (rejoiningMember.nonEmpty) failureDetector.remove(node) failureDetector.remove(node)
// add joining node as Joining // add joining node as Joining
// add self in case someone else joins before self has joined (Set discards duplicates) // add self in case someone else joins before self has joined (Set discards duplicates)
val newMembers = localMembers + Member(node, Joining) + Member(selfAddress, Joining) val newMembers = localMembers + Member(node, Joining) + Member(selfAddress, Joining)
val newGossip = latestGossip copy (overview = newOverview, members = newMembers) val newGossip = latestGossip copy (members = newMembers)
val versionedGossip = newGossip :+ vclockNode val versionedGossip = newGossip :+ vclockNode
val seenVersionedGossip = versionedGossip seen selfAddress val seenVersionedGossip = versionedGossip seen selfAddress
@ -678,10 +675,11 @@ private[cluster] final class ClusterCoreDaemon(publisher: ActorRef) extends Acto
// 3. Non-exiting remain -- When all partition handoff has completed // 3. Non-exiting remain -- When all partition handoff has completed
// 4. Move EXITING => REMOVED -- When all nodes have seen that the node is EXITING (convergence) - remove the nodes from the node ring and seen table // 4. Move EXITING => REMOVED -- When all nodes have seen that the node is EXITING (convergence) - remove the nodes from the node ring and seen table
// 5. Move UNREACHABLE => DOWN -- When the node is in the UNREACHABLE set it can be auto-down by leader // 5. Move UNREACHABLE => DOWN -- When the node is in the UNREACHABLE set it can be auto-down by leader
// 6. Updating the vclock version for the changes // 6. Move DOWN => REMOVED -- When all nodes have seen that the node is DOWN (convergence) - remove the nodes from the node ring and seen table
// 7. Updating the 'seen' table // 7. Updating the vclock version for the changes
// 8. Try to update the state with the new gossip // 8. Updating the 'seen' table
// 9. If success - run all the side-effecting processing // 9. Try to update the state with the new gossip
// 10. If success - run all the side-effecting processing
val ( val (
newGossip: Gossip, newGossip: Gossip,
@ -699,45 +697,46 @@ private[cluster] final class ClusterCoreDaemon(publisher: ActorRef) extends Acto
// transform the node member ring // transform the node member ring
val newMembers = localMembers collect { val newMembers = localMembers collect {
// 1. Move JOINING => UP (once all nodes have seen that this node is JOINING, i.e. we have a convergence) // Move JOINING => UP (once all nodes have seen that this node is JOINING, i.e. we have a convergence)
// and minimum number of nodes have joined the cluster // and minimum number of nodes have joined the cluster
case member if isJoiningToUp(member) member copy (status = Up) case member if isJoiningToUp(member) member copy (status = Up)
// 2. Move LEAVING => EXITING (once we have a convergence on LEAVING // Move LEAVING => EXITING (once we have a convergence on LEAVING
// *and* if we have a successful partition handoff) // *and* if we have a successful partition handoff)
case member if member.status == Leaving && hasPartionHandoffCompletedSuccessfully case member if member.status == Leaving && hasPartionHandoffCompletedSuccessfully
member copy (status = Exiting) member copy (status = Exiting)
// 3. Everyone else that is not Exiting stays as they are // Everyone else that is not Exiting stays as they are
case member if member.status != Exiting member case member if member.status != Exiting && member.status != Down member
// 4. Move EXITING => REMOVED - e.g. remove the nodes from the 'members' set/node ring and seen table // Move EXITING => REMOVED, DOWN => REMOVED - i.e. remove the nodes from the 'members' set/node ring and seen table
} }
// ---------------------- // ----------------------
// 5. Store away all stuff needed for the side-effecting processing in 10. // Store away all stuff needed for the side-effecting processing
// ---------------------- // ----------------------
// Check for the need to do side-effecting on successful state change // Check for the need to do side-effecting on successful state change
// Repeat the checking for transitions between JOINING -> UP, LEAVING -> EXITING, EXITING -> REMOVED // Repeat the checking for transitions between JOINING -> UP, LEAVING -> EXITING, EXITING -> REMOVED, DOWN -> REMOVED
// to check for state-changes and to store away removed and exiting members for later notification // to check for state-changes and to store away removed and exiting members for later notification
// 1. check for state-changes to update // 1. check for state-changes to update
// 2. store away removed and exiting members so we can separate the pure state changes // 2. store away removed and exiting members so we can separate the pure state changes
val (removedMembers, newMembers1) = localMembers partition (_.status == Exiting) val (removedMembers, newMembers1) = localMembers partition (m m.status == Exiting || m.status == Down)
val removedMembers2 = removedMembers ++ localUnreachableMembers.filter(_.status == Down)
val (upMembers, newMembers2) = newMembers1 partition (isJoiningToUp(_)) val (upMembers, newMembers2) = newMembers1 partition (isJoiningToUp(_))
val exitingMembers = newMembers2 filter (_.status == Leaving && hasPartionHandoffCompletedSuccessfully) val exitingMembers = newMembers2 filter (_.status == Leaving && hasPartionHandoffCompletedSuccessfully)
val hasChangedState = removedMembers.nonEmpty || upMembers.nonEmpty || exitingMembers.nonEmpty val hasChangedState = removedMembers2.nonEmpty || upMembers.nonEmpty || exitingMembers.nonEmpty
// removing REMOVED nodes from the 'seen' table // removing REMOVED nodes from the 'seen' table
val newSeen = localSeen -- removedMembers.map(_.address) val newSeen = localSeen -- removedMembers2.map(_.address)
// removing REMOVED nodes from the 'unreachable' set // removing REMOVED nodes from the 'unreachable' set
val newUnreachableMembers = localUnreachableMembers -- removedMembers val newUnreachableMembers = localUnreachableMembers -- removedMembers2
val newOverview = localOverview copy (seen = newSeen, unreachable = newUnreachableMembers) // update gossip overview val newOverview = localOverview copy (seen = newSeen, unreachable = newUnreachableMembers) // update gossip overview
val newGossip = localGossip copy (members = newMembers, overview = newOverview) // update gossip val newGossip = localGossip copy (members = newMembers, overview = newOverview) // update gossip
(newGossip, hasChangedState, upMembers, exitingMembers, removedMembers, Member.none) (newGossip, hasChangedState, upMembers, exitingMembers, removedMembers2, Member.none)
} else if (AutoDown) { } else if (AutoDown) {
// we don't have convergence - so we might have unreachable nodes // we don't have convergence - so we might have unreachable nodes
@ -745,7 +744,7 @@ private[cluster] final class ClusterCoreDaemon(publisher: ActorRef) extends Acto
// if 'auto-down' is turned on, then try to auto-down any unreachable nodes // if 'auto-down' is turned on, then try to auto-down any unreachable nodes
val newUnreachableMembers = localUnreachableMembers collect { val newUnreachableMembers = localUnreachableMembers collect {
// ---------------------- // ----------------------
// 6. Move UNREACHABLE => DOWN (auto-downing by leader) // Move UNREACHABLE => DOWN (auto-downing by leader)
// ---------------------- // ----------------------
case member if member.status != Down member copy (status = Down) case member if member.status != Down member copy (status = Down)
case downMember downMember // no need to DOWN members already DOWN case downMember downMember // no need to DOWN members already DOWN
@ -766,25 +765,25 @@ private[cluster] final class ClusterCoreDaemon(publisher: ActorRef) extends Acto
if (hasChangedState) { // we have a change of state - version it and try to update if (hasChangedState) { // we have a change of state - version it and try to update
// ---------------------- // ----------------------
// 6. Updating the vclock version for the changes // Updating the vclock version for the changes
// ---------------------- // ----------------------
val versionedGossip = newGossip :+ vclockNode val versionedGossip = newGossip :+ vclockNode
// ---------------------- // ----------------------
// 7. Updating the 'seen' table // Updating the 'seen' table
// Unless the leader (this node) is part of the removed members, i.e. the leader have moved himself from EXITING -> REMOVED // Unless the leader (this node) is part of the removed members, i.e. the leader have moved himself from EXITING -> REMOVED
// ---------------------- // ----------------------
val seenVersionedGossip = val seenVersionedGossip =
if (removedMembers.exists(_.address == selfAddress)) versionedGossip if (removedMembers.exists(_.address == selfAddress)) versionedGossip
else versionedGossip seen selfAddress else versionedGossip seen selfAddress
// ---------------------- // ----------------------
// 8. Update the state with the new gossip // Update the state with the new gossip
// ---------------------- // ----------------------
latestGossip = seenVersionedGossip latestGossip = seenVersionedGossip
// ---------------------- // ----------------------
// 9. Run all the side-effecting processing // Run all the side-effecting processing
// ---------------------- // ----------------------
// log the move of members from joining to up // log the move of members from joining to up

View file

@ -282,9 +282,9 @@ object ClusterEvent {
} }
val unreachableDownedEvents = unreachableDownMembers map MemberDowned val unreachableDownedEvents = unreachableDownMembers map MemberDowned
val removedEvents = (oldGossip.members -- newGossip.members -- newGossip.overview.unreachable) map { m val removedMembers = (oldGossip.members -- newGossip.members -- newGossip.overview.unreachable) ++
MemberRemoved(m.copy(status = Removed)) (oldGossip.overview.unreachable -- newGossip.overview.unreachable)
} val removedEvents = removedMembers.map(m MemberRemoved(m.copy(status = Removed)))
(new VectorBuilder[MemberEvent]() ++= memberEvents ++= downedEvents ++= unreachableDownedEvents (new VectorBuilder[MemberEvent]() ++= memberEvents ++= downedEvents ++= unreachableDownedEvents
++= removedEvents).result() ++= removedEvents).result()
@ -413,9 +413,7 @@ private[cluster] final class ClusterDomainEventPublisher extends Actor with Acto
latestConvergedGossip = newGossip latestConvergedGossip = newGossip
bufferedEvents foreach { event bufferedEvents foreach { event
event match { event match {
case m: MemberEvent if m.isInstanceOf[MemberDowned] || m.isInstanceOf[MemberRemoved] case m: MemberEvent if m.isInstanceOf[MemberRemoved]
// TODO MemberDowned match should probably be covered by MemberRemoved, see ticket #2788
// but right now we don't change Downed to Removed
publish(event) publish(event)
// notify DeathWatch about downed node // notify DeathWatch about downed node
publish(AddressTerminated(m.member.address)) publish(AddressTerminated(m.member.address))

View file

@ -145,12 +145,7 @@ private[cluster] case class Gossip(
val mergedVClock = this.version merge that.version val mergedVClock = this.version merge that.version
// 2. merge unreachable by selecting the single Member with highest MemberStatus out of the Member groups // 2. merge unreachable by selecting the single Member with highest MemberStatus out of the Member groups
// FIXME allowing Down -> Joining should be adjusted as part of ticket #2788 val mergedUnreachable = Member.pickHighestPriority(this.overview.unreachable, that.overview.unreachable)
val mergedUnreachable = Member.pickHighestPriority(
this.overview.unreachable.filterNot(m1
m1.status == Down && that.members.exists(m2 m2.status == Joining && m2.address == m1.address)),
that.overview.unreachable.filterNot(m1
m1.status == Down && this.members.exists(m2 m2.status == Joining && m2.address == m1.address)))
// 3. merge members by selecting the single Member with highest MemberStatus out of the Member groups, // 3. merge members by selecting the single Member with highest MemberStatus out of the Member groups,
// and exclude unreachable // and exclude unreachable

View file

@ -82,8 +82,11 @@ abstract class ClusterDeathWatchSpec
enterBarrier("second-terminated") enterBarrier("second-terminated")
markNodeAsUnavailable(third) markNodeAsUnavailable(third)
awaitCond(clusterView.members.forall(_.address != address(third)))
awaitCond(clusterView.unreachableMembers.exists(_.address == address(third))) awaitCond(clusterView.unreachableMembers.exists(_.address == address(third)))
cluster.down(third) cluster.down(third)
// removed
awaitCond(clusterView.unreachableMembers.forall(_.address != address(third)))
expectMsg(path3) expectMsg(path3)
enterBarrier("third-terminated") enterBarrier("third-terminated")
@ -95,8 +98,11 @@ abstract class ClusterDeathWatchSpec
enterBarrier("watch-established") enterBarrier("watch-established")
runOn(third) { runOn(third) {
markNodeAsUnavailable(second) markNodeAsUnavailable(second)
awaitCond(clusterView.members.forall(_.address != address(second)))
awaitCond(clusterView.unreachableMembers.exists(_.address == address(second))) awaitCond(clusterView.unreachableMembers.exists(_.address == address(second)))
cluster.down(second) cluster.down(second)
// removed
awaitCond(clusterView.unreachableMembers.forall(_.address != address(second)))
} }
enterBarrier("second-terminated") enterBarrier("second-terminated")
enterBarrier("third-terminated") enterBarrier("third-terminated")
@ -131,8 +137,11 @@ abstract class ClusterDeathWatchSpec
enterBarrier("hello-deployed") enterBarrier("hello-deployed")
markNodeAsUnavailable(first) markNodeAsUnavailable(first)
awaitCond(clusterView.members.forall(_.address != address(first)))
awaitCond(clusterView.unreachableMembers.exists(_.address == address(first))) awaitCond(clusterView.unreachableMembers.exists(_.address == address(first)))
cluster.down(first) cluster.down(first)
// removed
awaitCond(clusterView.unreachableMembers.forall(_.address != address(first)))
val t = expectMsgType[Terminated] val t = expectMsgType[Terminated]
t.actor must be(hello) t.actor must be(hello)

View file

@ -84,11 +84,13 @@ abstract class LeaderElectionSpec(multiNodeConfig: LeaderElectionMultiNodeConfig
// detect failure // detect failure
markNodeAsUnavailable(leaderAddress) markNodeAsUnavailable(leaderAddress)
awaitCond(clusterView.unreachableMembers.exists(m m.address == leaderAddress)) awaitCond(clusterView.unreachableMembers.exists(_.address == leaderAddress))
enterBarrier("after-unavailable" + n) enterBarrier("after-unavailable" + n)
// user marks the shutdown leader as DOWN // user marks the shutdown leader as DOWN
cluster.down(leaderAddress) cluster.down(leaderAddress)
// removed
awaitCond(clusterView.unreachableMembers.forall(_.address != leaderAddress))
enterBarrier("after-down" + n, "completed" + n) enterBarrier("after-down" + n, "completed" + n)
case _ if remainingRoles.contains(myself) case _ if remainingRoles.contains(myself)
@ -96,7 +98,7 @@ abstract class LeaderElectionSpec(multiNodeConfig: LeaderElectionMultiNodeConfig
val leaderAddress = address(leader) val leaderAddress = address(leader)
enterBarrier("before-shutdown" + n, "after-shutdown" + n) enterBarrier("before-shutdown" + n, "after-shutdown" + n)
awaitCond(clusterView.unreachableMembers.exists(m m.address == leaderAddress)) awaitCond(clusterView.unreachableMembers.exists(_.address == leaderAddress))
enterBarrier("after-unavailable" + n) enterBarrier("after-unavailable" + n)
enterBarrier("after-down" + n) enterBarrier("after-down" + n)

View file

@ -117,6 +117,7 @@ abstract class MBeanSpec
runOn(first, second, third) { runOn(first, second, third) {
awaitUpConvergence(3, canNotBePartOfMemberRing = Set(fourthAddress)) awaitUpConvergence(3, canNotBePartOfMemberRing = Set(fourthAddress))
assertMembers(clusterView.members, first, second, third) assertMembers(clusterView.members, first, second, third)
awaitCond(mbeanServer.getAttribute(mbeanName, "Unreachable") == "")
} }
enterBarrier("after-5") enterBarrier("after-5")

View file

@ -108,7 +108,7 @@ abstract class RestartFirstSeedNodeSpec
} }
runOn(seed2, seed3) { runOn(seed2, seed3) {
awaitUpConvergence(2, canNotBePartOfMemberRing = Set(seedNodes.head)) awaitUpConvergence(2, canNotBePartOfMemberRing = Set(seedNodes.head))
awaitCond(clusterView.unreachableMembers.exists(m m.status == Down && m.address == seedNodes.head)) awaitCond(clusterView.unreachableMembers.forall(_.address != seedNodes.head))
} }
enterBarrier("seed1-shutdown") enterBarrier("seed1-shutdown")

View file

@ -67,7 +67,7 @@ abstract class SplitBrainSpec(multiNodeConfig: SplitBrainMultiNodeConfig)
enterBarrier("after-1") enterBarrier("after-1")
} }
"detect network partition and mark nodes on other side as unreachable" taggedAs LongRunningTest in { "detect network partition and mark nodes on other side as unreachable and form new cluster" taggedAs LongRunningTest in within(30 seconds) {
val thirdAddress = address(third) val thirdAddress = address(third)
enterBarrier("before-split") enterBarrier("before-split")
@ -86,35 +86,19 @@ abstract class SplitBrainSpec(multiNodeConfig: SplitBrainMultiNodeConfig)
for (role side1) markNodeAsUnavailable(role) for (role side1) markNodeAsUnavailable(role)
} }
runOn(side1: _*) {
awaitCond(clusterView.unreachableMembers.map(_.address) == (side2.toSet map address), 25 seconds)
}
runOn(side2: _*) {
awaitCond(clusterView.unreachableMembers.map(_.address) == (side1.toSet map address), 25 seconds)
}
enterBarrier("after-2")
}
"auto-down the other nodes and form new cluster with potentially new leader" taggedAs LongRunningTest in {
runOn(side1: _*) { runOn(side1: _*) {
// auto-down = on // auto-down = on
awaitCond(clusterView.unreachableMembers.forall(m m.status == MemberStatus.Down), 15 seconds)
clusterView.unreachableMembers.map(_.address) must be(side2.toSet map address)
awaitUpConvergence(side1.size, side2.toSet map address) awaitUpConvergence(side1.size, side2.toSet map address)
assertLeader(side1: _*) assertLeader(side1: _*)
} }
runOn(side2: _*) { runOn(side2: _*) {
// auto-down = on // auto-down = on
awaitCond(clusterView.unreachableMembers.forall(m m.status == MemberStatus.Down), 15 seconds)
clusterView.unreachableMembers.map(_.address) must be(side1.toSet map address)
awaitUpConvergence(side2.size, side1.toSet map address) awaitUpConvergence(side2.size, side1.toSet map address)
assertLeader(side2: _*) assertLeader(side2: _*)
} }
enterBarrier("after-3") enterBarrier("after-2")
} }
} }

View file

@ -131,6 +131,8 @@ abstract class UnreachableNodeRejoinsClusterSpec(multiNodeConfig: UnreachableNod
runOn(allBut(victim): _*) { runOn(allBut(victim): _*) {
awaitUpConvergence(roles.size - 1, Set(victim)) awaitUpConvergence(roles.size - 1, Set(victim))
// eventually removed
awaitCond(clusterView.unreachableMembers.isEmpty, 15 seconds)
} }
endBarrier endBarrier

View file

@ -79,16 +79,6 @@ class GossipSpec extends WordSpec with MustMatchers {
} }
"merge by allowing Down -> Joining" in {
val g1 = Gossip(members = SortedSet(a1, b1), overview = GossipOverview(unreachable = Set(e3)))
val g2 = Gossip(members = SortedSet(a1, b1, e1), overview = GossipOverview(unreachable = Set.empty))
val merged2 = g2 merge g1
merged2.members must be(SortedSet(a1, b1, e1))
merged2.members.toSeq.map(_.status) must be(Seq(Up, Up, Joining))
merged2.overview.unreachable must be(Set.empty)
}
"start with fresh seen table after merge" in { "start with fresh seen table after merge" in {
val g1 = Gossip(members = SortedSet(a1, e1)).seen(a1.address).seen(e1.address) val g1 = Gossip(members = SortedSet(a1, e1)).seen(a1.address).seen(e1.address)
val g2 = Gossip(members = SortedSet(a2, e2)).seen(a2.address).seen(e2.address) val g2 = Gossip(members = SortedSet(a2, e2)).seen(a2.address).seen(e2.address)