Fix regression IllegalStateException: Can't find this Member, see #2117

* Gossiping to myself caused the failure detector to mark myself as unreachable
* Don't heartbeat for myself failure detector
This commit is contained in:
Patrik Nordwall 2012-05-31 17:19:49 +02:00
parent 9d1cbdc601
commit 13a93dfb50
2 changed files with 53 additions and 51 deletions

View file

@ -203,7 +203,7 @@ case class Gossip(
}
/**
* Marks the gossip as seen by this node (remoteAddress) by updating the address entry in the 'gossip.overview.seen'
* Marks the gossip as seen by this node (selfAddress) by updating the address entry in the 'gossip.overview.seen'
* Map with the VectorClock for the new gossip.
*/
def seen(address: Address): Gossip = {
@ -380,11 +380,11 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
val remoteSettings = new RemoteSettings(system.settings.config, system.name)
val clusterSettings = new ClusterSettings(system.settings.config, system.name)
val remoteAddress = remote.transport.address
val selfAddress = remote.transport.address
val failureDetector = new AccrualFailureDetector(
system, remoteAddress, clusterSettings.FailureDetectorThreshold, clusterSettings.FailureDetectorMaxSampleSize)
system, selfAddress, clusterSettings.FailureDetectorThreshold, clusterSettings.FailureDetectorMaxSampleSize)
private val vclockNode = VectorClock.Node(remoteAddress.toString)
private val vclockNode = VectorClock.Node(selfAddress.toString)
private val periodicTasksInitialDelay = clusterSettings.PeriodicTasksInitialDelay
private val gossipFrequency = clusterSettings.GossipFrequency
@ -396,7 +396,7 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
private val autoDown = clusterSettings.AutoDown
private val nrOfDeputyNodes = clusterSettings.NrOfDeputyNodes
private val nrOfGossipDaemons = clusterSettings.NrOfGossipDaemons
private val nodeToJoin: Option[Address] = clusterSettings.NodeToJoin filter (_ != remoteAddress)
private val nodeToJoin: Option[Address] = clusterSettings.NodeToJoin filter (_ != selfAddress)
private val serialization = remote.serialization
@ -406,7 +406,7 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
private val mBeanServer = ManagementFactory.getPlatformMBeanServer
private val clusterMBeanName = new ObjectName("akka:type=Cluster")
log.info("Cluster Node [{}] - is starting up...", remoteAddress)
log.info("Cluster Node [{}] - is starting up...", selfAddress)
// create superisor for daemons under path "/system/cluster"
private val clusterDaemons = {
@ -418,7 +418,7 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
}
private val state = {
val member = Member(remoteAddress, MemberStatus.Joining)
val member = Member(selfAddress, MemberStatus.Joining)
val gossip = Gossip(members = SortedSet.empty[Member] + member) + vclockNode // add me as member and update my vector clock
new AtomicReference[State](State(gossip))
}
@ -447,15 +447,15 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
createMBean()
log.info("Cluster Node [{}] - has started up successfully", remoteAddress)
log.info("Cluster Node [{}] - has started up successfully", selfAddress)
// ======================================================
// ===================== PUBLIC API =====================
// ======================================================
def self: Member = latestGossip.members
.find(_.address == remoteAddress)
.getOrElse(throw new IllegalStateException("Can't find 'this' Member (" + remoteAddress + ") in the cluster membership ring"))
.find(_.address == selfAddress)
.getOrElse(throw new IllegalStateException("Can't find 'this' Member (" + selfAddress + ") in the cluster membership ring"))
/**
* Latest gossip.
@ -472,7 +472,7 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
*/
def isLeader: Boolean = {
val members = latestGossip.members
!members.isEmpty && (remoteAddress == members.head.address)
members.nonEmpty && (selfAddress == members.head.address)
}
/**
@ -502,7 +502,7 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
*/
def shutdown(): Unit = {
if (isRunning.compareAndSet(true, false)) {
log.info("Cluster Node [{}] - Shutting down cluster Node and cluster daemons...", remoteAddress)
log.info("Cluster Node [{}] - Shutting down cluster Node and cluster daemons...", selfAddress)
gossipCanceller.cancel()
failureDetectorReaperCanceller.cancel()
leaderActionsCanceller.cancel()
@ -543,8 +543,8 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
*/
def join(address: Address): Unit = {
val connection = clusterCommandConnectionFor(address)
val command = ClusterAction.Join(remoteAddress)
log.info("Cluster Node [{}] - Trying to send JOIN to [{}] through connection [{}]", remoteAddress, address, connection)
val command = ClusterAction.Join(selfAddress)
log.info("Cluster Node [{}] - Trying to send JOIN to [{}] through connection [{}]", selfAddress, address, connection)
connection ! command
}
@ -579,7 +579,7 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
*/
@tailrec
private[cluster] final def joining(node: Address): Unit = {
log.info("Cluster Node [{}] - Node [{}] is JOINING", remoteAddress, node)
log.info("Cluster Node [{}] - Node [{}] is JOINING", selfAddress, node)
val localState = state.get
val localGossip = localState.latestGossip
@ -595,13 +595,14 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
val newGossip = localGossip copy (overview = newOverview, members = newMembers)
val versionedGossip = newGossip + vclockNode
val seenVersionedGossip = versionedGossip seen remoteAddress
val seenVersionedGossip = versionedGossip seen selfAddress
val newState = localState copy (latestGossip = seenVersionedGossip)
if (!state.compareAndSet(localState, newState)) joining(node) // recur if we failed update
else {
failureDetector heartbeat node // update heartbeat in failure detector
if (node != selfAddress) failureDetector heartbeat node
if (convergence(newState.latestGossip).isDefined) {
newState.memberMembershipChangeListeners foreach { _ notify newMembers }
}
@ -612,28 +613,28 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
* State transition to UP.
*/
private[cluster] final def up(address: Address): Unit = {
log.info("Cluster Node [{}] - Marking node [{}] as UP", remoteAddress, address)
log.info("Cluster Node [{}] - Marking node [{}] as UP", selfAddress, address)
}
/**
* State transition to LEAVING.
*/
private[cluster] final def leaving(address: Address): Unit = {
log.info("Cluster Node [{}] - Marking node [{}] as LEAVING", remoteAddress, address)
log.info("Cluster Node [{}] - Marking node [{}] as LEAVING", selfAddress, address)
}
/**
* State transition to EXITING.
*/
private[cluster] final def exiting(address: Address): Unit = {
log.info("Cluster Node [{}] - Marking node [{}] as EXITING", remoteAddress, address)
log.info("Cluster Node [{}] - Marking node [{}] as EXITING", selfAddress, address)
}
/**
* State transition to REMOVED.
*/
private[cluster] final def removing(address: Address): Unit = {
log.info("Cluster Node [{}] - Marking node [{}] as REMOVED", remoteAddress, address)
log.info("Cluster Node [{}] - Marking node [{}] as REMOVED", selfAddress, address)
}
/**
@ -658,7 +659,7 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
localMembers
.map { member
if (member.address == address) {
log.info("Cluster Node [{}] - Marking node [{}] as DOWN", remoteAddress, member.address)
log.info("Cluster Node [{}] - Marking node [{}] as DOWN", selfAddress, member.address)
val newMember = member copy (status = MemberStatus.Down)
downedMember = Some(newMember)
newMember
@ -672,7 +673,7 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
.filter(_.status != MemberStatus.Down) // no need to DOWN members already DOWN
.map { member
if (member.address == address) {
log.info("Cluster Node [{}] - Marking unreachable node [{}] as DOWN", remoteAddress, member.address)
log.info("Cluster Node [{}] - Marking unreachable node [{}] as DOWN", selfAddress, member.address)
member copy (status = MemberStatus.Down)
} else member
}
@ -691,7 +692,7 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
val newOverview = localOverview copy (seen = newSeen, unreachable = newUnreachablePlusNewlyDownedMembers) // update gossip overview
val newGossip = localGossip copy (overview = newOverview, members = newMembers) // update gossip
val versionedGossip = newGossip + vclockNode
val newState = localState copy (latestGossip = versionedGossip seen remoteAddress)
val newState = localState copy (latestGossip = versionedGossip seen selfAddress)
if (!state.compareAndSet(localState, newState)) downing(address) // recur if we fail the update
else {
@ -730,14 +731,14 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
remoteGossip
}
val newState = localState copy (latestGossip = winningGossip seen remoteAddress)
val newState = localState copy (latestGossip = winningGossip seen selfAddress)
// if we won the race then update else try again
if (!state.compareAndSet(localState, newState)) receive(sender, remoteGossip) // recur if we fail the update
else {
log.debug("Cluster Node [{}] - Receiving gossip from [{}]", remoteAddress, sender.address)
log.debug("Cluster Node [{}] - Receiving gossip from [{}]", selfAddress, sender.address)
failureDetector heartbeat sender.address // update heartbeat in failure detector
if (sender.address != selfAddress) failureDetector heartbeat sender.address
if (convergence(newState.latestGossip).isDefined) {
newState.memberMembershipChangeListeners foreach { _ notify newState.latestGossip.members }
@ -758,7 +759,7 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
* @return the updated new state with the new member status
*/
private def switchMemberStatusTo(newStatus: MemberStatus, state: State): State = {
log.info("Cluster Node [{}] - Switching membership status to [{}]", remoteAddress, newStatus)
log.info("Cluster Node [{}] - Switching membership status to [{}]", selfAddress, newStatus)
val localSelf = self
@ -770,7 +771,7 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
// change my state in 'gossip.members'
val newMembersSet = localMembers map { member
if (member.address == remoteAddress) newSelf
if (member.address == selfAddress) newSelf
else member
}
@ -780,7 +781,7 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
// version my changes
val versionedGossip = newGossip + vclockNode
val seenVersionedGossip = versionedGossip seen remoteAddress
val seenVersionedGossip = versionedGossip seen selfAddress
state copy (latestGossip = seenVersionedGossip)
}
@ -790,7 +791,7 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
*/
private def gossipTo(address: Address): Unit = {
val connection = clusterGossipConnectionFor(address)
log.debug("Cluster Node [{}] - Gossiping to [{}]", remoteAddress, connection)
log.debug("Cluster Node [{}] - Gossiping to [{}]", selfAddress, connection)
connection ! GossipEnvelope(self, latestGossip)
}
@ -800,10 +801,10 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
* @return 'true' if it gossiped to a "deputy" member.
*/
private def gossipToRandomNodeOf(addresses: Iterable[Address]): Boolean = {
log.debug("Cluster Node [{}] - Selecting random node to gossip to [{}]", remoteAddress, addresses.mkString(", "))
log.debug("Cluster Node [{}] - Selecting random node to gossip to [{}]", selfAddress, addresses.mkString(", "))
if (addresses.isEmpty) false
else {
val peers = addresses filter (_ != remoteAddress) // filter out myself
val peers = addresses filter (_ != selfAddress) // filter out myself
val peer = selectRandomNode(peers)
gossipTo(peer)
deputyNodes exists (peer == _)
@ -819,10 +820,10 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
if (isSingletonCluster(localState)) {
// gossip to myself
// TODO could perhaps be optimized, no need to gossip to myself when Up?
gossipTo(remoteAddress)
gossipTo(selfAddress)
} else if (isAvailable(localState)) {
log.debug("Cluster Node [{}] - Initiating new round of gossip", remoteAddress)
log.debug("Cluster Node [{}] - Initiating new round of gossip", selfAddress)
val localGossip = localState.latestGossip
val localMembers = localGossip.members
@ -842,7 +843,7 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
// 3. gossip to a deputy nodes for facilitating partition healing
val deputies = deputyNodes
if ((!gossipedToDeputy || localMembersSize < 1) && !deputies.isEmpty) {
if ((!gossipedToDeputy || localMembersSize < 1) && deputies.nonEmpty) {
if (localMembersSize == 0) gossipToRandomNodeOf(deputies)
else {
val probability = 1.0 / localMembersSize + localUnreachableSize
@ -870,7 +871,7 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
val newlyDetectedUnreachableMembers = localMembers filterNot { member failureDetector.isAvailable(member.address) }
if (!newlyDetectedUnreachableMembers.isEmpty) { // we have newly detected members marked as unavailable
if (newlyDetectedUnreachableMembers.nonEmpty) { // we have newly detected members marked as unavailable
val newMembers = localMembers diff newlyDetectedUnreachableMembers
val newUnreachableMembers: Set[Member] = localUnreachableMembers ++ newlyDetectedUnreachableMembers
@ -880,14 +881,14 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
// updating vclock and 'seen' table
val versionedGossip = newGossip + vclockNode
val seenVersionedGossip = versionedGossip seen remoteAddress
val seenVersionedGossip = versionedGossip seen selfAddress
val newState = localState copy (latestGossip = seenVersionedGossip)
// if we won the race then update else try again
if (!state.compareAndSet(localState, newState)) reapUnreachableMembers() // recur
else {
log.info("Cluster Node [{}] - Marking node(s) as UNREACHABLE [{}]", remoteAddress, newlyDetectedUnreachableMembers.mkString(", "))
log.info("Cluster Node [{}] - Marking node(s) as UNREACHABLE [{}]", selfAddress, newlyDetectedUnreachableMembers.mkString(", "))
if (convergence(newState.latestGossip).isDefined) {
newState.memberMembershipChangeListeners foreach { _ notify newMembers }
@ -906,7 +907,7 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
val localGossip = localState.latestGossip
val localMembers = localGossip.members
val isLeader = !localMembers.isEmpty && (remoteAddress == localMembers.head.address)
val isLeader = localMembers.nonEmpty && (selfAddress == localMembers.head.address)
if (isLeader && isAvailable(localState)) {
// only run the leader actions if we are the LEADER and available
@ -932,14 +933,14 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
localMembers map { member
// 1. Move JOINING => UP
if (member.status == MemberStatus.Joining) {
log.info("Cluster Node [{}] - Leader is moving node [{}] from JOINING to UP", remoteAddress, member.address)
log.info("Cluster Node [{}] - Leader is moving node [{}] from JOINING to UP", selfAddress, member.address)
hasChangedState = true
member copy (status = MemberStatus.Up)
} else member
} map { member
// 2. Move EXITING => REMOVED
if (member.status == MemberStatus.Exiting) {
log.info("Cluster Node [{}] - Leader is moving node [{}] from EXITING to REMOVED", remoteAddress, member.address)
log.info("Cluster Node [{}] - Leader is moving node [{}] from EXITING to REMOVED", selfAddress, member.address)
hasChangedState = true
member copy (status = MemberStatus.Removed)
} else member
@ -955,7 +956,7 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
localUnreachableMembers
.filter(_.status != MemberStatus.Down) // no need to DOWN members already DOWN
.map { member
log.info("Cluster Node [{}] - Leader is marking unreachable node [{}] as DOWN", remoteAddress, member.address)
log.info("Cluster Node [{}] - Leader is marking unreachable node [{}] as DOWN", selfAddress, member.address)
hasChangedState = true
member copy (status = MemberStatus.Down)
}
@ -974,7 +975,7 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
val versionedGossip = newGossip + vclockNode
// 5. Updating the 'seen' table
val seenVersionedGossip = versionedGossip seen remoteAddress
val seenVersionedGossip = versionedGossip seen selfAddress
val newState = localState copy (latestGossip = seenVersionedGossip)
@ -1009,7 +1010,7 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
val views = Set.empty[VectorClock] ++ seen.values
if (views.size == 1) {
log.debug("Cluster Node [{}] - Cluster convergence reached", remoteAddress)
log.debug("Cluster Node [{}] - Cluster convergence reached", selfAddress)
Some(gossip)
} else None
} else None
@ -1022,7 +1023,7 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
val localOverview = localGossip.overview
val localMembers = localGossip.members
val localUnreachableMembers = localOverview.unreachable
val isUnreachable = localUnreachableMembers exists { _.address == remoteAddress }
val isUnreachable = localUnreachableMembers exists { _.address == selfAddress }
val hasUnavailableMemberStatus = localMembers exists { m (m == self) && MemberStatus.isUnavailable(m.status) }
isUnreachable || hasUnavailableMemberStatus
}
@ -1030,7 +1031,7 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
/**
* Looks up and returns the local cluster command connection.
*/
private def clusterCommandDaemon = system.actorFor(RootActorPath(remoteAddress) / "system" / "cluster" / "commands")
private def clusterCommandDaemon = system.actorFor(RootActorPath(selfAddress) / "system" / "cluster" / "commands")
/**
* Looks up and returns the remote cluster command connection for the specific address.
@ -1045,7 +1046,7 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
/**
* Gets an Iterable with the addresses of a all the 'deputy' nodes - excluding this node if part of the group.
*/
private def deputyNodes: Iterable[Address] = state.get.latestGossip.members.toIterable map (_.address) drop 1 take nrOfDeputyNodes filter (_ != remoteAddress)
private def deputyNodes: Iterable[Address] = state.get.latestGossip.members.toIterable map (_.address) drop 1 take nrOfDeputyNodes filter (_ != selfAddress)
private def selectRandomNode(addresses: Iterable[Address]): Address = addresses.toSeq(ThreadLocalRandom.current nextInt addresses.size)
@ -1074,8 +1075,8 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
val unreachable = gossip.overview.unreachable
val metaData = gossip.meta
"\nMembers:\n\t" + gossip.members.mkString("\n\t") +
{ if (!unreachable.isEmpty) "\nUnreachable:\n\t" + unreachable.mkString("\n\t") else "" } +
{ if (!metaData.isEmpty) "\nMeta Data:\t" + metaData.toString else "" }
{ if (unreachable.nonEmpty) "\nUnreachable:\n\t" + unreachable.mkString("\n\t") else "" } +
{ if (metaData.nonEmpty) "\nMeta Data:\t" + metaData.toString else "" }
}
def getMemberStatus: String = clusterNode.status.toString
@ -1100,7 +1101,7 @@ class Cluster(system: ExtendedActorSystem) extends Extension { clusterNode ⇒
def shutdown() = clusterNode.shutdown()
}
log.info("Cluster Node [{}] - registering cluster JMX MBean [{}]", remoteAddress, clusterMBeanName)
log.info("Cluster Node [{}] - registering cluster JMX MBean [{}]", selfAddress, clusterMBeanName)
try {
mBeanServer.registerMBean(mbean, clusterMBeanName)
} catch {