2012-07-05 13:55:08 +02:00
|
|
|
/**
|
2014-02-02 19:05:45 -06:00
|
|
|
* Copyright (C) 2009-2014 Typesafe Inc. <http://www.typesafe.com>
|
2012-07-05 13:55:08 +02:00
|
|
|
*/
|
|
|
|
|
package akka.cluster
|
|
|
|
|
|
2012-07-06 17:04:04 +02:00
|
|
|
import language.postfixOps
|
2012-11-15 12:33:11 +01:00
|
|
|
import scala.collection.immutable
|
2012-09-21 14:50:06 +02:00
|
|
|
import scala.concurrent.duration._
|
2013-03-26 18:17:50 +01:00
|
|
|
import akka.actor.{ ActorLogging, ActorRef, ActorSelection, Address, Actor, RootActorPath, Props }
|
2012-10-01 14:12:20 +02:00
|
|
|
import akka.cluster.ClusterEvent._
|
2013-01-15 09:35:07 +01:00
|
|
|
import akka.routing.MurmurHash
|
2013-11-07 13:52:08 +01:00
|
|
|
import akka.remote.FailureDetectorRegistry
|
2012-07-05 13:55:08 +02:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* INTERNAL API.
|
|
|
|
|
*
|
2013-11-07 13:52:08 +01:00
|
|
|
* Receives Heartbeat messages and replies.
|
2012-07-05 13:55:08 +02:00
|
|
|
*/
|
2012-10-01 10:02:48 +02:00
|
|
|
private[cluster] final class ClusterHeartbeatReceiver extends Actor with ActorLogging {
|
2013-11-07 13:52:08 +01:00
|
|
|
import ClusterHeartbeatSender._
|
2012-09-06 21:48:40 +02:00
|
|
|
|
2013-11-07 13:52:08 +01:00
|
|
|
val selfHeartbeatRsp = HeartbeatRsp(Cluster(context.system).selfUniqueAddress)
|
2012-07-05 13:55:08 +02:00
|
|
|
|
|
|
|
|
def receive = {
|
2014-01-16 15:16:35 +01:00
|
|
|
case Heartbeat(from) ⇒ sender() ! selfHeartbeatRsp
|
2012-07-05 13:55:08 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* INTERNAL API
|
|
|
|
|
*/
|
|
|
|
|
private[cluster] object ClusterHeartbeatSender {
|
|
|
|
|
/**
|
2013-11-07 13:52:08 +01:00
|
|
|
* Sent at regular intervals for failure detection.
|
2013-01-15 09:35:07 +01:00
|
|
|
*/
|
2013-11-07 13:52:08 +01:00
|
|
|
case class Heartbeat(from: Address) extends ClusterMessage
|
2013-01-15 09:35:07 +01:00
|
|
|
|
|
|
|
|
/**
|
2013-11-07 13:52:08 +01:00
|
|
|
* Sent as reply to [[Heartbeat]] messages.
|
2013-01-15 09:35:07 +01:00
|
|
|
*/
|
2013-11-07 13:52:08 +01:00
|
|
|
case class HeartbeatRsp(from: UniqueAddress) extends ClusterMessage
|
|
|
|
|
|
|
|
|
|
// sent to self only
|
|
|
|
|
case object HeartbeatTick
|
|
|
|
|
case class ExpectedFirstHeartbeat(from: UniqueAddress)
|
2013-01-15 09:35:07 +01:00
|
|
|
|
2012-07-05 13:55:08 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* INTERNAL API
|
|
|
|
|
*
|
|
|
|
|
* This actor is responsible for sending the heartbeat messages to
|
2013-11-07 13:52:08 +01:00
|
|
|
* a few other nodes, which will reply and then this actor updates the
|
|
|
|
|
* failure detector.
|
2012-07-05 13:55:08 +02:00
|
|
|
*/
|
2012-09-06 21:48:40 +02:00
|
|
|
private[cluster] final class ClusterHeartbeatSender extends Actor with ActorLogging {
|
2012-07-05 13:55:08 +02:00
|
|
|
import ClusterHeartbeatSender._
|
2012-10-01 10:02:48 +02:00
|
|
|
|
|
|
|
|
val cluster = Cluster(context.system)
|
2013-11-07 13:52:08 +01:00
|
|
|
import cluster.{ selfAddress, selfUniqueAddress, scheduler }
|
2012-10-01 10:02:48 +02:00
|
|
|
import cluster.settings._
|
2013-05-23 13:36:35 +02:00
|
|
|
import cluster.InfoLogger._
|
2012-10-01 10:02:48 +02:00
|
|
|
import context.dispatcher
|
|
|
|
|
|
2013-11-07 13:52:08 +01:00
|
|
|
// the failureDetector is only updated by this actor, but read from other places
|
|
|
|
|
val failureDetector = Cluster(context.system).failureDetector
|
|
|
|
|
|
2012-10-01 10:02:48 +02:00
|
|
|
val selfHeartbeat = Heartbeat(selfAddress)
|
|
|
|
|
|
2013-11-07 13:52:08 +01:00
|
|
|
var state = ClusterHeartbeatSenderState(
|
|
|
|
|
ring = HeartbeatNodeRing(selfUniqueAddress, Set(selfUniqueAddress), MonitoredByNrOfMembers),
|
|
|
|
|
unreachable = Set.empty[UniqueAddress],
|
|
|
|
|
failureDetector)
|
2012-10-01 10:02:48 +02:00
|
|
|
|
|
|
|
|
// start periodic heartbeat to other nodes in cluster
|
2012-10-15 17:17:54 +02:00
|
|
|
val heartbeatTask = scheduler.schedule(PeriodicTasksInitialDelay max HeartbeatInterval,
|
2012-10-09 18:11:36 +02:00
|
|
|
HeartbeatInterval, self, HeartbeatTick)
|
2012-10-01 10:02:48 +02:00
|
|
|
|
2012-11-27 18:07:37 +01:00
|
|
|
override def preStart(): Unit = {
|
2013-03-05 21:05:11 +01:00
|
|
|
cluster.subscribe(self, classOf[MemberEvent])
|
2012-11-27 18:07:37 +01:00
|
|
|
}
|
2012-10-01 10:02:48 +02:00
|
|
|
|
|
|
|
|
override def postStop(): Unit = {
|
2013-11-07 13:52:08 +01:00
|
|
|
state.activeReceivers.foreach(a ⇒ failureDetector.remove(a.address))
|
2012-10-01 10:02:48 +02:00
|
|
|
heartbeatTask.cancel()
|
|
|
|
|
cluster.unsubscribe(self)
|
|
|
|
|
}
|
2012-07-05 13:55:08 +02:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Looks up and returns the remote cluster heartbeat connection for the specific address.
|
|
|
|
|
*/
|
2013-03-26 18:17:50 +01:00
|
|
|
def heartbeatReceiver(address: Address): ActorSelection =
|
|
|
|
|
context.actorSelection(RootActorPath(address) / "system" / "cluster" / "heartbeatReceiver")
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-11-07 13:52:08 +01:00
|
|
|
def receive = initializing
|
2013-01-15 09:35:07 +01:00
|
|
|
|
2013-11-07 13:52:08 +01:00
|
|
|
def initializing: Actor.Receive = {
|
|
|
|
|
case s: CurrentClusterState ⇒
|
|
|
|
|
init(s)
|
|
|
|
|
context.become(active)
|
|
|
|
|
case HeartbeatTick ⇒
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def active: Actor.Receive = {
|
2013-01-15 09:35:07 +01:00
|
|
|
case HeartbeatTick ⇒ heartbeat()
|
2013-11-07 13:52:08 +01:00
|
|
|
case HeartbeatRsp(from) ⇒ heartbeatRsp(from)
|
2013-03-05 21:05:11 +01:00
|
|
|
case MemberUp(m) ⇒ addMember(m)
|
2013-05-23 11:09:32 +02:00
|
|
|
case MemberRemoved(m, _) ⇒ removeMember(m)
|
2013-03-05 21:05:11 +01:00
|
|
|
case _: MemberEvent ⇒ // not interested in other types of MemberEvent
|
2013-01-15 09:35:07 +01:00
|
|
|
case ExpectedFirstHeartbeat(from) ⇒ triggerFirstHeartbeat(from)
|
2012-07-05 13:55:08 +02:00
|
|
|
}
|
|
|
|
|
|
2013-11-07 13:52:08 +01:00
|
|
|
def init(snapshot: CurrentClusterState): Unit = {
|
|
|
|
|
val nodes: Set[UniqueAddress] = snapshot.members.collect {
|
|
|
|
|
case m if m.status == MemberStatus.Up ⇒ m.uniqueAddress
|
|
|
|
|
}(collection.breakOut)
|
|
|
|
|
state = state.init(nodes)
|
2013-05-09 09:49:59 +02:00
|
|
|
}
|
|
|
|
|
|
2013-11-07 13:52:08 +01:00
|
|
|
def addMember(m: Member): Unit =
|
|
|
|
|
if (m.uniqueAddress != selfUniqueAddress)
|
|
|
|
|
state = state.addMember(m.uniqueAddress)
|
|
|
|
|
|
|
|
|
|
def removeMember(m: Member): Unit =
|
2013-05-09 09:49:59 +02:00
|
|
|
if (m.uniqueAddress == cluster.selfUniqueAddress) {
|
|
|
|
|
// This cluster node will be shutdown, but stop this actor immediately
|
2013-11-07 13:52:08 +01:00
|
|
|
// to avoid further updates
|
2013-05-09 09:49:59 +02:00
|
|
|
context stop self
|
2013-11-07 13:52:08 +01:00
|
|
|
} else {
|
|
|
|
|
state = state.removeMember(m.uniqueAddress)
|
2013-01-15 09:35:07 +01:00
|
|
|
}
|
2012-10-01 10:02:48 +02:00
|
|
|
|
|
|
|
|
def heartbeat(): Unit = {
|
2013-11-07 13:52:08 +01:00
|
|
|
state.activeReceivers foreach { to ⇒
|
|
|
|
|
if (cluster.failureDetector.isMonitoring(to.address))
|
|
|
|
|
log.debug("Cluster Node [{}] - Heartbeat to [{}]", selfAddress, to.address)
|
|
|
|
|
else {
|
|
|
|
|
log.debug("Cluster Node [{}] - First Heartbeat to [{}]", selfAddress, to.address)
|
|
|
|
|
// schedule the expected first heartbeat for later, which will give the
|
|
|
|
|
// other side a chance to reply, and also trigger some resends if needed
|
|
|
|
|
scheduler.scheduleOnce(HeartbeatExpectedResponseAfter, self, ExpectedFirstHeartbeat(to))
|
|
|
|
|
}
|
|
|
|
|
heartbeatReceiver(to.address) ! selfHeartbeat
|
2013-01-15 09:35:07 +01:00
|
|
|
}
|
|
|
|
|
|
2012-07-05 13:55:08 +02:00
|
|
|
}
|
|
|
|
|
|
2013-11-07 13:52:08 +01:00
|
|
|
def heartbeatRsp(from: UniqueAddress): Unit = {
|
|
|
|
|
log.debug("Cluster Node [{}] - Heartbeat response from [{}]", selfAddress, from.address)
|
|
|
|
|
state = state.heartbeatRsp(from)
|
2013-08-27 15:14:53 +02:00
|
|
|
}
|
|
|
|
|
|
2013-11-07 13:52:08 +01:00
|
|
|
def triggerFirstHeartbeat(from: UniqueAddress): Unit =
|
|
|
|
|
if (state.activeReceivers(from) && !failureDetector.isMonitoring(from.address)) {
|
|
|
|
|
log.debug("Cluster Node [{}] - Trigger extra expected heartbeat from [{}]", selfAddress, from.address)
|
|
|
|
|
failureDetector.heartbeat(from.address)
|
|
|
|
|
}
|
2012-10-01 10:02:48 +02:00
|
|
|
|
2012-10-10 15:23:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* INTERNAL API
|
2013-11-07 13:52:08 +01:00
|
|
|
* State of [[ClusterHeartbeatSender]]. Encapsulated to facilitate unit testing.
|
|
|
|
|
* It is immutable, but it updates the failureDetector.
|
2012-10-10 15:23:18 +02:00
|
|
|
*/
|
2013-11-07 13:52:08 +01:00
|
|
|
private[cluster] case class ClusterHeartbeatSenderState(
|
2013-01-15 09:35:07 +01:00
|
|
|
ring: HeartbeatNodeRing,
|
2013-11-07 13:52:08 +01:00
|
|
|
unreachable: Set[UniqueAddress],
|
|
|
|
|
failureDetector: FailureDetectorRegistry[Address]) {
|
2013-01-15 09:35:07 +01:00
|
|
|
|
2013-11-07 13:52:08 +01:00
|
|
|
val activeReceivers: Set[UniqueAddress] = ring.myReceivers ++ unreachable
|
2012-10-10 15:23:18 +02:00
|
|
|
|
2013-11-07 13:52:08 +01:00
|
|
|
def selfAddress = ring.selfAddress
|
2012-10-10 15:23:18 +02:00
|
|
|
|
2013-11-07 13:52:08 +01:00
|
|
|
def init(nodes: Set[UniqueAddress]): ClusterHeartbeatSenderState =
|
|
|
|
|
copy(ring = ring.copy(nodes = nodes + selfAddress))
|
2012-10-10 15:23:18 +02:00
|
|
|
|
2013-11-07 13:52:08 +01:00
|
|
|
def addMember(node: UniqueAddress): ClusterHeartbeatSenderState =
|
|
|
|
|
membershipChange(ring :+ node)
|
2012-10-10 15:23:18 +02:00
|
|
|
|
2013-11-07 13:52:08 +01:00
|
|
|
def removeMember(node: UniqueAddress): ClusterHeartbeatSenderState = {
|
|
|
|
|
val newState = membershipChange(ring :- node)
|
2012-10-10 15:23:18 +02:00
|
|
|
|
2013-11-07 13:52:08 +01:00
|
|
|
failureDetector remove node.address
|
|
|
|
|
if (newState.unreachable(node))
|
|
|
|
|
newState.copy(unreachable = newState.unreachable - node)
|
|
|
|
|
else
|
|
|
|
|
newState
|
2012-10-01 14:12:20 +02:00
|
|
|
}
|
|
|
|
|
|
2013-11-07 13:52:08 +01:00
|
|
|
private def membershipChange(newRing: HeartbeatNodeRing): ClusterHeartbeatSenderState = {
|
|
|
|
|
val oldReceivers = ring.myReceivers
|
|
|
|
|
val removedReceivers = oldReceivers -- newRing.myReceivers
|
|
|
|
|
var newUnreachable = unreachable
|
|
|
|
|
removedReceivers foreach { a ⇒
|
|
|
|
|
if (failureDetector.isAvailable(a.address))
|
|
|
|
|
failureDetector remove a.address
|
|
|
|
|
else
|
|
|
|
|
newUnreachable += a
|
|
|
|
|
}
|
|
|
|
|
copy(newRing, newUnreachable)
|
2012-07-05 13:55:08 +02:00
|
|
|
}
|
|
|
|
|
|
2013-11-07 13:52:08 +01:00
|
|
|
def heartbeatRsp(from: UniqueAddress): ClusterHeartbeatSenderState =
|
|
|
|
|
if (activeReceivers(from)) {
|
|
|
|
|
failureDetector heartbeat from.address
|
|
|
|
|
if (unreachable(from)) {
|
|
|
|
|
// back from unreachable, ok to stop heartbeating to it
|
|
|
|
|
if (!ring.myReceivers(from))
|
|
|
|
|
failureDetector remove from.address
|
|
|
|
|
copy(unreachable = unreachable - from)
|
|
|
|
|
} else this
|
|
|
|
|
} else this
|
2012-10-10 15:23:18 +02:00
|
|
|
|
2012-07-05 13:55:08 +02:00
|
|
|
}
|
|
|
|
|
|
2013-01-15 09:35:07 +01:00
|
|
|
/**
|
|
|
|
|
* INTERNAL API
|
|
|
|
|
*
|
2013-11-07 13:52:08 +01:00
|
|
|
* Data structure for picking heartbeat receivers. The node ring is
|
2013-01-15 09:35:07 +01:00
|
|
|
* shuffled by deterministic hashing to avoid picking physically co-located
|
|
|
|
|
* neighbors.
|
|
|
|
|
*
|
|
|
|
|
* It is immutable, i.e. the methods return new instances.
|
|
|
|
|
*/
|
2013-11-07 13:52:08 +01:00
|
|
|
private[cluster] case class HeartbeatNodeRing(selfAddress: UniqueAddress, nodes: Set[UniqueAddress], monitoredByNrOfMembers: Int) {
|
2013-01-15 09:35:07 +01:00
|
|
|
|
|
|
|
|
require(nodes contains selfAddress, s"nodes [${nodes.mkString(", ")}] must contain selfAddress [${selfAddress}]")
|
|
|
|
|
|
2013-11-07 13:52:08 +01:00
|
|
|
private val nodeRing: immutable.SortedSet[UniqueAddress] = {
|
|
|
|
|
implicit val ringOrdering: Ordering[UniqueAddress] = Ordering.fromLessThan[UniqueAddress] { (a, b) ⇒
|
2013-09-11 14:46:08 +02:00
|
|
|
val ha = a.##
|
|
|
|
|
val hb = b.##
|
2013-11-07 13:52:08 +01:00
|
|
|
ha < hb || (ha == hb && Member.addressOrdering.compare(a.address, b.address) < 0)
|
2013-01-15 09:35:07 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
immutable.SortedSet() ++ nodes
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Receivers for `selfAddress`. Cached for subsequent access.
|
|
|
|
|
*/
|
2013-11-07 13:52:08 +01:00
|
|
|
lazy val myReceivers: immutable.Set[UniqueAddress] = receivers(selfAddress)
|
2013-01-15 09:35:07 +01:00
|
|
|
|
|
|
|
|
private val useAllAsReceivers = monitoredByNrOfMembers >= (nodeRing.size - 1)
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* The receivers to use from a specified sender.
|
|
|
|
|
*/
|
2013-11-07 13:52:08 +01:00
|
|
|
def receivers(sender: UniqueAddress): immutable.Set[UniqueAddress] =
|
2013-01-15 09:35:07 +01:00
|
|
|
if (useAllAsReceivers)
|
|
|
|
|
nodeRing - sender
|
|
|
|
|
else {
|
|
|
|
|
val slice = nodeRing.from(sender).tail.take(monitoredByNrOfMembers)
|
|
|
|
|
if (slice.size < monitoredByNrOfMembers)
|
|
|
|
|
(slice ++ nodeRing.take(monitoredByNrOfMembers - slice.size))
|
|
|
|
|
else slice
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Add a node to the ring.
|
|
|
|
|
*/
|
2013-11-07 13:52:08 +01:00
|
|
|
def :+(node: UniqueAddress): HeartbeatNodeRing = if (nodes contains node) this else copy(nodes = nodes + node)
|
2013-01-15 09:35:07 +01:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Remove a node from the ring.
|
|
|
|
|
*/
|
2013-11-07 13:52:08 +01:00
|
|
|
def :-(node: UniqueAddress): HeartbeatNodeRing = if (nodes contains node) copy(nodes = nodes - node) else this
|
2013-01-15 09:35:07 +01:00
|
|
|
|
|
|
|
|
}
|