2012-07-05 13:55:08 +02:00
|
|
|
/**
|
2014-02-02 19:05:45 -06:00
|
|
|
* Copyright (C) 2009-2014 Typesafe Inc. <http://www.typesafe.com>
|
2012-07-05 13:55:08 +02:00
|
|
|
*/
|
|
|
|
|
package akka.cluster
|
|
|
|
|
|
2012-12-10 08:46:25 +01:00
|
|
|
import language.existentials
|
|
|
|
|
import language.postfixOps
|
2012-11-15 12:33:11 +01:00
|
|
|
import scala.collection.immutable
|
2012-09-21 14:50:06 +02:00
|
|
|
import scala.concurrent.duration._
|
2012-07-06 17:04:04 +02:00
|
|
|
import scala.concurrent.forkjoin.ThreadLocalRandom
|
2012-12-10 08:46:25 +01:00
|
|
|
import scala.util.control.NonFatal
|
2013-08-23 14:39:21 +02:00
|
|
|
import akka.actor._
|
2013-02-11 10:40:01 +01:00
|
|
|
import akka.actor.SupervisorStrategy.Stop
|
2012-08-14 10:58:30 +02:00
|
|
|
import akka.cluster.MemberStatus._
|
|
|
|
|
import akka.cluster.ClusterEvent._
|
2013-04-26 12:18:01 +02:00
|
|
|
import akka.dispatch.{ UnboundedMessageQueueSemantics, RequiresMessageQueue }
|
2013-09-02 13:50:46 +02:00
|
|
|
import scala.collection.breakOut
|
2013-08-27 15:14:53 +02:00
|
|
|
import akka.remote.QuarantinedEvent
|
2012-07-05 13:55:08 +02:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Base trait for all cluster messages. All ClusterMessage's are serializable.
|
|
|
|
|
*/
|
|
|
|
|
trait ClusterMessage extends Serializable
|
|
|
|
|
|
|
|
|
|
/**
|
2013-04-11 09:18:12 +02:00
|
|
|
* INTERNAL API
|
|
|
|
|
* Cluster commands sent by the USER via
|
|
|
|
|
* [[akka.cluster.Cluster]] extension
|
|
|
|
|
* or JMX.
|
2012-07-05 13:55:08 +02:00
|
|
|
*/
|
2013-04-11 09:18:12 +02:00
|
|
|
private[cluster] object ClusterUserAction {
|
2012-07-05 13:55:08 +02:00
|
|
|
|
|
|
|
|
/**
|
2013-04-11 09:18:12 +02:00
|
|
|
* Command to initiate join another node (represented by `address`).
|
|
|
|
|
* Join will be sent to the other node.
|
2012-07-05 13:55:08 +02:00
|
|
|
*/
|
2013-04-04 17:56:29 +02:00
|
|
|
@SerialVersionUID(1L)
|
2013-04-11 09:18:12 +02:00
|
|
|
case class JoinTo(address: Address)
|
2012-07-05 13:55:08 +02:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Command to leave the cluster.
|
|
|
|
|
*/
|
2013-04-04 17:56:29 +02:00
|
|
|
@SerialVersionUID(1L)
|
2012-07-05 13:55:08 +02:00
|
|
|
case class Leave(address: Address) extends ClusterMessage
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Command to mark node as temporary down.
|
|
|
|
|
*/
|
2013-04-04 17:56:29 +02:00
|
|
|
@SerialVersionUID(1L)
|
2012-07-05 13:55:08 +02:00
|
|
|
case class Down(address: Address) extends ClusterMessage
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* INTERNAL API
|
|
|
|
|
*/
|
|
|
|
|
private[cluster] object InternalClusterAction {
|
|
|
|
|
|
|
|
|
|
/**
|
2013-04-11 09:18:12 +02:00
|
|
|
* Command to join the cluster. Sent when a node wants to join another node (the receiver).
|
|
|
|
|
* @param node the node that wants to join the cluster
|
2012-07-05 13:55:08 +02:00
|
|
|
*/
|
2013-04-11 09:18:12 +02:00
|
|
|
@SerialVersionUID(1L)
|
|
|
|
|
case class Join(node: UniqueAddress, roles: Set[String]) extends ClusterMessage
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Reply to Join
|
|
|
|
|
* @param from the sender node in the cluster, i.e. the node that received the Join command
|
|
|
|
|
*/
|
|
|
|
|
@SerialVersionUID(1L)
|
|
|
|
|
case class Welcome(from: UniqueAddress, gossip: Gossip) extends ClusterMessage
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2012-09-06 21:48:40 +02:00
|
|
|
/**
|
|
|
|
|
* Command to initiate the process to join the specified
|
|
|
|
|
* seed nodes.
|
|
|
|
|
*/
|
2012-11-15 12:33:11 +01:00
|
|
|
case class JoinSeedNodes(seedNodes: immutable.IndexedSeq[Address])
|
2012-09-06 21:48:40 +02:00
|
|
|
|
2012-07-05 13:55:08 +02:00
|
|
|
/**
|
|
|
|
|
* Start message of the process to join one of the seed nodes.
|
|
|
|
|
* The node sends `InitJoin` to all seed nodes, which replies
|
|
|
|
|
* with `InitJoinAck`. The first reply is used others are discarded.
|
|
|
|
|
* The node sends `Join` command to the seed node that replied first.
|
2013-02-17 17:35:43 +01:00
|
|
|
* If a node is uninitialized it will reply to `InitJoin` with
|
|
|
|
|
* `InitJoinNack`.
|
2012-07-05 13:55:08 +02:00
|
|
|
*/
|
2013-04-04 17:56:29 +02:00
|
|
|
case object JoinSeedNode
|
2012-07-05 13:55:08 +02:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @see JoinSeedNode
|
|
|
|
|
*/
|
2013-04-04 17:56:29 +02:00
|
|
|
@SerialVersionUID(1L)
|
2012-07-05 13:55:08 +02:00
|
|
|
case object InitJoin extends ClusterMessage
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @see JoinSeedNode
|
|
|
|
|
*/
|
2013-04-04 17:56:29 +02:00
|
|
|
@SerialVersionUID(1L)
|
2012-07-05 13:55:08 +02:00
|
|
|
case class InitJoinAck(address: Address) extends ClusterMessage
|
|
|
|
|
|
2013-02-17 17:35:43 +01:00
|
|
|
/**
|
|
|
|
|
* @see JoinSeedNode
|
|
|
|
|
*/
|
2013-04-04 17:56:29 +02:00
|
|
|
@SerialVersionUID(1L)
|
2013-02-17 17:35:43 +01:00
|
|
|
case class InitJoinNack(address: Address) extends ClusterMessage
|
|
|
|
|
|
2012-08-14 13:55:22 +02:00
|
|
|
/**
|
|
|
|
|
* Marker interface for periodic tick messages
|
|
|
|
|
*/
|
2012-08-14 17:30:49 +02:00
|
|
|
sealed trait Tick
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2012-08-14 13:55:22 +02:00
|
|
|
case object GossipTick extends Tick
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-10-14 14:50:57 +02:00
|
|
|
case object GossipSpeedupTick extends Tick
|
|
|
|
|
|
2012-08-14 13:55:22 +02:00
|
|
|
case object ReapUnreachableTick extends Tick
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2012-09-24 13:07:11 -06:00
|
|
|
case object MetricsTick extends Tick
|
|
|
|
|
|
2012-08-14 13:55:22 +02:00
|
|
|
case object LeaderActionsTick extends Tick
|
|
|
|
|
|
2012-08-16 18:54:10 +02:00
|
|
|
case object PublishStatsTick extends Tick
|
2012-07-05 13:55:08 +02:00
|
|
|
|
|
|
|
|
case class SendGossipTo(address: Address)
|
|
|
|
|
|
|
|
|
|
case object GetClusterCoreRef
|
|
|
|
|
|
2013-02-11 10:40:01 +01:00
|
|
|
case class PublisherCreated(publisher: ActorRef)
|
|
|
|
|
|
2012-12-10 08:46:25 +01:00
|
|
|
/**
|
|
|
|
|
* Comand to [[akka.cluster.ClusterDaemon]] to create a
|
|
|
|
|
* [[akka.cluster.OnMemberUpListener]].
|
|
|
|
|
*/
|
2013-08-23 14:39:21 +02:00
|
|
|
case class AddOnMemberUpListener(callback: Runnable) extends NoSerializationVerificationNeeded
|
2012-12-10 08:46:25 +01:00
|
|
|
|
2012-08-19 20:15:22 +02:00
|
|
|
sealed trait SubscriptionMessage
|
2014-01-08 14:14:48 +01:00
|
|
|
case class Subscribe(subscriber: ActorRef, initialStateMode: SubscriptionInitialStateMode, to: Set[Class[_]]) extends SubscriptionMessage
|
2012-09-28 13:09:36 +02:00
|
|
|
case class Unsubscribe(subscriber: ActorRef, to: Option[Class[_]]) extends SubscriptionMessage
|
2012-09-12 09:23:02 +02:00
|
|
|
/**
|
|
|
|
|
* @param receiver if `receiver` is defined the event will only be sent to that
|
|
|
|
|
* actor, otherwise it will be sent to all subscribers via the `eventStream`.
|
|
|
|
|
*/
|
|
|
|
|
case class PublishCurrentClusterState(receiver: Option[ActorRef]) extends SubscriptionMessage
|
2012-08-19 20:15:22 +02:00
|
|
|
|
2012-11-27 18:07:37 +01:00
|
|
|
sealed trait PublishMessage
|
|
|
|
|
case class PublishChanges(newGossip: Gossip) extends PublishMessage
|
|
|
|
|
case class PublishEvent(event: ClusterDomainEvent) extends PublishMessage
|
2012-07-05 13:55:08 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* INTERNAL API.
|
|
|
|
|
*
|
|
|
|
|
* Supervisor managing the different Cluster daemons.
|
|
|
|
|
*/
|
2013-04-26 12:18:01 +02:00
|
|
|
private[cluster] final class ClusterDaemon(settings: ClusterSettings) extends Actor with ActorLogging
|
|
|
|
|
with RequiresMessageQueue[UnboundedMessageQueueSemantics] {
|
2013-02-11 10:40:01 +01:00
|
|
|
import InternalClusterAction._
|
2012-09-06 21:48:40 +02:00
|
|
|
// Important - don't use Cluster(context.system) here because that would
|
|
|
|
|
// cause deadlock. The Cluster extension is currently being created and is waiting
|
|
|
|
|
// for response from GetClusterCoreRef in its constructor.
|
2013-02-11 10:40:01 +01:00
|
|
|
val coreSupervisor = context.actorOf(Props[ClusterCoreSupervisor].
|
2012-09-06 21:48:40 +02:00
|
|
|
withDispatcher(context.props.dispatcher), name = "core")
|
2012-10-01 10:02:48 +02:00
|
|
|
context.actorOf(Props[ClusterHeartbeatReceiver].
|
|
|
|
|
withDispatcher(context.props.dispatcher), name = "heartbeatReceiver")
|
2012-07-05 13:55:08 +02:00
|
|
|
|
|
|
|
|
def receive = {
|
2013-02-11 10:40:01 +01:00
|
|
|
case msg @ GetClusterCoreRef ⇒ coreSupervisor forward msg
|
|
|
|
|
case AddOnMemberUpListener(code) ⇒
|
2013-05-29 16:13:10 +02:00
|
|
|
context.actorOf(Props(classOf[OnMemberUpListener], code).withDeploy(Deploy.local))
|
2013-02-11 10:40:01 +01:00
|
|
|
case PublisherCreated(publisher) ⇒
|
|
|
|
|
if (settings.MetricsEnabled) {
|
|
|
|
|
// metrics must be started after core/publisher to be able
|
|
|
|
|
// to inject the publisher ref to the ClusterMetricsCollector
|
2013-04-26 13:54:10 +02:00
|
|
|
context.actorOf(Props(classOf[ClusterMetricsCollector], publisher).
|
2013-02-11 10:40:01 +01:00
|
|
|
withDispatcher(context.props.dispatcher), name = "metrics")
|
|
|
|
|
}
|
2012-07-05 13:55:08 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
2013-02-11 10:40:01 +01:00
|
|
|
/**
|
|
|
|
|
* INTERNAL API.
|
|
|
|
|
*
|
|
|
|
|
* ClusterCoreDaemon and ClusterDomainEventPublisher can't be restarted because the state
|
|
|
|
|
* would be obsolete. Shutdown the member if any those actors crashed.
|
|
|
|
|
*/
|
2013-04-26 12:18:01 +02:00
|
|
|
private[cluster] final class ClusterCoreSupervisor extends Actor with ActorLogging
|
|
|
|
|
with RequiresMessageQueue[UnboundedMessageQueueSemantics] {
|
2013-02-11 10:40:01 +01:00
|
|
|
import InternalClusterAction._
|
|
|
|
|
|
|
|
|
|
val publisher = context.actorOf(Props[ClusterDomainEventPublisher].
|
|
|
|
|
withDispatcher(context.props.dispatcher), name = "publisher")
|
2013-04-26 13:54:10 +02:00
|
|
|
val coreDaemon = context.watch(context.actorOf(Props(classOf[ClusterCoreDaemon], publisher).
|
2013-02-11 10:40:01 +01:00
|
|
|
withDispatcher(context.props.dispatcher), name = "daemon"))
|
|
|
|
|
|
|
|
|
|
context.parent ! PublisherCreated(publisher)
|
|
|
|
|
|
|
|
|
|
override val supervisorStrategy =
|
|
|
|
|
OneForOneStrategy() {
|
|
|
|
|
case NonFatal(e) ⇒
|
|
|
|
|
log.error(e, "Cluster node [{}] crashed, [{}] - shutting down...", Cluster(context.system).selfAddress, e.getMessage)
|
|
|
|
|
self ! PoisonPill
|
|
|
|
|
Stop
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
override def postStop(): Unit = Cluster(context.system).shutdown()
|
|
|
|
|
|
|
|
|
|
def receive = {
|
2014-01-16 15:16:35 +01:00
|
|
|
case InternalClusterAction.GetClusterCoreRef ⇒ sender() ! coreDaemon
|
2013-02-11 10:40:01 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2012-07-05 13:55:08 +02:00
|
|
|
/**
|
|
|
|
|
* INTERNAL API.
|
|
|
|
|
*/
|
2013-06-13 15:43:37 -04:00
|
|
|
private[cluster] class ClusterCoreDaemon(publisher: ActorRef) extends Actor with ActorLogging
|
2013-04-26 12:18:01 +02:00
|
|
|
with RequiresMessageQueue[UnboundedMessageQueueSemantics] {
|
2012-07-05 13:55:08 +02:00
|
|
|
import InternalClusterAction._
|
|
|
|
|
|
2012-09-06 21:48:40 +02:00
|
|
|
val cluster = Cluster(context.system)
|
2013-06-13 15:43:37 -04:00
|
|
|
import cluster.{ selfAddress, scheduler, failureDetector }
|
2013-09-11 16:09:51 +02:00
|
|
|
import cluster.settings.{ AutoDown ⇒ _, _ }
|
2013-05-23 13:36:35 +02:00
|
|
|
import cluster.InfoLogger._
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-06-13 15:43:37 -04:00
|
|
|
protected def selfUniqueAddress = cluster.selfUniqueAddress
|
|
|
|
|
|
2013-05-09 09:49:59 +02:00
|
|
|
val NumberOfGossipsBeforeShutdownWhenLeaderExits = 3
|
|
|
|
|
|
2013-04-11 09:18:12 +02:00
|
|
|
def vclockName(node: UniqueAddress): String = node.address + "-" + node.uid
|
|
|
|
|
val vclockNode = VectorClock.Node(vclockName(selfUniqueAddress))
|
2012-07-05 13:55:08 +02:00
|
|
|
|
|
|
|
|
// note that self is not initially member,
|
|
|
|
|
// and the Gossip is not versioned for this 'Node' yet
|
2013-01-04 16:39:48 +01:00
|
|
|
var latestGossip: Gossip = Gossip.empty
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-05-27 16:34:31 +02:00
|
|
|
val statsEnabled = PublishStatsInterval.isFinite
|
|
|
|
|
var gossipStats = GossipStats()
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-02-17 17:35:43 +01:00
|
|
|
var seedNodeProcess: Option[ActorRef] = None
|
|
|
|
|
|
2013-01-25 15:03:52 +01:00
|
|
|
/**
|
|
|
|
|
* Looks up and returns the remote cluster command connection for the specific address.
|
|
|
|
|
*/
|
2013-03-26 18:17:50 +01:00
|
|
|
private def clusterCore(address: Address): ActorSelection =
|
|
|
|
|
context.actorSelection(RootActorPath(address) / "system" / "cluster" / "core" / "daemon")
|
2013-01-25 15:03:52 +01:00
|
|
|
|
2012-08-08 15:57:30 +02:00
|
|
|
import context.dispatcher
|
|
|
|
|
|
2012-07-05 13:55:08 +02:00
|
|
|
// start periodic gossip to random nodes in cluster
|
2012-09-21 14:50:06 +02:00
|
|
|
val gossipTask = scheduler.schedule(PeriodicTasksInitialDelay.max(GossipInterval),
|
2012-10-08 12:17:40 +02:00
|
|
|
GossipInterval, self, GossipTick)
|
2012-07-05 13:55:08 +02:00
|
|
|
|
|
|
|
|
// start periodic cluster failure detector reaping (moving nodes condemned by the failure detector to unreachable list)
|
2012-09-21 14:50:06 +02:00
|
|
|
val failureDetectorReaperTask = scheduler.schedule(PeriodicTasksInitialDelay.max(UnreachableNodesReaperInterval),
|
2012-10-08 12:17:40 +02:00
|
|
|
UnreachableNodesReaperInterval, self, ReapUnreachableTick)
|
2012-07-05 13:55:08 +02:00
|
|
|
|
|
|
|
|
// start periodic leader action management (only applies for the current leader)
|
2012-09-21 14:50:06 +02:00
|
|
|
val leaderActionsTask = scheduler.schedule(PeriodicTasksInitialDelay.max(LeaderActionsInterval),
|
2012-10-08 12:17:40 +02:00
|
|
|
LeaderActionsInterval, self, LeaderActionsTick)
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2012-09-24 13:07:11 -06:00
|
|
|
// start periodic publish of current stats
|
2013-04-11 09:18:12 +02:00
|
|
|
val publishStatsTask: Option[Cancellable] = PublishStatsInterval match {
|
2013-05-16 17:00:04 +02:00
|
|
|
case Duration.Zero | _: Duration.Infinite ⇒ None
|
2013-04-11 09:18:12 +02:00
|
|
|
case d: FiniteDuration ⇒
|
|
|
|
|
Some(scheduler.schedule(PeriodicTasksInitialDelay.max(d), d, self, PublishStatsTick))
|
|
|
|
|
}
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-08-27 15:14:53 +02:00
|
|
|
override def preStart(): Unit = {
|
|
|
|
|
context.system.eventStream.subscribe(self, classOf[QuarantinedEvent])
|
2013-09-11 16:09:51 +02:00
|
|
|
|
|
|
|
|
AutoDownUnreachableAfter match {
|
|
|
|
|
case d: FiniteDuration ⇒
|
|
|
|
|
context.actorOf(AutoDown.props(d) withDispatcher (context.props.dispatcher), name = "autoDown")
|
|
|
|
|
case _ ⇒ // auto-down is disabled
|
|
|
|
|
}
|
|
|
|
|
|
2013-05-16 17:00:04 +02:00
|
|
|
if (SeedNodes.isEmpty)
|
2013-05-23 13:36:35 +02:00
|
|
|
logInfo("No seed-nodes configured, manual cluster join required")
|
2013-05-16 17:00:04 +02:00
|
|
|
else
|
|
|
|
|
self ! JoinSeedNodes(SeedNodes)
|
2013-08-27 15:14:53 +02:00
|
|
|
}
|
2012-07-05 13:55:08 +02:00
|
|
|
|
|
|
|
|
override def postStop(): Unit = {
|
2013-08-27 15:14:53 +02:00
|
|
|
context.system.eventStream.unsubscribe(self)
|
2012-07-05 13:55:08 +02:00
|
|
|
gossipTask.cancel()
|
|
|
|
|
failureDetectorReaperTask.cancel()
|
|
|
|
|
leaderActionsTask.cancel()
|
2012-09-24 13:07:11 -06:00
|
|
|
publishStatsTask foreach { _.cancel() }
|
2012-07-05 13:55:08 +02:00
|
|
|
}
|
|
|
|
|
|
2012-08-14 13:55:22 +02:00
|
|
|
def uninitialized: Actor.Receive = {
|
2014-01-16 15:16:35 +01:00
|
|
|
case InitJoin ⇒ sender() ! InitJoinNack(selfAddress)
|
2013-04-11 09:18:12 +02:00
|
|
|
case ClusterUserAction.JoinTo(address) ⇒ join(address)
|
|
|
|
|
case JoinSeedNodes(seedNodes) ⇒ joinSeedNodes(seedNodes)
|
|
|
|
|
case msg: SubscriptionMessage ⇒ publisher forward msg
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def tryingToJoin(joinWith: Address, deadline: Option[Deadline]): Actor.Receive = {
|
|
|
|
|
case Welcome(from, gossip) ⇒ welcome(joinWith, from, gossip)
|
2014-01-16 15:16:35 +01:00
|
|
|
case InitJoin ⇒ sender() ! InitJoinNack(selfAddress)
|
2013-04-11 09:18:12 +02:00
|
|
|
case ClusterUserAction.JoinTo(address) ⇒
|
2013-09-17 14:20:29 +02:00
|
|
|
becomeUninitialized()
|
2013-04-11 09:18:12 +02:00
|
|
|
join(address)
|
|
|
|
|
case JoinSeedNodes(seedNodes) ⇒
|
2013-09-17 14:20:29 +02:00
|
|
|
becomeUninitialized()
|
2013-04-11 09:18:12 +02:00
|
|
|
joinSeedNodes(seedNodes)
|
2012-08-19 20:15:22 +02:00
|
|
|
case msg: SubscriptionMessage ⇒ publisher forward msg
|
2013-04-11 09:18:12 +02:00
|
|
|
case _: Tick ⇒
|
|
|
|
|
if (deadline.exists(_.isOverdue)) {
|
2013-09-17 14:20:29 +02:00
|
|
|
becomeUninitialized()
|
2013-05-16 17:00:04 +02:00
|
|
|
if (SeedNodes.nonEmpty) joinSeedNodes(SeedNodes)
|
2013-04-11 09:18:12 +02:00
|
|
|
else join(joinWith)
|
|
|
|
|
}
|
2012-08-14 13:55:22 +02:00
|
|
|
}
|
|
|
|
|
|
2013-09-17 14:20:29 +02:00
|
|
|
def becomeUninitialized(): Unit = {
|
|
|
|
|
// make sure that join process is stopped
|
|
|
|
|
stopSeedNodeProcess()
|
|
|
|
|
context.become(uninitialized)
|
|
|
|
|
}
|
|
|
|
|
|
2013-05-24 15:38:24 +02:00
|
|
|
def becomeInitialized(): Unit = {
|
|
|
|
|
// start heartbeatSender here, and not in constructor to make sure that
|
|
|
|
|
// heartbeating doesn't start before Welcome is received
|
|
|
|
|
context.actorOf(Props[ClusterHeartbeatSender].
|
|
|
|
|
withDispatcher(UseDispatcher), name = "heartbeatSender")
|
2013-09-17 14:20:29 +02:00
|
|
|
// make sure that join process is stopped
|
|
|
|
|
stopSeedNodeProcess()
|
2013-05-24 15:38:24 +02:00
|
|
|
context.become(initialized)
|
|
|
|
|
}
|
|
|
|
|
|
2012-08-14 13:55:22 +02:00
|
|
|
def initialized: Actor.Receive = {
|
2013-04-11 09:18:12 +02:00
|
|
|
case msg: GossipEnvelope ⇒ receiveGossip(msg)
|
2013-04-28 22:28:20 +02:00
|
|
|
case msg: GossipStatus ⇒ receiveGossipStatus(msg)
|
2013-10-14 14:50:57 +02:00
|
|
|
case GossipTick ⇒ gossipTick()
|
|
|
|
|
case GossipSpeedupTick ⇒ gossipSpeedupTick()
|
2013-04-11 09:18:12 +02:00
|
|
|
case ReapUnreachableTick ⇒ reapUnreachableMembers()
|
|
|
|
|
case LeaderActionsTick ⇒ leaderActions()
|
|
|
|
|
case PublishStatsTick ⇒ publishInternalStats()
|
|
|
|
|
case InitJoin ⇒ initJoin()
|
|
|
|
|
case Join(node, roles) ⇒ joining(node, roles)
|
|
|
|
|
case ClusterUserAction.Down(address) ⇒ downing(address)
|
|
|
|
|
case ClusterUserAction.Leave(address) ⇒ leaving(address)
|
|
|
|
|
case SendGossipTo(address) ⇒ sendGossipTo(address)
|
|
|
|
|
case msg: SubscriptionMessage ⇒ publisher forward msg
|
2013-08-27 15:14:53 +02:00
|
|
|
case QuarantinedEvent(address, uid) ⇒ quarantined(UniqueAddress(address, uid))
|
2013-04-11 09:18:12 +02:00
|
|
|
case ClusterUserAction.JoinTo(address) ⇒
|
2013-05-23 13:36:35 +02:00
|
|
|
logInfo("Trying to join [{}] when already part of a cluster, ignoring", address)
|
2013-06-24 11:46:29 +02:00
|
|
|
case JoinSeedNodes(seedNodes) ⇒
|
|
|
|
|
logInfo("Trying to join seed nodes [{}] when already part of a cluster, ignoring",
|
|
|
|
|
seedNodes.mkString(", "))
|
2012-07-05 13:55:08 +02:00
|
|
|
}
|
|
|
|
|
|
2012-08-19 20:15:22 +02:00
|
|
|
def removed: Actor.Receive = {
|
|
|
|
|
case msg: SubscriptionMessage ⇒ publisher forward msg
|
|
|
|
|
}
|
|
|
|
|
|
2012-08-14 13:55:22 +02:00
|
|
|
def receive = uninitialized
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-04-28 22:28:20 +02:00
|
|
|
override def unhandled(message: Any): Unit = message match {
|
|
|
|
|
case _: Tick ⇒
|
|
|
|
|
case _: GossipEnvelope ⇒
|
|
|
|
|
case _: GossipStatus ⇒
|
|
|
|
|
case other ⇒ super.unhandled(other)
|
|
|
|
|
}
|
|
|
|
|
|
2014-01-16 15:16:35 +01:00
|
|
|
def initJoin(): Unit = sender() ! InitJoinAck(selfAddress)
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2012-11-15 12:33:11 +01:00
|
|
|
def joinSeedNodes(seedNodes: immutable.IndexedSeq[Address]): Unit = {
|
2013-05-16 17:00:04 +02:00
|
|
|
if (seedNodes.nonEmpty) {
|
2013-09-17 14:20:29 +02:00
|
|
|
stopSeedNodeProcess()
|
2013-05-16 17:00:04 +02:00
|
|
|
seedNodeProcess =
|
|
|
|
|
if (seedNodes == immutable.IndexedSeq(selfAddress)) {
|
|
|
|
|
self ! ClusterUserAction.JoinTo(selfAddress)
|
|
|
|
|
None
|
|
|
|
|
} else if (seedNodes.head == selfAddress) {
|
|
|
|
|
Some(context.actorOf(Props(classOf[FirstSeedNodeProcess], seedNodes).
|
|
|
|
|
withDispatcher(UseDispatcher), name = "firstSeedNodeProcess"))
|
|
|
|
|
} else {
|
|
|
|
|
Some(context.actorOf(Props(classOf[JoinSeedNodeProcess], seedNodes).
|
|
|
|
|
withDispatcher(UseDispatcher), name = "joinSeedNodeProcess"))
|
|
|
|
|
}
|
|
|
|
|
}
|
2012-09-06 21:48:40 +02:00
|
|
|
}
|
|
|
|
|
|
2012-07-05 13:55:08 +02:00
|
|
|
/**
|
2013-04-11 09:18:12 +02:00
|
|
|
* Try to join this cluster node with the node specified by `address`.
|
|
|
|
|
* It's only allowed to join from an empty state, i.e. when not already a member.
|
|
|
|
|
* A `Join(selfUniqueAddress)` command is sent to the node to join,
|
|
|
|
|
* which will reply with a `Welcome` message.
|
2012-07-05 13:55:08 +02:00
|
|
|
*/
|
|
|
|
|
def join(address: Address): Unit = {
|
2013-01-15 09:35:07 +01:00
|
|
|
if (address.protocol != selfAddress.protocol)
|
2013-02-17 17:35:43 +01:00
|
|
|
log.warning("Trying to join member with wrong protocol, but was ignored, expected [{}] but was [{}]",
|
2013-01-15 09:35:07 +01:00
|
|
|
selfAddress.protocol, address.protocol)
|
|
|
|
|
else if (address.system != selfAddress.system)
|
2013-02-17 17:35:43 +01:00
|
|
|
log.warning("Trying to join member with wrong ActorSystem name, but was ignored, expected [{}] but was [{}]",
|
2013-01-15 09:35:07 +01:00
|
|
|
selfAddress.system, address.system)
|
2013-04-11 09:18:12 +02:00
|
|
|
else {
|
|
|
|
|
require(latestGossip.members.isEmpty, "Join can only be done from empty state")
|
|
|
|
|
|
2013-02-17 17:35:43 +01:00
|
|
|
// to support manual join when joining to seed nodes is stuck (no seed nodes available)
|
2013-09-17 14:20:29 +02:00
|
|
|
stopSeedNodeProcess()
|
2013-02-17 17:35:43 +01:00
|
|
|
|
2013-04-11 09:18:12 +02:00
|
|
|
if (address == selfAddress) {
|
2013-05-24 15:38:24 +02:00
|
|
|
becomeInitialized()
|
2013-04-11 09:18:12 +02:00
|
|
|
joining(selfUniqueAddress, cluster.selfRoles)
|
|
|
|
|
} else {
|
|
|
|
|
val joinDeadline = RetryUnsuccessfulJoinAfter match {
|
2013-05-16 17:00:04 +02:00
|
|
|
case d: FiniteDuration ⇒ Some(Deadline.now + d)
|
|
|
|
|
case _ ⇒ None
|
2013-04-11 09:18:12 +02:00
|
|
|
}
|
|
|
|
|
context.become(tryingToJoin(address, joinDeadline))
|
|
|
|
|
clusterCore(address) ! Join(selfUniqueAddress, cluster.selfRoles)
|
2013-03-20 10:32:18 +01:00
|
|
|
}
|
2012-08-19 20:15:22 +02:00
|
|
|
}
|
2012-07-05 13:55:08 +02:00
|
|
|
}
|
|
|
|
|
|
2013-09-17 14:20:29 +02:00
|
|
|
def stopSeedNodeProcess(): Unit = {
|
|
|
|
|
seedNodeProcess match {
|
|
|
|
|
case Some(s) ⇒
|
|
|
|
|
// manual join, abort current seedNodeProcess
|
|
|
|
|
context stop s
|
|
|
|
|
seedNodeProcess = None
|
|
|
|
|
case None ⇒ // no seedNodeProcess in progress
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2012-07-05 13:55:08 +02:00
|
|
|
/**
|
|
|
|
|
* State transition to JOINING - new node joining.
|
2013-04-11 09:18:12 +02:00
|
|
|
* Received `Join` message and replies with `Welcome` message, containing
|
|
|
|
|
* current gossip state, including the new joining member.
|
2012-07-05 13:55:08 +02:00
|
|
|
*/
|
2013-04-11 09:18:12 +02:00
|
|
|
def joining(node: UniqueAddress, roles: Set[String]): Unit = {
|
|
|
|
|
if (node.address.protocol != selfAddress.protocol)
|
2013-02-17 17:35:43 +01:00
|
|
|
log.warning("Member with wrong protocol tried to join, but was ignored, expected [{}] but was [{}]",
|
2013-04-11 09:18:12 +02:00
|
|
|
selfAddress.protocol, node.address.protocol)
|
|
|
|
|
else if (node.address.system != selfAddress.system)
|
2013-02-17 17:35:43 +01:00
|
|
|
log.warning("Member with wrong ActorSystem name tried to join, but was ignored, expected [{}] but was [{}]",
|
2013-04-11 09:18:12 +02:00
|
|
|
selfAddress.system, node.address.system)
|
2013-02-17 17:35:43 +01:00
|
|
|
else {
|
|
|
|
|
val localMembers = latestGossip.members
|
|
|
|
|
|
2013-04-11 09:18:12 +02:00
|
|
|
// check by address without uid to make sure that node with same host:port is not allowed
|
|
|
|
|
// to join until previous node with that host:port has been removed from the cluster
|
|
|
|
|
val alreadyMember = localMembers.exists(_.address == node.address)
|
2013-08-27 15:14:53 +02:00
|
|
|
val isUnreachable = !latestGossip.overview.reachability.isReachable(node)
|
2013-02-17 17:35:43 +01:00
|
|
|
|
2013-04-11 09:18:12 +02:00
|
|
|
if (alreadyMember)
|
2013-05-23 13:36:35 +02:00
|
|
|
logInfo("Existing member [{}] is trying to join, ignoring", node)
|
2013-04-11 09:18:12 +02:00
|
|
|
else if (isUnreachable)
|
2013-05-23 13:36:35 +02:00
|
|
|
logInfo("Unreachable member [{}] is trying to join, ignoring", node)
|
2013-04-11 09:18:12 +02:00
|
|
|
else {
|
2013-02-17 17:35:43 +01:00
|
|
|
|
2013-03-05 15:32:13 +01:00
|
|
|
// remove the node from the failure detector
|
2013-04-11 09:18:12 +02:00
|
|
|
failureDetector.remove(node.address)
|
2013-02-17 17:35:43 +01:00
|
|
|
|
|
|
|
|
// add joining node as Joining
|
|
|
|
|
// add self in case someone else joins before self has joined (Set discards duplicates)
|
2013-04-11 09:18:12 +02:00
|
|
|
val newMembers = localMembers + Member(node, roles) + Member(selfUniqueAddress, cluster.selfRoles)
|
2013-03-05 15:32:13 +01:00
|
|
|
val newGossip = latestGossip copy (members = newMembers)
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-05-09 09:49:59 +02:00
|
|
|
updateLatestGossip(newGossip)
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-05-23 13:36:35 +02:00
|
|
|
logInfo("Node [{}] is JOINING, roles [{}]", node.address, roles.mkString(", "))
|
2013-04-11 09:18:12 +02:00
|
|
|
if (node != selfUniqueAddress) {
|
2014-01-16 15:16:35 +01:00
|
|
|
sender() ! Welcome(selfUniqueAddress, latestGossip)
|
2013-02-17 17:35:43 +01:00
|
|
|
}
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-02-17 17:35:43 +01:00
|
|
|
publish(latestGossip)
|
2012-07-05 13:55:08 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-04-11 09:18:12 +02:00
|
|
|
/**
|
|
|
|
|
* Reply from Join request.
|
|
|
|
|
*/
|
|
|
|
|
def welcome(joinWith: Address, from: UniqueAddress, gossip: Gossip): Unit = {
|
|
|
|
|
require(latestGossip.members.isEmpty, "Join can only be done from empty state")
|
|
|
|
|
if (joinWith != from.address)
|
2013-05-23 13:36:35 +02:00
|
|
|
logInfo("Ignoring welcome from [{}] when trying to join with [{}]", from.address, joinWith)
|
2013-04-11 09:18:12 +02:00
|
|
|
else {
|
2013-05-23 13:36:35 +02:00
|
|
|
logInfo("Welcome from [{}]", from.address)
|
2013-04-11 09:18:12 +02:00
|
|
|
latestGossip = gossip seen selfUniqueAddress
|
|
|
|
|
publish(latestGossip)
|
|
|
|
|
if (from != selfUniqueAddress)
|
2014-01-16 15:16:35 +01:00
|
|
|
gossipTo(from, sender())
|
2013-05-24 15:38:24 +02:00
|
|
|
becomeInitialized()
|
2013-04-11 09:18:12 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2012-07-05 13:55:08 +02:00
|
|
|
/**
|
|
|
|
|
* State transition to LEAVING.
|
2013-04-11 09:18:12 +02:00
|
|
|
* The node will eventually be removed by the leader, after hand-off in EXITING, and only after
|
|
|
|
|
* removal a new node with same address can join the cluster through the normal joining procedure.
|
2012-07-05 13:55:08 +02:00
|
|
|
*/
|
|
|
|
|
def leaving(address: Address): Unit = {
|
2013-04-05 12:38:09 +02:00
|
|
|
// only try to update if the node is available (in the member ring)
|
|
|
|
|
if (latestGossip.members.exists(m ⇒ m.address == address && m.status == Up)) {
|
2013-03-14 20:32:43 +01:00
|
|
|
val newMembers = latestGossip.members map { m ⇒ if (m.address == address) m.copy(status = Leaving) else m } // mark node as LEAVING
|
2012-11-27 18:07:37 +01:00
|
|
|
val newGossip = latestGossip copy (members = newMembers)
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-05-09 09:49:59 +02:00
|
|
|
updateLatestGossip(newGossip)
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-05-23 13:36:35 +02:00
|
|
|
logInfo("Marked address [{}] as [{}]", address, Leaving)
|
2012-11-27 18:07:37 +01:00
|
|
|
publish(latestGossip)
|
2012-07-05 13:55:08 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2013-08-27 15:14:53 +02:00
|
|
|
* This method is called when a member sees itself as Exiting or Down.
|
2012-07-05 13:55:08 +02:00
|
|
|
*/
|
2013-05-23 13:36:35 +02:00
|
|
|
def shutdown(): Unit = cluster.shutdown()
|
2012-07-05 13:55:08 +02:00
|
|
|
|
|
|
|
|
/**
|
2013-08-27 15:14:53 +02:00
|
|
|
* State transition to DOWN.
|
|
|
|
|
* Its status is set to DOWN. The node is also removed from the `seen` table.
|
2012-07-05 13:55:08 +02:00
|
|
|
*
|
2013-04-11 09:18:12 +02:00
|
|
|
* The node will eventually be removed by the leader, and only after removal a new node with same address can
|
|
|
|
|
* join the cluster through the normal joining procedure.
|
2012-07-05 13:55:08 +02:00
|
|
|
*/
|
|
|
|
|
def downing(address: Address): Unit = {
|
|
|
|
|
val localGossip = latestGossip
|
|
|
|
|
val localMembers = localGossip.members
|
|
|
|
|
val localOverview = localGossip.overview
|
|
|
|
|
val localSeen = localOverview.seen
|
2013-08-27 15:14:53 +02:00
|
|
|
val localReachability = localOverview.reachability
|
2012-07-07 20:55:02 +02:00
|
|
|
|
2013-08-27 15:14:53 +02:00
|
|
|
// check if the node to DOWN is in the `members` set
|
|
|
|
|
localMembers.collectFirst { case m if m.address == address ⇒ m.copy(status = Down) } match {
|
2012-07-05 13:55:08 +02:00
|
|
|
case Some(m) ⇒
|
2013-08-27 15:14:53 +02:00
|
|
|
if (localReachability.isReachable(m.uniqueAddress))
|
|
|
|
|
logInfo("Marking node [{}] as [{}]", m.address, Down)
|
|
|
|
|
else
|
|
|
|
|
logInfo("Marking unreachable node [{}] as [{}]", m.address, Down)
|
|
|
|
|
|
|
|
|
|
// replace member (changed status)
|
|
|
|
|
val newMembers = localMembers - m + m
|
|
|
|
|
// remove nodes marked as DOWN from the `seen` table
|
|
|
|
|
val newSeen = localSeen - m.uniqueAddress
|
|
|
|
|
|
|
|
|
|
// update gossip overview
|
|
|
|
|
val newOverview = localOverview copy (seen = newSeen)
|
|
|
|
|
val newGossip = localGossip copy (members = newMembers, overview = newOverview) // update gossip
|
|
|
|
|
updateLatestGossip(newGossip)
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-08-27 15:14:53 +02:00
|
|
|
publish(latestGossip)
|
|
|
|
|
case None ⇒
|
|
|
|
|
logInfo("Ignoring down of unknown node [{}] as [{}]", address)
|
|
|
|
|
}
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-08-27 15:14:53 +02:00
|
|
|
}
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-08-27 15:14:53 +02:00
|
|
|
def quarantined(node: UniqueAddress): Unit = {
|
|
|
|
|
val localGossip = latestGossip
|
|
|
|
|
if (localGossip.hasMember(node)) {
|
|
|
|
|
val newReachability = latestGossip.overview.reachability.terminated(selfUniqueAddress, node)
|
|
|
|
|
val newOverview = localGossip.overview copy (reachability = newReachability)
|
|
|
|
|
val newGossip = localGossip copy (overview = newOverview)
|
|
|
|
|
updateLatestGossip(newGossip)
|
|
|
|
|
log.warning("Cluster Node [{}] - Marking node as TERMINATED [{}], due to quarantine",
|
|
|
|
|
selfAddress, node.address)
|
|
|
|
|
publish(latestGossip)
|
|
|
|
|
downing(node.address)
|
|
|
|
|
}
|
2012-07-05 13:55:08 +02:00
|
|
|
}
|
|
|
|
|
|
2013-04-28 22:28:20 +02:00
|
|
|
def receiveGossipStatus(status: GossipStatus): Unit = {
|
|
|
|
|
val from = status.from
|
2013-08-27 15:14:53 +02:00
|
|
|
if (!latestGossip.overview.reachability.isReachable(selfUniqueAddress, from))
|
2013-05-23 13:36:35 +02:00
|
|
|
logInfo("Ignoring received gossip status from unreachable [{}] ", from)
|
2013-04-28 22:28:20 +02:00
|
|
|
else if (latestGossip.members.forall(_.uniqueAddress != from))
|
2013-08-16 15:28:57 +02:00
|
|
|
log.debug("Cluster Node [{}] - Ignoring received gossip status from unknown [{}]", selfAddress, from)
|
2013-04-28 22:28:20 +02:00
|
|
|
else {
|
2013-06-13 15:43:37 -04:00
|
|
|
(status.version compareTo latestGossip.version) match {
|
|
|
|
|
case VectorClock.Same ⇒ // same version
|
2014-01-16 15:16:35 +01:00
|
|
|
case VectorClock.After ⇒ gossipStatusTo(from, sender()) // remote is newer
|
|
|
|
|
case _ ⇒ gossipTo(from, sender()) // conflicting or local is newer
|
2013-04-28 22:28:20 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-06-13 15:43:37 -04:00
|
|
|
/**
|
|
|
|
|
* The types of gossip actions that receive gossip has performed.
|
|
|
|
|
*/
|
|
|
|
|
sealed trait ReceiveGossipType
|
|
|
|
|
case object Ignored extends ReceiveGossipType
|
|
|
|
|
case object Older extends ReceiveGossipType
|
|
|
|
|
case object Newer extends ReceiveGossipType
|
|
|
|
|
case object Same extends ReceiveGossipType
|
|
|
|
|
case object Merge extends ReceiveGossipType
|
|
|
|
|
|
2012-07-05 13:55:08 +02:00
|
|
|
/**
|
|
|
|
|
* Receive new gossip.
|
|
|
|
|
*/
|
2013-06-13 15:43:37 -04:00
|
|
|
def receiveGossip(envelope: GossipEnvelope): ReceiveGossipType = {
|
2012-07-05 13:55:08 +02:00
|
|
|
val from = envelope.from
|
|
|
|
|
val remoteGossip = envelope.gossip
|
|
|
|
|
val localGossip = latestGossip
|
|
|
|
|
|
2013-10-17 10:35:37 +02:00
|
|
|
if (remoteGossip eq Gossip.empty) {
|
|
|
|
|
log.debug("Cluster Node [{}] - Ignoring received gossip from [{}] to protect against overload", selfAddress, from)
|
|
|
|
|
Ignored
|
|
|
|
|
} else if (envelope.to != selfUniqueAddress) {
|
2013-05-23 13:36:35 +02:00
|
|
|
logInfo("Ignoring received gossip intended for someone else, from [{}] to [{}]", from.address, envelope.to)
|
2013-06-13 15:43:37 -04:00
|
|
|
Ignored
|
2013-08-27 15:14:53 +02:00
|
|
|
} else if (!remoteGossip.overview.reachability.isReachable(selfUniqueAddress)) {
|
2013-05-23 13:36:35 +02:00
|
|
|
logInfo("Ignoring received gossip with myself as unreachable, from [{}]", from.address)
|
2013-06-13 15:43:37 -04:00
|
|
|
Ignored
|
2013-08-27 15:14:53 +02:00
|
|
|
} else if (!localGossip.overview.reachability.isReachable(selfUniqueAddress, from)) {
|
2013-05-23 13:36:35 +02:00
|
|
|
logInfo("Ignoring received gossip from unreachable [{}] ", from)
|
2013-06-13 15:43:37 -04:00
|
|
|
Ignored
|
|
|
|
|
} else if (localGossip.members.forall(_.uniqueAddress != from)) {
|
2013-08-16 15:28:57 +02:00
|
|
|
log.debug("Cluster Node [{}] - Ignoring received gossip from unknown [{}]", selfAddress, from)
|
2013-06-13 15:43:37 -04:00
|
|
|
Ignored
|
|
|
|
|
} else if (remoteGossip.members.forall(_.uniqueAddress != selfUniqueAddress)) {
|
2013-05-23 13:36:35 +02:00
|
|
|
logInfo("Ignoring received gossip that does not contain myself, from [{}]", from)
|
2013-06-13 15:43:37 -04:00
|
|
|
Ignored
|
|
|
|
|
} else {
|
|
|
|
|
val comparison = remoteGossip.version compareTo localGossip.version
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-06-13 15:43:37 -04:00
|
|
|
val (winningGossip, talkback, gossipType) = comparison match {
|
|
|
|
|
case VectorClock.Same ⇒
|
2013-03-07 13:20:50 +01:00
|
|
|
// same version
|
2013-06-13 15:43:37 -04:00
|
|
|
(remoteGossip mergeSeen localGossip, !remoteGossip.seenByNode(selfUniqueAddress), Same)
|
|
|
|
|
case VectorClock.Before ⇒
|
2013-03-07 13:20:50 +01:00
|
|
|
// local is newer
|
2013-06-13 15:43:37 -04:00
|
|
|
(localGossip, true, Older)
|
|
|
|
|
case VectorClock.After ⇒
|
2013-03-07 13:20:50 +01:00
|
|
|
// remote is newer
|
2013-06-13 15:43:37 -04:00
|
|
|
(remoteGossip, !remoteGossip.seenByNode(selfUniqueAddress), Newer)
|
2013-09-02 13:50:46 +02:00
|
|
|
case _ ⇒
|
|
|
|
|
// conflicting versions, merge
|
|
|
|
|
(remoteGossip merge localGossip, true, Merge)
|
2013-03-07 13:20:50 +01:00
|
|
|
}
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-04-11 09:18:12 +02:00
|
|
|
latestGossip = winningGossip seen selfUniqueAddress
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-03-07 13:20:50 +01:00
|
|
|
// for all new joining nodes we remove them from the failure detector
|
2013-03-08 13:26:50 +01:00
|
|
|
latestGossip.members foreach {
|
|
|
|
|
node ⇒ if (node.status == Joining && !localGossip.members(node)) failureDetector.remove(node.address)
|
2013-03-07 13:20:50 +01:00
|
|
|
}
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-03-07 13:20:50 +01:00
|
|
|
log.debug("Cluster Node [{}] - Receiving gossip from [{}]", selfAddress, from)
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-06-13 15:43:37 -04:00
|
|
|
if (comparison == VectorClock.Concurrent) {
|
2013-03-07 13:20:50 +01:00
|
|
|
log.debug(
|
|
|
|
|
"""Couldn't establish a causal relationship between "remote" gossip and "local" gossip - Remote[{}] - Local[{}] - merged them into [{}]""",
|
|
|
|
|
remoteGossip, localGossip, winningGossip)
|
|
|
|
|
}
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-05-27 16:34:31 +02:00
|
|
|
if (statsEnabled) {
|
2013-06-13 15:43:37 -04:00
|
|
|
gossipStats = gossipType match {
|
|
|
|
|
case Merge ⇒ gossipStats.incrementMergeCount
|
|
|
|
|
case Same ⇒ gossipStats.incrementSameCount
|
|
|
|
|
case Newer ⇒ gossipStats.incrementNewerCount
|
|
|
|
|
case Older ⇒ gossipStats.incrementOlderCount
|
2013-05-27 16:34:31 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-03-07 13:20:50 +01:00
|
|
|
publish(latestGossip)
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-08-27 15:14:53 +02:00
|
|
|
val selfStatus = latestGossip.member(selfUniqueAddress).status
|
|
|
|
|
if (selfStatus == Exiting || selfStatus == Down)
|
2013-05-09 09:49:59 +02:00
|
|
|
shutdown()
|
|
|
|
|
else if (talkback) {
|
2014-01-16 15:16:35 +01:00
|
|
|
// send back gossip to sender() when sender() had different view, i.e. merge, or sender() had
|
|
|
|
|
// older or sender() had newer
|
|
|
|
|
gossipTo(from, sender())
|
2012-07-05 13:55:08 +02:00
|
|
|
}
|
2013-06-13 15:43:37 -04:00
|
|
|
gossipType
|
2012-07-05 13:55:08 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-10-14 14:50:57 +02:00
|
|
|
def gossipTick(): Unit = {
|
|
|
|
|
gossip()
|
|
|
|
|
if (isGossipSpeedupNeeded) {
|
|
|
|
|
scheduler.scheduleOnce(GossipInterval / 3, self, GossipSpeedupTick)
|
|
|
|
|
scheduler.scheduleOnce(GossipInterval * 2 / 3, self, GossipSpeedupTick)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def gossipSpeedupTick(): Unit =
|
|
|
|
|
if (isGossipSpeedupNeeded) gossip()
|
|
|
|
|
|
|
|
|
|
def isGossipSpeedupNeeded: Boolean =
|
|
|
|
|
(latestGossip.overview.seen.size < latestGossip.members.size / 2)
|
2012-07-05 13:55:08 +02:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Initiates a new round of gossip.
|
|
|
|
|
*/
|
|
|
|
|
def gossip(): Unit = {
|
|
|
|
|
|
2013-08-27 15:14:53 +02:00
|
|
|
if (!isSingletonCluster) {
|
2012-07-05 13:55:08 +02:00
|
|
|
val localGossip = latestGossip
|
|
|
|
|
|
2013-04-28 22:28:20 +02:00
|
|
|
val preferredGossipTargets: Vector[UniqueAddress] =
|
2013-10-18 08:25:52 +02:00
|
|
|
if (ThreadLocalRandom.current.nextDouble() < adjustedGossipDifferentViewProbability) {
|
|
|
|
|
// If it's time to try to gossip to some nodes with a different view
|
2013-09-02 13:50:46 +02:00
|
|
|
// gossip to a random alive member with preference to a member with older gossip version
|
2013-08-27 15:14:53 +02:00
|
|
|
localGossip.members.collect {
|
|
|
|
|
case m if !localGossip.seenByNode(m.uniqueAddress) && validNodeForGossip(m.uniqueAddress) ⇒
|
|
|
|
|
m.uniqueAddress
|
|
|
|
|
}(breakOut)
|
|
|
|
|
} else Vector.empty
|
2012-07-07 20:55:02 +02:00
|
|
|
|
2013-04-28 22:28:20 +02:00
|
|
|
if (preferredGossipTargets.nonEmpty) {
|
2013-08-27 15:14:53 +02:00
|
|
|
val peer = selectRandomNode(preferredGossipTargets)
|
2013-04-28 22:28:20 +02:00
|
|
|
// send full gossip because it has different view
|
|
|
|
|
peer foreach gossipTo
|
|
|
|
|
} else {
|
|
|
|
|
// Fall back to localGossip; important to not accidentally use `map` of the SortedSet, since the original order is not preserved)
|
|
|
|
|
val peer = selectRandomNode(localGossip.members.toIndexedSeq.collect {
|
2013-08-27 15:14:53 +02:00
|
|
|
case m if validNodeForGossip(m.uniqueAddress) ⇒ m.uniqueAddress
|
2013-04-28 22:28:20 +02:00
|
|
|
})
|
|
|
|
|
peer foreach { node ⇒
|
|
|
|
|
if (localGossip.seenByNode(node)) gossipStatusTo(node)
|
|
|
|
|
else gossipTo(node)
|
|
|
|
|
}
|
|
|
|
|
}
|
2012-07-05 13:55:08 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-10-18 08:25:52 +02:00
|
|
|
/**
|
|
|
|
|
* For large clusters we should avoid shooting down individual
|
|
|
|
|
* nodes. Therefore the probability is reduced for large clusters.
|
|
|
|
|
*/
|
|
|
|
|
def adjustedGossipDifferentViewProbability: Double = {
|
|
|
|
|
val size = latestGossip.members.size
|
|
|
|
|
val low = ReduceGossipDifferentViewProbability
|
|
|
|
|
val high = low * 3
|
|
|
|
|
// start reduction when cluster is larger than configured ReduceGossipDifferentViewProbability
|
|
|
|
|
if (size <= low)
|
|
|
|
|
GossipDifferentViewProbability
|
|
|
|
|
else {
|
|
|
|
|
// don't go lower than 1/10 of the configured GossipDifferentViewProbability
|
|
|
|
|
val minP = GossipDifferentViewProbability / 10
|
|
|
|
|
if (size >= high)
|
|
|
|
|
minP
|
|
|
|
|
else {
|
|
|
|
|
// linear reduction of the probability with increasing number of nodes
|
|
|
|
|
// from ReduceGossipDifferentViewProbability at ReduceGossipDifferentViewProbability nodes
|
|
|
|
|
// to ReduceGossipDifferentViewProbability / 10 at ReduceGossipDifferentViewProbability * 3 nodes
|
|
|
|
|
// i.e. default from 0.8 at 400 nodes, to 0.08 at 1600 nodes
|
|
|
|
|
val k = (minP - GossipDifferentViewProbability) / (high - low)
|
|
|
|
|
GossipDifferentViewProbability + (size - low) * k
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2012-07-05 13:55:08 +02:00
|
|
|
/**
|
2013-09-11 16:09:51 +02:00
|
|
|
* Runs periodic leader actions, such as member status transitions, assigning partitions etc.
|
2012-07-05 13:55:08 +02:00
|
|
|
*/
|
2013-05-09 09:49:59 +02:00
|
|
|
def leaderActions(): Unit =
|
2013-08-27 15:14:53 +02:00
|
|
|
if (latestGossip.isLeader(selfUniqueAddress)) {
|
|
|
|
|
// only run the leader actions if we are the LEADER
|
2013-05-09 09:49:59 +02:00
|
|
|
if (latestGossip.convergence)
|
|
|
|
|
leaderActionsOnConvergence()
|
|
|
|
|
}
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-05-09 09:49:59 +02:00
|
|
|
/**
|
|
|
|
|
* Leader actions are as follows:
|
|
|
|
|
* 1. Move JOINING => UP -- When a node joins the cluster
|
|
|
|
|
* 2. Move LEAVING => EXITING -- When all partition handoff has completed
|
|
|
|
|
* 3. Non-exiting remain -- When all partition handoff has completed
|
|
|
|
|
* 4. Move unreachable EXITING => REMOVED -- When all nodes have seen the EXITING node as unreachable (convergence) -
|
|
|
|
|
* remove the node from the node ring and seen table
|
|
|
|
|
* 5. Move unreachable DOWN/EXITING => REMOVED -- When all nodes have seen that the node is DOWN/EXITING (convergence) -
|
|
|
|
|
* remove the node from the node ring and seen table
|
|
|
|
|
* 7. Updating the vclock version for the changes
|
|
|
|
|
* 8. Updating the `seen` table
|
|
|
|
|
* 9. Update the state with the new gossip
|
|
|
|
|
*/
|
|
|
|
|
def leaderActionsOnConvergence(): Unit = {
|
|
|
|
|
val localGossip = latestGossip
|
|
|
|
|
val localMembers = localGossip.members
|
|
|
|
|
val localOverview = localGossip.overview
|
|
|
|
|
val localSeen = localOverview.seen
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-05-09 09:49:59 +02:00
|
|
|
val hasPartionHandoffCompletedSuccessfully: Boolean = {
|
2013-05-28 09:02:03 +02:00
|
|
|
// TODO implement partion handoff and a check if it is completed - now just returns TRUE - e.g. has completed successfully
|
2013-05-09 09:49:59 +02:00
|
|
|
true
|
|
|
|
|
}
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-05-09 09:49:59 +02:00
|
|
|
def enoughMembers: Boolean = {
|
|
|
|
|
localMembers.size >= MinNrOfMembers && MinNrOfMembersOfRole.forall {
|
|
|
|
|
case (role, threshold) ⇒ localMembers.count(_.hasRole(role)) >= threshold
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
def isJoiningToUp(m: Member): Boolean = m.status == Joining && enoughMembers
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-08-27 15:14:53 +02:00
|
|
|
val removedUnreachable = for {
|
|
|
|
|
node ← localOverview.reachability.allUnreachableOrTerminated
|
|
|
|
|
m = localGossip.member(node)
|
|
|
|
|
if Gossip.removeUnreachableWithMemberStatus(m.status)
|
|
|
|
|
} yield m
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-05-09 09:49:59 +02:00
|
|
|
val changedMembers = localMembers collect {
|
|
|
|
|
var upNumber = 0
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
case m if isJoiningToUp(m) ⇒
|
|
|
|
|
// Move JOINING => UP (once all nodes have seen that this node is JOINING, i.e. we have a convergence)
|
|
|
|
|
// and minimum number of nodes have joined the cluster
|
|
|
|
|
if (upNumber == 0) {
|
|
|
|
|
// It is alright to use same upNumber as already used by a removed member, since the upNumber
|
|
|
|
|
// is only used for comparing age of current cluster members (Member.isOlderThan)
|
|
|
|
|
val youngest = localGossip.youngestMember
|
|
|
|
|
upNumber = 1 + (if (youngest.upNumber == Int.MaxValue) 0 else youngest.upNumber)
|
|
|
|
|
} else {
|
|
|
|
|
upNumber += 1
|
|
|
|
|
}
|
|
|
|
|
m.copyUp(upNumber)
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-05-09 09:49:59 +02:00
|
|
|
case m if m.status == Leaving && hasPartionHandoffCompletedSuccessfully ⇒
|
|
|
|
|
// Move LEAVING => EXITING (once we have a convergence on LEAVING
|
|
|
|
|
// *and* if we have a successful partition handoff)
|
|
|
|
|
m copy (status = Exiting)
|
|
|
|
|
}
|
|
|
|
|
}
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-05-09 09:49:59 +02:00
|
|
|
if (removedUnreachable.nonEmpty || changedMembers.nonEmpty) {
|
|
|
|
|
// handle changes
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-05-09 09:49:59 +02:00
|
|
|
// replace changed members
|
2013-08-27 15:14:53 +02:00
|
|
|
val newMembers = changedMembers ++ localMembers -- removedUnreachable
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-05-09 09:49:59 +02:00
|
|
|
// removing REMOVED nodes from the `seen` table
|
2013-08-27 15:14:53 +02:00
|
|
|
val removed = removedUnreachable.map(_.uniqueAddress)
|
|
|
|
|
val newSeen = localSeen -- removed
|
|
|
|
|
// removing REMOVED nodes from the `reachability` table
|
|
|
|
|
val newReachability = localOverview.reachability.remove(removed)
|
|
|
|
|
val newOverview = localOverview copy (seen = newSeen, reachability = newReachability)
|
|
|
|
|
val newGossip = localGossip copy (members = newMembers, overview = newOverview)
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-05-09 09:49:59 +02:00
|
|
|
updateLatestGossip(newGossip)
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-05-09 09:49:59 +02:00
|
|
|
// log status changes
|
|
|
|
|
changedMembers foreach { m ⇒
|
2013-05-23 13:36:35 +02:00
|
|
|
logInfo("Leader is moving node [{}] to [{}]", m.address, m.status)
|
2013-05-09 09:49:59 +02:00
|
|
|
}
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-05-09 09:49:59 +02:00
|
|
|
// log the removal of the unreachable nodes
|
|
|
|
|
removedUnreachable foreach { m ⇒
|
|
|
|
|
val status = if (m.status == Exiting) "exiting" else "unreachable"
|
2013-05-23 13:36:35 +02:00
|
|
|
logInfo("Leader is removing {} node [{}]", status, m.address)
|
2013-05-09 09:49:59 +02:00
|
|
|
}
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-05-09 09:49:59 +02:00
|
|
|
publish(latestGossip)
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-05-09 09:49:59 +02:00
|
|
|
if (latestGossip.member(selfUniqueAddress).status == Exiting) {
|
|
|
|
|
// Leader is moving itself from Leaving to Exiting. Let others know (best effort)
|
|
|
|
|
// before shutdown. Otherwise they will not see the Exiting state change
|
|
|
|
|
// and there will not be convergence until they have detected this node as
|
|
|
|
|
// unreachable and the required downing has finished. They will still need to detect
|
|
|
|
|
// unreachable, but Exiting unreachable will be removed without downing, i.e.
|
|
|
|
|
// normally the leaving of a leader will be graceful without the need
|
|
|
|
|
// for downing. However, if those final gossip messages never arrive it is
|
|
|
|
|
// alright to require the downing, because that is probably caused by a
|
|
|
|
|
// network failure anyway.
|
|
|
|
|
for (_ ← 1 to NumberOfGossipsBeforeShutdownWhenLeaderExits) gossip()
|
|
|
|
|
shutdown()
|
|
|
|
|
}
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-05-09 09:49:59 +02:00
|
|
|
}
|
|
|
|
|
}
|
2012-07-05 13:55:08 +02:00
|
|
|
|
|
|
|
|
/**
|
2013-08-27 15:14:53 +02:00
|
|
|
* Reaps the unreachable members according to the failure detector's verdict.
|
2012-07-05 13:55:08 +02:00
|
|
|
*/
|
|
|
|
|
def reapUnreachableMembers(): Unit = {
|
2013-08-27 15:14:53 +02:00
|
|
|
if (!isSingletonCluster) {
|
|
|
|
|
// only scrutinize if we are a non-singleton cluster
|
2012-07-05 13:55:08 +02:00
|
|
|
|
|
|
|
|
val localGossip = latestGossip
|
|
|
|
|
val localOverview = localGossip.overview
|
|
|
|
|
val localMembers = localGossip.members
|
|
|
|
|
|
|
|
|
|
val newlyDetectedUnreachableMembers = localMembers filterNot { member ⇒
|
2013-08-27 15:14:53 +02:00
|
|
|
member.uniqueAddress == selfUniqueAddress ||
|
|
|
|
|
localOverview.reachability.status(selfUniqueAddress, member.uniqueAddress) == Reachability.Unreachable ||
|
|
|
|
|
localOverview.reachability.status(selfUniqueAddress, member.uniqueAddress) == Reachability.Terminated ||
|
|
|
|
|
failureDetector.isAvailable(member.address)
|
2012-07-05 13:55:08 +02:00
|
|
|
}
|
|
|
|
|
|
2013-08-27 15:14:53 +02:00
|
|
|
val newlyDetectedReachableMembers = localOverview.reachability.allUnreachableFrom(selfUniqueAddress) collect {
|
|
|
|
|
case node if node != selfUniqueAddress && failureDetector.isAvailable(node.address) ⇒
|
|
|
|
|
localGossip.member(node)
|
|
|
|
|
}
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-08-27 15:14:53 +02:00
|
|
|
if (newlyDetectedUnreachableMembers.nonEmpty || newlyDetectedReachableMembers.nonEmpty) {
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-08-27 15:14:53 +02:00
|
|
|
val newReachability1 = (localOverview.reachability /: newlyDetectedUnreachableMembers) {
|
|
|
|
|
(reachability, m) ⇒ reachability.unreachable(selfUniqueAddress, m.uniqueAddress)
|
|
|
|
|
}
|
|
|
|
|
val newReachability2 = (newReachability1 /: newlyDetectedReachableMembers) {
|
|
|
|
|
(reachability, m) ⇒ reachability.reachable(selfUniqueAddress, m.uniqueAddress)
|
|
|
|
|
}
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-08-27 15:14:53 +02:00
|
|
|
if (newReachability2 ne localOverview.reachability) {
|
|
|
|
|
val newOverview = localOverview copy (reachability = newReachability2)
|
|
|
|
|
val newGossip = localGossip copy (overview = newOverview)
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-08-27 15:14:53 +02:00
|
|
|
updateLatestGossip(newGossip)
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-08-27 15:14:53 +02:00
|
|
|
val (exiting, nonExiting) = newlyDetectedUnreachableMembers.partition(_.status == Exiting)
|
|
|
|
|
if (nonExiting.nonEmpty)
|
|
|
|
|
log.warning("Cluster Node [{}] - Marking node(s) as UNREACHABLE [{}]", selfAddress, nonExiting.mkString(", "))
|
|
|
|
|
if (exiting.nonEmpty)
|
|
|
|
|
logInfo("Marking exiting node(s) as UNREACHABLE [{}]. This is expected and they will be removed.",
|
|
|
|
|
exiting.mkString(", "))
|
|
|
|
|
if (newlyDetectedReachableMembers.nonEmpty)
|
|
|
|
|
logInfo("Marking node(s) as REACHABLE [{}]", newlyDetectedReachableMembers.mkString(", "))
|
|
|
|
|
|
|
|
|
|
publish(latestGossip)
|
|
|
|
|
}
|
2012-07-05 13:55:08 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-04-11 09:18:12 +02:00
|
|
|
def selectRandomNode(nodes: IndexedSeq[UniqueAddress]): Option[UniqueAddress] =
|
|
|
|
|
if (nodes.isEmpty) None
|
|
|
|
|
else Some(nodes(ThreadLocalRandom.current nextInt nodes.size))
|
2012-07-05 13:55:08 +02:00
|
|
|
|
|
|
|
|
def isSingletonCluster: Boolean = latestGossip.isSingletonCluster
|
|
|
|
|
|
2013-04-11 09:18:12 +02:00
|
|
|
// needed for tests
|
|
|
|
|
def sendGossipTo(address: Address): Unit = {
|
|
|
|
|
latestGossip.members.foreach(m ⇒
|
|
|
|
|
if (m.address == address)
|
|
|
|
|
gossipTo(m.uniqueAddress))
|
|
|
|
|
}
|
|
|
|
|
|
2012-07-05 13:55:08 +02:00
|
|
|
/**
|
2013-04-11 09:18:12 +02:00
|
|
|
* Gossips latest gossip to a node.
|
2012-07-05 13:55:08 +02:00
|
|
|
*/
|
2013-04-11 09:18:12 +02:00
|
|
|
def gossipTo(node: UniqueAddress): Unit =
|
2013-04-28 22:28:20 +02:00
|
|
|
if (validNodeForGossip(node))
|
|
|
|
|
clusterCore(node.address) ! GossipEnvelope(selfUniqueAddress, node, latestGossip)
|
|
|
|
|
|
|
|
|
|
def gossipTo(node: UniqueAddress, destination: ActorRef): Unit =
|
|
|
|
|
if (validNodeForGossip(node))
|
|
|
|
|
destination ! GossipEnvelope(selfUniqueAddress, node, latestGossip)
|
|
|
|
|
|
|
|
|
|
def gossipStatusTo(node: UniqueAddress, destination: ActorRef): Unit =
|
|
|
|
|
if (validNodeForGossip(node))
|
|
|
|
|
destination ! GossipStatus(selfUniqueAddress, latestGossip.version)
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-04-28 22:28:20 +02:00
|
|
|
def gossipStatusTo(node: UniqueAddress): Unit =
|
|
|
|
|
if (validNodeForGossip(node))
|
|
|
|
|
clusterCore(node.address) ! GossipStatus(selfUniqueAddress, latestGossip.version)
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-04-28 22:28:20 +02:00
|
|
|
def validNodeForGossip(node: UniqueAddress): Boolean =
|
2013-08-27 15:14:53 +02:00
|
|
|
(node != selfUniqueAddress && latestGossip.hasMember(node) &&
|
|
|
|
|
latestGossip.overview.reachability.isReachable(node))
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-05-09 09:49:59 +02:00
|
|
|
def updateLatestGossip(newGossip: Gossip): Unit = {
|
|
|
|
|
// Updating the vclock version for the changes
|
|
|
|
|
val versionedGossip = newGossip :+ vclockNode
|
2013-09-02 13:50:46 +02:00
|
|
|
// Nobody else have seen this gossip but us
|
|
|
|
|
val seenVersionedGossip = versionedGossip onlySeen (selfUniqueAddress)
|
2013-05-09 09:49:59 +02:00
|
|
|
// Update the state with the new gossip
|
|
|
|
|
latestGossip = seenVersionedGossip
|
|
|
|
|
}
|
|
|
|
|
|
2012-11-27 18:07:37 +01:00
|
|
|
def publish(newGossip: Gossip): Unit = {
|
|
|
|
|
publisher ! PublishChanges(newGossip)
|
2012-08-15 16:47:34 +02:00
|
|
|
if (PublishStatsInterval == Duration.Zero) publishInternalStats()
|
2012-08-14 10:58:30 +02:00
|
|
|
}
|
|
|
|
|
|
2013-06-15 23:17:05 +02:00
|
|
|
def publishInternalStats(): Unit = {
|
|
|
|
|
val vclockStats = VectorClockStats(
|
|
|
|
|
versionSize = latestGossip.version.versions.size,
|
|
|
|
|
seenLatest = latestGossip.members.count(m ⇒ latestGossip.seenByNode(m.uniqueAddress)))
|
|
|
|
|
publisher ! CurrentInternalStats(gossipStats, vclockStats)
|
|
|
|
|
}
|
2012-08-14 15:33:34 +02:00
|
|
|
|
2012-07-05 13:55:08 +02:00
|
|
|
}
|
|
|
|
|
|
2012-08-14 17:26:33 +02:00
|
|
|
/**
|
|
|
|
|
* INTERNAL API.
|
2012-08-15 08:21:34 +02:00
|
|
|
*
|
2013-02-17 17:35:43 +01:00
|
|
|
* Used only for the first seed node.
|
|
|
|
|
* Sends InitJoin to all seed nodes (except itself).
|
|
|
|
|
* If other seed nodes are not part of the cluster yet they will reply with
|
|
|
|
|
* InitJoinNack or not respond at all and then the first seed node
|
|
|
|
|
* will join itself to initialize the new cluster. When the first
|
|
|
|
|
* seed node is restarted, and some other seed node is part of the cluster
|
|
|
|
|
* it will reply with InitJoinAck and then the first seed node will join
|
|
|
|
|
* that other seed node to join existing cluster.
|
|
|
|
|
*/
|
|
|
|
|
private[cluster] final class FirstSeedNodeProcess(seedNodes: immutable.IndexedSeq[Address]) extends Actor with ActorLogging {
|
|
|
|
|
import InternalClusterAction._
|
2013-04-11 09:18:12 +02:00
|
|
|
import ClusterUserAction.JoinTo
|
2013-02-17 17:35:43 +01:00
|
|
|
|
|
|
|
|
val cluster = Cluster(context.system)
|
|
|
|
|
def selfAddress = cluster.selfAddress
|
|
|
|
|
|
|
|
|
|
if (seedNodes.size <= 1 || seedNodes.head != selfAddress)
|
|
|
|
|
throw new IllegalArgumentException("Join seed node should not be done")
|
|
|
|
|
|
|
|
|
|
val timeout = Deadline.now + cluster.settings.SeedNodeTimeout
|
|
|
|
|
|
|
|
|
|
var remainingSeedNodes = seedNodes.toSet - selfAddress
|
|
|
|
|
|
|
|
|
|
// retry until one ack, or all nack, or timeout
|
|
|
|
|
import context.dispatcher
|
|
|
|
|
val retryTask = cluster.scheduler.schedule(1.second, 1.second, self, JoinSeedNode)
|
|
|
|
|
self ! JoinSeedNode
|
|
|
|
|
|
|
|
|
|
override def postStop(): Unit = retryTask.cancel()
|
|
|
|
|
|
|
|
|
|
def receive = {
|
|
|
|
|
case JoinSeedNode ⇒
|
|
|
|
|
if (timeout.hasTimeLeft) {
|
|
|
|
|
// send InitJoin to remaining seed nodes (except myself)
|
2013-03-26 18:17:50 +01:00
|
|
|
remainingSeedNodes foreach { a ⇒ context.actorSelection(context.parent.path.toStringWithAddress(a)) ! InitJoin }
|
2013-02-17 17:35:43 +01:00
|
|
|
} else {
|
|
|
|
|
// no InitJoinAck received, initialize new cluster by joining myself
|
|
|
|
|
context.parent ! JoinTo(selfAddress)
|
|
|
|
|
context.stop(self)
|
|
|
|
|
}
|
|
|
|
|
case InitJoinAck(address) ⇒
|
|
|
|
|
// first InitJoinAck reply, join existing cluster
|
|
|
|
|
context.parent ! JoinTo(address)
|
|
|
|
|
context.stop(self)
|
|
|
|
|
case InitJoinNack(address) ⇒
|
|
|
|
|
remainingSeedNodes -= address
|
|
|
|
|
if (remainingSeedNodes.isEmpty) {
|
|
|
|
|
// initialize new cluster by joining myself when nacks from all other seed nodes
|
|
|
|
|
context.parent ! JoinTo(selfAddress)
|
|
|
|
|
context.stop(self)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* INTERNAL API.
|
|
|
|
|
*
|
|
|
|
|
* Sends InitJoin to all seed nodes (except itself) and expect
|
2012-08-15 08:21:34 +02:00
|
|
|
* InitJoinAck reply back. The seed node that replied first
|
|
|
|
|
* will be used, joined to. InitJoinAck replies received after the
|
|
|
|
|
* first one are ignored.
|
|
|
|
|
*
|
|
|
|
|
* Retries if no InitJoinAck replies are received within the
|
|
|
|
|
* SeedNodeTimeout.
|
|
|
|
|
* When at least one reply has been received it stops itself after
|
|
|
|
|
* an idle SeedNodeTimeout.
|
|
|
|
|
*
|
2012-08-31 12:27:17 +02:00
|
|
|
* The seed nodes can be started in any order, but they will not be "active",
|
|
|
|
|
* until they have been able to join another seed node (seed1).
|
|
|
|
|
* They will retry the join procedure.
|
|
|
|
|
* So one possible startup scenario is:
|
|
|
|
|
* 1. seed2 started, but doesn't get any ack from seed1 or seed3
|
|
|
|
|
* 2. seed3 started, doesn't get any ack from seed1 or seed3 (seed2 doesn't reply)
|
|
|
|
|
* 3. seed1 is started and joins itself
|
|
|
|
|
* 4. seed2 retries the join procedure and gets an ack from seed1, and then joins to seed1
|
|
|
|
|
* 5. seed3 retries the join procedure and gets acks from seed2 first, and then joins to seed2
|
|
|
|
|
*
|
2012-08-14 17:26:33 +02:00
|
|
|
*/
|
2012-11-15 12:33:11 +01:00
|
|
|
private[cluster] final class JoinSeedNodeProcess(seedNodes: immutable.IndexedSeq[Address]) extends Actor with ActorLogging {
|
2012-08-14 17:26:33 +02:00
|
|
|
import InternalClusterAction._
|
2013-04-11 09:18:12 +02:00
|
|
|
import ClusterUserAction.JoinTo
|
2012-08-14 17:26:33 +02:00
|
|
|
|
2012-09-06 21:48:40 +02:00
|
|
|
def selfAddress = Cluster(context.system).selfAddress
|
2012-08-14 17:26:33 +02:00
|
|
|
|
2012-09-06 21:48:40 +02:00
|
|
|
if (seedNodes.isEmpty || seedNodes.head == selfAddress)
|
2012-08-14 17:26:33 +02:00
|
|
|
throw new IllegalArgumentException("Join seed node should not be done")
|
|
|
|
|
|
2012-09-06 21:48:40 +02:00
|
|
|
context.setReceiveTimeout(Cluster(context.system).settings.SeedNodeTimeout)
|
2012-08-15 08:21:34 +02:00
|
|
|
|
2012-08-15 17:31:36 +02:00
|
|
|
override def preStart(): Unit = self ! JoinSeedNode
|
2012-08-14 17:26:33 +02:00
|
|
|
|
|
|
|
|
def receive = {
|
|
|
|
|
case JoinSeedNode ⇒
|
2012-08-15 08:21:34 +02:00
|
|
|
// send InitJoin to all seed nodes (except myself)
|
2012-09-06 21:48:40 +02:00
|
|
|
seedNodes.collect {
|
2013-03-26 18:17:50 +01:00
|
|
|
case a if a != selfAddress ⇒ context.actorSelection(context.parent.path.toStringWithAddress(a))
|
2012-08-15 17:31:36 +02:00
|
|
|
} foreach { _ ! InitJoin }
|
2012-08-14 17:26:33 +02:00
|
|
|
case InitJoinAck(address) ⇒
|
2012-08-15 08:21:34 +02:00
|
|
|
// first InitJoinAck reply
|
2012-08-14 17:26:33 +02:00
|
|
|
context.parent ! JoinTo(address)
|
2012-08-15 08:21:34 +02:00
|
|
|
context.become(done)
|
2013-02-17 17:35:43 +01:00
|
|
|
case InitJoinNack(_) ⇒ // that seed was uninitialized
|
2012-08-15 08:21:34 +02:00
|
|
|
case ReceiveTimeout ⇒
|
|
|
|
|
// no InitJoinAck received, try again
|
|
|
|
|
self ! JoinSeedNode
|
2012-08-14 17:26:33 +02:00
|
|
|
}
|
|
|
|
|
|
2012-08-15 08:21:34 +02:00
|
|
|
def done: Actor.Receive = {
|
|
|
|
|
case InitJoinAck(_) ⇒ // already received one, skip rest
|
|
|
|
|
case ReceiveTimeout ⇒ context.stop(self)
|
|
|
|
|
}
|
2012-08-14 17:26:33 +02:00
|
|
|
}
|
|
|
|
|
|
2012-12-10 08:46:25 +01:00
|
|
|
/**
|
|
|
|
|
* INTERNAL API
|
|
|
|
|
*
|
|
|
|
|
* The supplied callback will be run, once, when current cluster member is `Up`.
|
|
|
|
|
*/
|
|
|
|
|
private[cluster] class OnMemberUpListener(callback: Runnable) extends Actor with ActorLogging {
|
|
|
|
|
import ClusterEvent._
|
|
|
|
|
val cluster = Cluster(context.system)
|
|
|
|
|
// subscribe to MemberUp, re-subscribe when restart
|
|
|
|
|
override def preStart(): Unit =
|
|
|
|
|
cluster.subscribe(self, classOf[MemberUp])
|
|
|
|
|
override def postStop(): Unit =
|
|
|
|
|
cluster.unsubscribe(self)
|
|
|
|
|
|
|
|
|
|
def receive = {
|
|
|
|
|
case state: CurrentClusterState ⇒
|
|
|
|
|
if (state.members.exists(isSelfUp(_)))
|
|
|
|
|
done()
|
|
|
|
|
case MemberUp(m) ⇒
|
|
|
|
|
if (isSelfUp(m))
|
|
|
|
|
done()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def done(): Unit = {
|
|
|
|
|
try callback.run() catch {
|
|
|
|
|
case NonFatal(e) ⇒ log.error(e, "OnMemberUp callback failed with [{}]", e.getMessage)
|
|
|
|
|
} finally {
|
|
|
|
|
context stop self
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def isSelfUp(m: Member): Boolean =
|
2013-04-11 09:18:12 +02:00
|
|
|
m.uniqueAddress == cluster.selfUniqueAddress && m.status == MemberStatus.Up
|
2012-12-10 08:46:25 +01:00
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
2012-07-05 13:55:08 +02:00
|
|
|
/**
|
|
|
|
|
* INTERNAL API
|
|
|
|
|
*/
|
2013-04-04 17:56:29 +02:00
|
|
|
@SerialVersionUID(1L)
|
2013-05-27 16:34:31 +02:00
|
|
|
private[cluster] case class GossipStats(
|
2012-07-05 13:55:08 +02:00
|
|
|
receivedGossipCount: Long = 0L,
|
|
|
|
|
mergeCount: Long = 0L,
|
2013-03-05 12:49:35 +01:00
|
|
|
sameCount: Long = 0L,
|
|
|
|
|
newerCount: Long = 0L,
|
|
|
|
|
olderCount: Long = 0L) {
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-05-27 16:34:31 +02:00
|
|
|
def incrementMergeCount(): GossipStats =
|
|
|
|
|
copy(mergeCount = mergeCount + 1, receivedGossipCount = receivedGossipCount + 1)
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-05-27 16:34:31 +02:00
|
|
|
def incrementSameCount(): GossipStats =
|
|
|
|
|
copy(sameCount = sameCount + 1, receivedGossipCount = receivedGossipCount + 1)
|
2012-07-05 13:55:08 +02:00
|
|
|
|
2013-05-27 16:34:31 +02:00
|
|
|
def incrementNewerCount(): GossipStats =
|
|
|
|
|
copy(newerCount = newerCount + 1, receivedGossipCount = receivedGossipCount + 1)
|
2013-03-05 12:49:35 +01:00
|
|
|
|
2013-05-27 16:34:31 +02:00
|
|
|
def incrementOlderCount(): GossipStats =
|
|
|
|
|
copy(olderCount = olderCount + 1, receivedGossipCount = receivedGossipCount + 1)
|
2013-03-05 12:49:35 +01:00
|
|
|
|
2013-05-27 16:34:31 +02:00
|
|
|
def :+(that: GossipStats): GossipStats = {
|
|
|
|
|
GossipStats(
|
2013-03-05 12:49:35 +01:00
|
|
|
this.receivedGossipCount + that.receivedGossipCount,
|
|
|
|
|
this.mergeCount + that.mergeCount,
|
|
|
|
|
this.sameCount + that.sameCount,
|
|
|
|
|
this.newerCount + that.newerCount,
|
|
|
|
|
this.olderCount + that.olderCount)
|
|
|
|
|
}
|
|
|
|
|
|
2013-05-27 16:34:31 +02:00
|
|
|
def :-(that: GossipStats): GossipStats = {
|
|
|
|
|
GossipStats(
|
2013-03-05 12:49:35 +01:00
|
|
|
this.receivedGossipCount - that.receivedGossipCount,
|
|
|
|
|
this.mergeCount - that.mergeCount,
|
|
|
|
|
this.sameCount - that.sameCount,
|
|
|
|
|
this.newerCount - that.newerCount,
|
|
|
|
|
this.olderCount - that.olderCount)
|
|
|
|
|
}
|
|
|
|
|
|
2012-09-21 14:50:06 +02:00
|
|
|
}
|
2013-05-27 16:34:31 +02:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* INTERNAL API
|
|
|
|
|
*/
|
|
|
|
|
@SerialVersionUID(1L)
|
|
|
|
|
private[cluster] case class VectorClockStats(
|
|
|
|
|
versionSize: Int = 0,
|
|
|
|
|
seenLatest: Int = 0)
|
|
|
|
|
|