2011-10-26 08:48:16 +02:00
|
|
|
/**
|
2013-01-09 01:47:48 +01:00
|
|
|
* Copyright (C) 2009-2013 Typesafe Inc. <http://www.typesafe.com>
|
2011-10-26 08:48:16 +02:00
|
|
|
*/
|
|
|
|
|
|
2012-01-31 13:33:04 +01:00
|
|
|
package akka.cluster
|
2011-10-26 08:48:16 +02:00
|
|
|
|
2012-06-29 13:33:20 +02:00
|
|
|
import language.implicitConversions
|
2011-10-26 08:48:16 +02:00
|
|
|
import akka.actor._
|
|
|
|
|
import akka.actor.Status._
|
2012-06-16 00:00:19 +02:00
|
|
|
import akka.ConfigurationException
|
2012-06-29 16:40:36 +02:00
|
|
|
import akka.dispatch.MonitorableThreadFactory
|
2012-06-16 00:00:19 +02:00
|
|
|
import akka.event.Logging
|
2012-06-21 10:58:35 +02:00
|
|
|
import akka.pattern._
|
2012-06-16 00:00:19 +02:00
|
|
|
import akka.remote._
|
|
|
|
|
import akka.routing._
|
2012-01-31 13:33:04 +01:00
|
|
|
import akka.util._
|
2012-09-21 14:50:06 +02:00
|
|
|
import scala.concurrent.duration._
|
2012-07-06 17:04:04 +02:00
|
|
|
import scala.concurrent.forkjoin.ThreadLocalRandom
|
2012-06-16 00:00:19 +02:00
|
|
|
import scala.annotation.tailrec
|
2012-11-15 12:33:11 +01:00
|
|
|
import scala.collection.immutable
|
2012-07-06 17:04:04 +02:00
|
|
|
import java.io.Closeable
|
|
|
|
|
import java.util.concurrent.atomic.AtomicBoolean
|
|
|
|
|
import java.util.concurrent.atomic.AtomicReference
|
2012-06-16 00:00:19 +02:00
|
|
|
import akka.util.internal.HashedWheelTimer
|
2013-01-31 02:16:39 +01:00
|
|
|
import scala.concurrent.{ ExecutionContext, Await }
|
2013-01-14 23:21:51 +01:00
|
|
|
import com.typesafe.config.ConfigFactory
|
2013-01-29 11:55:33 +01:00
|
|
|
import akka.remote.DefaultFailureDetectorRegistry
|
|
|
|
|
import akka.remote.FailureDetector
|
|
|
|
|
import com.typesafe.config.Config
|
2013-02-13 15:37:22 +01:00
|
|
|
import akka.event.LoggingAdapter
|
|
|
|
|
import java.util.concurrent.ThreadFactory
|
2012-01-24 12:09:32 +01:00
|
|
|
|
2012-02-22 18:40:16 +01:00
|
|
|
/**
|
2012-03-22 23:04:04 +01:00
|
|
|
* Cluster Extension Id and factory for creating Cluster extension.
|
2012-02-22 18:40:16 +01:00
|
|
|
* Example:
|
|
|
|
|
* {{{
|
2012-04-12 12:25:39 +02:00
|
|
|
* if (Cluster(system).isLeader) { ... }
|
2012-02-29 10:02:00 +01:00
|
|
|
* }}}
|
2012-02-22 18:40:16 +01:00
|
|
|
*/
|
2012-03-22 23:04:04 +01:00
|
|
|
object Cluster extends ExtensionId[Cluster] with ExtensionIdProvider {
|
|
|
|
|
override def get(system: ActorSystem): Cluster = super.get(system)
|
2012-02-22 18:40:16 +01:00
|
|
|
|
2012-03-22 23:04:04 +01:00
|
|
|
override def lookup = Cluster
|
2012-02-22 18:40:16 +01:00
|
|
|
|
2012-09-06 21:48:40 +02:00
|
|
|
override def createExtension(system: ExtendedActorSystem): Cluster = new Cluster(system)
|
2012-02-22 18:40:16 +01:00
|
|
|
}
|
2012-02-14 20:50:12 +01:00
|
|
|
|
2011-10-26 08:48:16 +02:00
|
|
|
/**
|
|
|
|
|
* This module is responsible for Gossiping cluster information. The abstraction maintains the list of live
|
2012-01-30 11:41:41 +01:00
|
|
|
* and dead members. Periodically i.e. every 1 second this module chooses a random member and initiates a round
|
2012-06-25 20:46:48 +02:00
|
|
|
* of Gossip with it.
|
2011-10-26 08:48:16 +02:00
|
|
|
* <p/>
|
2012-07-04 14:39:27 +02:00
|
|
|
* During each round of gossip exchange it sends Gossip to random node with
|
|
|
|
|
* newer or older state information, if any, based on the current gossip overview,
|
|
|
|
|
* with some probability. Otherwise Gossip to any random live node.
|
2012-02-29 10:02:00 +01:00
|
|
|
*
|
|
|
|
|
* Example:
|
|
|
|
|
* {{{
|
2012-04-12 12:25:39 +02:00
|
|
|
* if (Cluster(system).isLeader) { ... }
|
2012-02-29 10:02:00 +01:00
|
|
|
* }}}
|
2011-10-26 08:48:16 +02:00
|
|
|
*/
|
2012-09-06 21:48:40 +02:00
|
|
|
class Cluster(val system: ExtendedActorSystem) extends Extension {
|
2012-02-22 18:40:16 +01:00
|
|
|
|
2012-08-14 10:58:30 +02:00
|
|
|
import ClusterEvent._
|
2011-10-26 08:48:16 +02:00
|
|
|
|
2012-07-04 11:37:56 +02:00
|
|
|
val settings = new ClusterSettings(system.settings.config, system.name)
|
|
|
|
|
import settings._
|
2012-02-07 16:53:49 +01:00
|
|
|
|
2012-10-01 14:12:20 +02:00
|
|
|
val selfAddress: Address = system.provider match {
|
2012-11-21 14:18:24 +01:00
|
|
|
case c: ClusterActorRefProvider ⇒ c.transport.defaultAddress
|
2012-09-11 19:11:20 +02:00
|
|
|
case other ⇒ throw new ConfigurationException(
|
|
|
|
|
"ActorSystem [%s] needs to have a 'ClusterActorRefProvider' enabled in the configuration, currently uses [%s]".
|
|
|
|
|
format(system, other.getClass.getName))
|
|
|
|
|
}
|
2012-01-31 15:00:46 +01:00
|
|
|
|
2012-12-06 15:26:57 +01:00
|
|
|
private val _isTerminated = new AtomicBoolean(false)
|
2012-07-04 11:37:56 +02:00
|
|
|
private val log = Logging(system, "Cluster")
|
2012-04-14 20:06:03 +02:00
|
|
|
|
2012-05-31 17:19:49 +02:00
|
|
|
log.info("Cluster Node [{}] - is starting up...", selfAddress)
|
2012-03-09 12:56:56 +01:00
|
|
|
|
2013-01-29 11:55:33 +01:00
|
|
|
val failureDetector: FailureDetectorRegistry[Address] = {
|
|
|
|
|
def createFailureDetector(): FailureDetector = {
|
|
|
|
|
import settings.{ FailureDetectorImplementationClass ⇒ fqcn }
|
|
|
|
|
system.dynamicAccess.createInstanceFor[FailureDetector](
|
|
|
|
|
fqcn, List(classOf[Config] -> settings.FailureDetectorConfig)).recover({
|
|
|
|
|
case e ⇒ throw new ConfigurationException(
|
|
|
|
|
s"Could not create custom cluster failure detector [$fqcn] due to: ${e.toString}", e)
|
|
|
|
|
}).get
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
new DefaultFailureDetectorRegistry(() ⇒ createFailureDetector())
|
2012-09-06 21:48:40 +02:00
|
|
|
}
|
|
|
|
|
|
2012-03-09 12:56:56 +01:00
|
|
|
// ========================================================
|
|
|
|
|
// ===================== WORK DAEMONS =====================
|
|
|
|
|
// ========================================================
|
|
|
|
|
|
|
|
|
|
/**
|
2012-07-04 11:37:56 +02:00
|
|
|
* INTERNAL API
|
2012-03-09 12:56:56 +01:00
|
|
|
*/
|
2013-02-13 15:37:22 +01:00
|
|
|
private[cluster] val scheduler: Scheduler = {
|
2013-01-14 23:21:51 +01:00
|
|
|
if (system.scheduler.maxFrequency < 1.second / SchedulerTickDuration) {
|
|
|
|
|
import scala.collection.JavaConverters._
|
2012-06-12 14:16:30 +02:00
|
|
|
log.info("Using a dedicated scheduler for cluster. Default scheduler can be used if configured " +
|
|
|
|
|
"with 'akka.scheduler.tick-duration' [{} ms] <= 'akka.cluster.scheduler.tick-duration' [{} ms].",
|
2013-01-24 13:43:40 +01:00
|
|
|
(1000 / system.scheduler.maxFrequency).toInt, SchedulerTickDuration.toMillis)
|
2013-02-13 15:37:22 +01:00
|
|
|
|
|
|
|
|
val cfg = ConfigFactory.parseString(
|
|
|
|
|
s"akka.scheduler.tick-duration=${SchedulerTickDuration.toMillis}ms").withFallback(
|
|
|
|
|
system.settings.config)
|
|
|
|
|
val threadFactory = system.threadFactory match {
|
|
|
|
|
case tf: MonitorableThreadFactory ⇒ tf.withName(tf.name + "-cluster-scheduler")
|
|
|
|
|
case tf ⇒ tf
|
|
|
|
|
}
|
|
|
|
|
system.dynamicAccess.createInstanceFor[Scheduler](system.settings.SchedulerClass, immutable.Seq(
|
|
|
|
|
classOf[Config] -> cfg,
|
|
|
|
|
classOf[LoggingAdapter] -> log,
|
|
|
|
|
classOf[ThreadFactory] -> threadFactory)).get
|
2012-06-12 14:16:30 +02:00
|
|
|
} else {
|
2012-08-08 15:57:30 +02:00
|
|
|
// delegate to system.scheduler, but don't close over system
|
2012-06-12 14:16:30 +02:00
|
|
|
val systemScheduler = system.scheduler
|
|
|
|
|
new Scheduler with Closeable {
|
2012-08-08 15:57:30 +02:00
|
|
|
override def close(): Unit = () // we are using system.scheduler, which we are not responsible for closing
|
|
|
|
|
|
2013-01-14 23:21:51 +01:00
|
|
|
override def maxFrequency: Double = systemScheduler.maxFrequency
|
2012-08-08 15:57:30 +02:00
|
|
|
|
2012-09-18 18:17:44 +02:00
|
|
|
override def schedule(initialDelay: FiniteDuration, interval: FiniteDuration,
|
2012-08-08 15:57:30 +02:00
|
|
|
runnable: Runnable)(implicit executor: ExecutionContext): Cancellable =
|
2012-09-18 18:17:44 +02:00
|
|
|
systemScheduler.schedule(initialDelay, interval, runnable)
|
2012-08-08 15:57:30 +02:00
|
|
|
|
2012-09-18 09:58:30 +02:00
|
|
|
override def scheduleOnce(delay: FiniteDuration,
|
2012-08-08 15:57:30 +02:00
|
|
|
runnable: Runnable)(implicit executor: ExecutionContext): Cancellable =
|
2012-06-12 14:16:30 +02:00
|
|
|
systemScheduler.scheduleOnce(delay, runnable)
|
|
|
|
|
}
|
|
|
|
|
}
|
2012-06-12 13:34:59 +02:00
|
|
|
}
|
2012-06-11 22:12:45 +02:00
|
|
|
|
2012-07-04 11:37:56 +02:00
|
|
|
// create supervisor for daemons under path "/system/cluster"
|
|
|
|
|
private val clusterDaemons: ActorRef = {
|
2012-09-07 17:42:15 +02:00
|
|
|
system.asInstanceOf[ActorSystemImpl].systemActorOf(Props(new ClusterDaemon(settings)).
|
2012-07-04 11:37:56 +02:00
|
|
|
withDispatcher(UseDispatcher), name = "cluster")
|
|
|
|
|
}
|
2012-03-09 12:56:56 +01:00
|
|
|
|
2012-06-05 22:16:15 +02:00
|
|
|
/**
|
2012-07-04 11:37:56 +02:00
|
|
|
* INTERNAL API
|
2012-06-05 22:16:15 +02:00
|
|
|
*/
|
2012-07-05 07:56:40 +02:00
|
|
|
private[cluster] val clusterCore: ActorRef = {
|
|
|
|
|
implicit val timeout = system.settings.CreationTimeout
|
|
|
|
|
Await.result((clusterDaemons ? InternalClusterAction.GetClusterCoreRef).mapTo[ActorRef], timeout.duration)
|
|
|
|
|
}
|
2012-04-14 20:06:03 +02:00
|
|
|
|
2012-08-16 18:28:01 +02:00
|
|
|
@volatile
|
|
|
|
|
private var readViewStarted = false
|
|
|
|
|
private[cluster] lazy val readView: ClusterReadView = {
|
|
|
|
|
val readView = new ClusterReadView(this)
|
|
|
|
|
readViewStarted = true
|
|
|
|
|
readView
|
2012-08-15 16:47:34 +02:00
|
|
|
}
|
|
|
|
|
|
2012-06-11 22:12:45 +02:00
|
|
|
system.registerOnTermination(shutdown())
|
|
|
|
|
|
2012-09-20 08:09:01 +02:00
|
|
|
private val clusterJmx: Option[ClusterJmx] = {
|
|
|
|
|
val jmx = new ClusterJmx(this, log)
|
|
|
|
|
jmx.createMBean()
|
|
|
|
|
Some(jmx)
|
|
|
|
|
}
|
2012-07-05 11:56:54 +02:00
|
|
|
|
2012-05-31 17:19:49 +02:00
|
|
|
log.info("Cluster Node [{}] - has started up successfully", selfAddress)
|
2012-03-09 12:56:56 +01:00
|
|
|
|
2012-02-14 20:50:12 +01:00
|
|
|
// ======================================================
|
|
|
|
|
// ===================== PUBLIC API =====================
|
|
|
|
|
// ======================================================
|
|
|
|
|
|
2012-06-16 00:00:19 +02:00
|
|
|
/**
|
2012-12-06 15:26:57 +01:00
|
|
|
* Returns true if this cluster instance has be shutdown.
|
2012-06-16 00:00:19 +02:00
|
|
|
*/
|
2012-12-06 15:26:57 +01:00
|
|
|
def isTerminated: Boolean = _isTerminated.get
|
2012-06-16 00:00:19 +02:00
|
|
|
|
2011-01-01 01:50:33 +01:00
|
|
|
/**
|
2012-08-14 10:58:30 +02:00
|
|
|
* Subscribe to cluster domain events.
|
|
|
|
|
* The `to` Class can be [[akka.cluster.ClusterEvent.ClusterDomainEvent]]
|
2013-01-15 09:35:07 +01:00
|
|
|
* or subclass.
|
|
|
|
|
*
|
|
|
|
|
* A snapshot of [[akka.cluster.ClusterEvent.CurrentClusterState]]
|
2013-03-06 16:39:22 +01:00
|
|
|
* will be sent to the subscriber as the first event.
|
2011-01-01 01:50:33 +01:00
|
|
|
*/
|
2012-08-15 16:47:34 +02:00
|
|
|
def subscribe(subscriber: ActorRef, to: Class[_]): Unit =
|
|
|
|
|
clusterCore ! InternalClusterAction.Subscribe(subscriber, to)
|
2012-08-14 15:33:34 +02:00
|
|
|
|
|
|
|
|
/**
|
2012-09-28 13:09:36 +02:00
|
|
|
* Unsubscribe to all cluster domain events.
|
2012-08-14 15:33:34 +02:00
|
|
|
*/
|
2012-08-15 16:47:34 +02:00
|
|
|
def unsubscribe(subscriber: ActorRef): Unit =
|
2012-09-28 13:09:36 +02:00
|
|
|
clusterCore ! InternalClusterAction.Unsubscribe(subscriber, None)
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Unsubscribe to a specific type of cluster domain events,
|
|
|
|
|
* matching previous `subscribe` registration.
|
|
|
|
|
*/
|
|
|
|
|
def unsubscribe(subscriber: ActorRef, to: Class[_]): Unit =
|
|
|
|
|
clusterCore ! InternalClusterAction.Unsubscribe(subscriber, Some(to))
|
2011-01-01 01:50:33 +01:00
|
|
|
|
2012-09-12 09:23:02 +02:00
|
|
|
/**
|
|
|
|
|
* Publish current (full) state of the cluster to subscribers,
|
|
|
|
|
* that are subscribing to [[akka.cluster.ClusterEvent.ClusterDomainEvent]]
|
|
|
|
|
* or [[akka.cluster.ClusterEvent.CurrentClusterState]].
|
|
|
|
|
* If you want this to happen periodically you need to schedule a call to
|
|
|
|
|
* this method yourself.
|
|
|
|
|
*/
|
|
|
|
|
def publishCurrentClusterState(): Unit =
|
|
|
|
|
clusterCore ! InternalClusterAction.PublishCurrentClusterState(None)
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Publish current (full) state of the cluster to the specified
|
|
|
|
|
* receiver. If you want this to happen periodically you need to schedule
|
|
|
|
|
* a call to this method yourself.
|
|
|
|
|
*/
|
|
|
|
|
def sendCurrentClusterState(receiver: ActorRef): Unit =
|
|
|
|
|
clusterCore ! InternalClusterAction.PublishCurrentClusterState(Some(receiver))
|
|
|
|
|
|
2012-02-29 10:02:00 +01:00
|
|
|
/**
|
2012-04-12 22:50:50 +02:00
|
|
|
* Try to join this cluster node with the node specified by 'address'.
|
2012-04-16 11:23:03 +02:00
|
|
|
* A 'Join(thisNodeAddress)' command is sent to the node to join.
|
2012-02-29 10:02:00 +01:00
|
|
|
*/
|
2012-07-04 11:37:56 +02:00
|
|
|
def join(address: Address): Unit =
|
2012-07-05 07:56:40 +02:00
|
|
|
clusterCore ! InternalClusterAction.JoinTo(address)
|
2012-02-29 10:02:00 +01:00
|
|
|
|
|
|
|
|
/**
|
2012-04-12 22:50:50 +02:00
|
|
|
* Send command to issue state transition to LEAVING for the node specified by 'address'.
|
2012-02-29 10:02:00 +01:00
|
|
|
*/
|
2012-07-04 11:37:56 +02:00
|
|
|
def leave(address: Address): Unit =
|
|
|
|
|
clusterCore ! ClusterUserAction.Leave(address)
|
2012-02-29 10:02:00 +01:00
|
|
|
|
|
|
|
|
/**
|
2012-06-08 11:51:34 +02:00
|
|
|
* Send command to DOWN the node specified by 'address'.
|
2012-02-29 10:02:00 +01:00
|
|
|
*/
|
2012-07-04 11:37:56 +02:00
|
|
|
def down(address: Address): Unit =
|
|
|
|
|
clusterCore ! ClusterUserAction.Down(address)
|
2012-02-29 10:02:00 +01:00
|
|
|
|
2012-12-10 08:46:25 +01:00
|
|
|
/**
|
|
|
|
|
* The supplied thunk will be run, once, when current cluster member is `Up`.
|
|
|
|
|
* Typically used together with configuration option `akka.cluster.min-nr-of-members'
|
|
|
|
|
* to defer some action, such as starting actors, until the cluster has reached
|
|
|
|
|
* a certain size.
|
|
|
|
|
*/
|
|
|
|
|
def registerOnMemberUp[T](code: ⇒ T): Unit =
|
|
|
|
|
registerOnMemberUp(new Runnable { def run = code })
|
|
|
|
|
|
|
|
|
|
/**
|
2013-03-07 09:05:55 +01:00
|
|
|
* Java API: The supplied callback will be run, once, when current cluster member is `Up`.
|
2012-12-10 08:46:25 +01:00
|
|
|
* Typically used together with configuration option `akka.cluster.min-nr-of-members'
|
|
|
|
|
* to defer some action, such as starting actors, until the cluster has reached
|
|
|
|
|
* a certain size.
|
|
|
|
|
*/
|
|
|
|
|
def registerOnMemberUp(callback: Runnable): Unit = clusterDaemons ! InternalClusterAction.AddOnMemberUpListener(callback)
|
|
|
|
|
|
2011-01-01 01:50:33 +01:00
|
|
|
// ========================================================
|
|
|
|
|
// ===================== INTERNAL API =====================
|
|
|
|
|
// ========================================================
|
|
|
|
|
|
2012-08-15 16:47:34 +02:00
|
|
|
/**
|
2012-09-06 21:48:40 +02:00
|
|
|
* Make it possible to join the specified seed nodes without defining them
|
|
|
|
|
* in config. Especially useful from tests when Addresses are unknown
|
|
|
|
|
* before startup time.
|
2012-08-15 16:47:34 +02:00
|
|
|
*/
|
2012-11-15 12:33:11 +01:00
|
|
|
private[cluster] def joinSeedNodes(seedNodes: immutable.IndexedSeq[Address]): Unit =
|
2012-09-06 21:48:40 +02:00
|
|
|
clusterCore ! InternalClusterAction.JoinSeedNodes(seedNodes)
|
2012-08-15 16:47:34 +02:00
|
|
|
|
2012-02-08 14:14:01 +01:00
|
|
|
/**
|
2012-06-16 00:00:19 +02:00
|
|
|
* INTERNAL API.
|
|
|
|
|
*
|
|
|
|
|
* Shuts down all connections to other members, the cluster daemon and the periodic gossip and cleanup tasks.
|
|
|
|
|
*
|
|
|
|
|
* Should not called by the user. The user can issue a LEAVE command which will tell the node
|
|
|
|
|
* to go through graceful handoff process `LEAVE -> EXITING -> REMOVED -> SHUTDOWN`.
|
|
|
|
|
*/
|
|
|
|
|
private[cluster] def shutdown(): Unit = {
|
2012-12-06 15:26:57 +01:00
|
|
|
if (_isTerminated.compareAndSet(false, true)) {
|
2012-06-16 00:00:19 +02:00
|
|
|
log.info("Cluster Node [{}] - Shutting down cluster Node and cluster daemons...", selfAddress)
|
|
|
|
|
|
2012-08-15 16:47:34 +02:00
|
|
|
system.stop(clusterDaemons)
|
2012-08-16 18:28:01 +02:00
|
|
|
if (readViewStarted) readView.close()
|
2012-06-16 00:00:19 +02:00
|
|
|
|
2013-02-13 15:37:22 +01:00
|
|
|
closeScheduler()
|
2012-03-09 12:56:56 +01:00
|
|
|
|
2012-09-20 08:09:01 +02:00
|
|
|
clusterJmx foreach { _.unregisterMBean() }
|
2012-03-09 12:56:56 +01:00
|
|
|
|
2012-07-04 11:37:56 +02:00
|
|
|
log.info("Cluster Node [{}] - Cluster node successfully shut down", selfAddress)
|
2012-03-09 12:56:56 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-02-13 15:37:22 +01:00
|
|
|
private def closeScheduler(): Unit = scheduler match {
|
|
|
|
|
case x: Closeable ⇒ x.close()
|
|
|
|
|
case _ ⇒
|
|
|
|
|
}
|
|
|
|
|
|
2011-10-26 08:48:16 +02:00
|
|
|
}
|