2018-10-29 17:19:37 +08:00
|
|
|
/*
|
2020-01-02 07:24:59 -05:00
|
|
|
* Copyright (C) 2009-2020 Lightbend Inc. <https://www.lightbend.com>
|
2013-01-14 14:09:53 +01:00
|
|
|
*/
|
|
|
|
|
|
2015-04-27 14:25:10 +02:00
|
|
|
package akka.cluster.singleton
|
2013-01-14 14:09:53 +01:00
|
|
|
|
2013-04-28 22:05:40 +02:00
|
|
|
import scala.collection.immutable
|
2017-11-23 08:48:38 +01:00
|
|
|
import scala.concurrent.Future
|
2019-05-21 17:29:11 +02:00
|
|
|
import scala.concurrent.Promise
|
|
|
|
|
import scala.concurrent.duration._
|
|
|
|
|
import scala.util.control.NonFatal
|
|
|
|
|
|
2020-04-27 20:32:18 +08:00
|
|
|
import com.typesafe.config.Config
|
|
|
|
|
|
2019-05-21 17:29:11 +02:00
|
|
|
import akka.AkkaException
|
|
|
|
|
import akka.Done
|
2013-01-14 14:09:53 +01:00
|
|
|
import akka.actor.Actor
|
|
|
|
|
import akka.actor.ActorRef
|
2013-03-26 18:17:50 +01:00
|
|
|
import akka.actor.ActorSelection
|
2019-05-21 17:29:11 +02:00
|
|
|
import akka.actor.ActorSystem
|
2013-01-14 14:09:53 +01:00
|
|
|
import akka.actor.Address
|
2019-05-21 17:29:11 +02:00
|
|
|
import akka.actor.CoordinatedShutdown
|
2015-08-18 16:32:18 +02:00
|
|
|
import akka.actor.DeadLetterSuppression
|
2019-05-21 17:29:11 +02:00
|
|
|
import akka.actor.Deploy
|
2013-01-14 14:09:53 +01:00
|
|
|
import akka.actor.FSM
|
2019-05-21 17:29:11 +02:00
|
|
|
import akka.actor.NoSerializationVerificationNeeded
|
2013-01-14 14:09:53 +01:00
|
|
|
import akka.actor.Props
|
|
|
|
|
import akka.actor.Terminated
|
2019-04-04 15:35:18 +02:00
|
|
|
import akka.annotation.DoNotInherit
|
2019-05-21 17:29:11 +02:00
|
|
|
import akka.annotation.InternalStableApi
|
|
|
|
|
import akka.cluster._
|
2020-04-27 20:32:18 +08:00
|
|
|
import akka.cluster.ClusterEvent._
|
2019-03-29 14:27:08 +01:00
|
|
|
import akka.coordination.lease.LeaseUsageSettings
|
2019-05-21 17:29:11 +02:00
|
|
|
import akka.coordination.lease.scaladsl.Lease
|
|
|
|
|
import akka.coordination.lease.scaladsl.LeaseProvider
|
|
|
|
|
import akka.dispatch.Dispatchers
|
2019-12-05 11:36:21 +01:00
|
|
|
import akka.event.LogMarker
|
|
|
|
|
import akka.event.Logging
|
2016-12-01 18:49:38 +01:00
|
|
|
import akka.pattern.ask
|
2019-05-21 17:29:11 +02:00
|
|
|
import akka.pattern.pipe
|
|
|
|
|
import akka.util.JavaDurationConverters._
|
2016-12-01 18:49:38 +01:00
|
|
|
import akka.util.Timeout
|
2013-01-14 14:09:53 +01:00
|
|
|
|
2015-04-29 18:23:45 +02:00
|
|
|
object ClusterSingletonManagerSettings {
|
2013-01-14 14:09:53 +01:00
|
|
|
|
2013-04-17 22:14:19 +02:00
|
|
|
/**
|
2015-04-29 18:23:45 +02:00
|
|
|
* Create settings from the default configuration
|
|
|
|
|
* `akka.cluster.singleton`.
|
2013-04-17 22:14:19 +02:00
|
|
|
*/
|
2015-04-29 18:23:45 +02:00
|
|
|
def apply(system: ActorSystem): ClusterSingletonManagerSettings =
|
|
|
|
|
apply(system.settings.config.getConfig("akka.cluster.singleton"))
|
2019-03-11 10:38:24 +01:00
|
|
|
// note that this setting has some additional logic inside the ClusterSingletonManager
|
|
|
|
|
// falling back to DowningProvider.downRemovalMargin if it is off/Zero
|
2015-05-30 16:12:22 +02:00
|
|
|
.withRemovalMargin(Cluster(system).settings.DownRemovalMargin)
|
2013-04-17 22:14:19 +02:00
|
|
|
|
|
|
|
|
/**
|
2015-04-29 18:23:45 +02:00
|
|
|
* Create settings from a configuration with the same layout as
|
|
|
|
|
* the default configuration `akka.cluster.singleton`.
|
2013-04-17 22:14:19 +02:00
|
|
|
*/
|
2019-03-28 13:24:46 +01:00
|
|
|
def apply(config: Config): ClusterSingletonManagerSettings = {
|
|
|
|
|
val lease = config.getString("use-lease") match {
|
2019-04-15 17:40:26 +02:00
|
|
|
case s if s.isEmpty => None
|
2019-03-28 13:24:46 +01:00
|
|
|
case leaseConfigPath =>
|
2019-03-29 14:27:08 +01:00
|
|
|
Some(new LeaseUsageSettings(leaseConfigPath, config.getDuration("lease-retry-interval").asScala))
|
2019-03-28 13:24:46 +01:00
|
|
|
}
|
2019-03-13 10:56:20 +01:00
|
|
|
new ClusterSingletonManagerSettings(
|
|
|
|
|
singletonName = config.getString("singleton-name"),
|
|
|
|
|
role = roleOption(config.getString("role")),
|
2019-03-28 13:24:46 +01:00
|
|
|
removalMargin = Duration.Zero, // defaults to ClusterSettings.DownRemovalMargin
|
|
|
|
|
handOverRetryInterval = config.getDuration("hand-over-retry-interval", MILLISECONDS).millis,
|
|
|
|
|
lease)
|
|
|
|
|
}
|
2015-04-29 18:23:45 +02:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Java API: Create settings from the default configuration
|
|
|
|
|
* `akka.cluster.singleton`.
|
|
|
|
|
*/
|
|
|
|
|
def create(system: ActorSystem): ClusterSingletonManagerSettings = apply(system)
|
2013-04-17 22:14:19 +02:00
|
|
|
|
|
|
|
|
/**
|
2015-04-29 18:23:45 +02:00
|
|
|
* Java API: Create settings from a configuration with the same layout as
|
|
|
|
|
* the default configuration `akka.cluster.singleton`.
|
|
|
|
|
*/
|
|
|
|
|
def create(config: Config): ClusterSingletonManagerSettings = apply(config)
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* INTERNAL API
|
2013-04-17 22:14:19 +02:00
|
|
|
*/
|
2015-04-29 18:23:45 +02:00
|
|
|
private[akka] def roleOption(role: String): Option[String] =
|
|
|
|
|
if (role == "") None else Option(role)
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @param singletonName The actor name of the child singleton actor.
|
|
|
|
|
*
|
|
|
|
|
* @param role Singleton among the nodes tagged with specified role.
|
|
|
|
|
* If the role is not specified it's a singleton among all nodes in
|
|
|
|
|
* the cluster.
|
|
|
|
|
*
|
2015-05-30 16:12:22 +02:00
|
|
|
* @param removalMargin Margin until the singleton instance that belonged to
|
|
|
|
|
* a downed/removed partition is created in surviving partition. The purpose of
|
|
|
|
|
* this margin is that in case of a network partition the singleton actors
|
|
|
|
|
* in the non-surviving partitions must be stopped before corresponding actors
|
|
|
|
|
* are started somewhere else. This is especially important for persistent
|
|
|
|
|
* actors.
|
2015-04-29 18:23:45 +02:00
|
|
|
*
|
2015-05-30 16:12:22 +02:00
|
|
|
* @param handOverRetryInterval When a node is becoming oldest it sends hand-over
|
|
|
|
|
* request to previous oldest, that might be leaving the cluster. This is
|
|
|
|
|
* retried with this interval until the previous oldest confirms that the hand
|
|
|
|
|
* over has started or the previous oldest member is removed from the cluster
|
|
|
|
|
* (+ `removalMargin`).
|
2019-03-28 13:24:46 +01:00
|
|
|
*
|
|
|
|
|
* @param leaseSettings LeaseSettings for acquiring before creating the singleton actor
|
2015-04-29 18:23:45 +02:00
|
|
|
*/
|
2019-03-13 10:56:20 +01:00
|
|
|
final class ClusterSingletonManagerSettings(
|
|
|
|
|
val singletonName: String,
|
|
|
|
|
val role: Option[String],
|
|
|
|
|
val removalMargin: FiniteDuration,
|
2019-03-28 13:24:46 +01:00
|
|
|
val handOverRetryInterval: FiniteDuration,
|
2019-03-29 14:27:08 +01:00
|
|
|
val leaseSettings: Option[LeaseUsageSettings])
|
2019-03-11 10:38:24 +01:00
|
|
|
extends NoSerializationVerificationNeeded {
|
2015-04-29 18:23:45 +02:00
|
|
|
|
2019-03-28 13:24:46 +01:00
|
|
|
// bin compat for akka 2.5.21
|
|
|
|
|
def this(
|
|
|
|
|
singletonName: String,
|
|
|
|
|
role: Option[String],
|
|
|
|
|
removalMargin: FiniteDuration,
|
|
|
|
|
handOverRetryInterval: FiniteDuration) =
|
|
|
|
|
this(singletonName, role, removalMargin, handOverRetryInterval, None)
|
|
|
|
|
|
2015-04-29 18:23:45 +02:00
|
|
|
def withSingletonName(name: String): ClusterSingletonManagerSettings = copy(singletonName = name)
|
|
|
|
|
|
2019-03-11 10:38:24 +01:00
|
|
|
def withRole(role: String): ClusterSingletonManagerSettings =
|
|
|
|
|
copy(role = ClusterSingletonManagerSettings.roleOption(role))
|
2015-04-29 18:23:45 +02:00
|
|
|
|
|
|
|
|
def withRole(role: Option[String]) = copy(role = role)
|
|
|
|
|
|
2015-05-30 16:12:22 +02:00
|
|
|
def withRemovalMargin(removalMargin: FiniteDuration): ClusterSingletonManagerSettings =
|
|
|
|
|
copy(removalMargin = removalMargin)
|
|
|
|
|
|
|
|
|
|
def withHandOverRetryInterval(retryInterval: FiniteDuration): ClusterSingletonManagerSettings =
|
|
|
|
|
copy(handOverRetryInterval = retryInterval)
|
2015-04-29 18:23:45 +02:00
|
|
|
|
2019-03-29 14:27:08 +01:00
|
|
|
def withLeaseSettings(leaseSettings: LeaseUsageSettings): ClusterSingletonManagerSettings =
|
2019-03-28 13:24:46 +01:00
|
|
|
copy(leaseSettings = Some(leaseSettings))
|
|
|
|
|
|
2019-03-13 10:56:20 +01:00
|
|
|
private def copy(
|
|
|
|
|
singletonName: String = singletonName,
|
|
|
|
|
role: Option[String] = role,
|
|
|
|
|
removalMargin: FiniteDuration = removalMargin,
|
2019-03-28 13:24:46 +01:00
|
|
|
handOverRetryInterval: FiniteDuration = handOverRetryInterval,
|
2019-03-29 14:27:08 +01:00
|
|
|
leaseSettings: Option[LeaseUsageSettings] = leaseSettings): ClusterSingletonManagerSettings =
|
2019-03-28 13:24:46 +01:00
|
|
|
new ClusterSingletonManagerSettings(singletonName, role, removalMargin, handOverRetryInterval, leaseSettings)
|
2015-04-29 18:23:45 +02:00
|
|
|
}
|
|
|
|
|
|
2015-08-17 16:46:26 +02:00
|
|
|
/**
|
|
|
|
|
* Marker trait for remote messages with special serializer.
|
|
|
|
|
*/
|
|
|
|
|
sealed trait ClusterSingletonMessage extends Serializable
|
|
|
|
|
|
2015-04-29 18:23:45 +02:00
|
|
|
object ClusterSingletonManager {
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Scala API: Factory method for `ClusterSingletonManager` [[akka.actor.Props]].
|
|
|
|
|
*/
|
2019-03-11 10:38:24 +01:00
|
|
|
def props(singletonProps: Props, terminationMessage: Any, settings: ClusterSingletonManagerSettings): Props =
|
2019-05-02 22:35:25 +02:00
|
|
|
Props(new ClusterSingletonManager(singletonProps, terminationMessage, settings))
|
|
|
|
|
.withDispatcher(Dispatchers.InternalDispatcherId)
|
|
|
|
|
.withDeploy(Deploy.local)
|
2013-04-17 22:14:19 +02:00
|
|
|
|
2013-01-14 14:09:53 +01:00
|
|
|
/**
|
2013-02-08 13:13:52 +01:00
|
|
|
* INTERNAL API
|
2013-01-14 14:09:53 +01:00
|
|
|
* public due to the `with FSM` type parameters
|
|
|
|
|
*/
|
|
|
|
|
sealed trait State
|
2019-03-11 10:38:24 +01:00
|
|
|
|
2013-01-14 14:09:53 +01:00
|
|
|
/**
|
2013-02-08 13:13:52 +01:00
|
|
|
* INTERNAL API
|
2013-01-14 14:09:53 +01:00
|
|
|
* public due to the `with FSM` type parameters
|
|
|
|
|
*/
|
|
|
|
|
sealed trait Data
|
|
|
|
|
|
|
|
|
|
/**
|
2013-02-08 13:13:52 +01:00
|
|
|
* INTERNAL API
|
2013-01-14 14:09:53 +01:00
|
|
|
*/
|
2015-08-17 16:46:26 +02:00
|
|
|
private[akka] object Internal {
|
2019-03-11 10:38:24 +01:00
|
|
|
|
2013-01-14 14:09:53 +01:00
|
|
|
/**
|
2015-06-02 21:01:00 -07:00
|
|
|
* Sent from new oldest to previous oldest to initiate the
|
2013-01-28 08:47:52 +01:00
|
|
|
* hand-over process. `HandOverInProgress` and `HandOverDone`
|
2013-01-14 14:09:53 +01:00
|
|
|
* are expected replies.
|
|
|
|
|
*/
|
2015-08-18 16:32:18 +02:00
|
|
|
case object HandOverToMe extends ClusterSingletonMessage with DeadLetterSuppression
|
2019-03-11 10:38:24 +01:00
|
|
|
|
2013-01-14 14:09:53 +01:00
|
|
|
/**
|
2013-04-28 22:05:40 +02:00
|
|
|
* Confirmation by the previous oldest that the hand
|
2013-01-28 08:47:52 +01:00
|
|
|
* over process, shut down of the singleton actor, has
|
2013-01-14 14:09:53 +01:00
|
|
|
* started.
|
|
|
|
|
*/
|
2015-08-17 16:46:26 +02:00
|
|
|
case object HandOverInProgress extends ClusterSingletonMessage
|
2019-03-11 10:38:24 +01:00
|
|
|
|
2013-01-14 14:09:53 +01:00
|
|
|
/**
|
2013-04-28 22:05:40 +02:00
|
|
|
* Confirmation by the previous oldest that the singleton
|
2013-01-28 08:47:52 +01:00
|
|
|
* actor has been terminated and the hand-over process is
|
2013-09-10 13:35:51 +02:00
|
|
|
* completed.
|
2013-01-14 14:09:53 +01:00
|
|
|
*/
|
2015-08-17 16:46:26 +02:00
|
|
|
case object HandOverDone extends ClusterSingletonMessage
|
2019-03-11 10:38:24 +01:00
|
|
|
|
2013-01-14 14:09:53 +01:00
|
|
|
/**
|
2013-04-28 22:05:40 +02:00
|
|
|
* Sent from from previous oldest to new oldest to
|
2013-01-28 08:47:52 +01:00
|
|
|
* initiate the normal hand-over process.
|
2013-01-14 14:09:53 +01:00
|
|
|
* Especially useful when new node joins and becomes
|
2013-04-28 22:05:40 +02:00
|
|
|
* oldest immediately, without knowing who was previous
|
|
|
|
|
* oldest.
|
2013-01-14 14:09:53 +01:00
|
|
|
*/
|
2015-08-18 16:32:18 +02:00
|
|
|
case object TakeOverFromMe extends ClusterSingletonMessage with DeadLetterSuppression
|
2013-01-14 14:09:53 +01:00
|
|
|
|
2014-03-07 13:20:01 +01:00
|
|
|
final case class HandOverRetry(count: Int)
|
|
|
|
|
final case class TakeOverRetry(count: Int)
|
2020-04-03 00:21:25 +07:00
|
|
|
case object LeaseRetry
|
2013-01-14 14:09:53 +01:00
|
|
|
case object Cleanup
|
2013-04-28 22:05:40 +02:00
|
|
|
case object StartOldestChangedBuffer
|
2013-01-14 14:09:53 +01:00
|
|
|
|
|
|
|
|
case object Start extends State
|
2019-03-28 13:24:46 +01:00
|
|
|
case object AcquiringLease extends State
|
2013-04-28 22:05:40 +02:00
|
|
|
case object Oldest extends State
|
|
|
|
|
case object Younger extends State
|
|
|
|
|
case object BecomingOldest extends State
|
|
|
|
|
case object WasOldest extends State
|
2013-01-14 14:09:53 +01:00
|
|
|
case object HandingOver extends State
|
|
|
|
|
case object TakeOver extends State
|
2016-10-28 14:23:18 +02:00
|
|
|
case object Stopping extends State
|
2013-05-09 09:49:59 +02:00
|
|
|
case object End extends State
|
2013-01-14 14:09:53 +01:00
|
|
|
|
|
|
|
|
case object Uninitialized extends Data
|
2019-08-19 14:59:35 +02:00
|
|
|
final case class YoungerData(oldest: List[UniqueAddress]) extends Data
|
|
|
|
|
final case class BecomingOldestData(previousOldest: List[UniqueAddress]) extends Data
|
2019-03-28 13:24:46 +01:00
|
|
|
final case class OldestData(singleton: Option[ActorRef]) extends Data
|
|
|
|
|
final case class WasOldestData(singleton: Option[ActorRef], newOldestOption: Option[UniqueAddress]) extends Data
|
2014-03-07 13:20:01 +01:00
|
|
|
final case class HandingOverData(singleton: ActorRef, handOverTo: Option[ActorRef]) extends Data
|
2016-10-28 14:23:18 +02:00
|
|
|
final case class StoppingData(singleton: ActorRef) extends Data
|
2013-05-09 09:49:59 +02:00
|
|
|
case object EndData extends Data
|
2015-05-30 16:12:22 +02:00
|
|
|
final case class DelayedMemberRemoved(member: Member)
|
2016-12-01 18:49:38 +01:00
|
|
|
case object SelfExiting
|
2019-03-28 13:24:46 +01:00
|
|
|
case class AcquiringLeaseData(leaseRequestInProgress: Boolean, singleton: Option[ActorRef]) extends Data
|
2013-01-14 14:09:53 +01:00
|
|
|
|
|
|
|
|
val HandOverRetryTimer = "hand-over-retry"
|
|
|
|
|
val TakeOverRetryTimer = "take-over-retry"
|
|
|
|
|
val CleanupTimer = "cleanup"
|
2019-03-28 13:24:46 +01:00
|
|
|
val LeaseRetryTimer = "lease-retry"
|
2013-01-14 14:09:53 +01:00
|
|
|
|
2013-04-28 22:05:40 +02:00
|
|
|
object OldestChangedBuffer {
|
2019-03-11 10:38:24 +01:00
|
|
|
|
2013-01-14 14:09:53 +01:00
|
|
|
/**
|
|
|
|
|
* Request to deliver one more event.
|
|
|
|
|
*/
|
|
|
|
|
case object GetNext
|
2019-03-11 10:38:24 +01:00
|
|
|
|
2013-01-14 14:09:53 +01:00
|
|
|
/**
|
|
|
|
|
* The first event, corresponding to CurrentClusterState.
|
|
|
|
|
*/
|
2019-08-19 14:59:35 +02:00
|
|
|
final case class InitialOldestState(oldest: List[UniqueAddress], safeToBeOldest: Boolean)
|
2013-04-28 22:05:40 +02:00
|
|
|
|
2016-08-19 11:56:55 +02:00
|
|
|
final case class OldestChanged(oldest: Option[UniqueAddress])
|
2013-01-14 14:09:53 +01:00
|
|
|
}
|
|
|
|
|
|
2019-03-28 13:24:46 +01:00
|
|
|
final case class AcquireLeaseResult(holdingLease: Boolean) extends DeadLetterSuppression
|
|
|
|
|
final case class ReleaseLeaseResult(released: Boolean) extends DeadLetterSuppression
|
|
|
|
|
final case class AcquireLeaseFailure(t: Throwable) extends DeadLetterSuppression
|
|
|
|
|
final case class ReleaseLeaseFailure(t: Throwable) extends DeadLetterSuppression
|
|
|
|
|
final case class LeaseLost(reason: Option[Throwable]) extends DeadLetterSuppression
|
|
|
|
|
|
2013-01-14 14:09:53 +01:00
|
|
|
/**
|
2019-03-28 13:24:46 +01:00
|
|
|
* Notifications of member events that track oldest member are tunneled
|
2013-01-14 14:09:53 +01:00
|
|
|
* via this actor (child of ClusterSingletonManager) to be able to deliver
|
2013-04-28 22:05:40 +02:00
|
|
|
* one change at a time. Avoiding simultaneous changes simplifies
|
2013-01-14 14:09:53 +01:00
|
|
|
* the process in ClusterSingletonManager. ClusterSingletonManager requests
|
|
|
|
|
* next event with `GetNext` when it is ready for it. Only one outstanding
|
|
|
|
|
* `GetNext` request is allowed. Incoming events are buffered and delivered
|
|
|
|
|
* upon `GetNext` request.
|
|
|
|
|
*/
|
2013-04-28 22:05:40 +02:00
|
|
|
class OldestChangedBuffer(role: Option[String]) extends Actor {
|
|
|
|
|
import OldestChangedBuffer._
|
2013-01-14 14:09:53 +01:00
|
|
|
|
|
|
|
|
val cluster = Cluster(context.system)
|
2013-04-28 22:05:40 +02:00
|
|
|
// sort by age, oldest first
|
2015-10-21 07:53:12 +02:00
|
|
|
val ageOrdering = Member.ageOrdering
|
2013-04-28 22:05:40 +02:00
|
|
|
var membersByAge: immutable.SortedSet[Member] = immutable.SortedSet.empty(ageOrdering)
|
|
|
|
|
|
2013-01-14 14:09:53 +01:00
|
|
|
var changes = Vector.empty[AnyRef]
|
|
|
|
|
|
2013-04-28 22:05:40 +02:00
|
|
|
// subscribe to MemberEvent, re-subscribe when restart
|
|
|
|
|
override def preStart(): Unit = {
|
|
|
|
|
cluster.subscribe(self, classOf[MemberEvent])
|
2016-12-01 18:49:38 +01:00
|
|
|
|
|
|
|
|
// It's a delicate difference between CoordinatedShutdown.PhaseClusterExiting and MemberExited.
|
|
|
|
|
// MemberExited event is published immediately (leader may have performed that transition on other node),
|
|
|
|
|
// and that will trigger run of CoordinatedShutdown, while PhaseClusterExiting will happen later.
|
|
|
|
|
// Using PhaseClusterExiting in the singleton because the graceful shutdown of sharding region
|
|
|
|
|
// should preferably complete before stopping the singleton sharding coordinator on same node.
|
|
|
|
|
val coordShutdown = CoordinatedShutdown(context.system)
|
2019-02-09 15:25:39 +01:00
|
|
|
coordShutdown.addTask(CoordinatedShutdown.PhaseClusterExiting, "singleton-exiting-1") { () =>
|
2017-11-23 08:48:38 +01:00
|
|
|
if (cluster.isTerminated || cluster.selfMember.status == MemberStatus.Down) {
|
|
|
|
|
Future.successful(Done)
|
|
|
|
|
} else {
|
|
|
|
|
implicit val timeout = Timeout(coordShutdown.timeout(CoordinatedShutdown.PhaseClusterExiting))
|
|
|
|
|
self.ask(SelfExiting).mapTo[Done]
|
|
|
|
|
}
|
2016-12-01 18:49:38 +01:00
|
|
|
}
|
2013-03-14 20:32:43 +01:00
|
|
|
}
|
2013-01-14 14:09:53 +01:00
|
|
|
override def postStop(): Unit = cluster.unsubscribe(self)
|
|
|
|
|
|
2017-07-12 11:47:32 +01:00
|
|
|
private val selfDc = ClusterSettings.DcRolePrefix + cluster.settings.SelfDataCenter
|
2017-06-26 16:03:06 +02:00
|
|
|
|
2017-06-26 16:28:44 +02:00
|
|
|
def matchingRole(member: Member): Boolean =
|
2017-07-04 17:11:21 +02:00
|
|
|
member.hasRole(selfDc) && role.forall(member.hasRole)
|
2013-04-28 22:05:40 +02:00
|
|
|
|
2019-02-09 15:25:39 +01:00
|
|
|
def trackChange(block: () => Unit): Unit = {
|
2013-04-28 22:05:40 +02:00
|
|
|
val before = membersByAge.headOption
|
|
|
|
|
block()
|
|
|
|
|
val after = membersByAge.headOption
|
|
|
|
|
if (before != after)
|
2016-08-19 11:56:55 +02:00
|
|
|
changes :+= OldestChanged(after.map(_.uniqueAddress))
|
2013-04-28 22:05:40 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def handleInitial(state: CurrentClusterState): Unit = {
|
2019-08-19 14:59:35 +02:00
|
|
|
// all members except Joining and WeaklyUp
|
2019-03-11 10:38:24 +01:00
|
|
|
membersByAge = immutable.SortedSet
|
|
|
|
|
.empty(ageOrdering)
|
2019-08-19 14:59:35 +02:00
|
|
|
.union(state.members.filter(m => m.upNumber != Int.MaxValue && matchingRole(m)))
|
|
|
|
|
|
2018-11-09 09:42:48 +01:00
|
|
|
// If there is some removal in progress of an older node it's not safe to immediately become oldest,
|
|
|
|
|
// removal of younger nodes doesn't matter. Note that it can also be started via restart after
|
|
|
|
|
// ClusterSingletonManagerIsStuck.
|
2019-03-11 10:38:24 +01:00
|
|
|
val selfUpNumber = state.members
|
|
|
|
|
.collectFirst { case m if m.uniqueAddress == cluster.selfUniqueAddress => m.upNumber }
|
|
|
|
|
.getOrElse(Int.MaxValue)
|
2019-08-19 14:59:35 +02:00
|
|
|
val oldest = membersByAge.takeWhile(_.upNumber <= selfUpNumber)
|
|
|
|
|
val safeToBeOldest = !oldest.exists { m =>
|
|
|
|
|
m.status == MemberStatus.Down || m.status == MemberStatus.Exiting || m.status == MemberStatus.Leaving
|
2018-11-09 09:42:48 +01:00
|
|
|
}
|
2019-08-19 14:59:35 +02:00
|
|
|
|
|
|
|
|
val initial = InitialOldestState(oldest.toList.map(_.uniqueAddress), safeToBeOldest)
|
2013-04-28 22:05:40 +02:00
|
|
|
changes :+= initial
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def add(m: Member): Unit = {
|
|
|
|
|
if (matchingRole(m))
|
2019-02-09 15:25:39 +01:00
|
|
|
trackChange { () =>
|
2016-11-12 00:10:30 +01:00
|
|
|
// replace, it's possible that the upNumber is changed
|
|
|
|
|
membersByAge = membersByAge.filterNot(_.uniqueAddress == m.uniqueAddress)
|
2015-10-21 07:53:12 +02:00
|
|
|
membersByAge += m
|
|
|
|
|
}
|
2013-04-28 22:05:40 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def remove(m: Member): Unit = {
|
|
|
|
|
if (matchingRole(m))
|
2019-02-09 15:25:39 +01:00
|
|
|
trackChange { () =>
|
2016-11-12 00:10:30 +01:00
|
|
|
membersByAge = membersByAge.filterNot(_.uniqueAddress == m.uniqueAddress)
|
|
|
|
|
}
|
2013-04-28 22:05:40 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def sendFirstChange(): Unit = {
|
2018-01-05 08:47:43 +00:00
|
|
|
// don't send cluster change events if this node is shutting its self down, just wait for SelfExiting
|
|
|
|
|
if (!cluster.isTerminated) {
|
|
|
|
|
val event = changes.head
|
|
|
|
|
changes = changes.tail
|
|
|
|
|
context.parent ! event
|
|
|
|
|
}
|
2013-04-28 22:05:40 +02:00
|
|
|
}
|
|
|
|
|
|
2013-01-14 14:09:53 +01:00
|
|
|
def receive = {
|
2019-02-09 15:25:39 +01:00
|
|
|
case state: CurrentClusterState => handleInitial(state)
|
|
|
|
|
case MemberUp(m) => add(m)
|
|
|
|
|
case MemberRemoved(m, _) => remove(m)
|
|
|
|
|
case MemberExited(m) if m.uniqueAddress != cluster.selfUniqueAddress =>
|
2016-12-01 18:49:38 +01:00
|
|
|
remove(m)
|
2019-02-09 15:25:39 +01:00
|
|
|
case SelfExiting =>
|
2016-12-01 18:49:38 +01:00
|
|
|
remove(cluster.readView.self)
|
|
|
|
|
sender() ! Done // reply to ask
|
2019-02-09 15:25:39 +01:00
|
|
|
case GetNext if changes.isEmpty =>
|
2013-01-14 14:09:53 +01:00
|
|
|
context.become(deliverNext, discardOld = false)
|
2019-02-09 15:25:39 +01:00
|
|
|
case GetNext =>
|
2013-04-28 22:05:40 +02:00
|
|
|
sendFirstChange()
|
2013-01-14 14:09:53 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// the buffer was empty when GetNext was received, deliver next event immediately
|
|
|
|
|
def deliverNext: Actor.Receive = {
|
2019-02-09 15:25:39 +01:00
|
|
|
case state: CurrentClusterState =>
|
2013-04-28 22:05:40 +02:00
|
|
|
handleInitial(state)
|
|
|
|
|
sendFirstChange()
|
2013-01-14 14:09:53 +01:00
|
|
|
context.unbecome()
|
2019-02-09 15:25:39 +01:00
|
|
|
case MemberUp(m) =>
|
2013-04-28 22:05:40 +02:00
|
|
|
add(m)
|
2018-01-05 08:47:43 +00:00
|
|
|
deliverChanges()
|
2019-02-09 15:25:39 +01:00
|
|
|
case MemberRemoved(m, _) =>
|
2016-12-01 18:49:38 +01:00
|
|
|
remove(m)
|
|
|
|
|
deliverChanges()
|
2019-02-09 15:25:39 +01:00
|
|
|
case MemberExited(m) if m.uniqueAddress != cluster.selfUniqueAddress =>
|
2016-12-01 18:49:38 +01:00
|
|
|
remove(m)
|
|
|
|
|
deliverChanges()
|
2019-02-09 15:25:39 +01:00
|
|
|
case SelfExiting =>
|
2016-12-01 18:49:38 +01:00
|
|
|
remove(cluster.readView.self)
|
|
|
|
|
deliverChanges()
|
|
|
|
|
sender() ! Done // reply to ask
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def deliverChanges(): Unit = {
|
|
|
|
|
if (changes.nonEmpty) {
|
|
|
|
|
sendFirstChange()
|
|
|
|
|
context.unbecome()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
override def unhandled(msg: Any): Unit = {
|
|
|
|
|
msg match {
|
2019-02-09 15:25:39 +01:00
|
|
|
case _: MemberEvent => // ok, silence
|
|
|
|
|
case _ => super.unhandled(msg)
|
2016-12-01 18:49:38 +01:00
|
|
|
}
|
2013-01-14 14:09:53 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Thrown when a consistent state can't be determined within the
|
|
|
|
|
* defined retry limits. Eventually it will reach a stable state and
|
|
|
|
|
* can continue, and that is simplified by starting over with a clean
|
|
|
|
|
* state. Parent supervisor should typically restart the actor, i.e.
|
|
|
|
|
* default decision.
|
|
|
|
|
*/
|
|
|
|
|
class ClusterSingletonManagerIsStuck(message: String) extends AkkaException(message, null)
|
|
|
|
|
|
|
|
|
|
/**
|
2013-03-14 20:32:43 +01:00
|
|
|
* Manages singleton actor instance among all cluster nodes or a group
|
|
|
|
|
* of nodes tagged with a specific role. At most one singleton instance
|
|
|
|
|
* is running at any point in time.
|
|
|
|
|
*
|
|
|
|
|
* The ClusterSingletonManager is supposed to be started on all nodes,
|
|
|
|
|
* or all nodes with specified role, in the cluster with `actorOf`.
|
2013-04-28 22:05:40 +02:00
|
|
|
* The actual singleton is started on the oldest node by creating a child
|
2013-01-14 14:09:53 +01:00
|
|
|
* actor from the supplied `singletonProps`.
|
|
|
|
|
*
|
2015-04-13 10:07:14 +02:00
|
|
|
* The singleton actor is always running on the oldest member with specified role.
|
|
|
|
|
* The oldest member is determined by [[akka.cluster.Member#isOlderThan]].
|
2013-04-28 22:05:40 +02:00
|
|
|
* This can change when removing members. A graceful hand over can normally
|
|
|
|
|
* be performed when current oldest node is leaving the cluster. Be aware that
|
|
|
|
|
* there is a short time period when there is no active singleton during the
|
2013-01-28 08:47:52 +01:00
|
|
|
* hand-over process.
|
2013-01-14 14:09:53 +01:00
|
|
|
*
|
2013-04-28 22:05:40 +02:00
|
|
|
* The cluster failure detector will notice when oldest node
|
2013-01-28 08:47:52 +01:00
|
|
|
* becomes unreachable due to things like JVM crash, hard shut down,
|
2013-11-19 15:53:40 +01:00
|
|
|
* or network failure. When the crashed node has been removed (via down) from the
|
|
|
|
|
* cluster then a new oldest node will take over and a new singleton actor is
|
|
|
|
|
* created. For these failure scenarios there will not be a graceful hand-over,
|
|
|
|
|
* but more than one active singletons is prevented by all reasonable means. Some
|
|
|
|
|
* corner cases are eventually resolved by configurable timeouts.
|
2013-01-14 14:09:53 +01:00
|
|
|
*
|
2015-06-04 21:21:37 +02:00
|
|
|
* You access the singleton actor with [[ClusterSingletonProxy]].
|
2013-04-28 22:05:40 +02:00
|
|
|
* Alternatively the singleton actor may broadcast its existence when it is started.
|
2013-01-14 14:09:53 +01:00
|
|
|
*
|
2014-05-07 11:09:53 +05:30
|
|
|
* Use factory method [[ClusterSingletonManager#props]] to create the
|
2013-04-17 22:14:19 +02:00
|
|
|
* [[akka.actor.Props]] for the actor.
|
|
|
|
|
*
|
2017-07-26 10:42:13 +02:00
|
|
|
* Not intended for subclassing by user code.
|
|
|
|
|
*
|
2013-01-14 14:09:53 +01:00
|
|
|
*
|
2015-04-29 18:23:45 +02:00
|
|
|
* @param singletonProps [[akka.actor.Props]] of the singleton actor instance.
|
2013-01-14 14:09:53 +01:00
|
|
|
*
|
2015-04-29 18:23:45 +02:00
|
|
|
* @param terminationMessage When handing over to a new oldest node
|
2013-01-14 14:09:53 +01:00
|
|
|
* this `terminationMessage` is sent to the singleton actor to tell
|
2013-09-10 13:35:51 +02:00
|
|
|
* it to finish its work, close resources, and stop.
|
2013-04-28 22:05:40 +02:00
|
|
|
* The hand-over to the new oldest node is completed when the
|
2013-01-14 14:09:53 +01:00
|
|
|
* singleton actor is terminated.
|
|
|
|
|
* Note that [[akka.actor.PoisonPill]] is a perfectly fine
|
|
|
|
|
* `terminationMessage` if you only need to stop the actor.
|
|
|
|
|
*
|
2015-04-29 18:23:45 +02:00
|
|
|
* @param settings see [[ClusterSingletonManagerSettings]]
|
2013-01-14 14:09:53 +01:00
|
|
|
*/
|
2017-07-26 10:42:13 +02:00
|
|
|
@DoNotInherit
|
2019-03-11 10:38:24 +01:00
|
|
|
class ClusterSingletonManager(singletonProps: Props, terminationMessage: Any, settings: ClusterSingletonManagerSettings)
|
|
|
|
|
extends Actor
|
|
|
|
|
with FSM[ClusterSingletonManager.State, ClusterSingletonManager.Data] {
|
2013-01-14 14:09:53 +01:00
|
|
|
|
2019-05-21 17:29:11 +02:00
|
|
|
import ClusterSingletonManager.Internal._
|
2020-04-27 20:32:18 +08:00
|
|
|
import ClusterSingletonManager.Internal.OldestChangedBuffer._
|
2015-04-29 18:23:45 +02:00
|
|
|
import settings._
|
2013-01-14 14:09:53 +01:00
|
|
|
|
|
|
|
|
val cluster = Cluster(context.system)
|
2016-08-19 11:56:55 +02:00
|
|
|
val selfUniqueAddressOption = Some(cluster.selfUniqueAddress)
|
2013-05-23 13:36:35 +02:00
|
|
|
import cluster.settings.LogInfo
|
2013-03-14 20:32:43 +01:00
|
|
|
|
2019-03-13 10:56:20 +01:00
|
|
|
require(
|
|
|
|
|
role.forall(cluster.selfRoles.contains),
|
|
|
|
|
s"This cluster member [${cluster.selfAddress}] doesn't have the role [$role]")
|
2013-03-14 20:32:43 +01:00
|
|
|
|
2019-03-28 13:24:46 +01:00
|
|
|
private val singletonLeaseName = s"${context.system.name}-singleton-${self.path}"
|
|
|
|
|
|
2019-12-05 11:36:21 +01:00
|
|
|
override val log = Logging.withMarker(context.system, this)
|
|
|
|
|
|
2019-03-28 13:24:46 +01:00
|
|
|
val lease: Option[Lease] = settings.leaseSettings.map(
|
|
|
|
|
settings =>
|
|
|
|
|
LeaseProvider(context.system)
|
|
|
|
|
.getLease(singletonLeaseName, settings.leaseImplementation, cluster.selfAddress.hostPort))
|
|
|
|
|
val leaseRetryInterval: FiniteDuration = settings.leaseSettings match {
|
|
|
|
|
case Some(s) => s.leaseRetryInterval
|
|
|
|
|
case None => 5.seconds // won't be used
|
|
|
|
|
}
|
|
|
|
|
|
2015-05-30 16:12:22 +02:00
|
|
|
val removalMargin =
|
2016-04-11 10:33:02 +02:00
|
|
|
if (settings.removalMargin <= Duration.Zero) cluster.downingProvider.downRemovalMargin
|
2015-05-30 16:12:22 +02:00
|
|
|
else settings.removalMargin
|
|
|
|
|
|
|
|
|
|
val (maxHandOverRetries, maxTakeOverRetries) = {
|
|
|
|
|
val n = (removalMargin.toMillis / handOverRetryInterval.toMillis).toInt
|
2019-03-11 10:38:24 +01:00
|
|
|
val minRetries = context.system.settings.config.getInt("akka.cluster.singleton.min-number-of-hand-over-retries")
|
2015-09-16 14:51:00 +02:00
|
|
|
require(minRetries >= 1, "min-number-of-hand-over-retries must be >= 1")
|
|
|
|
|
val handOverRetries = math.max(minRetries, n + 3)
|
|
|
|
|
val takeOverRetries = math.max(1, handOverRetries - 3)
|
|
|
|
|
|
|
|
|
|
(handOverRetries, takeOverRetries)
|
2015-05-30 16:12:22 +02:00
|
|
|
}
|
|
|
|
|
|
2013-01-14 14:09:53 +01:00
|
|
|
// started when when self member is Up
|
2013-04-28 22:05:40 +02:00
|
|
|
var oldestChangedBuffer: ActorRef = _
|
2013-01-14 14:09:53 +01:00
|
|
|
// Previous GetNext request delivered event and new GetNext is to be sent
|
2013-04-28 22:05:40 +02:00
|
|
|
var oldestChangedReceived = true
|
2013-01-14 14:09:53 +01:00
|
|
|
|
2013-05-09 09:49:59 +02:00
|
|
|
var selfExited = false
|
|
|
|
|
|
2013-01-14 14:09:53 +01:00
|
|
|
// keep track of previously removed members
|
2016-08-19 11:56:55 +02:00
|
|
|
var removed = Map.empty[UniqueAddress, Deadline]
|
2013-01-14 14:09:53 +01:00
|
|
|
|
2016-08-19 11:56:55 +02:00
|
|
|
def addRemoved(node: UniqueAddress): Unit =
|
2019-02-09 15:25:39 +01:00
|
|
|
removed += node -> (Deadline.now + 15.minutes)
|
2013-01-14 14:09:53 +01:00
|
|
|
|
|
|
|
|
def cleanupOverdueNotMemberAnyMore(): Unit = {
|
2020-04-27 17:31:16 +07:00
|
|
|
removed = removed.filter { case (_, deadline) => deadline.hasTimeLeft() }
|
2013-01-14 14:09:53 +01:00
|
|
|
}
|
|
|
|
|
|
2016-12-01 18:49:38 +01:00
|
|
|
// for CoordinatedShutdown
|
|
|
|
|
val coordShutdown = CoordinatedShutdown(context.system)
|
|
|
|
|
val memberExitingProgress = Promise[Done]()
|
2019-02-09 15:25:39 +01:00
|
|
|
coordShutdown.addTask(CoordinatedShutdown.PhaseClusterExiting, "wait-singleton-exiting") { () =>
|
2017-11-23 08:48:38 +01:00
|
|
|
if (cluster.isTerminated || cluster.selfMember.status == MemberStatus.Down)
|
|
|
|
|
Future.successful(Done)
|
|
|
|
|
else
|
|
|
|
|
memberExitingProgress.future
|
|
|
|
|
}
|
2019-02-09 15:25:39 +01:00
|
|
|
coordShutdown.addTask(CoordinatedShutdown.PhaseClusterExiting, "singleton-exiting-2") { () =>
|
2017-11-23 08:48:38 +01:00
|
|
|
if (cluster.isTerminated || cluster.selfMember.status == MemberStatus.Down) {
|
|
|
|
|
Future.successful(Done)
|
|
|
|
|
} else {
|
|
|
|
|
implicit val timeout = Timeout(coordShutdown.timeout(CoordinatedShutdown.PhaseClusterExiting))
|
|
|
|
|
self.ask(SelfExiting).mapTo[Done]
|
|
|
|
|
}
|
2016-12-01 18:49:38 +01:00
|
|
|
}
|
|
|
|
|
|
2013-01-14 14:09:53 +01:00
|
|
|
def logInfo(message: String): Unit =
|
2013-05-23 13:36:35 +02:00
|
|
|
if (LogInfo) log.info(message)
|
2013-01-14 14:09:53 +01:00
|
|
|
|
2019-12-05 11:36:21 +01:00
|
|
|
def logInfo(marker: LogMarker, message: String): Unit =
|
|
|
|
|
if (LogInfo) log.info(marker, message)
|
|
|
|
|
|
2013-01-14 14:09:53 +01:00
|
|
|
def logInfo(template: String, arg1: Any): Unit =
|
2013-05-23 13:36:35 +02:00
|
|
|
if (LogInfo) log.info(template, arg1)
|
2013-01-14 14:09:53 +01:00
|
|
|
|
2019-12-05 11:36:21 +01:00
|
|
|
def logInfo(marker: LogMarker, template: String, arg1: Any): Unit =
|
|
|
|
|
if (LogInfo) log.info(marker, template, arg1)
|
|
|
|
|
|
2013-01-14 14:09:53 +01:00
|
|
|
def logInfo(template: String, arg1: Any, arg2: Any): Unit =
|
2013-05-23 13:36:35 +02:00
|
|
|
if (LogInfo) log.info(template, arg1, arg2)
|
2013-01-14 14:09:53 +01:00
|
|
|
|
2019-12-05 11:36:21 +01:00
|
|
|
def logInfo(marker: LogMarker, template: String, arg1: Any, arg2: Any): Unit =
|
|
|
|
|
if (LogInfo) log.info(marker, template, arg1, arg2)
|
|
|
|
|
|
2019-03-28 13:24:46 +01:00
|
|
|
def logInfo(template: String, arg1: Any, arg2: Any, arg3: Any): Unit =
|
|
|
|
|
if (LogInfo) log.info(template, arg1, arg2, arg3)
|
|
|
|
|
|
2013-01-14 14:09:53 +01:00
|
|
|
override def preStart(): Unit = {
|
|
|
|
|
super.preStart()
|
|
|
|
|
require(!cluster.isTerminated, "Cluster node must not be terminated")
|
|
|
|
|
|
|
|
|
|
// subscribe to cluster changes, re-subscribe when restart
|
2019-02-12 15:05:33 +01:00
|
|
|
cluster.subscribe(self, ClusterEvent.InitialStateAsEvents, classOf[MemberRemoved], classOf[MemberDowned])
|
2013-01-14 14:09:53 +01:00
|
|
|
|
2019-05-27 11:53:26 +02:00
|
|
|
startTimerWithFixedDelay(CleanupTimer, Cleanup, 1.minute)
|
2013-01-14 14:09:53 +01:00
|
|
|
|
2013-04-28 22:05:40 +02:00
|
|
|
// defer subscription to avoid some jitter when
|
2013-01-14 14:09:53 +01:00
|
|
|
// starting/joining several nodes at the same time
|
2013-04-28 22:05:40 +02:00
|
|
|
cluster.registerOnMemberUp(self ! StartOldestChangedBuffer)
|
2013-01-14 14:09:53 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
override def postStop(): Unit = {
|
|
|
|
|
cancelTimer(CleanupTimer)
|
|
|
|
|
cluster.unsubscribe(self)
|
2016-12-01 18:49:38 +01:00
|
|
|
memberExitingProgress.trySuccess(Done)
|
2013-01-14 14:09:53 +01:00
|
|
|
super.postStop()
|
|
|
|
|
}
|
|
|
|
|
|
2013-03-26 18:17:50 +01:00
|
|
|
def peer(at: Address): ActorSelection = context.actorSelection(self.path.toStringWithAddress(at))
|
2013-01-14 14:09:53 +01:00
|
|
|
|
2013-04-28 22:05:40 +02:00
|
|
|
def getNextOldestChanged(): Unit =
|
|
|
|
|
if (oldestChangedReceived) {
|
|
|
|
|
oldestChangedReceived = false
|
|
|
|
|
oldestChangedBuffer ! GetNext
|
2013-01-14 14:09:53 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
startWith(Start, Uninitialized)
|
|
|
|
|
|
|
|
|
|
when(Start) {
|
2019-02-09 15:25:39 +01:00
|
|
|
case Event(StartOldestChangedBuffer, _) =>
|
2019-03-11 10:38:24 +01:00
|
|
|
oldestChangedBuffer =
|
|
|
|
|
context.actorOf(Props(classOf[OldestChangedBuffer], role).withDispatcher(context.props.dispatcher))
|
2013-04-28 22:05:40 +02:00
|
|
|
getNextOldestChanged()
|
2020-04-27 17:31:16 +07:00
|
|
|
stay()
|
2013-01-14 14:09:53 +01:00
|
|
|
|
2019-08-19 14:59:35 +02:00
|
|
|
case Event(InitialOldestState(oldest, safeToBeOldest), _) =>
|
2013-04-28 22:05:40 +02:00
|
|
|
oldestChangedReceived = true
|
2019-08-19 14:59:35 +02:00
|
|
|
|
|
|
|
|
if (oldest.headOption == selfUniqueAddressOption && safeToBeOldest)
|
2015-02-09 15:34:58 +01:00
|
|
|
// oldest immediately
|
2019-03-29 13:18:28 +01:00
|
|
|
tryGotoOldest()
|
2019-08-19 14:59:35 +02:00
|
|
|
else if (oldest.headOption == selfUniqueAddressOption)
|
|
|
|
|
goto(BecomingOldest).using(BecomingOldestData(oldest.filterNot(_ == cluster.selfUniqueAddress)))
|
2013-01-14 14:09:53 +01:00
|
|
|
else
|
2019-08-19 14:59:35 +02:00
|
|
|
goto(Younger).using(YoungerData(oldest.filterNot(_ == cluster.selfUniqueAddress)))
|
2013-01-14 14:09:53 +01:00
|
|
|
}
|
|
|
|
|
|
2013-04-28 22:05:40 +02:00
|
|
|
when(Younger) {
|
2019-08-19 14:59:35 +02:00
|
|
|
case Event(OldestChanged(oldestOption), YoungerData(previousOldest)) =>
|
2013-04-28 22:05:40 +02:00
|
|
|
oldestChangedReceived = true
|
2016-08-19 11:56:55 +02:00
|
|
|
if (oldestOption == selfUniqueAddressOption) {
|
2019-08-19 14:59:35 +02:00
|
|
|
logInfo("Younger observed OldestChanged: [{} -> myself]", previousOldest.headOption.map(_.address))
|
|
|
|
|
if (previousOldest.forall(removed.contains))
|
|
|
|
|
tryGotoOldest()
|
|
|
|
|
else {
|
|
|
|
|
peer(previousOldest.head.address) ! HandOverToMe
|
|
|
|
|
goto(BecomingOldest).using(BecomingOldestData(previousOldest))
|
2013-01-14 14:09:53 +01:00
|
|
|
}
|
|
|
|
|
} else {
|
2019-03-13 10:56:20 +01:00
|
|
|
logInfo(
|
|
|
|
|
"Younger observed OldestChanged: [{} -> {}]",
|
2019-08-19 14:59:35 +02:00
|
|
|
previousOldest.headOption.map(_.address),
|
2019-03-13 10:56:20 +01:00
|
|
|
oldestOption.map(_.address))
|
2013-04-28 22:05:40 +02:00
|
|
|
getNextOldestChanged()
|
2019-08-19 14:59:35 +02:00
|
|
|
val newPreviousOldest = oldestOption match {
|
|
|
|
|
case Some(oldest) if !previousOldest.contains(oldest) => oldest :: previousOldest
|
|
|
|
|
case _ => previousOldest
|
|
|
|
|
}
|
2020-04-27 17:31:16 +07:00
|
|
|
stay().using(YoungerData(newPreviousOldest))
|
2013-01-14 14:09:53 +01:00
|
|
|
}
|
|
|
|
|
|
2019-02-09 15:25:39 +01:00
|
|
|
case Event(MemberDowned(m), _) if m.uniqueAddress == cluster.selfUniqueAddress =>
|
2019-02-12 15:05:33 +01:00
|
|
|
logInfo("Self downed, stopping ClusterSingletonManager")
|
|
|
|
|
stop()
|
|
|
|
|
|
2019-02-09 15:25:39 +01:00
|
|
|
case Event(MemberRemoved(m, _), _) if m.uniqueAddress == cluster.selfUniqueAddress =>
|
2013-01-14 14:09:53 +01:00
|
|
|
logInfo("Self removed, stopping ClusterSingletonManager")
|
|
|
|
|
stop()
|
|
|
|
|
|
2019-02-09 15:25:39 +01:00
|
|
|
case Event(MemberRemoved(m, _), _) =>
|
2015-05-30 16:12:22 +02:00
|
|
|
scheduleDelayedMemberRemoved(m)
|
2020-04-27 17:31:16 +07:00
|
|
|
stay()
|
2015-05-30 16:12:22 +02:00
|
|
|
|
2019-08-19 14:59:35 +02:00
|
|
|
case Event(DelayedMemberRemoved(m), YoungerData(previousOldest)) =>
|
|
|
|
|
if (!selfExited)
|
|
|
|
|
logInfo("Member removed [{}]", m.address)
|
2016-08-19 11:56:55 +02:00
|
|
|
addRemoved(m.uniqueAddress)
|
2015-05-30 16:12:22 +02:00
|
|
|
// transition when OldestChanged
|
2020-04-27 17:31:16 +07:00
|
|
|
stay().using(YoungerData(previousOldest.filterNot(_ == m.uniqueAddress)))
|
2016-08-19 11:56:55 +02:00
|
|
|
|
2019-02-09 15:25:39 +01:00
|
|
|
case Event(HandOverToMe, _) =>
|
2019-08-19 14:59:35 +02:00
|
|
|
val selfStatus = cluster.selfMember.status
|
|
|
|
|
if (selfStatus == MemberStatus.Leaving || selfStatus == MemberStatus.Exiting)
|
|
|
|
|
logInfo("Ignoring HandOverToMe in Younger from [{}] because self is [{}].", sender().path.address, selfStatus)
|
|
|
|
|
else {
|
|
|
|
|
// this node was probably quickly restarted with same hostname:port,
|
|
|
|
|
// confirm that the old singleton instance has been stopped
|
|
|
|
|
sender() ! HandOverDone
|
|
|
|
|
}
|
|
|
|
|
|
2020-04-27 17:31:16 +07:00
|
|
|
stay()
|
2013-01-14 14:09:53 +01:00
|
|
|
}
|
|
|
|
|
|
2013-04-28 22:05:40 +02:00
|
|
|
when(BecomingOldest) {
|
2013-01-14 14:09:53 +01:00
|
|
|
|
2019-02-09 15:25:39 +01:00
|
|
|
case Event(HandOverInProgress, _) =>
|
2013-01-28 08:47:52 +01:00
|
|
|
// confirmation that the hand-over process has started
|
2014-01-16 15:16:35 +01:00
|
|
|
logInfo("Hand-over in progress at [{}]", sender().path.address)
|
2013-01-14 14:09:53 +01:00
|
|
|
cancelTimer(HandOverRetryTimer)
|
2020-04-27 17:31:16 +07:00
|
|
|
stay()
|
2013-01-14 14:09:53 +01:00
|
|
|
|
2019-08-19 14:59:35 +02:00
|
|
|
case Event(HandOverDone, BecomingOldestData(previousOldest)) =>
|
|
|
|
|
previousOldest.headOption match {
|
|
|
|
|
case Some(oldest) =>
|
|
|
|
|
if (sender().path.address == oldest.address)
|
|
|
|
|
tryGotoOldest()
|
|
|
|
|
else {
|
|
|
|
|
logInfo(
|
|
|
|
|
"Ignoring HandOverDone in BecomingOldest from [{}]. Expected previous oldest [{}]",
|
|
|
|
|
sender().path.address,
|
|
|
|
|
oldest.address)
|
2020-04-27 17:31:16 +07:00
|
|
|
stay()
|
2019-08-19 14:59:35 +02:00
|
|
|
}
|
|
|
|
|
case None =>
|
|
|
|
|
logInfo("Ignoring HandOverDone in BecomingOldest from [{}].", sender().path.address)
|
2020-04-27 17:31:16 +07:00
|
|
|
stay()
|
2013-01-14 14:09:53 +01:00
|
|
|
}
|
|
|
|
|
|
2019-02-09 15:25:39 +01:00
|
|
|
case Event(MemberDowned(m), _) if m.uniqueAddress == cluster.selfUniqueAddress =>
|
2019-02-12 15:05:33 +01:00
|
|
|
logInfo("Self downed, stopping ClusterSingletonManager")
|
|
|
|
|
stop()
|
|
|
|
|
|
2019-02-09 15:25:39 +01:00
|
|
|
case Event(MemberRemoved(m, _), _) if m.uniqueAddress == cluster.selfUniqueAddress =>
|
2015-05-30 16:12:22 +02:00
|
|
|
logInfo("Self removed, stopping ClusterSingletonManager")
|
|
|
|
|
stop()
|
|
|
|
|
|
2019-02-09 15:25:39 +01:00
|
|
|
case Event(MemberRemoved(m, _), _) =>
|
2015-05-30 16:12:22 +02:00
|
|
|
scheduleDelayedMemberRemoved(m)
|
2020-04-27 17:31:16 +07:00
|
|
|
stay()
|
2015-05-30 16:12:22 +02:00
|
|
|
|
2019-08-19 14:59:35 +02:00
|
|
|
case Event(DelayedMemberRemoved(m), BecomingOldestData(previousOldest)) =>
|
|
|
|
|
if (!selfExited)
|
|
|
|
|
logInfo("Member removed [{}], previous oldest [{}]", m.address, previousOldest.map(_.address).mkString(", "))
|
2016-08-19 11:56:55 +02:00
|
|
|
addRemoved(m.uniqueAddress)
|
2019-08-19 14:59:35 +02:00
|
|
|
if (cluster.isTerminated) {
|
|
|
|
|
// don't act on DelayedMemberRemoved (starting singleton) if this node is shutting its self down,
|
|
|
|
|
// just wait for self MemberRemoved
|
2020-04-27 17:31:16 +07:00
|
|
|
stay()
|
2019-08-19 14:59:35 +02:00
|
|
|
} else if (previousOldest.contains(m.uniqueAddress) && previousOldest.forall(removed.contains))
|
|
|
|
|
tryGotoOldest()
|
|
|
|
|
else
|
2020-04-27 17:31:16 +07:00
|
|
|
stay().using(BecomingOldestData(previousOldest.filterNot(_ == m.uniqueAddress)))
|
2013-01-14 14:09:53 +01:00
|
|
|
|
2019-08-19 14:59:35 +02:00
|
|
|
case Event(TakeOverFromMe, BecomingOldestData(previousOldest)) =>
|
2016-08-19 11:56:55 +02:00
|
|
|
val senderAddress = sender().path.address
|
|
|
|
|
// it would have been better to include the UniqueAddress in the TakeOverFromMe message,
|
|
|
|
|
// but can't change due to backwards compatibility
|
2019-02-09 15:25:39 +01:00
|
|
|
cluster.state.members.collectFirst { case m if m.address == senderAddress => m.uniqueAddress } match {
|
|
|
|
|
case None =>
|
2016-08-19 11:56:55 +02:00
|
|
|
// from unknown node, ignore
|
2019-03-11 10:38:24 +01:00
|
|
|
logInfo("Ignoring TakeOver request from unknown node in BecomingOldest from [{}].", senderAddress)
|
2020-04-27 17:31:16 +07:00
|
|
|
stay()
|
2019-02-09 15:25:39 +01:00
|
|
|
case Some(senderUniqueAddress) =>
|
2019-08-19 14:59:35 +02:00
|
|
|
previousOldest.headOption match {
|
|
|
|
|
case Some(oldest) =>
|
|
|
|
|
if (oldest == senderUniqueAddress)
|
|
|
|
|
sender() ! HandOverToMe
|
2019-03-11 10:38:24 +01:00
|
|
|
else
|
2019-03-13 10:56:20 +01:00
|
|
|
logInfo(
|
|
|
|
|
"Ignoring TakeOver request in BecomingOldest from [{}]. Expected previous oldest [{}]",
|
|
|
|
|
sender().path.address,
|
2019-08-19 14:59:35 +02:00
|
|
|
oldest.address)
|
2020-04-27 17:31:16 +07:00
|
|
|
stay()
|
2019-02-09 15:25:39 +01:00
|
|
|
case None =>
|
2016-08-19 11:56:55 +02:00
|
|
|
sender() ! HandOverToMe
|
2020-04-27 17:31:16 +07:00
|
|
|
stay().using(BecomingOldestData(senderUniqueAddress :: previousOldest))
|
2016-08-19 11:56:55 +02:00
|
|
|
}
|
|
|
|
|
}
|
2013-01-14 14:09:53 +01:00
|
|
|
|
2019-08-19 14:59:35 +02:00
|
|
|
case Event(HandOverRetry(count), BecomingOldestData(previousOldest)) =>
|
2013-01-14 14:09:53 +01:00
|
|
|
if (count <= maxHandOverRetries) {
|
2019-08-19 14:59:35 +02:00
|
|
|
logInfo("Retry [{}], sending HandOverToMe to [{}]", count, previousOldest.headOption.map(_.address))
|
|
|
|
|
previousOldest.headOption.foreach(node => peer(node.address) ! HandOverToMe)
|
2019-05-27 11:53:26 +02:00
|
|
|
startSingleTimer(HandOverRetryTimer, HandOverRetry(count + 1), handOverRetryInterval)
|
2013-03-27 17:47:56 +01:00
|
|
|
stay()
|
2019-08-19 14:59:35 +02:00
|
|
|
} else if (previousOldest.forall(removed.contains)) {
|
2013-04-28 22:05:40 +02:00
|
|
|
// can't send HandOverToMe, previousOldest unknown for new node (or restart)
|
|
|
|
|
// previous oldest might be down or removed, so no TakeOverFromMe message is received
|
|
|
|
|
logInfo("Timeout in BecomingOldest. Previous oldest unknown, removed and no TakeOver request.")
|
2019-03-29 13:18:28 +01:00
|
|
|
tryGotoOldest()
|
2015-05-30 16:12:22 +02:00
|
|
|
} else if (cluster.isTerminated)
|
|
|
|
|
stop()
|
|
|
|
|
else
|
2013-01-14 14:09:53 +01:00
|
|
|
throw new ClusterSingletonManagerIsStuck(
|
2019-08-19 14:59:35 +02:00
|
|
|
s"Becoming singleton oldest was stuck because previous oldest [${previousOldest.headOption}] is unresponsive")
|
2015-05-30 16:12:22 +02:00
|
|
|
}
|
2013-01-14 14:09:53 +01:00
|
|
|
|
2015-05-30 16:12:22 +02:00
|
|
|
def scheduleDelayedMemberRemoved(m: Member): Unit = {
|
2015-08-27 12:46:45 +02:00
|
|
|
if (removalMargin > Duration.Zero) {
|
|
|
|
|
log.debug("Schedule DelayedMemberRemoved for [{}]", m.address)
|
|
|
|
|
context.system.scheduler.scheduleOnce(removalMargin, self, DelayedMemberRemoved(m))(context.dispatcher)
|
|
|
|
|
} else
|
|
|
|
|
self ! DelayedMemberRemoved(m)
|
2013-01-14 14:09:53 +01:00
|
|
|
}
|
|
|
|
|
|
2019-03-28 13:24:46 +01:00
|
|
|
def tryAcquireLease() = {
|
|
|
|
|
import context.dispatcher
|
|
|
|
|
pipe(lease.get.acquire(reason => self ! LeaseLost(reason)).map[Any](AcquireLeaseResult).recover {
|
|
|
|
|
case NonFatal(t) => AcquireLeaseFailure(t)
|
|
|
|
|
}).to(self)
|
|
|
|
|
goto(AcquiringLease).using(AcquiringLeaseData(leaseRequestInProgress = true, None))
|
2013-01-14 14:09:53 +01:00
|
|
|
}
|
|
|
|
|
|
2019-03-28 13:24:46 +01:00
|
|
|
// Try and go to oldest, taking the lease if needed
|
2019-03-29 13:18:28 +01:00
|
|
|
def tryGotoOldest(): State = {
|
2019-03-28 13:24:46 +01:00
|
|
|
// check if lease
|
|
|
|
|
lease match {
|
|
|
|
|
case None =>
|
2019-03-29 13:18:28 +01:00
|
|
|
gotoOldest()
|
2019-03-28 13:24:46 +01:00
|
|
|
case Some(_) =>
|
|
|
|
|
logInfo("Trying to acquire lease before starting singleton")
|
|
|
|
|
tryAcquireLease()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
when(AcquiringLease) {
|
|
|
|
|
case Event(AcquireLeaseResult(result), _) =>
|
|
|
|
|
logInfo("Acquire lease result {}", result)
|
|
|
|
|
if (result) {
|
2019-03-29 13:18:28 +01:00
|
|
|
gotoOldest()
|
2019-03-28 13:24:46 +01:00
|
|
|
} else {
|
2019-05-27 11:53:26 +02:00
|
|
|
startSingleTimer(LeaseRetryTimer, LeaseRetry, leaseRetryInterval)
|
2020-04-27 17:31:16 +07:00
|
|
|
stay().using(AcquiringLeaseData(leaseRequestInProgress = false, None))
|
2013-01-14 14:09:53 +01:00
|
|
|
}
|
2019-03-28 13:24:46 +01:00
|
|
|
case Event(Terminated(ref), AcquiringLeaseData(_, Some(singleton))) if ref == singleton =>
|
2019-12-05 11:36:21 +01:00
|
|
|
logInfo(
|
|
|
|
|
ClusterLogMarker.singletonTerminated,
|
|
|
|
|
"Singleton actor terminated. Trying to acquire lease again before re-creating.")
|
2019-03-28 13:24:46 +01:00
|
|
|
// tryAcquireLease sets the state to None for singleton actor
|
|
|
|
|
tryAcquireLease()
|
|
|
|
|
case Event(AcquireLeaseFailure(t), _) =>
|
|
|
|
|
log.error(t, "failed to get lease (will be retried)")
|
2019-05-27 11:53:26 +02:00
|
|
|
startSingleTimer(LeaseRetryTimer, LeaseRetry, leaseRetryInterval)
|
2020-04-27 17:31:16 +07:00
|
|
|
stay().using(AcquiringLeaseData(leaseRequestInProgress = false, None))
|
2019-03-28 13:24:46 +01:00
|
|
|
case Event(LeaseRetry, _) =>
|
|
|
|
|
// If lease was lost (so previous state was oldest) then we don't try and get the lease
|
|
|
|
|
// until the old singleton instance has been terminated so we know there isn't an
|
|
|
|
|
// instance in this case
|
|
|
|
|
tryAcquireLease()
|
|
|
|
|
case Event(OldestChanged(oldestOption), AcquiringLeaseData(_, singleton)) =>
|
|
|
|
|
handleOldestChanged(singleton, oldestOption)
|
|
|
|
|
case Event(HandOverToMe, AcquiringLeaseData(_, singleton)) =>
|
|
|
|
|
gotoHandingOver(singleton, Some(sender()))
|
|
|
|
|
case Event(TakeOverFromMe, _) =>
|
|
|
|
|
// already oldest, so confirm and continue like that
|
|
|
|
|
sender() ! HandOverToMe
|
2020-04-27 17:31:16 +07:00
|
|
|
stay()
|
2019-03-28 13:24:46 +01:00
|
|
|
case Event(SelfExiting, _) =>
|
|
|
|
|
selfMemberExited()
|
|
|
|
|
// complete memberExitingProgress when handOverDone
|
|
|
|
|
sender() ! Done // reply to ask
|
2020-04-27 17:31:16 +07:00
|
|
|
stay()
|
2019-03-28 13:24:46 +01:00
|
|
|
case Event(MemberDowned(m), _) if m.uniqueAddress == cluster.selfUniqueAddress =>
|
|
|
|
|
logInfo("Self downed, stopping ClusterSingletonManager")
|
|
|
|
|
stop()
|
|
|
|
|
}
|
2013-01-14 14:09:53 +01:00
|
|
|
|
2019-05-21 17:29:11 +02:00
|
|
|
@InternalStableApi
|
2019-03-29 13:18:28 +01:00
|
|
|
def gotoOldest(): State = {
|
2019-12-05 11:36:21 +01:00
|
|
|
logInfo(
|
|
|
|
|
ClusterLogMarker.singletonStarted,
|
|
|
|
|
"Singleton manager starting singleton actor [{}]",
|
|
|
|
|
self.path / singletonName)
|
2019-03-28 13:24:46 +01:00
|
|
|
val singleton = context.watch(context.actorOf(singletonProps, singletonName))
|
|
|
|
|
goto(Oldest).using(OldestData(Some(singleton)))
|
|
|
|
|
}
|
2013-01-14 14:09:53 +01:00
|
|
|
|
2019-03-28 13:24:46 +01:00
|
|
|
def handleOldestChanged(singleton: Option[ActorRef], oldestOption: Option[UniqueAddress]) = {
|
|
|
|
|
oldestChangedReceived = true
|
|
|
|
|
logInfo("{} observed OldestChanged: [{} -> {}]", stateName, cluster.selfAddress, oldestOption.map(_.address))
|
|
|
|
|
oldestOption match {
|
|
|
|
|
case Some(a) if a == cluster.selfUniqueAddress =>
|
|
|
|
|
// already oldest
|
2020-04-27 17:31:16 +07:00
|
|
|
stay()
|
2019-03-28 13:24:46 +01:00
|
|
|
case Some(a) if !selfExited && removed.contains(a) =>
|
|
|
|
|
// The member removal was not completed and the old removed node is considered
|
|
|
|
|
// oldest again. Safest is to terminate the singleton instance and goto Younger.
|
|
|
|
|
// This node will become oldest again when the other is removed again.
|
|
|
|
|
gotoHandingOver(singleton, None)
|
|
|
|
|
case Some(a) =>
|
|
|
|
|
// send TakeOver request in case the new oldest doesn't know previous oldest
|
|
|
|
|
peer(a.address) ! TakeOverFromMe
|
2019-05-27 11:53:26 +02:00
|
|
|
startSingleTimer(TakeOverRetryTimer, TakeOverRetry(1), handOverRetryInterval)
|
2019-03-28 13:24:46 +01:00
|
|
|
goto(WasOldest).using(WasOldestData(singleton, newOldestOption = Some(a)))
|
|
|
|
|
case None =>
|
|
|
|
|
// new oldest will initiate the hand-over
|
2019-05-27 11:53:26 +02:00
|
|
|
startSingleTimer(TakeOverRetryTimer, TakeOverRetry(1), handOverRetryInterval)
|
2019-03-28 13:24:46 +01:00
|
|
|
goto(WasOldest).using(WasOldestData(singleton, newOldestOption = None))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
when(Oldest) {
|
|
|
|
|
case Event(OldestChanged(oldestOption), OldestData(singleton)) =>
|
|
|
|
|
handleOldestChanged(singleton, oldestOption)
|
|
|
|
|
case Event(HandOverToMe, OldestData(singleton)) =>
|
|
|
|
|
gotoHandingOver(singleton, Some(sender()))
|
2019-02-09 15:25:39 +01:00
|
|
|
case Event(TakeOverFromMe, _) =>
|
2017-09-04 15:36:39 +02:00
|
|
|
// already oldest, so confirm and continue like that
|
|
|
|
|
sender() ! HandOverToMe
|
2020-04-27 17:31:16 +07:00
|
|
|
stay()
|
2017-09-04 15:36:39 +02:00
|
|
|
|
2019-03-28 13:24:46 +01:00
|
|
|
case Event(Terminated(ref), d @ OldestData(Some(singleton))) if ref == singleton =>
|
2019-12-05 11:36:21 +01:00
|
|
|
logInfo(ClusterLogMarker.singletonTerminated, "Singleton actor [{}] was terminated", singleton.path)
|
2020-04-27 17:31:16 +07:00
|
|
|
stay().using(d.copy(singleton = None))
|
2016-12-01 18:49:38 +01:00
|
|
|
|
2019-02-09 15:25:39 +01:00
|
|
|
case Event(SelfExiting, _) =>
|
2016-12-01 18:49:38 +01:00
|
|
|
selfMemberExited()
|
|
|
|
|
// complete memberExitingProgress when handOverDone
|
|
|
|
|
sender() ! Done // reply to ask
|
2020-04-27 17:31:16 +07:00
|
|
|
stay()
|
2019-02-12 15:05:33 +01:00
|
|
|
|
2019-03-28 13:24:46 +01:00
|
|
|
case Event(MemberDowned(m), OldestData(singleton)) if m.uniqueAddress == cluster.selfUniqueAddress =>
|
|
|
|
|
singleton match {
|
|
|
|
|
case Some(s) =>
|
|
|
|
|
logInfo("Self downed, stopping")
|
|
|
|
|
gotoStopping(s)
|
|
|
|
|
case None =>
|
|
|
|
|
logInfo("Self downed, stopping ClusterSingletonManager")
|
|
|
|
|
stop()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case Event(LeaseLost(reason), OldestData(singleton)) =>
|
|
|
|
|
log.warning("Lease has been lost. Reason: {}. Terminating singleton and trying to re-acquire lease", reason)
|
|
|
|
|
singleton match {
|
|
|
|
|
case Some(s) =>
|
|
|
|
|
s ! terminationMessage
|
|
|
|
|
goto(AcquiringLease).using(AcquiringLeaseData(leaseRequestInProgress = false, singleton))
|
|
|
|
|
case None =>
|
|
|
|
|
tryAcquireLease()
|
2019-02-12 15:05:33 +01:00
|
|
|
}
|
2013-01-14 14:09:53 +01:00
|
|
|
}
|
|
|
|
|
|
2013-04-28 22:05:40 +02:00
|
|
|
when(WasOldest) {
|
2019-03-28 13:24:46 +01:00
|
|
|
case Event(TakeOverRetry(count), WasOldestData(singleton, newOldestOption)) =>
|
2016-12-01 18:49:38 +01:00
|
|
|
if ((cluster.isTerminated || selfExited) && (newOldestOption.isEmpty || count > maxTakeOverRetries)) {
|
2019-03-28 13:24:46 +01:00
|
|
|
singleton match {
|
|
|
|
|
case Some(s) => gotoStopping(s)
|
|
|
|
|
case None => stop()
|
|
|
|
|
}
|
2016-10-28 14:23:18 +02:00
|
|
|
} else if (count <= maxTakeOverRetries) {
|
2018-11-09 09:42:48 +01:00
|
|
|
if (maxTakeOverRetries - count <= 3)
|
|
|
|
|
logInfo("Retry [{}], sending TakeOverFromMe to [{}]", count, newOldestOption.map(_.address))
|
|
|
|
|
else
|
|
|
|
|
log.debug("Retry [{}], sending TakeOverFromMe to [{}]", count, newOldestOption.map(_.address))
|
2019-02-09 15:25:39 +01:00
|
|
|
newOldestOption.foreach(node => peer(node.address) ! TakeOverFromMe)
|
2019-05-27 11:53:26 +02:00
|
|
|
startSingleTimer(TakeOverRetryTimer, TakeOverRetry(count + 1), handOverRetryInterval)
|
2020-04-27 17:31:16 +07:00
|
|
|
stay()
|
2016-10-28 14:23:18 +02:00
|
|
|
} else
|
2018-11-09 09:42:48 +01:00
|
|
|
throw new ClusterSingletonManagerIsStuck(s"Expected hand-over to [$newOldestOption] never occurred")
|
2013-01-14 14:09:53 +01:00
|
|
|
|
2019-03-28 13:24:46 +01:00
|
|
|
case Event(HandOverToMe, WasOldestData(singleton, _)) =>
|
|
|
|
|
gotoHandingOver(singleton, Some(sender()))
|
2019-02-09 15:25:39 +01:00
|
|
|
case Event(MemberRemoved(m, _), _) if m.uniqueAddress == cluster.selfUniqueAddress && !selfExited =>
|
2015-05-30 16:12:22 +02:00
|
|
|
logInfo("Self removed, stopping ClusterSingletonManager")
|
|
|
|
|
stop()
|
|
|
|
|
|
2019-03-28 13:24:46 +01:00
|
|
|
case Event(MemberRemoved(m, _), WasOldestData(singleton, Some(newOldest)))
|
2019-03-11 10:38:24 +01:00
|
|
|
if !selfExited && m.uniqueAddress == newOldest =>
|
2016-08-19 11:56:55 +02:00
|
|
|
addRemoved(m.uniqueAddress)
|
2019-03-28 13:24:46 +01:00
|
|
|
gotoHandingOver(singleton, None)
|
2013-01-14 14:09:53 +01:00
|
|
|
|
2019-03-28 13:24:46 +01:00
|
|
|
case Event(Terminated(ref), d @ WasOldestData(singleton, _)) if singleton.contains(ref) =>
|
2019-12-05 11:36:21 +01:00
|
|
|
logInfo(ClusterLogMarker.singletonTerminated, "Singleton actor [{}] was terminated", ref.path)
|
2020-04-27 17:31:16 +07:00
|
|
|
stay().using(d.copy(singleton = None))
|
2013-01-14 14:09:53 +01:00
|
|
|
|
2019-02-09 15:25:39 +01:00
|
|
|
case Event(SelfExiting, _) =>
|
2016-12-01 18:49:38 +01:00
|
|
|
selfMemberExited()
|
|
|
|
|
// complete memberExitingProgress when handOverDone
|
|
|
|
|
sender() ! Done // reply to ask
|
2020-04-27 17:31:16 +07:00
|
|
|
stay()
|
2016-12-01 18:49:38 +01:00
|
|
|
|
2019-03-28 13:24:46 +01:00
|
|
|
case Event(MemberDowned(m), WasOldestData(singleton, _)) if m.uniqueAddress == cluster.selfUniqueAddress =>
|
|
|
|
|
singleton match {
|
|
|
|
|
case None =>
|
|
|
|
|
logInfo("Self downed, stopping ClusterSingletonManager")
|
|
|
|
|
stop()
|
|
|
|
|
case Some(s) =>
|
|
|
|
|
logInfo("Self downed, stopping")
|
|
|
|
|
gotoStopping(s)
|
2019-02-12 15:05:33 +01:00
|
|
|
}
|
2013-01-14 14:09:53 +01:00
|
|
|
}
|
|
|
|
|
|
2019-03-28 13:24:46 +01:00
|
|
|
def gotoHandingOver(singleton: Option[ActorRef], handOverTo: Option[ActorRef]): State = {
|
|
|
|
|
singleton match {
|
|
|
|
|
case None =>
|
|
|
|
|
handOverDone(handOverTo)
|
|
|
|
|
case Some(s) =>
|
|
|
|
|
handOverTo.foreach { _ ! HandOverInProgress }
|
|
|
|
|
logInfo("Singleton manager stopping singleton actor [{}]", s.path)
|
|
|
|
|
s ! terminationMessage
|
|
|
|
|
goto(HandingOver).using(HandingOverData(s, handOverTo))
|
2013-01-14 14:09:53 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
when(HandingOver) {
|
2019-03-28 13:24:46 +01:00
|
|
|
case Event(Terminated(ref), HandingOverData(singleton, handOverTo)) if ref == singleton =>
|
2013-09-10 13:35:51 +02:00
|
|
|
handOverDone(handOverTo)
|
2013-01-14 14:09:53 +01:00
|
|
|
|
2019-03-28 13:24:46 +01:00
|
|
|
case Event(HandOverToMe, HandingOverData(_, handOverTo)) if handOverTo.contains(sender()) =>
|
2013-01-14 14:09:53 +01:00
|
|
|
// retry
|
2014-01-16 15:16:35 +01:00
|
|
|
sender() ! HandOverInProgress
|
2020-04-27 17:31:16 +07:00
|
|
|
stay()
|
2013-01-14 14:09:53 +01:00
|
|
|
|
2019-02-09 15:25:39 +01:00
|
|
|
case Event(SelfExiting, _) =>
|
2016-12-01 18:49:38 +01:00
|
|
|
selfMemberExited()
|
|
|
|
|
// complete memberExitingProgress when handOverDone
|
|
|
|
|
sender() ! Done // reply to ask
|
2020-04-27 17:31:16 +07:00
|
|
|
stay()
|
2013-01-14 14:09:53 +01:00
|
|
|
}
|
|
|
|
|
|
2013-09-10 13:35:51 +02:00
|
|
|
def handOverDone(handOverTo: Option[ActorRef]): State = {
|
2013-04-28 22:05:40 +02:00
|
|
|
val newOldest = handOverTo.map(_.path.address)
|
2019-12-05 11:36:21 +01:00
|
|
|
logInfo(
|
|
|
|
|
ClusterLogMarker.singletonTerminated,
|
|
|
|
|
"Singleton terminated, hand-over done [{} -> {}]",
|
|
|
|
|
cluster.selfAddress,
|
|
|
|
|
newOldest)
|
2019-03-11 10:38:24 +01:00
|
|
|
handOverTo.foreach { _ ! HandOverDone }
|
2016-12-01 18:49:38 +01:00
|
|
|
memberExitingProgress.trySuccess(Done)
|
2016-08-19 11:56:55 +02:00
|
|
|
if (removed.contains(cluster.selfUniqueAddress)) {
|
2015-02-09 15:34:58 +01:00
|
|
|
logInfo("Self removed, stopping ClusterSingletonManager")
|
|
|
|
|
stop()
|
2016-08-22 15:30:49 +02:00
|
|
|
} else if (handOverTo.isEmpty)
|
2019-08-19 14:59:35 +02:00
|
|
|
goto(Younger).using(YoungerData(Nil))
|
2016-08-22 15:30:49 +02:00
|
|
|
else
|
2019-03-11 10:38:24 +01:00
|
|
|
goto(End).using(EndData)
|
2013-05-09 09:49:59 +02:00
|
|
|
}
|
|
|
|
|
|
2016-10-28 14:23:18 +02:00
|
|
|
def gotoStopping(singleton: ActorRef): State = {
|
2019-02-06 09:05:48 +01:00
|
|
|
logInfo("Singleton manager stopping singleton actor [{}]", singleton.path)
|
2016-10-28 14:23:18 +02:00
|
|
|
singleton ! terminationMessage
|
2019-03-11 10:38:24 +01:00
|
|
|
goto(Stopping).using(StoppingData(singleton))
|
2016-10-28 14:23:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
when(Stopping) {
|
2019-03-28 13:24:46 +01:00
|
|
|
case Event(Terminated(ref), StoppingData(singleton)) if ref == singleton =>
|
2019-12-05 11:36:21 +01:00
|
|
|
logInfo(ClusterLogMarker.singletonTerminated, "Singleton actor [{}] was terminated", singleton.path)
|
2016-10-28 14:23:18 +02:00
|
|
|
stop()
|
|
|
|
|
}
|
|
|
|
|
|
2013-05-09 09:49:59 +02:00
|
|
|
when(End) {
|
2019-02-09 15:25:39 +01:00
|
|
|
case Event(MemberRemoved(m, _), _) if m.uniqueAddress == cluster.selfUniqueAddress =>
|
2013-05-09 09:49:59 +02:00
|
|
|
logInfo("Self removed, stopping ClusterSingletonManager")
|
|
|
|
|
stop()
|
2019-04-23 11:20:42 +02:00
|
|
|
case Event(_: OldestChanged | HandOverToMe, _) =>
|
2018-06-27 09:06:32 +02:00
|
|
|
// not interested anymore - waiting for removal
|
2019-04-23 11:20:42 +02:00
|
|
|
stay()
|
2013-01-14 14:09:53 +01:00
|
|
|
}
|
|
|
|
|
|
2016-12-01 18:49:38 +01:00
|
|
|
def selfMemberExited(): Unit = {
|
|
|
|
|
selfExited = true
|
|
|
|
|
logInfo("Exited [{}]", cluster.selfAddress)
|
|
|
|
|
}
|
|
|
|
|
|
2013-01-14 14:09:53 +01:00
|
|
|
whenUnhandled {
|
2019-02-09 15:25:39 +01:00
|
|
|
case Event(SelfExiting, _) =>
|
2016-12-01 18:49:38 +01:00
|
|
|
selfMemberExited()
|
|
|
|
|
memberExitingProgress.trySuccess(Done)
|
|
|
|
|
sender() ! Done // reply to ask
|
2020-04-27 17:31:16 +07:00
|
|
|
stay()
|
2019-02-09 15:25:39 +01:00
|
|
|
case Event(MemberRemoved(m, _), _) if m.uniqueAddress == cluster.selfUniqueAddress && !selfExited =>
|
2015-02-09 15:34:58 +01:00
|
|
|
logInfo("Self removed, stopping ClusterSingletonManager")
|
|
|
|
|
stop()
|
2019-02-09 15:25:39 +01:00
|
|
|
case Event(MemberRemoved(m, _), _) =>
|
2013-05-09 09:49:59 +02:00
|
|
|
if (!selfExited) logInfo("Member removed [{}]", m.address)
|
2016-08-19 11:56:55 +02:00
|
|
|
addRemoved(m.uniqueAddress)
|
2020-04-27 17:31:16 +07:00
|
|
|
stay()
|
2019-02-09 15:25:39 +01:00
|
|
|
case Event(DelayedMemberRemoved(m), _) =>
|
2015-05-30 16:12:22 +02:00
|
|
|
if (!selfExited) logInfo("Member removed [{}]", m.address)
|
2016-08-19 11:56:55 +02:00
|
|
|
addRemoved(m.uniqueAddress)
|
2020-04-27 17:31:16 +07:00
|
|
|
stay()
|
2019-02-09 15:25:39 +01:00
|
|
|
case Event(TakeOverFromMe, _) =>
|
2018-11-09 09:42:48 +01:00
|
|
|
log.debug("Ignoring TakeOver request in [{}] from [{}].", stateName, sender().path.address)
|
2020-04-27 17:31:16 +07:00
|
|
|
stay()
|
2019-02-09 15:25:39 +01:00
|
|
|
case Event(Cleanup, _) =>
|
2013-01-14 14:09:53 +01:00
|
|
|
cleanupOverdueNotMemberAnyMore()
|
2020-04-27 17:31:16 +07:00
|
|
|
stay()
|
2019-02-09 15:25:39 +01:00
|
|
|
case Event(MemberDowned(m), _) =>
|
2019-02-12 15:05:33 +01:00
|
|
|
if (m.uniqueAddress == cluster.selfUniqueAddress)
|
|
|
|
|
logInfo("Self downed, waiting for removal")
|
2020-04-27 17:31:16 +07:00
|
|
|
stay()
|
2019-03-28 13:24:46 +01:00
|
|
|
case Event(ReleaseLeaseFailure(t), _) =>
|
|
|
|
|
log.error(
|
|
|
|
|
t,
|
|
|
|
|
"Failed to release lease. Singleton may not be able to run on another node until lease timeout occurs")
|
2020-04-27 17:31:16 +07:00
|
|
|
stay()
|
2019-03-28 13:24:46 +01:00
|
|
|
case Event(ReleaseLeaseResult(released), _) =>
|
|
|
|
|
if (released) {
|
|
|
|
|
logInfo("Lease released")
|
|
|
|
|
} else {
|
|
|
|
|
// TODO we could retry
|
|
|
|
|
log.error(
|
|
|
|
|
"Failed to release lease. Singleton may not be able to run on another node until lease timeout occurs")
|
|
|
|
|
}
|
2020-04-27 17:31:16 +07:00
|
|
|
stay()
|
2013-01-14 14:09:53 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
onTransition {
|
2019-02-09 15:25:39 +01:00
|
|
|
case from -> to => logInfo("ClusterSingletonManager state change [{} -> {}]", from, to)
|
2013-01-14 14:09:53 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
onTransition {
|
2019-05-27 11:53:26 +02:00
|
|
|
case _ -> BecomingOldest => startSingleTimer(HandOverRetryTimer, HandOverRetry(1), handOverRetryInterval)
|
2013-01-14 14:09:53 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
onTransition {
|
2019-02-09 15:25:39 +01:00
|
|
|
case BecomingOldest -> _ => cancelTimer(HandOverRetryTimer)
|
|
|
|
|
case WasOldest -> _ => cancelTimer(TakeOverRetryTimer)
|
2013-01-14 14:09:53 +01:00
|
|
|
}
|
|
|
|
|
|
2019-03-28 13:24:46 +01:00
|
|
|
onTransition {
|
|
|
|
|
case (AcquiringLease, to) if to != Oldest =>
|
|
|
|
|
stateData match {
|
|
|
|
|
case AcquiringLeaseData(true, _) =>
|
|
|
|
|
logInfo("Releasing lease as leaving AcquiringLease going to [{}]", to)
|
|
|
|
|
import context.dispatcher
|
|
|
|
|
lease.foreach(l =>
|
|
|
|
|
pipe(l.release().map[Any](ReleaseLeaseResult).recover {
|
|
|
|
|
case t => ReleaseLeaseFailure(t)
|
|
|
|
|
}).to(self))
|
|
|
|
|
case _ =>
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
onTransition {
|
|
|
|
|
case Oldest -> _ =>
|
|
|
|
|
lease.foreach { l =>
|
|
|
|
|
logInfo("Releasing lease as leaving Oldest")
|
|
|
|
|
import context.dispatcher
|
|
|
|
|
pipe(l.release().map(ReleaseLeaseResult)).to(self)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-01-14 14:09:53 +01:00
|
|
|
onTransition {
|
2019-02-09 15:25:39 +01:00
|
|
|
case _ -> (Younger | Oldest) => getNextOldestChanged()
|
2013-01-14 14:09:53 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
onTransition {
|
2019-02-09 15:25:39 +01:00
|
|
|
case _ -> (Younger | End) if removed.contains(cluster.selfUniqueAddress) =>
|
2013-01-14 14:09:53 +01:00
|
|
|
logInfo("Self removed, stopping ClusterSingletonManager")
|
2013-05-09 09:49:59 +02:00
|
|
|
// note that FSM.stop() can't be used in onTransition
|
|
|
|
|
context.stop(self)
|
2013-01-14 14:09:53 +01:00
|
|
|
}
|
|
|
|
|
|
2014-05-07 11:09:53 +05:30
|
|
|
}
|