pekko/akka-cluster/src/main/scala/akka/cluster/ClusterDaemon.scala

/**
 * Copyright (C) 2009-2013 Typesafe Inc. <http://www.typesafe.com>
 */
package akka.cluster

import language.existentials
import language.postfixOps
import scala.collection.immutable
import scala.concurrent.duration._
import scala.concurrent.forkjoin.ThreadLocalRandom
import scala.util.control.NonFatal
import java.util.UUID
import akka.actor.{ Actor, ActorLogging, ActorRef, Address, Cancellable, Props, PoisonPill, ReceiveTimeout, RootActorPath, Scheduler }
import akka.actor.OneForOneStrategy
import akka.actor.SupervisorStrategy.Stop
import akka.cluster.MemberStatus._
import akka.cluster.ClusterEvent._
import akka.actor.ActorSelection

/**
 * Base trait for all cluster messages. All ClusterMessage's are serializable.
 */
trait ClusterMessage extends Serializable

/**
 * Cluster commands sent by the USER.
 */
object ClusterUserAction {

  /**
   * Command to join the cluster. Sent when a node (represented by 'address')
   * wants to join another node (the receiver).
   */
  @SerialVersionUID(1L)
  case class Join(address: Address, roles: Set[String]) extends ClusterMessage

  /**
   * Command to leave the cluster.
   */
  @SerialVersionUID(1L)
  case class Leave(address: Address) extends ClusterMessage

  /**
   * Command to mark node as temporary down.
   */
  @SerialVersionUID(1L)
  case class Down(address: Address) extends ClusterMessage

}

/**
 * INTERNAL API
 */
private[cluster] object InternalClusterAction {

  /**
   * Command to initiate join another node (represented by 'address').
   * Join will be sent to the other node.
   */
  case class JoinTo(address: Address)

  /**
   * Command to initiate the process to join the specified
   * seed nodes.
   */
  case class JoinSeedNodes(seedNodes: immutable.IndexedSeq[Address])

  /**
   * Start message of the process to join one of the seed nodes.
   * The node sends `InitJoin` to all seed nodes, which replies
   * with `InitJoinAck`. The first reply is used others are discarded.
   * The node sends `Join` command to the seed node that replied first.
   * If a node is uninitialized it will reply to `InitJoin` with
   * `InitJoinNack`.
   */
  case object JoinSeedNode

  /**
   * @see JoinSeedNode
   */
  @SerialVersionUID(1L)
  case object InitJoin extends ClusterMessage

  /**
   * @see JoinSeedNode
   */
  @SerialVersionUID(1L)
  case class InitJoinAck(address: Address) extends ClusterMessage

  /**
   * @see JoinSeedNode
   */
  @SerialVersionUID(1L)
  case class InitJoinNack(address: Address) extends ClusterMessage

  /**
   * Marker interface for periodic tick messages
   */
  sealed trait Tick

  case object GossipTick extends Tick

  case object HeartbeatTick extends Tick

  case object ReapUnreachableTick extends Tick

  case object MetricsTick extends Tick

  case object LeaderActionsTick extends Tick

  case object PublishStatsTick extends Tick

  case class SendGossipTo(address: Address)

  case object GetClusterCoreRef

  case class PublisherCreated(publisher: ActorRef)

  /**
   * Comand to [[akka.cluster.ClusterDaemon]] to create a
   * [[akka.cluster.OnMemberUpListener]].
   */
  case class AddOnMemberUpListener(callback: Runnable)

  sealed trait SubscriptionMessage
  case class Subscribe(subscriber: ActorRef, to: Class[_]) extends SubscriptionMessage
  case class Unsubscribe(subscriber: ActorRef, to: Option[Class[_]]) extends SubscriptionMessage
  /**
   * @param receiver if `receiver` is defined the event will only be sent to that
   *   actor, otherwise it will be sent to all subscribers via the `eventStream`.
   */
  case class PublishCurrentClusterState(receiver: Option[ActorRef]) extends SubscriptionMessage

  sealed trait PublishMessage
  case class PublishChanges(newGossip: Gossip) extends PublishMessage
  case class PublishEvent(event: ClusterDomainEvent) extends PublishMessage
  case object PublishStart extends PublishMessage
}

/**
 * INTERNAL API.
 *
 * Cluster commands sent by the LEADER.
 */
private[cluster] object ClusterLeaderAction {

  /**
   * Command to mark a node to be removed from the cluster immediately.
   * Can only be sent by the leader.
   */
  @SerialVersionUID(1L)
  case class Exit(address: Address) extends ClusterMessage

  /**
   * Command to remove a node from the cluster immediately.
   */
  @SerialVersionUID(1L)
  case class Remove(address: Address) extends ClusterMessage
}

/**
 * INTERNAL API.
 *
 * Supervisor managing the different Cluster daemons.
 */
private[cluster] final class ClusterDaemon(settings: ClusterSettings) extends Actor with ActorLogging {
  import InternalClusterAction._
  // Important - don't use Cluster(context.system) here because that would
  // cause deadlock. The Cluster extension is currently being created and is waiting
  // for response from GetClusterCoreRef in its constructor.
  val coreSupervisor = context.actorOf(Props[ClusterCoreSupervisor].
    withDispatcher(context.props.dispatcher), name = "core")
  context.actorOf(Props[ClusterHeartbeatReceiver].
    withDispatcher(context.props.dispatcher), name = "heartbeatReceiver")

  def receive = {
    case msg @ GetClusterCoreRef ⇒ coreSupervisor forward msg
    case AddOnMemberUpListener(code) ⇒
      context.actorOf(Props(new OnMemberUpListener(code)))
    case PublisherCreated(publisher) ⇒
      if (settings.MetricsEnabled) {
        // metrics must be started after core/publisher to be able
        // to inject the publisher ref to the ClusterMetricsCollector
        context.actorOf(Props(new ClusterMetricsCollector(publisher)).
          withDispatcher(context.props.dispatcher), name = "metrics")
      }
  }

}

/**
 * INTERNAL API.
 *
 * ClusterCoreDaemon and ClusterDomainEventPublisher can't be restarted because the state
 * would be obsolete. Shutdown the member if any those actors crashed.
 */
private[cluster] final class ClusterCoreSupervisor extends Actor with ActorLogging {
  import InternalClusterAction._

  val publisher = context.actorOf(Props[ClusterDomainEventPublisher].
    withDispatcher(context.props.dispatcher), name = "publisher")
  val coreDaemon = context.watch(context.actorOf(Props(new ClusterCoreDaemon(publisher)).
    withDispatcher(context.props.dispatcher), name = "daemon"))

  context.parent ! PublisherCreated(publisher)

  override val supervisorStrategy =
    OneForOneStrategy() {
      case NonFatal(e) ⇒
        log.error(e, "Cluster node [{}] crashed, [{}] - shutting down...", Cluster(context.system).selfAddress, e.getMessage)
        self ! PoisonPill
        Stop
    }

  override def postStop(): Unit = Cluster(context.system).shutdown()

  def receive = {
    case InternalClusterAction.GetClusterCoreRef ⇒ sender ! coreDaemon
  }
}

/**
 * INTERNAL API.
 */
private[cluster] final class ClusterCoreDaemon(publisher: ActorRef) extends Actor with ActorLogging {
  import ClusterLeaderAction._
  import InternalClusterAction._

  val cluster = Cluster(context.system)
  import cluster.{ selfAddress, scheduler, failureDetector }
  import cluster.settings._

  // FIXME the UUID should not be needed when Address contains uid, ticket #2788
  val vclockNode = VectorClock.Node(selfAddress.toString + "-" + UUID.randomUUID())

  // note that self is not initially member,
  // and the Gossip is not versioned for this 'Node' yet
  var latestGossip: Gossip = Gossip.empty

  var stats = ClusterStats()

  var seedNodeProcess: Option[ActorRef] = None

  var tryingToJoinWith: Option[Address] = None

  /**
   * Looks up and returns the remote cluster command connection for the specific address.
   */
  private def clusterCore(address: Address): ActorSelection =
    context.actorSelection(RootActorPath(address) / "system" / "cluster" / "core" / "daemon")

  val heartbeatSender = context.actorOf(Props[ClusterHeartbeatSender].
    withDispatcher(UseDispatcher), name = "heartbeatSender")

  import context.dispatcher

  // start periodic gossip to random nodes in cluster
  val gossipTask = scheduler.schedule(PeriodicTasksInitialDelay.max(GossipInterval),
    GossipInterval, self, GossipTick)

  // start periodic cluster failure detector reaping (moving nodes condemned by the failure detector to unreachable list)
  val failureDetectorReaperTask = scheduler.schedule(PeriodicTasksInitialDelay.max(UnreachableNodesReaperInterval),
    UnreachableNodesReaperInterval, self, ReapUnreachableTick)

  // start periodic leader action management (only applies for the current leader)
  val leaderActionsTask = scheduler.schedule(PeriodicTasksInitialDelay.max(LeaderActionsInterval),
    LeaderActionsInterval, self, LeaderActionsTick)

  // start periodic publish of current stats
  val publishStatsTask: Option[Cancellable] =
    if (PublishStatsInterval == Duration.Zero) None
    else Some(scheduler.schedule(PeriodicTasksInitialDelay.max(PublishStatsInterval),
      PublishStatsInterval, self, PublishStatsTick))

  override def preStart(): Unit = {
    if (AutoJoin) self ! JoinSeedNodes(SeedNodes)
  }

  override def postStop(): Unit = {
    gossipTask.cancel()
    failureDetectorReaperTask.cancel()
    leaderActionsTask.cancel()
    publishStatsTask foreach { _.cancel() }
  }

  def uninitialized: Actor.Receive = {
    case InitJoin                 ⇒ sender ! InitJoinNack(selfAddress)
    case JoinTo(address)          ⇒ join(address)
    case JoinSeedNodes(seedNodes) ⇒ joinSeedNodes(seedNodes)
    case msg: SubscriptionMessage ⇒ publisher forward msg
    case _: Tick                  ⇒ // ignore periodic tasks until initialized
  }

  def initialized: Actor.Receive = {
    case msg: GossipEnvelope                    ⇒ receiveGossip(msg)
    case GossipTick                             ⇒ gossip()
    case ReapUnreachableTick                    ⇒ reapUnreachableMembers()
    case LeaderActionsTick                      ⇒ leaderActions()
    case PublishStatsTick                       ⇒ publishInternalStats()
    case InitJoin                               ⇒ initJoin()
    case JoinTo(address)                        ⇒ join(address)
    case ClusterUserAction.Join(address, roles) ⇒ joining(address, roles)
    case ClusterUserAction.Down(address)        ⇒ downing(address)
    case ClusterUserAction.Leave(address)       ⇒ leaving(address)
    case Exit(address)                          ⇒ exiting(address)
    case Remove(address)                        ⇒ removing(address)
    case SendGossipTo(address)                  ⇒ gossipTo(address)
    case msg: SubscriptionMessage               ⇒ publisher forward msg

  }

  def removed: Actor.Receive = {
    case msg: SubscriptionMessage ⇒ publisher forward msg
    case _: Tick                  ⇒ // ignore periodic tasks
  }

  def receive = uninitialized

  def initJoin(): Unit = sender ! InitJoinAck(selfAddress)

  def joinSeedNodes(seedNodes: immutable.IndexedSeq[Address]): Unit = {
    require(seedNodeProcess.isEmpty, "Join seed nodes is already in progress")
    seedNodeProcess =
      if (seedNodes.isEmpty || seedNodes == immutable.IndexedSeq(selfAddress)) {
        self ! JoinTo(selfAddress)
        None
      } else if (seedNodes.head == selfAddress) {
        Some(context.actorOf(Props(new FirstSeedNodeProcess(seedNodes)).
          withDispatcher(UseDispatcher), name = "firstSeedNodeProcess"))
      } else {
        Some(context.actorOf(Props(new JoinSeedNodeProcess(seedNodes)).
          withDispatcher(UseDispatcher), name = "joinSeedNodeProcess"))
      }
  }

  /**
   * Try to join this cluster node with the node specified by 'address'.
   * A 'Join(thisNodeAddress)' command is sent to the node to join.
   */
  def join(address: Address): Unit = {
    if (address.protocol != selfAddress.protocol)
      log.warning("Trying to join member with wrong protocol, but was ignored, expected [{}] but was [{}]",
        selfAddress.protocol, address.protocol)
    else if (address.system != selfAddress.system)
      log.warning("Trying to join member with wrong ActorSystem name, but was ignored, expected [{}] but was [{}]",
        selfAddress.system, address.system)
    else if (!latestGossip.members.exists(_.address == address)) {
      // to support manual join when joining to seed nodes is stuck (no seed nodes available)
      val snd = sender
      seedNodeProcess match {
        case Some(`snd`) ⇒
          // seedNodeProcess completed, it will stop itself
          seedNodeProcess = None
        case Some(s) ⇒
          // manual join, abort current seedNodeProcess
          context stop s
          seedNodeProcess = None
        case None ⇒ // no seedNodeProcess in progress
      }

      // only wipe the state if we're not in the process of joining this address
      if (tryingToJoinWith.forall(_ != address)) {
        tryingToJoinWith = Some(address)
        // wipe our state since a node that joins a cluster must be empty
        latestGossip = Gossip.empty
        // wipe the failure detector since we are starting fresh and shouldn't care about the past
        failureDetector.reset()
        // wipe the publisher since we are starting fresh
        publisher ! PublishStart

        publish(latestGossip)
      }
      context.become(initialized)
      if (address == selfAddress)
        joining(address, cluster.selfRoles)
      else
        clusterCore(address) ! ClusterUserAction.Join(selfAddress, cluster.selfRoles)
    }
  }

  /**
   * State transition to JOINING - new node joining.
   */
  def joining(node: Address, roles: Set[String]): Unit = {
    if (node.protocol != selfAddress.protocol)
      log.warning("Member with wrong protocol tried to join, but was ignored, expected [{}] but was [{}]",
        selfAddress.protocol, node.protocol)
    else if (node.system != selfAddress.system)
      log.warning("Member with wrong ActorSystem name tried to join, but was ignored, expected [{}] but was [{}]",
        selfAddress.system, node.system)
    else {
      val localMembers = latestGossip.members
      val localUnreachable = latestGossip.overview.unreachable

      val alreadyMember = localMembers.exists(_.address == node)
      val isUnreachable = localUnreachable.exists(_.address == node)

      if (!alreadyMember && !isUnreachable) {

        // remove the node from the failure detector
        failureDetector.remove(node)

        // add joining node as Joining
        // add self in case someone else joins before self has joined (Set discards duplicates)
        val newMembers = localMembers + Member(node, Joining, roles) + Member(selfAddress, Joining, cluster.selfRoles)
        val newGossip = latestGossip copy (members = newMembers)

        val versionedGossip = newGossip :+ vclockNode
        val seenVersionedGossip = versionedGossip seen selfAddress

        latestGossip = seenVersionedGossip

        log.info("Cluster Node [{}] - Node [{}] is JOINING, roles [{}]", selfAddress, node, roles.mkString(", "))
        if (node != selfAddress) {
          gossipTo(node)
        }

        publish(latestGossip)
      }
    }
  }

  /**
   * State transition to LEAVING.
   */
  def leaving(address: Address): Unit = {
    // only try to update if the node is available (in the member ring)
    if (latestGossip.members.exists(m ⇒ m.address == address && m.status == Up)) {
      val newMembers = latestGossip.members map { m ⇒ if (m.address == address) m.copy(status = Leaving) else m } // mark node as LEAVING
      val newGossip = latestGossip copy (members = newMembers)

      val versionedGossip = newGossip :+ vclockNode
      val seenVersionedGossip = versionedGossip seen selfAddress

      latestGossip = seenVersionedGossip

      log.info("Cluster Node [{}] - Marked address [{}] as [{}]", selfAddress, address, Leaving)
      publish(latestGossip)
    }
  }

  /**
   * State transition to EXITING.
   */
  def exiting(address: Address): Unit = {
    log.info("Cluster Node [{}] - Marked node [{}] as [{}]", selfAddress, address, Exiting)
    // FIXME implement when we implement hand-off
  }

  /**
   * State transition to REMOVED.
   *
   * This method is for now only called after the LEADER have sent a Removed message - telling the node
   * to shut down himself.
   *
   * In the future we might change this to allow the USER to send a Removed(address) message telling an
   * arbitrary node to be moved directly from UP -> REMOVED.
   */
  def removing(address: Address): Unit = {
    log.info("Cluster Node [{}] - Node has been REMOVED by the leader - shutting down...", selfAddress)
    cluster.shutdown()
  }

  /**
   * The node to DOWN is removed from the 'members' set and put in the 'unreachable' set (if not already there)
   * and its status is set to DOWN. The node is also removed from the 'seen' table.
   *
   * The node will reside as DOWN in the 'unreachable' set until an explicit command JOIN command is sent directly
   * to this node and it will then go through the normal JOINING procedure.
   */
  def downing(address: Address): Unit = {
    val localGossip = latestGossip
    val localMembers = localGossip.members
    val localOverview = localGossip.overview
    val localSeen = localOverview.seen
    val localUnreachableMembers = localOverview.unreachable

    // 1. check if the node to DOWN is in the 'members' set
    val downedMember: Option[Member] =
      localMembers.collectFirst { case m if m.address == address ⇒ m.copy(status = Down) }

    val newMembers = downedMember match {
      case Some(m) ⇒
        log.info("Cluster Node [{}] - Marking node [{}] as [{}]", selfAddress, m.address, Down)
        localMembers - m
      case None ⇒ localMembers
    }

    // 2. check if the node to DOWN is in the 'unreachable' set
    val newUnreachableMembers =
      localUnreachableMembers.map { member ⇒
        // no need to DOWN members already DOWN
        if (member.address == address && member.status != Down) {
          log.info("Cluster Node [{}] - Marking unreachable node [{}] as [{}]", selfAddress, member.address, Down)
          member copy (status = Down)
        } else member
      }

    // 3. add the newly DOWNED members from the 'members' (in step 1.) to the 'newUnreachableMembers' set.
    val newUnreachablePlusNewlyDownedMembers = newUnreachableMembers ++ downedMember

    // 4. remove nodes marked as DOWN from the 'seen' table
    val newSeen = localSeen -- newUnreachablePlusNewlyDownedMembers.collect { case m if m.status == Down ⇒ m.address }

    // update gossip overview
    val newOverview = localOverview copy (seen = newSeen, unreachable = newUnreachablePlusNewlyDownedMembers)
    val newGossip = localGossip copy (overview = newOverview, members = newMembers) // update gossip
    val versionedGossip = newGossip :+ vclockNode
    latestGossip = versionedGossip seen selfAddress

    publish(latestGossip)
  }

  /**
   * Receive new gossip.
   */
  def receiveGossip(envelope: GossipEnvelope): Unit = {
    val from = envelope.from
    val remoteGossip = envelope.gossip
    val localGossip = latestGossip

    if (remoteGossip.overview.unreachable.exists(_.address == selfAddress)) {
      log.debug("Ignoring received gossip with self [{}] as unreachable, from [{}]", selfAddress, from)
    } else if (localGossip.overview.isNonDownUnreachable(from)) {
      log.debug("Ignoring received gossip from unreachable [{}] ", from)
    } else {
      // if we're in the remote gossip and not Removed, then we're not joining
      if (tryingToJoinWith.nonEmpty && remoteGossip.member(selfAddress).status != Removed)
        tryingToJoinWith = None

      val comparison = remoteGossip.version tryCompareTo localGossip.version
      val conflict = comparison.isEmpty

      val (winningGossip, talkback, newStats) = comparison match {
        case None ⇒
          // conflicting versions, merge
          (remoteGossip merge localGossip, true, stats.incrementMergeCount)
        case Some(0) ⇒
          // same version
          (remoteGossip mergeSeen localGossip, !remoteGossip.seenByAddress(selfAddress), stats.incrementSameCount)
        case Some(x) if x < 0 ⇒
          // local is newer
          (localGossip, true, stats.incrementNewerCount)
        case _ ⇒
          // remote is newer
          (remoteGossip, !remoteGossip.seenByAddress(selfAddress), stats.incrementOlderCount)
      }

      stats = newStats
      latestGossip = winningGossip seen selfAddress

      // for all new joining nodes we remove them from the failure detector
      latestGossip.members foreach {
        node ⇒ if (node.status == Joining && !localGossip.members(node)) failureDetector.remove(node.address)
      }

      log.debug("Cluster Node [{}] - Receiving gossip from [{}]", selfAddress, from)

      if (conflict) {
        log.debug(
          """Couldn't establish a causal relationship between "remote" gossip and "local" gossip - Remote[{}] - Local[{}] - merged them into [{}]""",
          remoteGossip, localGossip, winningGossip)
      }

      stats = stats.incrementReceivedGossipCount
      publish(latestGossip)

      if (envelope.conversation && talkback) {
        // send back gossip to sender when sender had different view, i.e. merge, or sender had
        // older or sender had newer
        gossipTo(from)
      }
    }
  }

  def mergeRate(count: Long): Double = (count * 1000.0) / GossipInterval.toMillis

  /**
   * Initiates a new round of gossip.
   */
  def gossip(): Unit = {
    log.debug("Cluster Node [{}] - Initiating new round of gossip", selfAddress)

    if (!isSingletonCluster && isAvailable) {
      val localGossip = latestGossip

      val preferredGossipTargets =
        if (ThreadLocalRandom.current.nextDouble() < GossipDifferentViewProbability) { // If it's time to try to gossip to some nodes with a different view
          // gossip to a random alive member with preference to a member with older or newer gossip version
          val localMemberAddressesSet = localGossip.members map { _.address }
          val nodesWithDifferentView = for {
            (address, version) ← localGossip.overview.seen
            if localMemberAddressesSet contains address
            if version != localGossip.version
          } yield address

          nodesWithDifferentView.toIndexedSeq
        } else Vector.empty[Address]

      gossipToRandomNodeOf(
        if (preferredGossipTargets.nonEmpty) preferredGossipTargets
        else localGossip.members.toIndexedSeq.map(_.address) // Fall back to localGossip; important to not accidentally use `map` of the SortedSet, since the original order is not preserved)
        )
    }
  }

  /**
   * Runs periodic leader actions, such as auto-downing unreachable nodes, assigning partitions etc.
   */
  def leaderActions(): Unit = {
    val localGossip = latestGossip
    val localMembers = localGossip.members

    val isLeader = localGossip.isLeader(selfAddress)

    if (isLeader && isAvailable) {
      // only run the leader actions if we are the LEADER and available

      val localOverview = localGossip.overview
      val localSeen = localOverview.seen
      val localUnreachableMembers = localOverview.unreachable
      val hasPartionHandoffCompletedSuccessfully: Boolean = {
        // FIXME implement partion handoff and a check if it is completed - now just returns TRUE - e.g. has completed successfully
        true
      }

      // Leader actions are as follows:
      //   1. Move JOINING     => UP          -- When a node joins the cluster
      //   2. Move LEAVING     => EXITING     -- When all partition handoff has completed
      //   3. Non-exiting remain              -- When all partition handoff has completed
      //   4. Move EXITING     => REMOVED     -- When all nodes have seen that the node is EXITING (convergence) - remove the nodes from the node ring and seen table
      //   5. Move UNREACHABLE => DOWN        -- When the node is in the UNREACHABLE set it can be auto-down by leader
      //   6. Move DOWN        => REMOVED     -- When all nodes have seen that the node is DOWN (convergence) - remove the nodes from the node ring and seen table
      //   7. Updating the vclock version for the changes
      //   8. Updating the 'seen' table
      //   9. Try to update the state with the new gossip
      //  10. If success - run all the side-effecting processing

      val (
        newGossip: Gossip,
        hasChangedState: Boolean,
        upMembers,
        exitingMembers,
        removedMembers,
        removedUnreachableMembers,
        unreachableButNotDownedMembers) =

        if (localGossip.convergence) {
          // we have convergence - so we can't have unreachable nodes

          def enoughMembers: Boolean = {
            localMembers.size >= MinNrOfMembers && MinNrOfMembersOfRole.forall {
              case (role, threshold) ⇒ localMembers.count(_.hasRole(role)) >= threshold
            }
          }
          def isJoiningToUp(m: Member): Boolean = m.status == Joining && enoughMembers

          // transform the node member ring
          val newMembers = localMembers collect {
            // Move JOINING => UP (once all nodes have seen that this node is JOINING, i.e. we have a convergence)
            // and minimum number of nodes have joined the cluster
            case member if isJoiningToUp(member) ⇒ member copy (status = Up)
            // Move LEAVING => EXITING (once we have a convergence on LEAVING
            // *and* if we have a successful partition handoff)
            case member if member.status == Leaving && hasPartionHandoffCompletedSuccessfully ⇒
              member copy (status = Exiting)
            // Everyone else that is not Exiting stays as they are
            case member if member.status != Exiting && member.status != Down ⇒ member
            // Move EXITING => REMOVED, DOWN => REMOVED - i.e. remove the nodes from the 'members' set/node ring and seen table
          }

          // ----------------------
          // Store away all stuff needed for the side-effecting processing
          // ----------------------

          // Check for the need to do side-effecting on successful state change
          // Repeat the checking for transitions between JOINING -> UP, LEAVING -> EXITING, EXITING -> REMOVED, DOWN -> REMOVED
          // to check for state-changes and to store away removed and exiting members for later notification
          //    1. check for state-changes to update
          //    2. store away removed and exiting members so we can separate the pure state changes
          val (removedMembers, newMembers1) = localMembers partition (m ⇒ m.status == Exiting || m.status == Down)
          val (removedUnreachable, newUnreachable) = localUnreachableMembers partition (_.status == Down)

          val (upMembers, newMembers2) = newMembers1 partition (isJoiningToUp(_))

          val exitingMembers = newMembers2 filter (_.status == Leaving && hasPartionHandoffCompletedSuccessfully)

          val hasChangedState = removedMembers.nonEmpty || removedUnreachable.nonEmpty || upMembers.nonEmpty || exitingMembers.nonEmpty

          // removing REMOVED nodes from the 'seen' table
          val newSeen = localSeen -- removedMembers.map(_.address) -- removedUnreachable.map(_.address)

          val newOverview = localOverview copy (seen = newSeen, unreachable = newUnreachable) // update gossip overview
          val newGossip = localGossip copy (members = newMembers, overview = newOverview) // update gossip

          (newGossip, hasChangedState, upMembers, exitingMembers, removedMembers, removedUnreachable, Member.none)

        } else if (AutoDown) {
          // we don't have convergence - so we might have unreachable nodes

          // if 'auto-down' is turned on, then try to auto-down any unreachable nodes
          val newUnreachableMembers = localUnreachableMembers collect {
            // ----------------------
            // Move UNREACHABLE => DOWN (auto-downing by leader)
            // ----------------------
            case member if member.status != Down ⇒ member copy (status = Down)
            case downMember                      ⇒ downMember // no need to DOWN members already DOWN
          }

          // Check for the need to do side-effecting on successful state change
          val unreachableButNotDownedMembers = localUnreachableMembers filter (_.status != Down)

          // removing nodes marked as DOWN from the 'seen' table
          val newSeen = localSeen -- newUnreachableMembers.collect { case m if m.status == Down ⇒ m.address }

          val newOverview = localOverview copy (seen = newSeen, unreachable = newUnreachableMembers) // update gossip overview
          val newGossip = localGossip copy (overview = newOverview) // update gossip

          (newGossip, unreachableButNotDownedMembers.nonEmpty, Member.none, Member.none, Member.none, Member.none, unreachableButNotDownedMembers)

        } else (localGossip, false, Member.none, Member.none, Member.none, Member.none, Member.none)

      if (hasChangedState) { // we have a change of state - version it and try to update
        // ----------------------
        // Updating the vclock version for the changes
        // ----------------------
        val versionedGossip = newGossip :+ vclockNode

        // ----------------------
        // Updating the 'seen' table
        // Unless the leader (this node) is part of the removed members, i.e. the leader have moved himself from EXITING -> REMOVED
        // ----------------------
        val seenVersionedGossip =
          if (removedMembers.exists(_.address == selfAddress)) versionedGossip
          else versionedGossip seen selfAddress

        // ----------------------
        // Update the state with the new gossip
        // ----------------------
        latestGossip = seenVersionedGossip

        // ----------------------
        // Run all the side-effecting processing
        // ----------------------

        // log the move of members from joining to up
        upMembers foreach { member ⇒
          log.info("Cluster Node [{}] - Leader is moving node [{}] from [{}] to [{}]",
            selfAddress, member.address, member.status, Up)
        }

        //  tell all removed members to remove and shut down themselves
        removedMembers foreach { member ⇒
          val address = member.address
          log.info("Cluster Node [{}] - Leader is moving node [{}] from [{}] to [{}] - and removing node from node ring",
            selfAddress, address, member.status, Removed)
          clusterCore(address) ! ClusterLeaderAction.Remove(address)
        }

        //  tell all exiting members to exit
        exitingMembers foreach { member ⇒
          val address = member.address
          log.info("Cluster Node [{}] - Leader is moving node [{}] from [{}] to [{}]",
            selfAddress, address, member.status, Exiting)
          clusterCore(address) ! ClusterLeaderAction.Exit(address) // FIXME should use ? to await completion of handoff?
        }

        // log the auto-downing of the unreachable nodes
        unreachableButNotDownedMembers foreach { member ⇒
          log.info("Cluster Node [{}] - Leader is marking unreachable node [{}] as [{}]", selfAddress, member.address, Down)
        }

        // log the auto-downing of the unreachable nodes
        removedUnreachableMembers foreach { member ⇒
          log.info("Cluster Node [{}] - Leader is removing unreachable node [{}]", selfAddress, member.address)
        }

        publish(latestGossip)
      }
    }
  }

  /**
   * Reaps the unreachable members (moves them to the 'unreachable' list in the cluster overview) according to the failure detector's verdict.
   */
  def reapUnreachableMembers(): Unit = {
    if (!isSingletonCluster && isAvailable) {
      // only scrutinize if we are a non-singleton cluster and available

      val localGossip = latestGossip
      val localOverview = localGossip.overview
      val localMembers = localGossip.members
      val localUnreachableMembers = localGossip.overview.unreachable

      val newlyDetectedUnreachableMembers = localMembers filterNot { member ⇒
        member.address == selfAddress || failureDetector.isAvailable(member.address)
      }

      if (newlyDetectedUnreachableMembers.nonEmpty) {

        val newMembers = localMembers -- newlyDetectedUnreachableMembers
        val newUnreachableMembers = localUnreachableMembers ++ newlyDetectedUnreachableMembers

        val newOverview = localOverview copy (unreachable = newUnreachableMembers)
        val newGossip = localGossip copy (overview = newOverview, members = newMembers)

        // updating vclock and 'seen' table
        val versionedGossip = newGossip :+ vclockNode
        val seenVersionedGossip = versionedGossip seen selfAddress

        latestGossip = seenVersionedGossip

        log.error("Cluster Node [{}] - Marking node(s) as UNREACHABLE [{}]", selfAddress, newlyDetectedUnreachableMembers.mkString(", "))

        publish(latestGossip)
      }
    }
  }

  def selectRandomNode(addresses: IndexedSeq[Address]): Option[Address] =
    if (addresses.isEmpty) None
    else Some(addresses(ThreadLocalRandom.current nextInt addresses.size))

  def isSingletonCluster: Boolean = latestGossip.isSingletonCluster

  def isAvailable: Boolean = !latestGossip.isUnreachable(selfAddress)

  /**
   * Gossips latest gossip to a random member in the set of members passed in as argument.
   *
   * @return the used [[akka.actor.Address] if any
   */
  private def gossipToRandomNodeOf(addresses: immutable.IndexedSeq[Address]): Option[Address] = {
    log.debug("Cluster Node [{}] - Selecting random node to gossip to [{}]", selfAddress, addresses.mkString(", "))
    // filter out myself
    val peer = selectRandomNode(addresses filterNot (_ == selfAddress))
    peer foreach gossipTo
    peer
  }

  /**
   * Gossips latest gossip to an address.
   */
  def gossipTo(address: Address): Unit =
    gossipTo(address, GossipEnvelope(selfAddress, latestGossip, conversation = true))

  def oneWayGossipTo(address: Address): Unit =
    gossipTo(address, GossipEnvelope(selfAddress, latestGossip, conversation = false))

  def gossipTo(address: Address, gossipMsg: GossipEnvelope): Unit =
    if (address != selfAddress && gossipMsg.gossip.members.exists(_.address == address))
      clusterCore(address) ! gossipMsg

  def publish(newGossip: Gossip): Unit = {
    publisher ! PublishChanges(newGossip)
    if (PublishStatsInterval == Duration.Zero) publishInternalStats()
  }

  def publishInternalStats(): Unit = publisher ! CurrentInternalStats(stats)

}

/**
 * INTERNAL API.
 *
 * Used only for the first seed node.
 * Sends InitJoin to all seed nodes (except itself).
 * If other seed nodes are not part of the cluster yet they will reply with
 * InitJoinNack or not respond at all and then the first seed node
 * will join itself to initialize the new cluster. When the first
 * seed node is restarted, and some other seed node is part of the cluster
 * it will reply with InitJoinAck and then the first seed node will join
 * that other seed node to join existing cluster.
 */
private[cluster] final class FirstSeedNodeProcess(seedNodes: immutable.IndexedSeq[Address]) extends Actor with ActorLogging {
  import InternalClusterAction._

  val cluster = Cluster(context.system)
  def selfAddress = cluster.selfAddress

  if (seedNodes.size <= 1 || seedNodes.head != selfAddress)
    throw new IllegalArgumentException("Join seed node should not be done")

  val timeout = Deadline.now + cluster.settings.SeedNodeTimeout

  var remainingSeedNodes = seedNodes.toSet - selfAddress

  // retry until one ack, or all nack, or timeout
  import context.dispatcher
  val retryTask = cluster.scheduler.schedule(1.second, 1.second, self, JoinSeedNode)
  self ! JoinSeedNode

  override def postStop(): Unit = retryTask.cancel()

  def receive = {
    case JoinSeedNode ⇒
      if (timeout.hasTimeLeft) {
        // send InitJoin to remaining seed nodes (except myself)
        remainingSeedNodes foreach { a ⇒ context.actorSelection(context.parent.path.toStringWithAddress(a)) ! InitJoin }
      } else {
        // no InitJoinAck received, initialize new cluster by joining myself
        context.parent ! JoinTo(selfAddress)
        context.stop(self)
      }
    case InitJoinAck(address) ⇒
      // first InitJoinAck reply, join existing cluster
      context.parent ! JoinTo(address)
      context.stop(self)
    case InitJoinNack(address) ⇒
      remainingSeedNodes -= address
      if (remainingSeedNodes.isEmpty) {
        // initialize new cluster by joining myself when nacks from all other seed nodes
        context.parent ! JoinTo(selfAddress)
        context.stop(self)
      }
  }

}

/**
 * INTERNAL API.
 *
 * Sends InitJoin to all seed nodes (except itself) and expect
 * InitJoinAck reply back. The seed node that replied first
 * will be used, joined to. InitJoinAck replies received after the
 * first one are ignored.
 *
 * Retries if no InitJoinAck replies are received within the
 * SeedNodeTimeout.
 * When at least one reply has been received it stops itself after
 * an idle SeedNodeTimeout.
 *
 * The seed nodes can be started in any order, but they will not be "active",
 * until they have been able to join another seed node (seed1).
 * They will retry the join procedure.
 * So one possible startup scenario is:
 * 1. seed2 started, but doesn't get any ack from seed1 or seed3
 * 2. seed3 started, doesn't get any ack from seed1 or seed3 (seed2 doesn't reply)
 * 3. seed1 is started and joins itself
 * 4. seed2 retries the join procedure and gets an ack from seed1, and then joins to seed1
 * 5. seed3 retries the join procedure and gets acks from seed2 first, and then joins to seed2
 *
 */
private[cluster] final class JoinSeedNodeProcess(seedNodes: immutable.IndexedSeq[Address]) extends Actor with ActorLogging {
  import InternalClusterAction._

  def selfAddress = Cluster(context.system).selfAddress

  if (seedNodes.isEmpty || seedNodes.head == selfAddress)
    throw new IllegalArgumentException("Join seed node should not be done")

  context.setReceiveTimeout(Cluster(context.system).settings.SeedNodeTimeout)

  override def preStart(): Unit = self ! JoinSeedNode

  def receive = {
    case JoinSeedNode ⇒
      // send InitJoin to all seed nodes (except myself)
      seedNodes.collect {
        case a if a != selfAddress ⇒ context.actorSelection(context.parent.path.toStringWithAddress(a))
      } foreach { _ ! InitJoin }
    case InitJoinAck(address) ⇒
      // first InitJoinAck reply
      context.parent ! JoinTo(address)
      context.become(done)
    case InitJoinNack(_) ⇒ // that seed was uninitialized
    case ReceiveTimeout ⇒
      // no InitJoinAck received, try again
      self ! JoinSeedNode
  }

  def done: Actor.Receive = {
    case InitJoinAck(_) ⇒ // already received one, skip rest
    case ReceiveTimeout ⇒ context.stop(self)
  }
}

/**
 * INTERNAL API
 *
 * The supplied callback will be run, once, when current cluster member is `Up`.
 */
private[cluster] class OnMemberUpListener(callback: Runnable) extends Actor with ActorLogging {
  import ClusterEvent._
  val cluster = Cluster(context.system)
  // subscribe to MemberUp, re-subscribe when restart
  override def preStart(): Unit =
    cluster.subscribe(self, classOf[MemberUp])
  override def postStop(): Unit =
    cluster.unsubscribe(self)

  def receive = {
    case state: CurrentClusterState ⇒
      if (state.members.exists(isSelfUp(_)))
        done()
    case MemberUp(m) ⇒
      if (isSelfUp(m))
        done()
  }

  def done(): Unit = {
    try callback.run() catch {
      case NonFatal(e) ⇒ log.error(e, "OnMemberUp callback failed with [{}]", e.getMessage)
    } finally {
      context stop self
    }
  }

  def isSelfUp(m: Member): Boolean =
    m.address == cluster.selfAddress && m.status == MemberStatus.Up

}

/**
 * INTERNAL API
 */
@SerialVersionUID(1L)
private[cluster] case class ClusterStats(
  receivedGossipCount: Long = 0L,
  mergeCount: Long = 0L,
  sameCount: Long = 0L,
  newerCount: Long = 0L,
  olderCount: Long = 0L) {

  def incrementReceivedGossipCount(): ClusterStats =
    copy(receivedGossipCount = receivedGossipCount + 1)

  def incrementMergeCount(): ClusterStats =
    copy(mergeCount = mergeCount + 1)

  def incrementSameCount(): ClusterStats =
    copy(sameCount = sameCount + 1)

  def incrementNewerCount(): ClusterStats =
    copy(newerCount = newerCount + 1)

  def incrementOlderCount(): ClusterStats =
    copy(olderCount = olderCount + 1)

  def :+(that: ClusterStats): ClusterStats = {
    ClusterStats(
      this.receivedGossipCount + that.receivedGossipCount,
      this.mergeCount + that.mergeCount,
      this.sameCount + that.sameCount,
      this.newerCount + that.newerCount,
      this.olderCount + that.olderCount)
  }

  def :-(that: ClusterStats): ClusterStats = {
    ClusterStats(
      this.receivedGossipCount - that.receivedGossipCount,
      this.mergeCount - that.mergeCount,
      this.sameCount - that.sameCount,
      this.newerCount - that.newerCount,
      this.olderCount - that.olderCount)
  }

}
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								/**
-												 #2879 - updating copyright info

											
										
										
											2013-01-09 01:47:48 +01:00
+								 * Copyright (C) 2009-2013 Typesafe Inc. <http://www.typesafe.com>
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								 */
 								package akka.cluster
-												min-nr-of-members and registerOnMemberUp, see #2306

* Leader moves joining members to up when min-nr-of-members reached
* Tested by MinMembersBeforeUpSpec
* Used in factorial sample
* Docs

											
										
										
											2012-12-10 08:46:25 +01:00
+								import language.existentials
 								import language.postfixOps
-												Merge branch 'master' into wip-2547-metrics-router-patriknw

Conflicts:
	akka-actor/src/main/scala/akka/actor/Deployer.scala
	akka-cluster/src/main/scala/akka/cluster/ClusterMetricsCollector.scala
	akka-cluster/src/test/scala/akka/cluster/MetricsCollectorSpec.scala

											
										
										
											2012-11-15 12:33:11 +01:00
+								import scala.collection.immutable
-												move Duration classes according to scala 2.10 nightly and remove casts to FiniteDuration, see #2504

											
										
										
											2012-09-21 14:50:06 +02:00
+								import scala.concurrent.duration._
-												Merging in master, huge work trying to get things to compile, tests not green at this stage

											
										
										
											2012-07-06 17:04:04 +02:00
+								import scala.concurrent.forkjoin.ThreadLocalRandom
-												min-nr-of-members and registerOnMemberUp, see #2306

* Leader moves joining members to up when min-nr-of-members reached
* Tested by MinMembersBeforeUpSpec
* Used in factorial sample
* Docs

											
										
										
											2012-12-10 08:46:25 +01:00
+								import scala.util.control.NonFatal
-												Nodes not part of cluster have marked the Gossip as seen, see #3031

* Problem may occur when joining member with same hostname:port again,
  after downing.
* Reproduced with StressSpec exerciseJoinRemove with fixed port that
  joins and shutdown several times.
* Real solution for this will be covered by ticket #2788 by adding
  uid to member identifier, but as first step we need to support
  this scenario with current design.
* Use unique node identifier for vector clock to avoid mixup of
  old and new member instance.
* Support transition from Down to Joining in Gossip merge
* Don't gossip to unknown or unreachable members.

											
										
										
											2013-02-12 21:45:41 +01:00
+								import java.util.UUID
-												Make cluster fault handling more robust, see #3030

* ClusterCoreDaemon and ClusterDomainEventPublisher can't be restarted
  because the state would be obsolete.
* Add extra supervisor level for ClusterCoreDaemon and
  ClusterDomainEventPublisher, which will shutdown the member
  on failure in children.
* Publish the final removed state on postStop in
  ClusterDomainEventPublisher. This also simplifies the removing
  process.

											
										
										
											2013-02-11 10:40:01 +01:00
+								import akka.actor.{ Actor, ActorLogging, ActorRef, Address, Cancellable, Props, PoisonPill, ReceiveTimeout, RootActorPath, Scheduler }
 								import akka.actor.OneForOneStrategy
 								import akka.actor.SupervisorStrategy.Stop
-												Publish cluster changes to event bus, see #2202

* ClusterEventBus
* Removed register listener and related
* Removed Gossip.meta because it doesn't handle version conflicts

											
										
										
											2012-08-14 10:58:30 +02:00
+								import akka.cluster.MemberStatus._
 								import akka.cluster.ClusterEvent._
-												Deprecate actorFor in favor of ActorSelection, see #3074

* Deprecate all actorFor methods
* resolveActorRef in provider
* Identify auto receive message
* Support ActorPath in actorSelection
* Support remote actor selections
* Additional tests of actor selection
* Update tests (keep most actorFor tests)
* Update samples to use actorSelection
* Updates to documentation
* Migration guide, including motivation

											
										
										
											2013-03-26 18:17:50 +01:00
+								import akka.actor.ActorSelection
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
 								/**
 								 * Base trait for all cluster messages. All ClusterMessage's are serializable.
 								 */
 								trait ClusterMessage extends Serializable
 								/**
 								 * Cluster commands sent by the USER.
 								 */
 								object ClusterUserAction {
 								  /**
 								   * Command to join the cluster. Sent when a node (represented by 'address')
 								   * wants to join another node (the receiver).
 								   */
-												Protobuf serialization of cluster messages. See #1910

											
										
										
											2013-04-04 17:56:29 +02:00
+								  @SerialVersionUID(1L)
-												Cluster node roles, see #3049

* Config of node roles cluster.role
* Cluster router configurable with use-role
* RoleLeaderChanged event
* Cluster singleton per role
* Cluster only starts once all required per-role node
  counts are reached,
  role.<role-name>.min-nr-of-members config
*  Update documentation and make use of the roles in the examples

											
										
										
											2013-03-14 20:32:43 +01:00
+								  case class Join(address: Address, roles: Set[String]) extends ClusterMessage
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
 								  /**
 								   * Command to leave the cluster.
 								   */
-												Protobuf serialization of cluster messages. See #1910

											
										
										
											2013-04-04 17:56:29 +02:00
+								  @SerialVersionUID(1L)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								  case class Leave(address: Address) extends ClusterMessage
 								  /**
 								   * Command to mark node as temporary down.
 								   */
-												Protobuf serialization of cluster messages. See #1910

											
										
										
											2013-04-04 17:56:29 +02:00
+								  @SerialVersionUID(1L)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								  case class Down(address: Address) extends ClusterMessage
 								}
 								/**
 								 * INTERNAL API
 								 */
 								private[cluster] object InternalClusterAction {
 								  /**
 								   * Command to initiate join another node (represented by 'address').
 								   * Join will be sent to the other node.
 								   */
-												Protobuf serialization of cluster messages. See #1910

											
										
										
											2013-04-04 17:56:29 +02:00
+								  case class JoinTo(address: Address)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
-												Fix leaking this in constructor of Cluster, see #2473

* Major refactoring to remove the need to use special
  Cluster instance for testing. Use default Cluster
  extension instead. Most of it is trivial changes.
* Used failure-detector.implementation-class from config
  to swap to Puppet
* Removed FailureDetectorStrategy, since it doesn't add any value
* Added Cluster.joinSeedNodes to be able to test seedNodes when Addresses
  are unknown before startup time.
* Removed ClusterEnvironment that was passed around among the actors,
  instead they use the ordinary Cluster extension.
* Overall much cleaner design

											
										
										
											2012-09-06 21:48:40 +02:00
+								  /**
 								   * Command to initiate the process to join the specified
 								   * seed nodes.
 								   */
-												Merge branch 'master' into wip-2547-metrics-router-patriknw

Conflicts:
	akka-actor/src/main/scala/akka/actor/Deployer.scala
	akka-cluster/src/main/scala/akka/cluster/ClusterMetricsCollector.scala
	akka-cluster/src/test/scala/akka/cluster/MetricsCollectorSpec.scala

											
										
										
											2012-11-15 12:33:11 +01:00
+								  case class JoinSeedNodes(seedNodes: immutable.IndexedSeq[Address])
-												Fix leaking this in constructor of Cluster, see #2473

* Major refactoring to remove the need to use special
  Cluster instance for testing. Use default Cluster
  extension instead. Most of it is trivial changes.
* Used failure-detector.implementation-class from config
  to swap to Puppet
* Removed FailureDetectorStrategy, since it doesn't add any value
* Added Cluster.joinSeedNodes to be able to test seedNodes when Addresses
  are unknown before startup time.
* Removed ClusterEnvironment that was passed around among the actors,
  instead they use the ordinary Cluster extension.
* Overall much cleaner design

											
										
										
											2012-09-06 21:48:40 +02:00
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								  /**
 								   * Start message of the process to join one of the seed nodes.
 								   * The node sends `InitJoin` to all seed nodes, which replies
 								   * with `InitJoinAck`. The first reply is used others are discarded.
 								   * The node sends `Join` command to the seed node that replied first.
-												Support restart of first seed node, see #2854

* Try to first join other seed nodes before joining itself

											
										
										
											2013-02-17 17:35:43 +01:00
+								   * If a node is uninitialized it will reply to `InitJoin` with
 								   * `InitJoinNack`.
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								   */
-												Protobuf serialization of cluster messages. See #1910

											
										
										
											2013-04-04 17:56:29 +02:00
+								  case object JoinSeedNode
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
 								  /**
 								   * @see JoinSeedNode
 								   */
-												Protobuf serialization of cluster messages. See #1910

											
										
										
											2013-04-04 17:56:29 +02:00
+								  @SerialVersionUID(1L)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								  case object InitJoin extends ClusterMessage
 								  /**
 								   * @see JoinSeedNode
 								   */
-												Protobuf serialization of cluster messages. See #1910

											
										
										
											2013-04-04 17:56:29 +02:00
+								  @SerialVersionUID(1L)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								  case class InitJoinAck(address: Address) extends ClusterMessage
-												Support restart of first seed node, see #2854

* Try to first join other seed nodes before joining itself

											
										
										
											2013-02-17 17:35:43 +01:00
+								  /**
 								   * @see JoinSeedNode
 								   */
-												Protobuf serialization of cluster messages. See #1910

											
										
										
											2013-04-04 17:56:29 +02:00
+								  @SerialVersionUID(1L)
-												Support restart of first seed node, see #2854

* Try to first join other seed nodes before joining itself

											
										
										
											2013-02-17 17:35:43 +01:00
+								  case class InitJoinNack(address: Address) extends ClusterMessage
-												Support concurrent startup of seed nodes, see #2270

* Implemented the startup sequence of seed nodes as
  described in #2305
* Test that verifies concurrent startup of seed nodes

											
										
										
											2012-08-14 13:55:22 +02:00
+								  /**
 								   * Marker interface for periodic tick messages
 								   */
-												sealed Tick, see #2270

											
										
										
											2012-08-14 17:30:49 +02:00
+								  sealed trait Tick
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
-												Support concurrent startup of seed nodes, see #2270

* Implemented the startup sequence of seed nodes as
  described in #2305
* Test that verifies concurrent startup of seed nodes

											
										
										
											2012-08-14 13:55:22 +02:00
+								  case object GossipTick extends Tick
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
-												Support concurrent startup of seed nodes, see #2270

* Implemented the startup sequence of seed nodes as
  described in #2305
* Test that verifies concurrent startup of seed nodes

											
										
										
											2012-08-14 13:55:22 +02:00
+								  case object HeartbeatTick extends Tick
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
-												Support concurrent startup of seed nodes, see #2270

* Implemented the startup sequence of seed nodes as
  described in #2305
* Test that verifies concurrent startup of seed nodes

											
										
										
											2012-08-14 13:55:22 +02:00
+								  case object ReapUnreachableTick extends Tick
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
-												Cluster metrics internal API and cluster-wide transport of metrics data.
* Create Cluster Metrics API
* Create transport of relevant metrics data
Does not include load-balancing routers.

											
										
										
											2012-09-24 13:07:11 -06:00
+								  case object MetricsTick extends Tick
-												Support concurrent startup of seed nodes, see #2270

* Implemented the startup sequence of seed nodes as
  described in #2305
* Test that verifies concurrent startup of seed nodes

											
										
										
											2012-08-14 13:55:22 +02:00
+								  case object LeaderActionsTick extends Tick
-												Merge branch 'master' into wip-2202-cluster-domain-events-patriknw

Conflicts:
	akka-cluster/src/main/scala/akka/cluster/ClusterDaemon.scala

											
										
										
											2012-08-16 18:54:10 +02:00
+								  case object PublishStatsTick extends Tick
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
 								  case class SendGossipTo(address: Address)
 								  case object GetClusterCoreRef
-												Make cluster fault handling more robust, see #3030

* ClusterCoreDaemon and ClusterDomainEventPublisher can't be restarted
  because the state would be obsolete.
* Add extra supervisor level for ClusterCoreDaemon and
  ClusterDomainEventPublisher, which will shutdown the member
  on failure in children.
* Publish the final removed state on postStop in
  ClusterDomainEventPublisher. This also simplifies the removing
  process.

											
										
										
											2013-02-11 10:40:01 +01:00
+								  case class PublisherCreated(publisher: ActorRef)
-												min-nr-of-members and registerOnMemberUp, see #2306

* Leader moves joining members to up when min-nr-of-members reached
* Tested by MinMembersBeforeUpSpec
* Used in factorial sample
* Docs

											
										
										
											2012-12-10 08:46:25 +01:00
+								  /**
 								   * Comand to [[akka.cluster.ClusterDaemon]] to create a
 								   * [[akka.cluster.OnMemberUpListener]].
 								   */
 								  case class AddOnMemberUpListener(callback: Runnable)
-												Fine grained events, see #2202

* Defined the domain events in ClusterEvent.scala file
* Produce events from diff  and publish publish to event bus
  from separate actor, ClusterDomainEventPublisher
* Adjustments of tests

											
										
										
											2012-08-19 20:15:22 +02:00
+								  sealed trait SubscriptionMessage
 								  case class Subscribe(subscriber: ActorRef, to: Class[_]) extends SubscriptionMessage
-												Cluster.unsubscribe with class parameter, see #2567

											
										
										
											2012-09-28 13:09:36 +02:00
+								  case class Unsubscribe(subscriber: ActorRef, to: Option[Class[_]]) extends SubscriptionMessage
-												Request send/publish of CurrentClusterState, see #2438

* Added publishCurrentClusterState and sendCurrentClusterState
* Removed Ping/Pong that was used for some tests, since awaitCond is
  now needed anyway, since publish to eventStream is done afterwards

											
										
										
											2012-09-12 09:23:02 +02:00
+								  /**
 								   * @param receiver if `receiver` is defined the event will only be sent to that
 								   *   actor, otherwise it will be sent to all subscribers via the `eventStream`.
 								   */
 								  case class PublishCurrentClusterState(receiver: Option[ActorRef]) extends SubscriptionMessage
-												Fine grained events, see #2202

* Defined the domain events in ClusterEvent.scala file
* Produce events from diff  and publish publish to event bus
  from separate actor, ClusterDomainEventPublisher
* Adjustments of tests

											
										
										
											2012-08-19 20:15:22 +02:00
-												Change cluster MemberEvents to only be published on convergence. See #2692

Conflicts:
	akka-cluster/src/main/scala/akka/cluster/ClusterEvent.scala
	akka-cluster/src/main/scala/akka/cluster/ClusterJmx.scala
	akka-cluster/src/main/scala/akka/cluster/ClusterMetricsCollector.scala
	akka-cluster/src/main/scala/akka/cluster/ClusterReadView.scala
	akka-cluster/src/multi-jvm/scala/akka/cluster/MultiNodeClusterSpec.scala
	akka-docs/rst/cluster/cluster-usage-java.rst
	akka-docs/rst/cluster/cluster-usage-scala.rst
	akka-kernel/src/main/dist/bin/akka-cluster

											
										
										
											2012-11-27 18:07:37 +01:00
+								  sealed trait PublishMessage
 								  case class PublishChanges(newGossip: Gossip) extends PublishMessage
 								  case class PublishEvent(event: ClusterDomainEvent) extends PublishMessage
 								  case object PublishStart extends PublishMessage
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								}
 								/**
 								 * INTERNAL API.
 								 *
 								 * Cluster commands sent by the LEADER.
 								 */
 								private[cluster] object ClusterLeaderAction {
 								  /**
 								   * Command to mark a node to be removed from the cluster immediately.
 								   * Can only be sent by the leader.
 								   */
-												Protobuf serialization of cluster messages. See #1910

											
										
										
											2013-04-04 17:56:29 +02:00
+								  @SerialVersionUID(1L)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								  case class Exit(address: Address) extends ClusterMessage
 								  /**
 								   * Command to remove a node from the cluster immediately.
 								   */
-												Protobuf serialization of cluster messages. See #1910

											
										
										
											2013-04-04 17:56:29 +02:00
+								  @SerialVersionUID(1L)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								  case class Remove(address: Address) extends ClusterMessage
 								}
 								/**
 								 * INTERNAL API.
 								 *
 								 * Supervisor managing the different Cluster daemons.
 								 */
-												Incorparate review comments, see #2473

* Also added ClusterSettings in constructor of ClusterDaemon,
  because that will be needed to decide if the metrics actor is
  to be started

											
										
										
											2012-09-07 17:42:15 +02:00
+								private[cluster] final class ClusterDaemon(settings: ClusterSettings) extends Actor with ActorLogging {
-												Make cluster fault handling more robust, see #3030

* ClusterCoreDaemon and ClusterDomainEventPublisher can't be restarted
  because the state would be obsolete.
* Add extra supervisor level for ClusterCoreDaemon and
  ClusterDomainEventPublisher, which will shutdown the member
  on failure in children.
* Publish the final removed state on postStop in
  ClusterDomainEventPublisher. This also simplifies the removing
  process.

											
										
										
											2013-02-11 10:40:01 +01:00
+								  import InternalClusterAction._
-												Fix leaking this in constructor of Cluster, see #2473

* Major refactoring to remove the need to use special
  Cluster instance for testing. Use default Cluster
  extension instead. Most of it is trivial changes.
* Used failure-detector.implementation-class from config
  to swap to Puppet
* Removed FailureDetectorStrategy, since it doesn't add any value
* Added Cluster.joinSeedNodes to be able to test seedNodes when Addresses
  are unknown before startup time.
* Removed ClusterEnvironment that was passed around among the actors,
  instead they use the ordinary Cluster extension.
* Overall much cleaner design

											
										
										
											2012-09-06 21:48:40 +02:00
+								  // Important - don't use Cluster(context.system) here because that would
 								  // cause deadlock. The Cluster extension is currently being created and is waiting
 								  // for response from GetClusterCoreRef in its constructor.
-												Make cluster fault handling more robust, see #3030

* ClusterCoreDaemon and ClusterDomainEventPublisher can't be restarted
  because the state would be obsolete.
* Add extra supervisor level for ClusterCoreDaemon and
  ClusterDomainEventPublisher, which will shutdown the member
  on failure in children.
* Publish the final removed state on postStop in
  ClusterDomainEventPublisher. This also simplifies the removing
  process.

											
										
										
											2013-02-11 10:40:01 +01:00
+								  val coreSupervisor = context.actorOf(Props[ClusterCoreSupervisor].
-												Fix leaking this in constructor of Cluster, see #2473

* Major refactoring to remove the need to use special
  Cluster instance for testing. Use default Cluster
  extension instead. Most of it is trivial changes.
* Used failure-detector.implementation-class from config
  to swap to Puppet
* Removed FailureDetectorStrategy, since it doesn't add any value
* Added Cluster.joinSeedNodes to be able to test seedNodes when Addresses
  are unknown before startup time.
* Removed ClusterEnvironment that was passed around among the actors,
  instead they use the ordinary Cluster extension.
* Overall much cleaner design

											
										
										
											2012-09-06 21:48:40 +02:00
+								    withDispatcher(context.props.dispatcher), name = "core")
-												Move heartbeat sending out from ClusterCoreDaemon, see #2284

											
										
										
											2012-10-01 10:02:48 +02:00
+								  context.actorOf(Props[ClusterHeartbeatReceiver].
 								    withDispatcher(context.props.dispatcher), name = "heartbeatReceiver")
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
 								  def receive = {
-												Make cluster fault handling more robust, see #3030

* ClusterCoreDaemon and ClusterDomainEventPublisher can't be restarted
  because the state would be obsolete.
* Add extra supervisor level for ClusterCoreDaemon and
  ClusterDomainEventPublisher, which will shutdown the member
  on failure in children.
* Publish the final removed state on postStop in
  ClusterDomainEventPublisher. This also simplifies the removing
  process.

											
										
										
											2013-02-11 10:40:01 +01:00
+								    case msg @ GetClusterCoreRef ⇒ coreSupervisor forward msg
 								    case AddOnMemberUpListener(code) ⇒
-												min-nr-of-members and registerOnMemberUp, see #2306

* Leader moves joining members to up when min-nr-of-members reached
* Tested by MinMembersBeforeUpSpec
* Used in factorial sample
* Docs

											
										
										
											2012-12-10 08:46:25 +01:00
+								      context.actorOf(Props(new OnMemberUpListener(code)))
-												Make cluster fault handling more robust, see #3030

* ClusterCoreDaemon and ClusterDomainEventPublisher can't be restarted
  because the state would be obsolete.
* Add extra supervisor level for ClusterCoreDaemon and
  ClusterDomainEventPublisher, which will shutdown the member
  on failure in children.
* Publish the final removed state on postStop in
  ClusterDomainEventPublisher. This also simplifies the removing
  process.

											
										
										
											2013-02-11 10:40:01 +01:00
+								    case PublisherCreated(publisher) ⇒
 								      if (settings.MetricsEnabled) {
 								        // metrics must be started after core/publisher to be able
 								        // to inject the publisher ref to the ClusterMetricsCollector
 								        context.actorOf(Props(new ClusterMetricsCollector(publisher)).
 								          withDispatcher(context.props.dispatcher), name = "metrics")
 								      }
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								  }
 								}
-												Make cluster fault handling more robust, see #3030

* ClusterCoreDaemon and ClusterDomainEventPublisher can't be restarted
  because the state would be obsolete.
* Add extra supervisor level for ClusterCoreDaemon and
  ClusterDomainEventPublisher, which will shutdown the member
  on failure in children.
* Publish the final removed state on postStop in
  ClusterDomainEventPublisher. This also simplifies the removing
  process.

											
										
										
											2013-02-11 10:40:01 +01:00
+								/**
 								 * INTERNAL API.
 								 *
 								 * ClusterCoreDaemon and ClusterDomainEventPublisher can't be restarted because the state
 								 * would be obsolete. Shutdown the member if any those actors crashed.
 								 */
 								private[cluster] final class ClusterCoreSupervisor extends Actor with ActorLogging {
 								  import InternalClusterAction._
 								  val publisher = context.actorOf(Props[ClusterDomainEventPublisher].
 								    withDispatcher(context.props.dispatcher), name = "publisher")
 								  val coreDaemon = context.watch(context.actorOf(Props(new ClusterCoreDaemon(publisher)).
 								    withDispatcher(context.props.dispatcher), name = "daemon"))
 								  context.parent ! PublisherCreated(publisher)
 								  override val supervisorStrategy =
 								    OneForOneStrategy() {
 								      case NonFatal(e) ⇒
 								        log.error(e, "Cluster node [{}] crashed, [{}] - shutting down...", Cluster(context.system).selfAddress, e.getMessage)
 								        self ! PoisonPill
 								        Stop
 								    }
 								  override def postStop(): Unit = Cluster(context.system).shutdown()
 								  def receive = {
 								    case InternalClusterAction.GetClusterCoreRef ⇒ sender ! coreDaemon
 								  }
 								}
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								/**
 								 * INTERNAL API.
 								 */
-												Publish cluster metrics through the publisher actor.

* To avoid ordering surprises metrics should be published via
  the same actor that handles the subscriptions and publishes
  other cluster domain events.
* Added missing publish in case of removal of member
  (had a test failure for that)

											
										
										
											2012-10-02 16:41:03 +02:00
+								private[cluster] final class ClusterCoreDaemon(publisher: ActorRef) extends Actor with ActorLogging {
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								  import ClusterLeaderAction._
 								  import InternalClusterAction._
-												Fix leaking this in constructor of Cluster, see #2473

* Major refactoring to remove the need to use special
  Cluster instance for testing. Use default Cluster
  extension instead. Most of it is trivial changes.
* Used failure-detector.implementation-class from config
  to swap to Puppet
* Removed FailureDetectorStrategy, since it doesn't add any value
* Added Cluster.joinSeedNodes to be able to test seedNodes when Addresses
  are unknown before startup time.
* Removed ClusterEnvironment that was passed around among the actors,
  instead they use the ordinary Cluster extension.
* Overall much cleaner design

											
										
										
											2012-09-06 21:48:40 +02:00
+								  val cluster = Cluster(context.system)
-												Incorparate review comments, see #2473

* Also added ClusterSettings in constructor of ClusterDaemon,
  because that will be needed to decide if the metrics actor is
  to be started

											
										
										
											2012-09-07 17:42:15 +02:00
+								  import cluster.{ selfAddress, scheduler, failureDetector }
-												Fix leaking this in constructor of Cluster, see #2473

* Major refactoring to remove the need to use special
  Cluster instance for testing. Use default Cluster
  extension instead. Most of it is trivial changes.
* Used failure-detector.implementation-class from config
  to swap to Puppet
* Removed FailureDetectorStrategy, since it doesn't add any value
* Added Cluster.joinSeedNodes to be able to test seedNodes when Addresses
  are unknown before startup time.
* Removed ClusterEnvironment that was passed around among the actors,
  instead they use the ordinary Cluster extension.
* Overall much cleaner design

											
										
										
											2012-09-06 21:48:40 +02:00
+								  import cluster.settings._
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
-												Nodes not part of cluster have marked the Gossip as seen, see #3031

* Problem may occur when joining member with same hostname:port again,
  after downing.
* Reproduced with StressSpec exerciseJoinRemove with fixed port that
  joins and shutdown several times.
* Real solution for this will be covered by ticket #2788 by adding
  uid to member identifier, but as first step we need to support
  this scenario with current design.
* Use unique node identifier for vector clock to avoid mixup of
  old and new member instance.
* Support transition from Down to Joining in Gossip merge
* Don't gossip to unknown or unreachable members.

											
										
										
											2013-02-12 21:45:41 +01:00
+								  // FIXME the UUID should not be needed when Address contains uid, ticket #2788
 								  val vclockNode = VectorClock.Node(selfAddress.toString + "-" + UUID.randomUUID())
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
 								  // note that self is not initially member,
 								  // and the Gossip is not versioned for this 'Node' yet
-												Publish clean state when joining (PublishStart), see #2871

* The failure in JoinTwoClustersSpec was due to missing publishing
  of cluster events when clearing current state when joining
* This fix is in the right direction, but joining clusters like this
  will need some design thought, creating ticket 2873 for that

											
										
										
											2013-01-04 16:39:48 +01:00
+								  var latestGossip: Gossip = Gossip.empty
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
 								  var stats = ClusterStats()
-												Support restart of first seed node, see #2854

* Try to first join other seed nodes before joining itself

											
										
										
											2013-02-17 17:35:43 +01:00
+								  var seedNodeProcess: Option[ActorRef] = None
-												Make joining to the same node multiple times work, and reenable blackhole test. See #2930

											
										
										
											2013-03-20 10:32:18 +01:00
+								  var tryingToJoinWith: Option[Address] = None
-												Remove work-around for sending to broken connections, see #2909

* Previous work-around was introduced because Netty blocks when sending
to broken connections. This is supposed to be solved by the non-blocking
new remoting.
* Removed HeartbeatSender and CoreSender in cluster
* Added tests to verify that broken connections don't disturb live connection

											
										
										
											2013-01-25 15:03:52 +01:00
+								  /**
 								   * Looks up and returns the remote cluster command connection for the specific address.
 								   */
-												Deprecate actorFor in favor of ActorSelection, see #3074

* Deprecate all actorFor methods
* resolveActorRef in provider
* Identify auto receive message
* Support ActorPath in actorSelection
* Support remote actor selections
* Additional tests of actor selection
* Update tests (keep most actorFor tests)
* Update samples to use actorSelection
* Updates to documentation
* Migration guide, including motivation

											
										
										
											2013-03-26 18:17:50 +01:00
+								  private def clusterCore(address: Address): ActorSelection =
 								    context.actorSelection(RootActorPath(address) / "system" / "cluster" / "core" / "daemon")
-												Remove work-around for sending to broken connections, see #2909

* Previous work-around was introduced because Netty blocks when sending
to broken connections. This is supposed to be solved by the non-blocking
new remoting.
* Removed HeartbeatSender and CoreSender in cluster
* Added tests to verify that broken connections don't disturb live connection

											
										
										
											2013-01-25 15:03:52 +01:00
-												Move heartbeat sending out from ClusterCoreDaemon, see #2284

											
										
										
											2012-10-01 10:02:48 +02:00
+								  val heartbeatSender = context.actorOf(Props[ClusterHeartbeatSender].
 								    withDispatcher(UseDispatcher), name = "heartbeatSender")
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
-												Making it possible/mandatory to signal which ExecutionContext will actually execute something scheduled

											
										
										
											2012-08-08 15:57:30 +02:00
+								  import context.dispatcher
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								  // start periodic gossip to random nodes in cluster
-												move Duration classes according to scala 2.10 nightly and remove casts to FiniteDuration, see #2504

											
										
										
											2012-09-21 14:50:06 +02:00
+								  val gossipTask = scheduler.schedule(PeriodicTasksInitialDelay.max(GossipInterval),
-												Remove cluster.FixedRateTask, see #2606

											
										
										
											2012-10-08 12:17:40 +02:00
+								    GossipInterval, self, GossipTick)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
 								  // start periodic cluster failure detector reaping (moving nodes condemned by the failure detector to unreachable list)
-												move Duration classes according to scala 2.10 nightly and remove casts to FiniteDuration, see #2504

											
										
										
											2012-09-21 14:50:06 +02:00
+								  val failureDetectorReaperTask = scheduler.schedule(PeriodicTasksInitialDelay.max(UnreachableNodesReaperInterval),
-												Remove cluster.FixedRateTask, see #2606

											
										
										
											2012-10-08 12:17:40 +02:00
+								    UnreachableNodesReaperInterval, self, ReapUnreachableTick)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
 								  // start periodic leader action management (only applies for the current leader)
-												move Duration classes according to scala 2.10 nightly and remove casts to FiniteDuration, see #2504

											
										
										
											2012-09-21 14:50:06 +02:00
+								  val leaderActionsTask = scheduler.schedule(PeriodicTasksInitialDelay.max(LeaderActionsInterval),
-												Remove cluster.FixedRateTask, see #2606

											
										
										
											2012-10-08 12:17:40 +02:00
+								    LeaderActionsInterval, self, LeaderActionsTick)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
-												Cluster metrics internal API and cluster-wide transport of metrics data.
* Create Cluster Metrics API
* Create transport of relevant metrics data
Does not include load-balancing routers.

											
										
										
											2012-09-24 13:07:11 -06:00
+								  // start periodic publish of current stats
-												Remove cluster.FixedRateTask, see #2606

											
										
										
											2012-10-08 12:17:40 +02:00
+								  val publishStatsTask: Option[Cancellable] =
-												Improve publish of domain events, see #2202

* Gossip is not exposed in user api
* Better and more events
* Snapshot event sent to new subscriber
* Updated tests
* Periodic publish only for internal stats

											
										
										
											2012-08-15 16:47:34 +02:00
+								    if (PublishStatsInterval == Duration.Zero) None
-												move Duration classes according to scala 2.10 nightly and remove casts to FiniteDuration, see #2504

											
										
										
											2012-09-21 14:50:06 +02:00
+								    else Some(scheduler.schedule(PeriodicTasksInitialDelay.max(PublishStatsInterval),
-												Remove cluster.FixedRateTask, see #2606

											
										
										
											2012-10-08 12:17:40 +02:00
+								      PublishStatsInterval, self, PublishStatsTick))
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
 								  override def preStart(): Unit = {
-												Fix leaking this in constructor of Cluster, see #2473

* Major refactoring to remove the need to use special
  Cluster instance for testing. Use default Cluster
  extension instead. Most of it is trivial changes.
* Used failure-detector.implementation-class from config
  to swap to Puppet
* Removed FailureDetectorStrategy, since it doesn't add any value
* Added Cluster.joinSeedNodes to be able to test seedNodes when Addresses
  are unknown before startup time.
* Removed ClusterEnvironment that was passed around among the actors,
  instead they use the ordinary Cluster extension.
* Overall much cleaner design

											
										
										
											2012-09-06 21:48:40 +02:00
+								    if (AutoJoin) self ! JoinSeedNodes(SeedNodes)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								  }
 								  override def postStop(): Unit = {
 								    gossipTask.cancel()
 								    failureDetectorReaperTask.cancel()
 								    leaderActionsTask.cancel()
-												Cluster metrics internal API and cluster-wide transport of metrics data.
* Create Cluster Metrics API
* Create transport of relevant metrics data
Does not include load-balancing routers.

											
										
										
											2012-09-24 13:07:11 -06:00
+								    publishStatsTask foreach { _.cancel() }
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								  }
-												Support concurrent startup of seed nodes, see #2270

* Implemented the startup sequence of seed nodes as
  described in #2305
* Test that verifies concurrent startup of seed nodes

											
										
										
											2012-08-14 13:55:22 +02:00
+								  def uninitialized: Actor.Receive = {
-												Support restart of first seed node, see #2854

* Try to first join other seed nodes before joining itself

											
										
										
											2013-02-17 17:35:43 +01:00
+								    case InitJoin                 ⇒ sender ! InitJoinNack(selfAddress)
-												Fine grained events, see #2202

* Defined the domain events in ClusterEvent.scala file
* Produce events from diff  and publish publish to event bus
  from separate actor, ClusterDomainEventPublisher
* Adjustments of tests

											
										
										
											2012-08-19 20:15:22 +02:00
+								    case JoinTo(address)          ⇒ join(address)
-												Fix leaking this in constructor of Cluster, see #2473

* Major refactoring to remove the need to use special
  Cluster instance for testing. Use default Cluster
  extension instead. Most of it is trivial changes.
* Used failure-detector.implementation-class from config
  to swap to Puppet
* Removed FailureDetectorStrategy, since it doesn't add any value
* Added Cluster.joinSeedNodes to be able to test seedNodes when Addresses
  are unknown before startup time.
* Removed ClusterEnvironment that was passed around among the actors,
  instead they use the ordinary Cluster extension.
* Overall much cleaner design

											
										
										
											2012-09-06 21:48:40 +02:00
+								    case JoinSeedNodes(seedNodes) ⇒ joinSeedNodes(seedNodes)
-												Fine grained events, see #2202

* Defined the domain events in ClusterEvent.scala file
* Produce events from diff  and publish publish to event bus
  from separate actor, ClusterDomainEventPublisher
* Adjustments of tests

											
										
										
											2012-08-19 20:15:22 +02:00
+								    case msg: SubscriptionMessage ⇒ publisher forward msg
 								    case _: Tick                  ⇒ // ignore periodic tasks until initialized
-												Support concurrent startup of seed nodes, see #2270

* Implemented the startup sequence of seed nodes as
  described in #2305
* Test that verifies concurrent startup of seed nodes

											
										
										
											2012-08-14 13:55:22 +02:00
+								  }
 								  def initialized: Actor.Receive = {
-												Cluster node roles, see #3049

* Config of node roles cluster.role
* Cluster router configurable with use-role
* RoleLeaderChanged event
* Cluster singleton per role
* Cluster only starts once all required per-role node
  counts are reached,
  role.<role-name>.min-nr-of-members config
*  Update documentation and make use of the roles in the examples

											
										
										
											2013-03-14 20:32:43 +01:00
+								    case msg: GossipEnvelope                    ⇒ receiveGossip(msg)
 								    case GossipTick                             ⇒ gossip()
 								    case ReapUnreachableTick                    ⇒ reapUnreachableMembers()
 								    case LeaderActionsTick                      ⇒ leaderActions()
 								    case PublishStatsTick                       ⇒ publishInternalStats()
 								    case InitJoin                               ⇒ initJoin()
 								    case JoinTo(address)                        ⇒ join(address)
 								    case ClusterUserAction.Join(address, roles) ⇒ joining(address, roles)
 								    case ClusterUserAction.Down(address)        ⇒ downing(address)
 								    case ClusterUserAction.Leave(address)       ⇒ leaving(address)
 								    case Exit(address)                          ⇒ exiting(address)
 								    case Remove(address)                        ⇒ removing(address)
 								    case SendGossipTo(address)                  ⇒ gossipTo(address)
 								    case msg: SubscriptionMessage               ⇒ publisher forward msg
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
 								  }
-												Fine grained events, see #2202

* Defined the domain events in ClusterEvent.scala file
* Produce events from diff  and publish publish to event bus
  from separate actor, ClusterDomainEventPublisher
* Adjustments of tests

											
										
										
											2012-08-19 20:15:22 +02:00
+								  def removed: Actor.Receive = {
 								    case msg: SubscriptionMessage ⇒ publisher forward msg
 								    case _: Tick                  ⇒ // ignore periodic tasks
 								  }
-												Support concurrent startup of seed nodes, see #2270

* Implemented the startup sequence of seed nodes as
  described in #2305
* Test that verifies concurrent startup of seed nodes

											
										
										
											2012-08-14 13:55:22 +02:00
+								  def receive = uninitialized
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
 								  def initJoin(): Unit = sender ! InitJoinAck(selfAddress)
-												Merge branch 'master' into wip-2547-metrics-router-patriknw

Conflicts:
	akka-actor/src/main/scala/akka/actor/Deployer.scala
	akka-cluster/src/main/scala/akka/cluster/ClusterMetricsCollector.scala
	akka-cluster/src/test/scala/akka/cluster/MetricsCollectorSpec.scala

											
										
										
											2012-11-15 12:33:11 +01:00
+								  def joinSeedNodes(seedNodes: immutable.IndexedSeq[Address]): Unit = {
-												Support restart of first seed node, see #2854

* Try to first join other seed nodes before joining itself

											
										
										
											2013-02-17 17:35:43 +01:00
+								    require(seedNodeProcess.isEmpty, "Join seed nodes is already in progress")
 								    seedNodeProcess =
 								      if (seedNodes.isEmpty || seedNodes == immutable.IndexedSeq(selfAddress)) {
 								        self ! JoinTo(selfAddress)
 								        None
 								      } else if (seedNodes.head == selfAddress) {
 								        Some(context.actorOf(Props(new FirstSeedNodeProcess(seedNodes)).
 								          withDispatcher(UseDispatcher), name = "firstSeedNodeProcess"))
 								      } else {
 								        Some(context.actorOf(Props(new JoinSeedNodeProcess(seedNodes)).
 								          withDispatcher(UseDispatcher), name = "joinSeedNodeProcess"))
 								      }
-												Fix leaking this in constructor of Cluster, see #2473

* Major refactoring to remove the need to use special
  Cluster instance for testing. Use default Cluster
  extension instead. Most of it is trivial changes.
* Used failure-detector.implementation-class from config
  to swap to Puppet
* Removed FailureDetectorStrategy, since it doesn't add any value
* Added Cluster.joinSeedNodes to be able to test seedNodes when Addresses
  are unknown before startup time.
* Removed ClusterEnvironment that was passed around among the actors,
  instead they use the ordinary Cluster extension.
* Overall much cleaner design

											
										
										
											2012-09-06 21:48:40 +02:00
+								  }
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								  /**
 								   * Try to join this cluster node with the node specified by 'address'.
 								   * A 'Join(thisNodeAddress)' command is sent to the node to join.
 								   */
 								  def join(address: Address): Unit = {
-												Detect failure when no heartbeats sent, see #2907

* Subscribe to InstantMemberEvent and start heartbeating when
  InstantMemberUp. Same for metrics.
* HeartbeatNodeRing data structure for bidirectional mapping of
  heartbeat sender and receiver. Not using ConsistentHash anymore.
  Node addresses are hashed to ensure that neighbors are spread out.
* HeartbeatRequest when receiver detects that it has not received
  expected heartbeats.
* New test InitialHeartbeatSpec that simulates the problem
* Add/remove some related conf properties
* Add some more logging to be able to diagnose eventual problems
* Explicit config of nr-of-end-heartbeats

											
										
										
											2013-01-15 09:35:07 +01:00
+								    if (address.protocol != selfAddress.protocol)
-												Support restart of first seed node, see #2854

* Try to first join other seed nodes before joining itself

											
										
										
											2013-02-17 17:35:43 +01:00
+								      log.warning("Trying to join member with wrong protocol, but was ignored, expected [{}] but was [{}]",
-												Detect failure when no heartbeats sent, see #2907

* Subscribe to InstantMemberEvent and start heartbeating when
  InstantMemberUp. Same for metrics.
* HeartbeatNodeRing data structure for bidirectional mapping of
  heartbeat sender and receiver. Not using ConsistentHash anymore.
  Node addresses are hashed to ensure that neighbors are spread out.
* HeartbeatRequest when receiver detects that it has not received
  expected heartbeats.
* New test InitialHeartbeatSpec that simulates the problem
* Add/remove some related conf properties
* Add some more logging to be able to diagnose eventual problems
* Explicit config of nr-of-end-heartbeats

											
										
										
											2013-01-15 09:35:07 +01:00
+								        selfAddress.protocol, address.protocol)
 								    else if (address.system != selfAddress.system)
-												Support restart of first seed node, see #2854

* Try to first join other seed nodes before joining itself

											
										
										
											2013-02-17 17:35:43 +01:00
+								      log.warning("Trying to join member with wrong ActorSystem name, but was ignored, expected [{}] but was [{}]",
-												Detect failure when no heartbeats sent, see #2907

* Subscribe to InstantMemberEvent and start heartbeating when
  InstantMemberUp. Same for metrics.
* HeartbeatNodeRing data structure for bidirectional mapping of
  heartbeat sender and receiver. Not using ConsistentHash anymore.
  Node addresses are hashed to ensure that neighbors are spread out.
* HeartbeatRequest when receiver detects that it has not received
  expected heartbeats.
* New test InitialHeartbeatSpec that simulates the problem
* Add/remove some related conf properties
* Add some more logging to be able to diagnose eventual problems
* Explicit config of nr-of-end-heartbeats

											
										
										
											2013-01-15 09:35:07 +01:00
+								        selfAddress.system, address.system)
 								    else if (!latestGossip.members.exists(_.address == address)) {
-												Support restart of first seed node, see #2854

* Try to first join other seed nodes before joining itself

											
										
										
											2013-02-17 17:35:43 +01:00
+								      // to support manual join when joining to seed nodes is stuck (no seed nodes available)
 								      val snd = sender
 								      seedNodeProcess match {
 								        case Some(`snd`) ⇒
 								          // seedNodeProcess completed, it will stop itself
 								          seedNodeProcess = None
 								        case Some(s) ⇒
 								          // manual join, abort current seedNodeProcess
 								          context stop s
 								          seedNodeProcess = None
 								        case None ⇒ // no seedNodeProcess in progress
 								      }
-												Make joining to the same node multiple times work, and reenable blackhole test. See #2930

											
										
										
											2013-03-20 10:32:18 +01:00
+								      // only wipe the state if we're not in the process of joining this address
 								      if (tryingToJoinWith.forall(_ != address)) {
 								        tryingToJoinWith = Some(address)
 								        // wipe our state since a node that joins a cluster must be empty
 								        latestGossip = Gossip.empty
 								        // wipe the failure detector since we are starting fresh and shouldn't care about the past
 								        failureDetector.reset()
 								        // wipe the publisher since we are starting fresh
 								        publisher ! PublishStart
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
-												Make joining to the same node multiple times work, and reenable blackhole test. See #2930

											
										
										
											2013-03-20 10:32:18 +01:00
+								        publish(latestGossip)
 								      }
-												Fine grained events, see #2202

* Defined the domain events in ClusterEvent.scala file
* Produce events from diff  and publish publish to event bus
  from separate actor, ClusterDomainEventPublisher
* Adjustments of tests

											
										
										
											2012-08-19 20:15:22 +02:00
+								      context.become(initialized)
 								      if (address == selfAddress)
-												Cluster node roles, see #3049

* Config of node roles cluster.role
* Cluster router configurable with use-role
* RoleLeaderChanged event
* Cluster singleton per role
* Cluster only starts once all required per-role node
  counts are reached,
  role.<role-name>.min-nr-of-members config
*  Update documentation and make use of the roles in the examples

											
										
										
											2013-03-14 20:32:43 +01:00
+								        joining(address, cluster.selfRoles)
-												Fine grained events, see #2202

* Defined the domain events in ClusterEvent.scala file
* Produce events from diff  and publish publish to event bus
  from separate actor, ClusterDomainEventPublisher
* Adjustments of tests

											
										
										
											2012-08-19 20:15:22 +02:00
+								      else
-												Cluster node roles, see #3049

* Config of node roles cluster.role
* Cluster router configurable with use-role
* RoleLeaderChanged event
* Cluster singleton per role
* Cluster only starts once all required per-role node
  counts are reached,
  role.<role-name>.min-nr-of-members config
*  Update documentation and make use of the roles in the examples

											
										
										
											2013-03-14 20:32:43 +01:00
+								        clusterCore(address) ! ClusterUserAction.Join(selfAddress, cluster.selfRoles)
-												Fine grained events, see #2202

* Defined the domain events in ClusterEvent.scala file
* Produce events from diff  and publish publish to event bus
  from separate actor, ClusterDomainEventPublisher
* Adjustments of tests

											
										
										
											2012-08-19 20:15:22 +02:00
+								    }
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								  }
 								  /**
 								   * State transition to JOINING - new node joining.
 								   */
-												Cluster node roles, see #3049

* Config of node roles cluster.role
* Cluster router configurable with use-role
* RoleLeaderChanged event
* Cluster singleton per role
* Cluster only starts once all required per-role node
  counts are reached,
  role.<role-name>.min-nr-of-members config
*  Update documentation and make use of the roles in the examples

											
										
										
											2013-03-14 20:32:43 +01:00
+								  def joining(node: Address, roles: Set[String]): Unit = {
-												Support restart of first seed node, see #2854

* Try to first join other seed nodes before joining itself

											
										
										
											2013-02-17 17:35:43 +01:00
+								    if (node.protocol != selfAddress.protocol)
 								      log.warning("Member with wrong protocol tried to join, but was ignored, expected [{}] but was [{}]",
 								        selfAddress.protocol, node.protocol)
 								    else if (node.system != selfAddress.system)
 								      log.warning("Member with wrong ActorSystem name tried to join, but was ignored, expected [{}] but was [{}]",
 								        selfAddress.system, node.system)
 								    else {
 								      val localMembers = latestGossip.members
 								      val localUnreachable = latestGossip.overview.unreachable
 								      val alreadyMember = localMembers.exists(_.address == node)
-												Transition from Down to Removed, see #3075

											
										
										
											2013-03-05 15:32:13 +01:00
+								      val isUnreachable = localUnreachable.exists(_.address == node)
-												Support restart of first seed node, see #2854

* Try to first join other seed nodes before joining itself

											
										
										
											2013-02-17 17:35:43 +01:00
 								      if (!alreadyMember && !isUnreachable) {
-												Transition from Down to Removed, see #3075

											
										
										
											2013-03-05 15:32:13 +01:00
+								        // remove the node from the failure detector
 								        failureDetector.remove(node)
-												Support restart of first seed node, see #2854

* Try to first join other seed nodes before joining itself

											
										
										
											2013-02-17 17:35:43 +01:00
 								        // add joining node as Joining
 								        // add self in case someone else joins before self has joined (Set discards duplicates)
-												Cluster node roles, see #3049

* Config of node roles cluster.role
* Cluster router configurable with use-role
* RoleLeaderChanged event
* Cluster singleton per role
* Cluster only starts once all required per-role node
  counts are reached,
  role.<role-name>.min-nr-of-members config
*  Update documentation and make use of the roles in the examples

											
										
										
											2013-03-14 20:32:43 +01:00
+								        val newMembers = localMembers + Member(node, Joining, roles) + Member(selfAddress, Joining, cluster.selfRoles)
-												Transition from Down to Removed, see #3075

											
										
										
											2013-03-05 15:32:13 +01:00
+								        val newGossip = latestGossip copy (members = newMembers)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
-												Support restart of first seed node, see #2854

* Try to first join other seed nodes before joining itself

											
										
										
											2013-02-17 17:35:43 +01:00
+								        val versionedGossip = newGossip :+ vclockNode
 								        val seenVersionedGossip = versionedGossip seen selfAddress
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
-												Support restart of first seed node, see #2854

* Try to first join other seed nodes before joining itself

											
										
										
											2013-02-17 17:35:43 +01:00
+								        latestGossip = seenVersionedGossip
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
-												Cluster node roles, see #3049

* Config of node roles cluster.role
* Cluster router configurable with use-role
* RoleLeaderChanged event
* Cluster singleton per role
* Cluster only starts once all required per-role node
  counts are reached,
  role.<role-name>.min-nr-of-members config
*  Update documentation and make use of the roles in the examples

											
										
										
											2013-03-14 20:32:43 +01:00
+								        log.info("Cluster Node [{}] - Node [{}] is JOINING, roles [{}]", selfAddress, node, roles.mkString(", "))
-												Support restart of first seed node, see #2854

* Try to first join other seed nodes before joining itself

											
										
										
											2013-02-17 17:35:43 +01:00
+								        if (node != selfAddress) {
 								          gossipTo(node)
 								        }
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
-												Support restart of first seed node, see #2854

* Try to first join other seed nodes before joining itself

											
										
										
											2013-02-17 17:35:43 +01:00
+								        publish(latestGossip)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								      }
 								    }
 								  }
 								  /**
 								   * State transition to LEAVING.
 								   */
 								  def leaving(address: Address): Unit = {
-												Cluster member status transition guards, see #2802

											
										
										
											2013-04-05 12:38:09 +02:00
+								    // only try to update if the node is available (in the member ring)
 								    if (latestGossip.members.exists(m ⇒ m.address == address && m.status == Up)) {
-												Cluster node roles, see #3049

* Config of node roles cluster.role
* Cluster router configurable with use-role
* RoleLeaderChanged event
* Cluster singleton per role
* Cluster only starts once all required per-role node
  counts are reached,
  role.<role-name>.min-nr-of-members config
*  Update documentation and make use of the roles in the examples

											
										
										
											2013-03-14 20:32:43 +01:00
+								      val newMembers = latestGossip.members map { m ⇒ if (m.address == address) m.copy(status = Leaving) else m } // mark node as LEAVING
-												Change cluster MemberEvents to only be published on convergence. See #2692

Conflicts:
	akka-cluster/src/main/scala/akka/cluster/ClusterEvent.scala
	akka-cluster/src/main/scala/akka/cluster/ClusterJmx.scala
	akka-cluster/src/main/scala/akka/cluster/ClusterMetricsCollector.scala
	akka-cluster/src/main/scala/akka/cluster/ClusterReadView.scala
	akka-cluster/src/multi-jvm/scala/akka/cluster/MultiNodeClusterSpec.scala
	akka-docs/rst/cluster/cluster-usage-java.rst
	akka-docs/rst/cluster/cluster-usage-scala.rst
	akka-kernel/src/main/dist/bin/akka-cluster

											
										
										
											2012-11-27 18:07:37 +01:00
+								      val newGossip = latestGossip copy (members = newMembers)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
 								      val versionedGossip = newGossip :+ vclockNode
 								      val seenVersionedGossip = versionedGossip seen selfAddress
 								      latestGossip = seenVersionedGossip
-												Cluster singleton failure due to down-removed, see #3130

* The scenario was that previous leader left.
* The problem was that the new leader got MemberRemoved
  before it got the HandOverDone and therefore missed the
  hand over data.
* Solved by not changing the singleton to leader when receiving
  MemberRemoved and instead do that on normal HandOverDone or
  in failure cases after retry timeout.
* The reason for this bug was the new transition from Down to
  Removed and that there is now no MemberDowned event. Previously
  this was only triggered by MemberDowned (not MemberRemoved) and
  that was safe because that was "always" preceeded by unreachable.
* The new solution means that it will take longer for new singleton
  to startup in case of unreachable previous leader, but I don't
  want to trigger it on MemberUnreachable because it might in the
  future be possible to switch it back to reachable.

											
										
										
											2013-03-08 09:39:48 +01:00
+								      log.info("Cluster Node [{}] - Marked address [{}] as [{}]", selfAddress, address, Leaving)
-												Change cluster MemberEvents to only be published on convergence. See #2692

Conflicts:
	akka-cluster/src/main/scala/akka/cluster/ClusterEvent.scala
	akka-cluster/src/main/scala/akka/cluster/ClusterJmx.scala
	akka-cluster/src/main/scala/akka/cluster/ClusterMetricsCollector.scala
	akka-cluster/src/main/scala/akka/cluster/ClusterReadView.scala
	akka-cluster/src/multi-jvm/scala/akka/cluster/MultiNodeClusterSpec.scala
	akka-docs/rst/cluster/cluster-usage-java.rst
	akka-docs/rst/cluster/cluster-usage-scala.rst
	akka-kernel/src/main/dist/bin/akka-cluster

											
										
										
											2012-11-27 18:07:37 +01:00
+								      publish(latestGossip)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								    }
 								  }
 								  /**
 								   * State transition to EXITING.
 								   */
 								  def exiting(address: Address): Unit = {
-												Cluster singleton failure due to down-removed, see #3130

* The scenario was that previous leader left.
* The problem was that the new leader got MemberRemoved
  before it got the HandOverDone and therefore missed the
  hand over data.
* Solved by not changing the singleton to leader when receiving
  MemberRemoved and instead do that on normal HandOverDone or
  in failure cases after retry timeout.
* The reason for this bug was the new transition from Down to
  Removed and that there is now no MemberDowned event. Previously
  this was only triggered by MemberDowned (not MemberRemoved) and
  that was safe because that was "always" preceeded by unreachable.
* The new solution means that it will take longer for new singleton
  to startup in case of unreachable previous leader, but I don't
  want to trigger it on MemberUnreachable because it might in the
  future be possible to switch it back to reachable.

											
										
										
											2013-03-08 09:39:48 +01:00
+								    log.info("Cluster Node [{}] - Marked node [{}] as [{}]", selfAddress, address, Exiting)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								    // FIXME implement when we implement hand-off
 								  }
 								  /**
 								   * State transition to REMOVED.
 								   *
 								   * This method is for now only called after the LEADER have sent a Removed message - telling the node
 								   * to shut down himself.
 								   *
 								   * In the future we might change this to allow the USER to send a Removed(address) message telling an
-												Don't send Remove command to unreachable, see #3209

											
										
										
											2013-04-09 21:06:48 +02:00
+								   * arbitrary node to be moved directly from UP -> REMOVED.
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								   */
 								  def removing(address: Address): Unit = {
 								    log.info("Cluster Node [{}] - Node has been REMOVED by the leader - shutting down...", selfAddress)
-												Make cluster fault handling more robust, see #3030

* ClusterCoreDaemon and ClusterDomainEventPublisher can't be restarted
  because the state would be obsolete.
* Add extra supervisor level for ClusterCoreDaemon and
  ClusterDomainEventPublisher, which will shutdown the member
  on failure in children.
* Publish the final removed state on postStop in
  ClusterDomainEventPublisher. This also simplifies the removing
  process.

											
										
										
											2013-02-11 10:40:01 +01:00
+								    cluster.shutdown()
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								  }
 								  /**
 								   * The node to DOWN is removed from the 'members' set and put in the 'unreachable' set (if not already there)
 								   * and its status is set to DOWN. The node is also removed from the 'seen' table.
 								   *
 								   * The node will reside as DOWN in the 'unreachable' set until an explicit command JOIN command is sent directly
 								   * to this node and it will then go through the normal JOINING procedure.
 								   */
 								  def downing(address: Address): Unit = {
 								    val localGossip = latestGossip
 								    val localMembers = localGossip.members
 								    val localOverview = localGossip.overview
 								    val localSeen = localOverview.seen
 								    val localUnreachableMembers = localOverview.unreachable
 								    // 1. check if the node to DOWN is in the 'members' set
-												Making some changes in the cluster code to avoid doing work that isn't needed

											
										
										
											2012-07-07 20:55:02 +02:00
+								    val downedMember: Option[Member] =
 								      localMembers.collectFirst { case m if m.address == address ⇒ m.copy(status = Down) }
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								    val newMembers = downedMember match {
 								      case Some(m) ⇒
-												Cluster singleton failure due to down-removed, see #3130

* The scenario was that previous leader left.
* The problem was that the new leader got MemberRemoved
  before it got the HandOverDone and therefore missed the
  hand over data.
* Solved by not changing the singleton to leader when receiving
  MemberRemoved and instead do that on normal HandOverDone or
  in failure cases after retry timeout.
* The reason for this bug was the new transition from Down to
  Removed and that there is now no MemberDowned event. Previously
  this was only triggered by MemberDowned (not MemberRemoved) and
  that was safe because that was "always" preceeded by unreachable.
* The new solution means that it will take longer for new singleton
  to startup in case of unreachable previous leader, but I don't
  want to trigger it on MemberUnreachable because it might in the
  future be possible to switch it back to reachable.

											
										
										
											2013-03-08 09:39:48 +01:00
+								        log.info("Cluster Node [{}] - Marking node [{}] as [{}]", selfAddress, m.address, Down)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								        localMembers - m
 								      case None ⇒ localMembers
 								    }
 								    // 2. check if the node to DOWN is in the 'unreachable' set
 								    val newUnreachableMembers =
 								      localUnreachableMembers.map { member ⇒
 								        // no need to DOWN members already DOWN
 								        if (member.address == address && member.status != Down) {
-												Cluster singleton failure due to down-removed, see #3130

* The scenario was that previous leader left.
* The problem was that the new leader got MemberRemoved
  before it got the HandOverDone and therefore missed the
  hand over data.
* Solved by not changing the singleton to leader when receiving
  MemberRemoved and instead do that on normal HandOverDone or
  in failure cases after retry timeout.
* The reason for this bug was the new transition from Down to
  Removed and that there is now no MemberDowned event. Previously
  this was only triggered by MemberDowned (not MemberRemoved) and
  that was safe because that was "always" preceeded by unreachable.
* The new solution means that it will take longer for new singleton
  to startup in case of unreachable previous leader, but I don't
  want to trigger it on MemberUnreachable because it might in the
  future be possible to switch it back to reachable.

											
										
										
											2013-03-08 09:39:48 +01:00
+								          log.info("Cluster Node [{}] - Marking unreachable node [{}] as [{}]", selfAddress, member.address, Down)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								          member copy (status = Down)
 								        } else member
 								      }
 								    // 3. add the newly DOWNED members from the 'members' (in step 1.) to the 'newUnreachableMembers' set.
 								    val newUnreachablePlusNewlyDownedMembers = newUnreachableMembers ++ downedMember
 								    // 4. remove nodes marked as DOWN from the 'seen' table
-												Making some changes in the cluster code to avoid doing work that isn't needed

											
										
										
											2012-07-07 20:55:02 +02:00
+								    val newSeen = localSeen -- newUnreachablePlusNewlyDownedMembers.collect { case m if m.status == Down ⇒ m.address }
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
 								    // update gossip overview
 								    val newOverview = localOverview copy (seen = newSeen, unreachable = newUnreachablePlusNewlyDownedMembers)
 								    val newGossip = localGossip copy (overview = newOverview, members = newMembers) // update gossip
 								    val versionedGossip = newGossip :+ vclockNode
 								    latestGossip = versionedGossip seen selfAddress
-												Change cluster MemberEvents to only be published on convergence. See #2692

Conflicts:
	akka-cluster/src/main/scala/akka/cluster/ClusterEvent.scala
	akka-cluster/src/main/scala/akka/cluster/ClusterJmx.scala
	akka-cluster/src/main/scala/akka/cluster/ClusterMetricsCollector.scala
	akka-cluster/src/main/scala/akka/cluster/ClusterReadView.scala
	akka-cluster/src/multi-jvm/scala/akka/cluster/MultiNodeClusterSpec.scala
	akka-docs/rst/cluster/cluster-usage-java.rst
	akka-docs/rst/cluster/cluster-usage-scala.rst
	akka-kernel/src/main/dist/bin/akka-cluster

											
										
										
											2012-11-27 18:07:37 +01:00
+								    publish(latestGossip)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								  }
 								  /**
 								   * Receive new gossip.
 								   */
 								  def receiveGossip(envelope: GossipEnvelope): Unit = {
 								    val from = envelope.from
 								    val remoteGossip = envelope.gossip
 								    val localGossip = latestGossip
 								    if (remoteGossip.overview.unreachable.exists(_.address == selfAddress)) {
-												Nodes not part of cluster have marked the Gossip as seen, see #3031

* Problem may occur when joining member with same hostname:port again,
  after downing.
* Reproduced with StressSpec exerciseJoinRemove with fixed port that
  joins and shutdown several times.
* Real solution for this will be covered by ticket #2788 by adding
  uid to member identifier, but as first step we need to support
  this scenario with current design.
* Use unique node identifier for vector clock to avoid mixup of
  old and new member instance.
* Support transition from Down to Joining in Gossip merge
* Don't gossip to unknown or unreachable members.

											
										
										
											2013-02-12 21:45:41 +01:00
+								      log.debug("Ignoring received gossip with self [{}] as unreachable, from [{}]", selfAddress, from)
 								    } else if (localGossip.overview.isNonDownUnreachable(from)) {
 								      log.debug("Ignoring received gossip from unreachable [{}] ", from)
 								    } else {
-												Make joining to the same node multiple times work, and reenable blackhole test. See #2930

											
										
										
											2013-03-20 10:32:18 +01:00
+								      // if we're in the remote gossip and not Removed, then we're not joining
 								      if (tryingToJoinWith.nonEmpty && remoteGossip.member(selfAddress).status != Removed)
 								        tryingToJoinWith = None
-												Merge gossip seen table when versions are the same. See #3115

											
										
										
											2013-03-05 12:49:35 +01:00
+								      val comparison = remoteGossip.version tryCompareTo localGossip.version
-												Fixes according to review. See #3115

											
										
										
											2013-03-06 13:11:46 +01:00
+								      val conflict = comparison.isEmpty
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
-												Don't increment vector-clock on merge and merge locally. See #3076

											
										
										
											2013-03-07 13:20:50 +01:00
+								      val (winningGossip, talkback, newStats) = comparison match {
 								        case None ⇒
 								          // conflicting versions, merge
 								          (remoteGossip merge localGossip, true, stats.incrementMergeCount)
 								        case Some(0) ⇒
 								          // same version
 								          (remoteGossip mergeSeen localGossip, !remoteGossip.seenByAddress(selfAddress), stats.incrementSameCount)
 								        case Some(x) if x < 0 ⇒
 								          // local is newer
 								          (localGossip, true, stats.incrementNewerCount)
 								        case _ ⇒
 								          // remote is newer
 								          (remoteGossip, !remoteGossip.seenByAddress(selfAddress), stats.incrementOlderCount)
 								      }
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
-												Don't increment vector-clock on merge and merge locally. See #3076

											
										
										
											2013-03-07 13:20:50 +01:00
+								      stats = newStats
 								      latestGossip = winningGossip seen selfAddress
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
-												Don't increment vector-clock on merge and merge locally. See #3076

											
										
										
											2013-03-07 13:20:50 +01:00
+								      // for all new joining nodes we remove them from the failure detector
-												Fixes according review. See #3076

											
										
										
											2013-03-08 13:26:50 +01:00
+								      latestGossip.members foreach {
 								        node ⇒ if (node.status == Joining && !localGossip.members(node)) failureDetector.remove(node.address)
-												Don't increment vector-clock on merge and merge locally. See #3076

											
										
										
											2013-03-07 13:20:50 +01:00
+								      }
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
-												Don't increment vector-clock on merge and merge locally. See #3076

											
										
										
											2013-03-07 13:20:50 +01:00
+								      log.debug("Cluster Node [{}] - Receiving gossip from [{}]", selfAddress, from)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
-												Don't increment vector-clock on merge and merge locally. See #3076

											
										
										
											2013-03-07 13:20:50 +01:00
+								      if (conflict) {
 								        log.debug(
 								          """Couldn't establish a causal relationship between "remote" gossip and "local" gossip - Remote[{}] - Local[{}] - merged them into [{}]""",
 								          remoteGossip, localGossip, winningGossip)
 								      }
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
-												Don't increment vector-clock on merge and merge locally. See #3076

											
										
										
											2013-03-07 13:20:50 +01:00
+								      stats = stats.incrementReceivedGossipCount
 								      publish(latestGossip)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
-												Don't increment vector-clock on merge and merge locally. See #3076

											
										
										
											2013-03-07 13:20:50 +01:00
+								      if (envelope.conversation && talkback) {
 								        // send back gossip to sender when sender had different view, i.e. merge, or sender had
 								        // older or sender had newer
 								        gossipTo(from)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								      }
 								    }
 								  }
 								  def mergeRate(count: Long): Double = (count * 1000.0) / GossipInterval.toMillis
 								  /**
 								   * Initiates a new round of gossip.
 								   */
 								  def gossip(): Unit = {
 								    log.debug("Cluster Node [{}] - Initiating new round of gossip", selfAddress)
 								    if (!isSingletonCluster && isAvailable) {
 								      val localGossip = latestGossip
-												Making some changes in the cluster code to avoid doing work that isn't needed

											
										
										
											2012-07-07 20:55:02 +02:00
+								      val preferredGossipTargets =
 								        if (ThreadLocalRandom.current.nextDouble() < GossipDifferentViewProbability) { // If it's time to try to gossip to some nodes with a different view
 								          // gossip to a random alive member with preference to a member with older or newer gossip version
 								          val localMemberAddressesSet = localGossip.members map { _.address }
 								          val nodesWithDifferentView = for {
 								            (address, version) ← localGossip.overview.seen
 								            if localMemberAddressesSet contains address
 								            if version != localGossip.version
 								          } yield address
 								          nodesWithDifferentView.toIndexedSeq
 								        } else Vector.empty[Address]
 								      gossipToRandomNodeOf(
 								        if (preferredGossipTargets.nonEmpty) preferredGossipTargets
-												Merging with master

											
										
										
											2012-07-12 00:34:51 +02:00
+								        else localGossip.members.toIndexedSeq.map(_.address) // Fall back to localGossip; important to not accidentally use `map` of the SortedSet, since the original order is not preserved)
-												Making some changes in the cluster code to avoid doing work that isn't needed

											
										
										
											2012-07-07 20:55:02 +02:00
+								        )
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								    }
 								  }
 								  /**
 								   * Runs periodic leader actions, such as auto-downing unreachable nodes, assigning partitions etc.
 								   */
 								  def leaderActions(): Unit = {
 								    val localGossip = latestGossip
 								    val localMembers = localGossip.members
-												Members ordered by address only, see #2405

* The special ordering of status Exiting makes ordering and equals
  inconsistent
* Take the Exiting status into account when looking for leader

											
										
										
											2012-08-19 21:48:39 +02:00
+								    val isLeader = localGossip.isLeader(selfAddress)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
 								    if (isLeader && isAvailable) {
 								      // only run the leader actions if we are the LEADER and available
 								      val localOverview = localGossip.overview
 								      val localSeen = localOverview.seen
 								      val localUnreachableMembers = localOverview.unreachable
 								      val hasPartionHandoffCompletedSuccessfully: Boolean = {
 								        // FIXME implement partion handoff and a check if it is completed - now just returns TRUE - e.g. has completed successfully
 								        true
 								      }
 								      // Leader actions are as follows:
-												Making some changes in the cluster code to avoid doing work that isn't needed

											
										
										
											2012-07-07 20:55:02 +02:00
+								      //   1. Move JOINING     => UP          -- When a node joins the cluster
 								      //   2. Move LEAVING     => EXITING     -- When all partition handoff has completed
 								      //   3. Non-exiting remain              -- When all partition handoff has completed
 								      //   4. Move EXITING     => REMOVED     -- When all nodes have seen that the node is EXITING (convergence) - remove the nodes from the node ring and seen table
 								      //   5. Move UNREACHABLE => DOWN        -- When the node is in the UNREACHABLE set it can be auto-down by leader
-												Transition from Down to Removed, see #3075

											
										
										
											2013-03-05 15:32:13 +01:00
+								      //   6. Move DOWN        => REMOVED     -- When all nodes have seen that the node is DOWN (convergence) - remove the nodes from the node ring and seen table
 								      //   7. Updating the vclock version for the changes
 								      //   8. Updating the 'seen' table
 								      //   9. Try to update the state with the new gossip
 								      //  10. If success - run all the side-effecting processing
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
 								      val (
 								        newGossip: Gossip,
 								        hasChangedState: Boolean,
 								        upMembers,
 								        exitingMembers,
 								        removedMembers,
-												Don't send Remove command to unreachable, see #3209

											
										
										
											2013-04-09 21:06:48 +02:00
+								        removedUnreachableMembers,
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								        unreachableButNotDownedMembers) =
 								        if (localGossip.convergence) {
 								          // we have convergence - so we can't have unreachable nodes
-												Cluster node roles, see #3049

* Config of node roles cluster.role
* Cluster router configurable with use-role
* RoleLeaderChanged event
* Cluster singleton per role
* Cluster only starts once all required per-role node
  counts are reached,
  role.<role-name>.min-nr-of-members config
*  Update documentation and make use of the roles in the examples

											
										
										
											2013-03-14 20:32:43 +01:00
+								          def enoughMembers: Boolean = {
 								            localMembers.size >= MinNrOfMembers && MinNrOfMembersOfRole.forall {
 								              case (role, threshold) ⇒ localMembers.count(_.hasRole(role)) >= threshold
 								            }
 								          }
 								          def isJoiningToUp(m: Member): Boolean = m.status == Joining && enoughMembers
-												min-nr-of-members and registerOnMemberUp, see #2306

* Leader moves joining members to up when min-nr-of-members reached
* Tested by MinMembersBeforeUpSpec
* Used in factorial sample
* Docs

											
										
										
											2012-12-10 08:46:25 +01:00
-												Making some changes in the cluster code to avoid doing work that isn't needed

											
										
										
											2012-07-07 20:55:02 +02:00
+								          // transform the node member ring
 								          val newMembers = localMembers collect {
-												Transition from Down to Removed, see #3075

											
										
										
											2013-03-05 15:32:13 +01:00
+								            // Move JOINING => UP (once all nodes have seen that this node is JOINING, i.e. we have a convergence)
-												min-nr-of-members and registerOnMemberUp, see #2306

* Leader moves joining members to up when min-nr-of-members reached
* Tested by MinMembersBeforeUpSpec
* Used in factorial sample
* Docs

											
										
										
											2012-12-10 08:46:25 +01:00
+								            // and minimum number of nodes have joined the cluster
 								            case member if isJoiningToUp(member) ⇒ member copy (status = Up)
-												Transition from Down to Removed, see #3075

											
										
										
											2013-03-05 15:32:13 +01:00
+								            // Move LEAVING => EXITING (once we have a convergence on LEAVING
-												min-nr-of-members and registerOnMemberUp, see #2306

* Leader moves joining members to up when min-nr-of-members reached
* Tested by MinMembersBeforeUpSpec
* Used in factorial sample
* Docs

											
										
										
											2012-12-10 08:46:25 +01:00
+								            // *and* if we have a successful partition handoff)
 								            case member if member.status == Leaving && hasPartionHandoffCompletedSuccessfully ⇒
 								              member copy (status = Exiting)
-												Transition from Down to Removed, see #3075

											
										
										
											2013-03-05 15:32:13 +01:00
+								            // Everyone else that is not Exiting stays as they are
 								            case member if member.status != Exiting && member.status != Down ⇒ member
 								            // Move EXITING => REMOVED, DOWN => REMOVED - i.e. remove the nodes from the 'members' set/node ring and seen table
-												Making some changes in the cluster code to avoid doing work that isn't needed

											
										
										
											2012-07-07 20:55:02 +02:00
+								          }
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
 								          // ----------------------
-												Transition from Down to Removed, see #3075

											
										
										
											2013-03-05 15:32:13 +01:00
+								          // Store away all stuff needed for the side-effecting processing
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								          // ----------------------
 								          // Check for the need to do side-effecting on successful state change
-												Transition from Down to Removed, see #3075

											
										
										
											2013-03-05 15:32:13 +01:00
+								          // Repeat the checking for transitions between JOINING -> UP, LEAVING -> EXITING, EXITING -> REMOVED, DOWN -> REMOVED
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								          // to check for state-changes and to store away removed and exiting members for later notification
 								          //    1. check for state-changes to update
-												min-nr-of-members and registerOnMemberUp, see #2306

* Leader moves joining members to up when min-nr-of-members reached
* Tested by MinMembersBeforeUpSpec
* Used in factorial sample
* Docs

											
										
										
											2012-12-10 08:46:25 +01:00
+								          //    2. store away removed and exiting members so we can separate the pure state changes
-												Transition from Down to Removed, see #3075

											
										
										
											2013-03-05 15:32:13 +01:00
+								          val (removedMembers, newMembers1) = localMembers partition (m ⇒ m.status == Exiting || m.status == Down)
-												Don't send Remove command to unreachable, see #3209

											
										
										
											2013-04-09 21:06:48 +02:00
+								          val (removedUnreachable, newUnreachable) = localUnreachableMembers partition (_.status == Down)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
-												min-nr-of-members and registerOnMemberUp, see #2306

* Leader moves joining members to up when min-nr-of-members reached
* Tested by MinMembersBeforeUpSpec
* Used in factorial sample
* Docs

											
										
										
											2012-12-10 08:46:25 +01:00
+								          val (upMembers, newMembers2) = newMembers1 partition (isJoiningToUp(_))
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
-												Making some changes in the cluster code to avoid doing work that isn't needed

											
										
										
											2012-07-07 20:55:02 +02:00
+								          val exitingMembers = newMembers2 filter (_.status == Leaving && hasPartionHandoffCompletedSuccessfully)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
-												Don't send Remove command to unreachable, see #3209

											
										
										
											2013-04-09 21:06:48 +02:00
+								          val hasChangedState = removedMembers.nonEmpty || removedUnreachable.nonEmpty || upMembers.nonEmpty || exitingMembers.nonEmpty
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
 								          // removing REMOVED nodes from the 'seen' table
-												Don't send Remove command to unreachable, see #3209

											
										
										
											2013-04-09 21:06:48 +02:00
+								          val newSeen = localSeen -- removedMembers.map(_.address) -- removedUnreachable.map(_.address)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
-												Don't send Remove command to unreachable, see #3209

											
										
										
											2013-04-09 21:06:48 +02:00
+								          val newOverview = localOverview copy (seen = newSeen, unreachable = newUnreachable) // update gossip overview
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								          val newGossip = localGossip copy (members = newMembers, overview = newOverview) // update gossip
-												Don't send Remove command to unreachable, see #3209

											
										
										
											2013-04-09 21:06:48 +02:00
+								          (newGossip, hasChangedState, upMembers, exitingMembers, removedMembers, removedUnreachable, Member.none)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
 								        } else if (AutoDown) {
 								          // we don't have convergence - so we might have unreachable nodes
 								          // if 'auto-down' is turned on, then try to auto-down any unreachable nodes
-												Making some changes in the cluster code to avoid doing work that isn't needed

											
										
										
											2012-07-07 20:55:02 +02:00
+								          val newUnreachableMembers = localUnreachableMembers collect {
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								            // ----------------------
-												Transition from Down to Removed, see #3075

											
										
										
											2013-03-05 15:32:13 +01:00
+								            // Move UNREACHABLE => DOWN (auto-downing by leader)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								            // ----------------------
-												Making some changes in the cluster code to avoid doing work that isn't needed

											
										
										
											2012-07-07 20:55:02 +02:00
+								            case member if member.status != Down ⇒ member copy (status = Down)
 								            case downMember                      ⇒ downMember // no need to DOWN members already DOWN
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								          }
 								          // Check for the need to do side-effecting on successful state change
-												Making some changes in the cluster code to avoid doing work that isn't needed

											
										
										
											2012-07-07 20:55:02 +02:00
+								          val unreachableButNotDownedMembers = localUnreachableMembers filter (_.status != Down)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
 								          // removing nodes marked as DOWN from the 'seen' table
 								          val newSeen = localSeen -- newUnreachableMembers.collect { case m if m.status == Down ⇒ m.address }
 								          val newOverview = localOverview copy (seen = newSeen, unreachable = newUnreachableMembers) // update gossip overview
 								          val newGossip = localGossip copy (overview = newOverview) // update gossip
-												Don't send Remove command to unreachable, see #3209

											
										
										
											2013-04-09 21:06:48 +02:00
+								          (newGossip, unreachableButNotDownedMembers.nonEmpty, Member.none, Member.none, Member.none, Member.none, unreachableButNotDownedMembers)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
-												Don't send Remove command to unreachable, see #3209

											
										
										
											2013-04-09 21:06:48 +02:00
+								        } else (localGossip, false, Member.none, Member.none, Member.none, Member.none, Member.none)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
 								      if (hasChangedState) { // we have a change of state - version it and try to update
 								        // ----------------------
-												Transition from Down to Removed, see #3075

											
										
										
											2013-03-05 15:32:13 +01:00
+								        // Updating the vclock version for the changes
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								        // ----------------------
 								        val versionedGossip = newGossip :+ vclockNode
 								        // ----------------------
-												Transition from Down to Removed, see #3075

											
										
										
											2013-03-05 15:32:13 +01:00
+								        // Updating the 'seen' table
 								        // Unless the leader (this node) is part of the removed members, i.e. the leader have moved himself from EXITING -> REMOVED
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								        // ----------------------
 								        val seenVersionedGossip =
 								          if (removedMembers.exists(_.address == selfAddress)) versionedGossip
 								          else versionedGossip seen selfAddress
 								        // ----------------------
-												Transition from Down to Removed, see #3075

											
										
										
											2013-03-05 15:32:13 +01:00
+								        // Update the state with the new gossip
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								        // ----------------------
 								        latestGossip = seenVersionedGossip
 								        // ----------------------
-												Transition from Down to Removed, see #3075

											
										
										
											2013-03-05 15:32:13 +01:00
+								        // Run all the side-effecting processing
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								        // ----------------------
 								        // log the move of members from joining to up
-												Cluster singleton failure due to down-removed, see #3130

* The scenario was that previous leader left.
* The problem was that the new leader got MemberRemoved
  before it got the HandOverDone and therefore missed the
  hand over data.
* Solved by not changing the singleton to leader when receiving
  MemberRemoved and instead do that on normal HandOverDone or
  in failure cases after retry timeout.
* The reason for this bug was the new transition from Down to
  Removed and that there is now no MemberDowned event. Previously
  this was only triggered by MemberDowned (not MemberRemoved) and
  that was safe because that was "always" preceeded by unreachable.
* The new solution means that it will take longer for new singleton
  to startup in case of unreachable previous leader, but I don't
  want to trigger it on MemberUnreachable because it might in the
  future be possible to switch it back to reachable.

											
										
										
											2013-03-08 09:39:48 +01:00
+								        upMembers foreach { member ⇒
 								          log.info("Cluster Node [{}] - Leader is moving node [{}] from [{}] to [{}]",
 								            selfAddress, member.address, member.status, Up)
 								        }
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
 								        //  tell all removed members to remove and shut down themselves
 								        removedMembers foreach { member ⇒
 								          val address = member.address
-												Cluster singleton failure due to down-removed, see #3130

* The scenario was that previous leader left.
* The problem was that the new leader got MemberRemoved
  before it got the HandOverDone and therefore missed the
  hand over data.
* Solved by not changing the singleton to leader when receiving
  MemberRemoved and instead do that on normal HandOverDone or
  in failure cases after retry timeout.
* The reason for this bug was the new transition from Down to
  Removed and that there is now no MemberDowned event. Previously
  this was only triggered by MemberDowned (not MemberRemoved) and
  that was safe because that was "always" preceeded by unreachable.
* The new solution means that it will take longer for new singleton
  to startup in case of unreachable previous leader, but I don't
  want to trigger it on MemberUnreachable because it might in the
  future be possible to switch it back to reachable.

											
										
										
											2013-03-08 09:39:48 +01:00
+								          log.info("Cluster Node [{}] - Leader is moving node [{}] from [{}] to [{}] - and removing node from node ring",
 								            selfAddress, address, member.status, Removed)
-												Remove work-around for sending to broken connections, see #2909

* Previous work-around was introduced because Netty blocks when sending
to broken connections. This is supposed to be solved by the non-blocking
new remoting.
* Removed HeartbeatSender and CoreSender in cluster
* Added tests to verify that broken connections don't disturb live connection

											
										
										
											2013-01-25 15:03:52 +01:00
+								          clusterCore(address) ! ClusterLeaderAction.Remove(address)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								        }
 								        //  tell all exiting members to exit
 								        exitingMembers foreach { member ⇒
 								          val address = member.address
-												Cluster singleton failure due to down-removed, see #3130

* The scenario was that previous leader left.
* The problem was that the new leader got MemberRemoved
  before it got the HandOverDone and therefore missed the
  hand over data.
* Solved by not changing the singleton to leader when receiving
  MemberRemoved and instead do that on normal HandOverDone or
  in failure cases after retry timeout.
* The reason for this bug was the new transition from Down to
  Removed and that there is now no MemberDowned event. Previously
  this was only triggered by MemberDowned (not MemberRemoved) and
  that was safe because that was "always" preceeded by unreachable.
* The new solution means that it will take longer for new singleton
  to startup in case of unreachable previous leader, but I don't
  want to trigger it on MemberUnreachable because it might in the
  future be possible to switch it back to reachable.

											
										
										
											2013-03-08 09:39:48 +01:00
+								          log.info("Cluster Node [{}] - Leader is moving node [{}] from [{}] to [{}]",
 								            selfAddress, address, member.status, Exiting)
-												Remove work-around for sending to broken connections, see #2909

* Previous work-around was introduced because Netty blocks when sending
to broken connections. This is supposed to be solved by the non-blocking
new remoting.
* Removed HeartbeatSender and CoreSender in cluster
* Added tests to verify that broken connections don't disturb live connection

											
										
										
											2013-01-25 15:03:52 +01:00
+								          clusterCore(address) ! ClusterLeaderAction.Exit(address) // FIXME should use ? to await completion of handoff?
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								        }
 								        // log the auto-downing of the unreachable nodes
 								        unreachableButNotDownedMembers foreach { member ⇒
-												Cluster singleton failure due to down-removed, see #3130

* The scenario was that previous leader left.
* The problem was that the new leader got MemberRemoved
  before it got the HandOverDone and therefore missed the
  hand over data.
* Solved by not changing the singleton to leader when receiving
  MemberRemoved and instead do that on normal HandOverDone or
  in failure cases after retry timeout.
* The reason for this bug was the new transition from Down to
  Removed and that there is now no MemberDowned event. Previously
  this was only triggered by MemberDowned (not MemberRemoved) and
  that was safe because that was "always" preceeded by unreachable.
* The new solution means that it will take longer for new singleton
  to startup in case of unreachable previous leader, but I don't
  want to trigger it on MemberUnreachable because it might in the
  future be possible to switch it back to reachable.

											
										
										
											2013-03-08 09:39:48 +01:00
+								          log.info("Cluster Node [{}] - Leader is marking unreachable node [{}] as [{}]", selfAddress, member.address, Down)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								        }
-												Don't send Remove command to unreachable, see #3209

											
										
										
											2013-04-09 21:06:48 +02:00
+								        // log the auto-downing of the unreachable nodes
 								        removedUnreachableMembers foreach { member ⇒
 								          log.info("Cluster Node [{}] - Leader is removing unreachable node [{}]", selfAddress, member.address)
 								        }
-												Change cluster MemberEvents to only be published on convergence. See #2692

Conflicts:
	akka-cluster/src/main/scala/akka/cluster/ClusterEvent.scala
	akka-cluster/src/main/scala/akka/cluster/ClusterJmx.scala
	akka-cluster/src/main/scala/akka/cluster/ClusterMetricsCollector.scala
	akka-cluster/src/main/scala/akka/cluster/ClusterReadView.scala
	akka-cluster/src/multi-jvm/scala/akka/cluster/MultiNodeClusterSpec.scala
	akka-docs/rst/cluster/cluster-usage-java.rst
	akka-docs/rst/cluster/cluster-usage-scala.rst
	akka-kernel/src/main/dist/bin/akka-cluster

											
										
										
											2012-11-27 18:07:37 +01:00
+								        publish(latestGossip)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								      }
 								    }
 								  }
 								  /**
 								   * Reaps the unreachable members (moves them to the 'unreachable' list in the cluster overview) according to the failure detector's verdict.
 								   */
 								  def reapUnreachableMembers(): Unit = {
 								    if (!isSingletonCluster && isAvailable) {
 								      // only scrutinize if we are a non-singleton cluster and available
 								      val localGossip = latestGossip
 								      val localOverview = localGossip.overview
 								      val localMembers = localGossip.members
 								      val localUnreachableMembers = localGossip.overview.unreachable
 								      val newlyDetectedUnreachableMembers = localMembers filterNot { member ⇒
 								        member.address == selfAddress || failureDetector.isAvailable(member.address)
 								      }
 								      if (newlyDetectedUnreachableMembers.nonEmpty) {
 								        val newMembers = localMembers -- newlyDetectedUnreachableMembers
 								        val newUnreachableMembers = localUnreachableMembers ++ newlyDetectedUnreachableMembers
 								        val newOverview = localOverview copy (unreachable = newUnreachableMembers)
 								        val newGossip = localGossip copy (overview = newOverview, members = newMembers)
 								        // updating vclock and 'seen' table
 								        val versionedGossip = newGossip :+ vclockNode
 								        val seenVersionedGossip = versionedGossip seen selfAddress
 								        latestGossip = seenVersionedGossip
 								        log.error("Cluster Node [{}] - Marking node(s) as UNREACHABLE [{}]", selfAddress, newlyDetectedUnreachableMembers.mkString(", "))
-												Change cluster MemberEvents to only be published on convergence. See #2692

Conflicts:
	akka-cluster/src/main/scala/akka/cluster/ClusterEvent.scala
	akka-cluster/src/main/scala/akka/cluster/ClusterJmx.scala
	akka-cluster/src/main/scala/akka/cluster/ClusterMetricsCollector.scala
	akka-cluster/src/main/scala/akka/cluster/ClusterReadView.scala
	akka-cluster/src/multi-jvm/scala/akka/cluster/MultiNodeClusterSpec.scala
	akka-docs/rst/cluster/cluster-usage-java.rst
	akka-docs/rst/cluster/cluster-usage-scala.rst
	akka-kernel/src/main/dist/bin/akka-cluster

											
										
										
											2012-11-27 18:07:37 +01:00
+								        publish(latestGossip)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								      }
 								    }
 								  }
 								  def selectRandomNode(addresses: IndexedSeq[Address]): Option[Address] =
 								    if (addresses.isEmpty) None
 								    else Some(addresses(ThreadLocalRandom.current nextInt addresses.size))
 								  def isSingletonCluster: Boolean = latestGossip.isSingletonCluster
-												Incorporate review comments and cleanup isAvailable, see #2018

* Renamed isRunning to isTerminated (with negation of course)
* Removed Running from JMX API, since the mbean is deregistered anyway
* Cleanup isAvailable, isUnavailbe
* Misc minor

											
										
										
											2012-12-06 15:26:57 +01:00
+								  def isAvailable: Boolean = !latestGossip.isUnreachable(selfAddress)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
 								  /**
 								   * Gossips latest gossip to a random member in the set of members passed in as argument.
 								   *
 								   * @return the used [[akka.actor.Address] if any
 								   */
-												Merge branch 'master' into wip-2547-metrics-router-patriknw

Conflicts:
	akka-actor/src/main/scala/akka/actor/Deployer.scala
	akka-cluster/src/main/scala/akka/cluster/ClusterMetricsCollector.scala
	akka-cluster/src/test/scala/akka/cluster/MetricsCollectorSpec.scala

											
										
										
											2012-11-15 12:33:11 +01:00
+								  private def gossipToRandomNodeOf(addresses: immutable.IndexedSeq[Address]): Option[Address] = {
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								    log.debug("Cluster Node [{}] - Selecting random node to gossip to [{}]", selfAddress, addresses.mkString(", "))
 								    // filter out myself
 								    val peer = selectRandomNode(addresses filterNot (_ == selfAddress))
 								    peer foreach gossipTo
 								    peer
 								  }
 								  /**
 								   * Gossips latest gossip to an address.
 								   */
 								  def gossipTo(address: Address): Unit =
 								    gossipTo(address, GossipEnvelope(selfAddress, latestGossip, conversation = true))
 								  def oneWayGossipTo(address: Address): Unit =
 								    gossipTo(address, GossipEnvelope(selfAddress, latestGossip, conversation = false))
-												Style change of def starting with if

* When a def starts with if and is not a oneliner the if
  should be on a new line.
* The reason is that it might be easy to miss the if when
  reading the code.

											
										
										
											2013-01-17 14:00:01 +01:00
+								  def gossipTo(address: Address, gossipMsg: GossipEnvelope): Unit =
-												Nodes not part of cluster have marked the Gossip as seen, see #3031

* Problem may occur when joining member with same hostname:port again,
  after downing.
* Reproduced with StressSpec exerciseJoinRemove with fixed port that
  joins and shutdown several times.
* Real solution for this will be covered by ticket #2788 by adding
  uid to member identifier, but as first step we need to support
  this scenario with current design.
* Use unique node identifier for vector clock to avoid mixup of
  old and new member instance.
* Support transition from Down to Joining in Gossip merge
* Don't gossip to unknown or unreachable members.

											
										
										
											2013-02-12 21:45:41 +01:00
+								    if (address != selfAddress && gossipMsg.gossip.members.exists(_.address == address))
 								      clusterCore(address) ! gossipMsg
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
-												Change cluster MemberEvents to only be published on convergence. See #2692

Conflicts:
	akka-cluster/src/main/scala/akka/cluster/ClusterEvent.scala
	akka-cluster/src/main/scala/akka/cluster/ClusterJmx.scala
	akka-cluster/src/main/scala/akka/cluster/ClusterMetricsCollector.scala
	akka-cluster/src/main/scala/akka/cluster/ClusterReadView.scala
	akka-cluster/src/multi-jvm/scala/akka/cluster/MultiNodeClusterSpec.scala
	akka-docs/rst/cluster/cluster-usage-java.rst
	akka-docs/rst/cluster/cluster-usage-scala.rst
	akka-kernel/src/main/dist/bin/akka-cluster

											
										
										
											2012-11-27 18:07:37 +01:00
+								  def publish(newGossip: Gossip): Unit = {
 								    publisher ! PublishChanges(newGossip)
-												Improve publish of domain events, see #2202

* Gossip is not exposed in user api
* Better and more events
* Snapshot event sent to new subscriber
* Updated tests
* Periodic publish only for internal stats

											
										
										
											2012-08-15 16:47:34 +02:00
+								    if (PublishStatsInterval == Duration.Zero) publishInternalStats()
-												Publish cluster changes to event bus, see #2202

* ClusterEventBus
* Removed register listener and related
* Removed Gossip.meta because it doesn't handle version conflicts

											
										
										
											2012-08-14 10:58:30 +02:00
+								  }
-												Fine grained events, see #2202

* Defined the domain events in ClusterEvent.scala file
* Produce events from diff  and publish publish to event bus
  from separate actor, ClusterDomainEventPublisher
* Adjustments of tests

											
										
										
											2012-08-19 20:15:22 +02:00
+								  def publishInternalStats(): Unit = publisher ! CurrentInternalStats(stats)
-												Remove ClusterEventBus and system.eventStream, see #2202

											
										
										
											2012-08-14 15:33:34 +02:00
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								}
-												Join seed node in separate actor, see #2270

											
										
										
											2012-08-14 17:26:33 +02:00
+								/**
 								 * INTERNAL API.
-												Clarify JoinSeedNodeProcess, see #2270

* Implemented without ScatterGatherFirstCompletedRouter, since
  that is more straightforward and might cause less confusion
* Added more description of what it does

											
										
										
											2012-08-15 08:21:34 +02:00
+								 *
-												Support restart of first seed node, see #2854

* Try to first join other seed nodes before joining itself

											
										
										
											2013-02-17 17:35:43 +01:00
+								 * Used only for the first seed node.
 								 * Sends InitJoin to all seed nodes (except itself).
 								 * If other seed nodes are not part of the cluster yet they will reply with
 								 * InitJoinNack or not respond at all and then the first seed node
 								 * will join itself to initialize the new cluster. When the first
 								 * seed node is restarted, and some other seed node is part of the cluster
 								 * it will reply with InitJoinAck and then the first seed node will join
 								 * that other seed node to join existing cluster.
 								 */
 								private[cluster] final class FirstSeedNodeProcess(seedNodes: immutable.IndexedSeq[Address]) extends Actor with ActorLogging {
 								  import InternalClusterAction._
 								  val cluster = Cluster(context.system)
 								  def selfAddress = cluster.selfAddress
 								  if (seedNodes.size <= 1 || seedNodes.head != selfAddress)
 								    throw new IllegalArgumentException("Join seed node should not be done")
 								  val timeout = Deadline.now + cluster.settings.SeedNodeTimeout
 								  var remainingSeedNodes = seedNodes.toSet - selfAddress
 								  // retry until one ack, or all nack, or timeout
 								  import context.dispatcher
 								  val retryTask = cluster.scheduler.schedule(1.second, 1.second, self, JoinSeedNode)
 								  self ! JoinSeedNode
 								  override def postStop(): Unit = retryTask.cancel()
 								  def receive = {
 								    case JoinSeedNode ⇒
 								      if (timeout.hasTimeLeft) {
 								        // send InitJoin to remaining seed nodes (except myself)
-												Deprecate actorFor in favor of ActorSelection, see #3074

* Deprecate all actorFor methods
* resolveActorRef in provider
* Identify auto receive message
* Support ActorPath in actorSelection
* Support remote actor selections
* Additional tests of actor selection
* Update tests (keep most actorFor tests)
* Update samples to use actorSelection
* Updates to documentation
* Migration guide, including motivation

											
										
										
											2013-03-26 18:17:50 +01:00
+								        remainingSeedNodes foreach { a ⇒ context.actorSelection(context.parent.path.toStringWithAddress(a)) ! InitJoin }
-												Support restart of first seed node, see #2854

* Try to first join other seed nodes before joining itself

											
										
										
											2013-02-17 17:35:43 +01:00
+								      } else {
 								        // no InitJoinAck received, initialize new cluster by joining myself
 								        context.parent ! JoinTo(selfAddress)
 								        context.stop(self)
 								      }
 								    case InitJoinAck(address) ⇒
 								      // first InitJoinAck reply, join existing cluster
 								      context.parent ! JoinTo(address)
 								      context.stop(self)
 								    case InitJoinNack(address) ⇒
 								      remainingSeedNodes -= address
 								      if (remainingSeedNodes.isEmpty) {
 								        // initialize new cluster by joining myself when nacks from all other seed nodes
 								        context.parent ! JoinTo(selfAddress)
 								        context.stop(self)
 								      }
 								  }
 								}
 								/**
 								 * INTERNAL API.
 								 *
 								 * Sends InitJoin to all seed nodes (except itself) and expect
-												Clarify JoinSeedNodeProcess, see #2270

* Implemented without ScatterGatherFirstCompletedRouter, since
  that is more straightforward and might cause less confusion
* Added more description of what it does

											
										
										
											2012-08-15 08:21:34 +02:00
+								 * InitJoinAck reply back. The seed node that replied first
 								 * will be used, joined to. InitJoinAck replies received after the
 								 * first one are ignored.
 								 *
 								 * Retries if no InitJoinAck replies are received within the
 								 * SeedNodeTimeout.
 								 * When at least one reply has been received it stops itself after
 								 * an idle SeedNodeTimeout.
 								 *
-												Some clarifications from review, see #1916

											
										
										
											2012-08-31 12:27:17 +02:00
+								 * The seed nodes can be started in any order, but they will not be "active",
 								 * until they have been able to join another seed node (seed1).
 								 * They will retry the join procedure.
 								 * So one possible startup scenario is:
 								 * 1. seed2 started, but doesn't get any ack from seed1 or seed3
 								 * 2. seed3 started, doesn't get any ack from seed1 or seed3 (seed2 doesn't reply)
 								 * 3. seed1 is started and joins itself
 								 * 4. seed2 retries the join procedure and gets an ack from seed1, and then joins to seed1
 								 * 5. seed3 retries the join procedure and gets acks from seed2 first, and then joins to seed2
 								 *
-												Join seed node in separate actor, see #2270

											
										
										
											2012-08-14 17:26:33 +02:00
+								 */
-												Merge branch 'master' into wip-2547-metrics-router-patriknw

Conflicts:
	akka-actor/src/main/scala/akka/actor/Deployer.scala
	akka-cluster/src/main/scala/akka/cluster/ClusterMetricsCollector.scala
	akka-cluster/src/test/scala/akka/cluster/MetricsCollectorSpec.scala

											
										
										
											2012-11-15 12:33:11 +01:00
+								private[cluster] final class JoinSeedNodeProcess(seedNodes: immutable.IndexedSeq[Address]) extends Actor with ActorLogging {
-												Join seed node in separate actor, see #2270

											
										
										
											2012-08-14 17:26:33 +02:00
+								  import InternalClusterAction._
-												Fix leaking this in constructor of Cluster, see #2473

* Major refactoring to remove the need to use special
  Cluster instance for testing. Use default Cluster
  extension instead. Most of it is trivial changes.
* Used failure-detector.implementation-class from config
  to swap to Puppet
* Removed FailureDetectorStrategy, since it doesn't add any value
* Added Cluster.joinSeedNodes to be able to test seedNodes when Addresses
  are unknown before startup time.
* Removed ClusterEnvironment that was passed around among the actors,
  instead they use the ordinary Cluster extension.
* Overall much cleaner design

											
										
										
											2012-09-06 21:48:40 +02:00
+								  def selfAddress = Cluster(context.system).selfAddress
-												Join seed node in separate actor, see #2270

											
										
										
											2012-08-14 17:26:33 +02:00
-												Fix leaking this in constructor of Cluster, see #2473

* Major refactoring to remove the need to use special
  Cluster instance for testing. Use default Cluster
  extension instead. Most of it is trivial changes.
* Used failure-detector.implementation-class from config
  to swap to Puppet
* Removed FailureDetectorStrategy, since it doesn't add any value
* Added Cluster.joinSeedNodes to be able to test seedNodes when Addresses
  are unknown before startup time.
* Removed ClusterEnvironment that was passed around among the actors,
  instead they use the ordinary Cluster extension.
* Overall much cleaner design

											
										
										
											2012-09-06 21:48:40 +02:00
+								  if (seedNodes.isEmpty || seedNodes.head == selfAddress)
-												Join seed node in separate actor, see #2270

											
										
										
											2012-08-14 17:26:33 +02:00
+								    throw new IllegalArgumentException("Join seed node should not be done")
-												Fix leaking this in constructor of Cluster, see #2473

* Major refactoring to remove the need to use special
  Cluster instance for testing. Use default Cluster
  extension instead. Most of it is trivial changes.
* Used failure-detector.implementation-class from config
  to swap to Puppet
* Removed FailureDetectorStrategy, since it doesn't add any value
* Added Cluster.joinSeedNodes to be able to test seedNodes when Addresses
  are unknown before startup time.
* Removed ClusterEnvironment that was passed around among the actors,
  instead they use the ordinary Cluster extension.
* Overall much cleaner design

											
										
										
											2012-09-06 21:48:40 +02:00
+								  context.setReceiveTimeout(Cluster(context.system).settings.SeedNodeTimeout)
-												Clarify JoinSeedNodeProcess, see #2270

* Implemented without ScatterGatherFirstCompletedRouter, since
  that is more straightforward and might cause less confusion
* Added more description of what it does

											
										
										
											2012-08-15 08:21:34 +02:00
-												Incorporate review comments, see #2270

											
										
										
											2012-08-15 17:31:36 +02:00
+								  override def preStart(): Unit = self ! JoinSeedNode
-												Join seed node in separate actor, see #2270

											
										
										
											2012-08-14 17:26:33 +02:00
 								  def receive = {
 								    case JoinSeedNode ⇒
-												Clarify JoinSeedNodeProcess, see #2270

* Implemented without ScatterGatherFirstCompletedRouter, since
  that is more straightforward and might cause less confusion
* Added more description of what it does

											
										
										
											2012-08-15 08:21:34 +02:00
+								      // send InitJoin to all seed nodes (except myself)
-												Fix leaking this in constructor of Cluster, see #2473

* Major refactoring to remove the need to use special
  Cluster instance for testing. Use default Cluster
  extension instead. Most of it is trivial changes.
* Used failure-detector.implementation-class from config
  to swap to Puppet
* Removed FailureDetectorStrategy, since it doesn't add any value
* Added Cluster.joinSeedNodes to be able to test seedNodes when Addresses
  are unknown before startup time.
* Removed ClusterEnvironment that was passed around among the actors,
  instead they use the ordinary Cluster extension.
* Overall much cleaner design

											
										
										
											2012-09-06 21:48:40 +02:00
+								      seedNodes.collect {
-												Deprecate actorFor in favor of ActorSelection, see #3074

* Deprecate all actorFor methods
* resolveActorRef in provider
* Identify auto receive message
* Support ActorPath in actorSelection
* Support remote actor selections
* Additional tests of actor selection
* Update tests (keep most actorFor tests)
* Update samples to use actorSelection
* Updates to documentation
* Migration guide, including motivation

											
										
										
											2013-03-26 18:17:50 +01:00
+								        case a if a != selfAddress ⇒ context.actorSelection(context.parent.path.toStringWithAddress(a))
-												Incorporate review comments, see #2270

											
										
										
											2012-08-15 17:31:36 +02:00
+								      } foreach { _ ! InitJoin }
-												Join seed node in separate actor, see #2270

											
										
										
											2012-08-14 17:26:33 +02:00
+								    case InitJoinAck(address) ⇒
-												Clarify JoinSeedNodeProcess, see #2270

* Implemented without ScatterGatherFirstCompletedRouter, since
  that is more straightforward and might cause less confusion
* Added more description of what it does

											
										
										
											2012-08-15 08:21:34 +02:00
+								      // first InitJoinAck reply
-												Join seed node in separate actor, see #2270

											
										
										
											2012-08-14 17:26:33 +02:00
+								      context.parent ! JoinTo(address)
-												Clarify JoinSeedNodeProcess, see #2270

* Implemented without ScatterGatherFirstCompletedRouter, since
  that is more straightforward and might cause less confusion
* Added more description of what it does

											
										
										
											2012-08-15 08:21:34 +02:00
+								      context.become(done)
-												Support restart of first seed node, see #2854

* Try to first join other seed nodes before joining itself

											
										
										
											2013-02-17 17:35:43 +01:00
+								    case InitJoinNack(_) ⇒ // that seed was uninitialized
-												Clarify JoinSeedNodeProcess, see #2270

* Implemented without ScatterGatherFirstCompletedRouter, since
  that is more straightforward and might cause less confusion
* Added more description of what it does

											
										
										
											2012-08-15 08:21:34 +02:00
+								    case ReceiveTimeout ⇒
 								      // no InitJoinAck received, try again
 								      self ! JoinSeedNode
-												Join seed node in separate actor, see #2270

											
										
										
											2012-08-14 17:26:33 +02:00
+								  }
-												Clarify JoinSeedNodeProcess, see #2270

* Implemented without ScatterGatherFirstCompletedRouter, since
  that is more straightforward and might cause less confusion
* Added more description of what it does

											
										
										
											2012-08-15 08:21:34 +02:00
+								  def done: Actor.Receive = {
 								    case InitJoinAck(_) ⇒ // already received one, skip rest
 								    case ReceiveTimeout ⇒ context.stop(self)
 								  }
-												Join seed node in separate actor, see #2270

											
										
										
											2012-08-14 17:26:33 +02:00
+								}
-												min-nr-of-members and registerOnMemberUp, see #2306

* Leader moves joining members to up when min-nr-of-members reached
* Tested by MinMembersBeforeUpSpec
* Used in factorial sample
* Docs

											
										
										
											2012-12-10 08:46:25 +01:00
+								/**
 								 * INTERNAL API
 								 *
 								 * The supplied callback will be run, once, when current cluster member is `Up`.
 								 */
 								private[cluster] class OnMemberUpListener(callback: Runnable) extends Actor with ActorLogging {
 								  import ClusterEvent._
 								  val cluster = Cluster(context.system)
 								  // subscribe to MemberUp, re-subscribe when restart
 								  override def preStart(): Unit =
 								    cluster.subscribe(self, classOf[MemberUp])
 								  override def postStop(): Unit =
 								    cluster.unsubscribe(self)
 								  def receive = {
 								    case state: CurrentClusterState ⇒
 								      if (state.members.exists(isSelfUp(_)))
 								        done()
 								    case MemberUp(m) ⇒
 								      if (isSelfUp(m))
 								        done()
 								  }
 								  def done(): Unit = {
 								    try callback.run() catch {
 								      case NonFatal(e) ⇒ log.error(e, "OnMemberUp callback failed with [{}]", e.getMessage)
 								    } finally {
 								      context stop self
 								    }
 								  }
 								  def isSelfUp(m: Member): Boolean =
 								    m.address == cluster.selfAddress && m.status == MemberStatus.Up
 								}
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								/**
 								 * INTERNAL API
 								 */
-												Protobuf serialization of cluster messages. See #1910

											
										
										
											2013-04-04 17:56:29 +02:00
+								@SerialVersionUID(1L)
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
+								private[cluster] case class ClusterStats(
 								  receivedGossipCount: Long = 0L,
 								  mergeCount: Long = 0L,
-												Merge gossip seen table when versions are the same. See #3115

											
										
										
											2013-03-05 12:49:35 +01:00
+								  sameCount: Long = 0L,
 								  newerCount: Long = 0L,
 								  olderCount: Long = 0L) {
-												Separate Cluster to several files, see #2311

* Introduced ClusterEnvironment trait to make it easier to test
  the actors without using the extension
* Incorparate more feedback from review

											
										
										
											2012-07-05 13:55:08 +02:00
 								  def incrementReceivedGossipCount(): ClusterStats =
 								    copy(receivedGossipCount = receivedGossipCount + 1)
 								  def incrementMergeCount(): ClusterStats =
 								    copy(mergeCount = mergeCount + 1)
-												Merge gossip seen table when versions are the same. See #3115

											
										
										
											2013-03-05 12:49:35 +01:00
+								  def incrementSameCount(): ClusterStats =
 								    copy(sameCount = sameCount + 1)
 								  def incrementNewerCount(): ClusterStats =
 								    copy(newerCount = newerCount + 1)
 								  def incrementOlderCount(): ClusterStats =
 								    copy(olderCount = olderCount + 1)
-												Fixes according to review. See #3115

											
										
										
											2013-03-06 13:11:46 +01:00
+								  def :+(that: ClusterStats): ClusterStats = {
-												Merge gossip seen table when versions are the same. See #3115

											
										
										
											2013-03-05 12:49:35 +01:00
+								    ClusterStats(
 								      this.receivedGossipCount + that.receivedGossipCount,
 								      this.mergeCount + that.mergeCount,
 								      this.sameCount + that.sameCount,
 								      this.newerCount + that.newerCount,
 								      this.olderCount + that.olderCount)
 								  }
-												Fixes according to review. See #3115

											
										
										
											2013-03-06 13:11:46 +01:00
+								  def :-(that: ClusterStats): ClusterStats = {
-												Merge gossip seen table when versions are the same. See #3115

											
										
										
											2013-03-05 12:49:35 +01:00
+								    ClusterStats(
 								      this.receivedGossipCount - that.receivedGossipCount,
 								      this.mergeCount - that.mergeCount,
 								      this.sameCount - that.sameCount,
 								      this.newerCount - that.newerCount,
 								      this.olderCount - that.olderCount)
 								  }
-												move Duration classes according to scala 2.10 nightly and remove casts to FiniteDuration, see #2504

											
										
										
											2012-09-21 14:50:06 +02:00
+								}