From 821dc2199ba7f7ec8179d18c0b5664eacf896f40 Mon Sep 17 00:00:00 2001 From: Heiko Seeberger Date: Thu, 15 Oct 2015 08:08:01 +0200 Subject: [PATCH] +act #18575 Publish MemberJoined --- .../scala/akka/cluster/ClusterEvent.scala | 16 ++++ .../scala/akka/cluster/ConvergenceSpec.scala | 4 +- .../scala/akka/cluster/NodeChurnSpec.scala | 2 +- .../ClusterDomainEventPublisherSpec.scala | 9 +- .../akka/cluster/ClusterDomainEventSpec.scala | 4 +- akka-docs/rst/java/cluster-usage.rst | 81 ++++++++--------- akka-docs/rst/scala/cluster-usage.rst | 87 ++++++++++--------- 7 files changed, 113 insertions(+), 90 deletions(-) diff --git a/akka-cluster/src/main/scala/akka/cluster/ClusterEvent.scala b/akka-cluster/src/main/scala/akka/cluster/ClusterEvent.scala index be95f74148..f97bf55795 100644 --- a/akka-cluster/src/main/scala/akka/cluster/ClusterEvent.scala +++ b/akka-cluster/src/main/scala/akka/cluster/ClusterEvent.scala @@ -121,6 +121,13 @@ object ClusterEvent { def member: Member } + /** + * Member status changed to Joining. + */ + final case class MemberJoined(member: Member) extends MemberEvent { + if (member.status != Joining) throw new IllegalArgumentException("Expected Joining status, got: " + member) + } + /** * Member status changed to WeaklyUp. * A joining member can be moved to `WeaklyUp` if convergence @@ -138,6 +145,13 @@ object ClusterEvent { if (member.status != Up) throw new IllegalArgumentException("Expected Up status, got: " + member) } + /** + * Member status changed to Leaving. + */ + final case class MemberLeft(member: Member) extends MemberEvent { + if (member.status != Leaving) throw new IllegalArgumentException("Expected Leaving status, got: " + member) + } + /** * Member status changed to `MemberStatus.Exiting` and will be removed * when all members have seen the `Exiting` status. @@ -278,8 +292,10 @@ object ClusterEvent { case (_, newMember :: oldMember :: Nil) if newMember.status != oldMember.status ⇒ newMember } val memberEvents = (newMembers ++ changedMembers) collect { + case m if m.status == Joining ⇒ MemberJoined(m) case m if m.status == WeaklyUp ⇒ MemberWeaklyUp(m) case m if m.status == Up ⇒ MemberUp(m) + case m if m.status == Leaving ⇒ MemberLeft(m) case m if m.status == Exiting ⇒ MemberExited(m) // no events for other transitions } diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/ConvergenceSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/ConvergenceSpec.scala index a54d8a44e1..479b7c1a86 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/ConvergenceSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/ConvergenceSpec.scala @@ -95,11 +95,11 @@ abstract class ConvergenceSpec(multiNodeConfig: ConvergenceMultiNodeConfig) runOn(first, second, fourth) { for (n ← 1 to 5) { - awaitAssert(clusterView.members.size should ===(3)) + awaitAssert(clusterView.members.size should ===(4)) awaitSeenSameState(first, second, fourth) memberStatus(first) should ===(Some(MemberStatus.Up)) memberStatus(second) should ===(Some(MemberStatus.Up)) - memberStatus(fourth) should ===(None) + memberStatus(fourth) should ===(Some(MemberStatus.Joining)) // wait and then check again Thread.sleep(1.second.dilated.toMillis) } diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeChurnSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeChurnSpec.scala index 2eb7d31eba..fbcaba5d49 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeChurnSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeChurnSpec.scala @@ -67,7 +67,7 @@ abstract class NodeChurnSpec additionaSystems.foreach { s ⇒ val c = Cluster(s) c.state.members.size should be(numberOfMembers) - c.state.members.forall(_.status == MemberStatus.Up) + c.state.members.forall(_.status == MemberStatus.Up) shouldBe true } } } diff --git a/akka-cluster/src/test/scala/akka/cluster/ClusterDomainEventPublisherSpec.scala b/akka-cluster/src/test/scala/akka/cluster/ClusterDomainEventPublisherSpec.scala index edba2b5b87..70212aecc5 100644 --- a/akka-cluster/src/test/scala/akka/cluster/ClusterDomainEventPublisherSpec.scala +++ b/akka-cluster/src/test/scala/akka/cluster/ClusterDomainEventPublisherSpec.scala @@ -44,7 +44,7 @@ class ClusterDomainEventPublisherSpec extends AkkaSpec(ClusterDomainEventPublish val dUp = TestMember(Address("akka.tcp", "sys", "d", 2552), Up, Set("GRP")) val g0 = Gossip(members = SortedSet(aUp)).seen(aUp.uniqueAddress) - val g1 = Gossip(members = SortedSet(aUp, bExiting, cJoining)).seen(aUp.uniqueAddress).seen(bExiting.uniqueAddress).seen(cJoining.uniqueAddress) + val g1 = Gossip(members = SortedSet(aUp, cJoining)).seen(aUp.uniqueAddress).seen(cJoining.uniqueAddress) val g2 = Gossip(members = SortedSet(aUp, bExiting, cUp)).seen(aUp.uniqueAddress) val g3 = g2.seen(bExiting.uniqueAddress).seen(cUp.uniqueAddress) val g4 = Gossip(members = SortedSet(a51Up, aUp, bExiting, cUp)).seen(aUp.uniqueAddress) @@ -71,6 +71,11 @@ class ClusterDomainEventPublisherSpec extends AkkaSpec(ClusterDomainEventPublish "ClusterDomainEventPublisher" must { + "publish MemberJoined" in { + publisher ! PublishChanges(g1) + memberSubscriber.expectMsg(MemberJoined(cJoining)) + } + "publish MemberUp" in { publisher ! PublishChanges(g2) publisher ! PublishChanges(g3) @@ -92,7 +97,7 @@ class ClusterDomainEventPublisherSpec extends AkkaSpec(ClusterDomainEventPublish memberSubscriber.expectMsg(MemberExited(bExiting)) memberSubscriber.expectMsg(MemberUp(cUp)) publisher ! PublishChanges(g6) - memberSubscriber.expectNoMsg(500 millis) + memberSubscriber.expectMsg(MemberLeft(aLeaving)) publisher ! PublishChanges(g7) memberSubscriber.expectMsg(MemberExited(aExiting)) memberSubscriber.expectMsg(LeaderChanged(Some(cUp.address))) diff --git a/akka-cluster/src/test/scala/akka/cluster/ClusterDomainEventSpec.scala b/akka-cluster/src/test/scala/akka/cluster/ClusterDomainEventSpec.scala index db5210873d..074bb225b2 100644 --- a/akka-cluster/src/test/scala/akka/cluster/ClusterDomainEventSpec.scala +++ b/akka-cluster/src/test/scala/akka/cluster/ClusterDomainEventSpec.scala @@ -51,7 +51,7 @@ class ClusterDomainEventSpec extends WordSpec with Matchers { val (g1, _) = converge(Gossip(members = SortedSet(aUp))) val (g2, s2) = converge(Gossip(members = SortedSet(aUp, bUp, eJoining))) - diffMemberEvents(g1, g2) should ===(Seq(MemberUp(bUp))) + diffMemberEvents(g1, g2) should ===(Seq(MemberUp(bUp), MemberJoined(eJoining))) diffUnreachable(g1, g2, selfDummyAddress) should ===(Seq.empty) diffSeen(g1, g2, selfDummyAddress) should ===(Seq(SeenChanged(convergence = true, seenBy = s2.map(_.address)))) } @@ -60,7 +60,7 @@ class ClusterDomainEventSpec extends WordSpec with Matchers { val (g1, _) = converge(Gossip(members = SortedSet(aJoining, bUp, cUp))) val (g2, s2) = converge(Gossip(members = SortedSet(aUp, bUp, cLeaving, eJoining))) - diffMemberEvents(g1, g2) should ===(Seq(MemberUp(aUp))) + diffMemberEvents(g1, g2) should ===(Seq(MemberUp(aUp), MemberLeft(cLeaving), MemberJoined(eJoining))) diffUnreachable(g1, g2, selfDummyAddress) should ===(Seq.empty) diffSeen(g1, g2, selfDummyAddress) should ===(Seq(SeenChanged(convergence = true, seenBy = s2.map(_.address)))) } diff --git a/akka-docs/rst/java/cluster-usage.rst b/akka-docs/rst/java/cluster-usage.rst index a8e4953d46..63e1979614 100644 --- a/akka-docs/rst/java/cluster-usage.rst +++ b/akka-docs/rst/java/cluster-usage.rst @@ -67,7 +67,7 @@ it retries this procedure until successful or shutdown. You define the seed nodes in the :ref:`cluster_configuration_java` file (application.conf):: akka.cluster.seed-nodes = [ - "akka.tcp://ClusterSystem@host1:2552", + "akka.tcp://ClusterSystem@host1:2552", "akka.tcp://ClusterSystem@host2:2552"] This can also be defined as Java system properties when starting the JVM using the following syntax:: @@ -77,11 +77,11 @@ This can also be defined as Java system properties when starting the JVM using t The seed nodes can be started in any order and it is not necessary to have all seed nodes running, but the node configured as the first element in the ``seed-nodes`` -configuration list must be started when initially starting a cluster, otherwise the -other seed-nodes will not become initialized and no other node can join the cluster. +configuration list must be started when initially starting a cluster, otherwise the +other seed-nodes will not become initialized and no other node can join the cluster. The reason for the special first seed node is to avoid forming separated islands when starting from an empty cluster. -It is quickest to start all configured seed nodes at the same time (order doesn't matter), +It is quickest to start all configured seed nodes at the same time (order doesn't matter), otherwise it can take up to the configured ``seed-node-timeout`` until the nodes can join. @@ -105,7 +105,7 @@ which is attractive when dynamically discovering other nodes at startup by using When using ``joinSeedNodes`` you should not include the node itself except for the node that is supposed to be the first seed node, and that should be placed first in parameter to ``joinSeedNodes``. -Unsuccessful attempts to contact seed nodes are automatically retried after the time period defined in +Unsuccessful attempts to contact seed nodes are automatically retried after the time period defined in configuration property ``seed-node-timeout``. Unsuccessful attempt to join a specific seed node is automatically retried after the configured ``retry-unsuccessful-join-after``. Retrying means that it tries to contact all seed nodes and then joins the node that answers first. The first node in the list @@ -129,7 +129,7 @@ When a member is considered by the failure detector to be unreachable the leader is not allowed to perform its duties, such as changing status of new joining members to 'Up'. The node must first become reachable again, or the status of the unreachable member must be changed to 'Down'. Changing status to 'Down' -can be performed automatically or manually. By default it must be done manually, using +can be performed automatically or manually. By default it must be done manually, using :ref:`cluster_jmx_java` or :ref:`cluster_command_line_java`. It can also be performed programmatically with ``Cluster.get(system).down(address)``. @@ -138,7 +138,7 @@ You can enable automatic downing with configuration:: akka.cluster.auto-down-unreachable-after = 120s -This means that the cluster leader member will change the ``unreachable`` node +This means that the cluster leader member will change the ``unreachable`` node status to ``down`` automatically after the configured time of unreachability. Be aware of that using auto-down implies that two separate clusters will @@ -167,9 +167,9 @@ It can also be performed programmatically with: .. includecode:: code/docs/cluster/ClusterDocTest.java#leave Note that this command can be issued to any member in the cluster, not necessarily the -one that is leaving. The cluster extension, but not the actor system or JVM, of the -leaving member will be shutdown after the leader has changed status of the member to -`Exiting`. Thereafter the member will be removed from the cluster. Normally this is handled +one that is leaving. The cluster extension, but not the actor system or JVM, of the +leaving member will be shutdown after the leader has changed status of the member to +`Exiting`. Thereafter the member will be removed from the cluster. Normally this is handled automatically, but in case of network failures during this process it might still be necessary to set the node’s status to ``Down`` in order to complete the removal. @@ -199,7 +199,7 @@ members to ``Up``. You can subscribe to the ``WeaklyUp`` membership event to make use of the members that are in this state, but you should be aware of that members on the other side of a network partition -have no knowledge about the existence of the new members. You should for example not count +have no knowledge about the existence of the new members. You should for example not count ``WeaklyUp`` members in quorum decisions. .. warning:: @@ -222,8 +222,8 @@ A snapshot of the full state, ``akka.cluster.ClusterEvent.CurrentClusterState``, as the first message, followed by events for incremental updates. Note that you may receive an empty ``CurrentClusterState``, containing no members, -if you start the subscription before the initial join procedure has completed. -This is expected behavior. When the node has been accepted in the cluster you will +if you start the subscription before the initial join procedure has completed. +This is expected behavior. When the node has been accepted in the cluster you will receive ``MemberUp`` for that node, and other nodes. If you find it inconvenient to handle the ``CurrentClusterState`` you can use @@ -237,6 +237,7 @@ to the current state and it is not the full history of all changes that actually The events to track the life-cycle of members are: +* ``ClusterEvent.MemberJoined`` - A new member has joined the cluster and its status has been changed to ``Joining``. * ``ClusterEvent.MemberUp`` - A new member has joined the cluster and its status has been changed to ``Up``. * ``ClusterEvent.MemberExited`` - A member is leaving the cluster and its status has been changed to ``Exiting`` Note that the node might already have been shutdown when this event is published on another node. @@ -252,7 +253,7 @@ for details about the events. Instead of subscribing to cluster events it can sometimes be convenient to only get the full membership state with ``Cluster.get(system).state()``. Note that this state is not necessarily in sync with the events published to a -cluster subscription. +cluster subscription. Worker Dial-in Example ---------------------- @@ -286,10 +287,10 @@ Note that the ``TransformationFrontend`` actor watch the registered backend to be able to remove it from its list of available backend workers. Death watch uses the cluster failure detector for nodes in the cluster, i.e. it detects network failures and JVM crashes, in addition to graceful termination of watched -actor. Death watch generates the ``Terminated`` message to the watching actor when the +actor. Death watch generates the ``Terminated`` message to the watching actor when the unreachable cluster node has been downed and removed. -The `Typesafe Activator `_ tutorial named +The `Typesafe Activator `_ tutorial named `Akka Cluster Samples with Java `_. contains the full source code and instructions of how to run the **Worker Dial-in Example**. @@ -309,7 +310,7 @@ How To Startup when Cluster Size Reached ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ A common use case is to start actors after the cluster has been initialized, -members have joined, and the cluster has reached a certain size. +members have joined, and the cluster has reached a certain size. With a configuration option you can define required number of members before the leader changes member status of 'Joining' members to 'Up'. @@ -321,7 +322,7 @@ before the leader changes member status of 'Joining' members to 'Up'. .. includecode:: ../../../akka-samples/akka-sample-cluster-java/src/main/resources/factorial.conf#role-min-nr-of-members -You can start the actors in a ``registerOnMemberUp`` callback, which will +You can start the actors in a ``registerOnMemberUp`` callback, which will be invoked when the current member status is changed to 'Up', i.e. the cluster has at least the defined number of members. @@ -351,8 +352,8 @@ Cluster Singleton For some use cases it is convenient and sometimes also mandatory to ensure that you have exactly one actor of a certain type running somewhere in the cluster. -This can be implemented by subscribing to member events, but there are several corner -cases to consider. Therefore, this specific use case is made easily accessible by the +This can be implemented by subscribing to member events, but there are several corner +cases to consider. Therefore, this specific use case is made easily accessible by the :ref:`cluster-singleton-java`. Cluster Sharding @@ -400,11 +401,11 @@ mark a node ``unreachable`` to have the rest of the cluster mark that node ``unr The failure detector will also detect if the node becomes ``reachable`` again. When all nodes that monitored the ``unreachable`` node detects it as ``reachable`` again -the cluster, after gossip dissemination, will consider it as ``reachable``. +the cluster, after gossip dissemination, will consider it as ``reachable``. If system messages cannot be delivered to a node it will be quarantined and then it cannot come back from ``unreachable``. This can happen if the there are too many -unacknowledged system messages (e.g. watch, Terminated, remote actor deployment, +unacknowledged system messages (e.g. watch, Terminated, remote actor deployment, failures of actors supervised by remote parent). Then the node needs to be moved to the ``down`` or ``removed`` states and the actor system must be restarted before it can join the cluster again. @@ -459,10 +460,10 @@ This is how the curve looks like for ``acceptable-heartbeat-pause`` configured t Death watch uses the cluster failure detector for nodes in the cluster, i.e. it detects network failures and JVM crashes, in addition to graceful termination of watched -actor. Death watch generates the ``Terminated`` message to the watching actor when the +actor. Death watch generates the ``Terminated`` message to the watching actor when the unreachable cluster node has been downed and removed. -If you encounter suspicious false positives when the system is under load you should +If you encounter suspicious false positives when the system is under load you should define a separate dispatcher for the cluster actors as described in :ref:`cluster_dispatcher_java`. Cluster Aware Routers @@ -472,25 +473,25 @@ All :ref:`routers ` can be made aware of member nodes in the clust deploying new routees or looking up routees on nodes in the cluster. When a node becomes unreachable or leaves the cluster the routees of that node are automatically unregistered from the router. When new nodes join the cluster additional -routees are added to the router, according to the configuration. Routees are also added +routees are added to the router, according to the configuration. Routees are also added when a node becomes reachable again, after having been unreachable. Cluster aware routers make use of members with status :ref:`WeaklyUp ` if that feature is enabled. -There are two distinct types of routers. +There are two distinct types of routers. -* **Group - router that sends messages to the specified path using actor selection** - The routees can be shared between routers running on different nodes in the cluster. - One example of a use case for this type of router is a service running on some backend +* **Group - router that sends messages to the specified path using actor selection** + The routees can be shared between routers running on different nodes in the cluster. + One example of a use case for this type of router is a service running on some backend nodes in the cluster and used by routers running on front-end nodes in the cluster. -* **Pool - router that creates routees as child actors and deploys them on remote nodes.** +* **Pool - router that creates routees as child actors and deploys them on remote nodes.** Each router will have its own routee instances. For example, if you start a router on 3 nodes in a 10 nodes cluster you will have 30 routee actors in total if the router is configured to use one instance per node. The routees created by the different routers will not be shared between the routers. One example of a use case for this type of router - is a single master that coordinate jobs and delegates the actual work to routees running + is a single master that coordinate jobs and delegates the actual work to routees running on other nodes in the cluster. Router with Group of Routees @@ -505,12 +506,12 @@ That is not done by the router. The configuration for a group looks like this: The routee actors should be started as early as possible when starting the actor system, because the router will try to use them as soon as the member status is changed to 'Up'. -The actor paths without address information that are defined in ``routees.paths`` are used for selecting the +The actor paths without address information that are defined in ``routees.paths`` are used for selecting the actors to which the messages will be forwarded to by the router. Messages will be forwarded to the routees using :ref:`ActorSelection `, so the same delivery semantics should be expected. It is possible to limit the lookup of routees to member nodes tagged with a certain role by specifying ``use-role``. -``max-total-nr-of-instances`` defines total number of routees in the cluster. By default ``max-total-nr-of-instances`` +``max-total-nr-of-instances`` defines total number of routees in the cluster. By default ``max-total-nr-of-instances`` is set to a high value (10000) that will result in new routees added to the router when nodes join the cluster. Set it to a lower value if you want to limit total number of routees. @@ -523,7 +524,7 @@ See :ref:`cluster_configuration_java` section for further descriptions of the se Router Example with Group of Routees ------------------------------------ -Let's take a look at how to use a cluster aware router with a group of routees, +Let's take a look at how to use a cluster aware router with a group of routees, i.e. router sending to the paths of the routees. The example application provides a service to calculate statistics for a text. @@ -557,7 +558,7 @@ The router is configured with ``routees.paths``: This means that user requests can be sent to ``StatsService`` on any node and it will use ``StatsWorker`` on all nodes. -The `Typesafe Activator `_ tutorial named +The `Typesafe Activator `_ tutorial named `Akka Cluster Samples with Java `_. contains the full source code and instructions of how to run the **Router Example with Group of Routees**. @@ -573,7 +574,7 @@ It is possible to limit the deployment of routees to member nodes tagged with a specifying ``use-role``. ``max-total-nr-of-instances`` defines total number of routees in the cluster, but the number of routees -per node, ``max-nr-of-instances-per-node``, will not be exceeded. By default ``max-total-nr-of-instances`` +per node, ``max-nr-of-instances-per-node``, will not be exceeded. By default ``max-total-nr-of-instances`` is set to a high value (10000) that will result in new routees added to the router when nodes join the cluster. Set it to a lower value if you want to limit total number of routees. @@ -587,7 +588,7 @@ Router Example with Pool of Remote Deployed Routees --------------------------------------------------- Let's take a look at how to use a cluster aware router on single master node that creates -and deploys workers. To keep track of a single master we use the :ref:`cluster-singleton-java` +and deploys workers. To keep track of a single master we use the :ref:`cluster-singleton-java` in the contrib module. The ``ClusterSingletonManager`` is started on each node. .. includecode:: ../../../akka-samples/akka-sample-cluster-java/src/main/java/sample/cluster/stats/StatsSampleOneMasterMain.java#create-singleton-manager @@ -604,7 +605,7 @@ All nodes start ``ClusterSingletonProxy`` and the ``ClusterSingletonManager``. T .. includecode:: ../../../akka-samples/akka-sample-cluster-java/src/main/resources/stats2.conf#config-router-deploy -The `Typesafe Activator `_ tutorial named +The `Typesafe Activator `_ tutorial named `Akka Cluster Samples with Java `_. contains the full source code and instructions of how to run the **Router Example with Pool of Remote Deployed Routees**. @@ -659,7 +660,7 @@ Run it without parameters to see instructions about how to use the script:: is-singleton - Checks if the cluster is a singleton cluster (single node cluster) is-available - Checks if the member node is available - Where the should be on the format of + Where the should be on the format of 'akka.://@:' Examples: bin/akka-cluster localhost 9999 is-available @@ -681,7 +682,7 @@ Example of system properties to enable remote monitoring and management:: Configuration ^^^^^^^^^^^^^ -There are several configuration properties for the cluster. We refer to the +There are several configuration properties for the cluster. We refer to the :ref:`reference configuration ` for more information. Cluster Info Logging diff --git a/akka-docs/rst/scala/cluster-usage.rst b/akka-docs/rst/scala/cluster-usage.rst index 58aa03d8c3..cafa45da17 100644 --- a/akka-docs/rst/scala/cluster-usage.rst +++ b/akka-docs/rst/scala/cluster-usage.rst @@ -61,7 +61,7 @@ it retries this procedure until successful or shutdown. You define the seed nodes in the :ref:`cluster_configuration_scala` file (application.conf):: akka.cluster.seed-nodes = [ - "akka.tcp://ClusterSystem@host1:2552", + "akka.tcp://ClusterSystem@host1:2552", "akka.tcp://ClusterSystem@host2:2552"] This can also be defined as Java system properties when starting the JVM using the following syntax:: @@ -71,11 +71,11 @@ This can also be defined as Java system properties when starting the JVM using t The seed nodes can be started in any order and it is not necessary to have all seed nodes running, but the node configured as the first element in the ``seed-nodes`` -configuration list must be started when initially starting a cluster, otherwise the +configuration list must be started when initially starting a cluster, otherwise the other seed-nodes will not become initialized and no other node can join the cluster. The reason for the special first seed node is to avoid forming separated islands when starting from an empty cluster. -It is quickest to start all configured seed nodes at the same time (order doesn't matter), +It is quickest to start all configured seed nodes at the same time (order doesn't matter), otherwise it can take up to the configured ``seed-node-timeout`` until the nodes can join. @@ -97,10 +97,10 @@ node must join itself,and then the following nodes could join them to make up a You may also use ``Cluster(system).joinSeedNodes`` to join programmatically, which is attractive when dynamically discovering other nodes at startup by using some external tool or API. When using ``joinSeedNodes`` you should not include the node itself except for the node that is -supposed to be the first seed node, and that should be placed first in parameter to +supposed to be the first seed node, and that should be placed first in parameter to ``joinSeedNodes``. -Unsuccessful attempts to contact seed nodes are automatically retried after the time period defined in +Unsuccessful attempts to contact seed nodes are automatically retried after the time period defined in configuration property ``seed-node-timeout``. Unsuccessful attempt to join a specific seed node is automatically retried after the configured ``retry-unsuccessful-join-after``. Retrying means that it tries to contact all seed nodes and then joins the node that answers first. The first node in the list @@ -132,7 +132,7 @@ You can enable automatic downing with configuration:: akka.cluster.auto-down-unreachable-after = 120s -This means that the cluster leader member will change the ``unreachable`` node +This means that the cluster leader member will change the ``unreachable`` node status to ``down`` automatically after the configured time of unreachability. Be aware of that using auto-down implies that two separate clusters will @@ -161,9 +161,9 @@ It can also be performed programmatically with: .. includecode:: code/docs/cluster/ClusterDocSpec.scala#leave Note that this command can be issued to any member in the cluster, not necessarily the -one that is leaving. The cluster extension, but not the actor system or JVM, of the -leaving member will be shutdown after the leader has changed status of the member to -`Exiting`. Thereafter the member will be removed from the cluster. Normally this is handled +one that is leaving. The cluster extension, but not the actor system or JVM, of the +leaving member will be shutdown after the leader has changed status of the member to +`Exiting`. Thereafter the member will be removed from the cluster. Normally this is handled automatically, but in case of network failures during this process it might still be necessary to set the node’s status to ``Down`` in order to complete the removal. @@ -193,8 +193,8 @@ members to ``Up``. You can subscribe to the ``WeaklyUp`` membership event to make use of the members that are in this state, but you should be aware of that members on the other side of a network partition -have no knowledge about the existence of the new members. You should for example not count -``WeaklyUp`` members in quorum decisions. +have no knowledge about the existence of the new members. You should for example not count +``WeaklyUp`` members in quorum decisions. .. warning:: @@ -216,8 +216,8 @@ A snapshot of the full state, ``akka.cluster.ClusterEvent.CurrentClusterState``, as the first message, followed by events for incremental updates. Note that you may receive an empty ``CurrentClusterState``, containing no members, -if you start the subscription before the initial join procedure has completed. -This is expected behavior. When the node has been accepted in the cluster you will +if you start the subscription before the initial join procedure has completed. +This is expected behavior. When the node has been accepted in the cluster you will receive ``MemberUp`` for that node, and other nodes. If you find it inconvenient to handle the ``CurrentClusterState`` you can use @@ -225,12 +225,13 @@ If you find it inconvenient to handle the ``CurrentClusterState`` you can use That means that instead of receiving ``CurrentClusterState`` as the first message you will receive the events corresponding to the current state to mimic what you would have seen if you were listening to the events when they occurred in the past. Note that those initial events only correspond -to the current state and it is not the full history of all changes that actually has occurred in the cluster. +to the current state and it is not the full history of all changes that actually has occurred in the cluster. .. includecode:: ../../../akka-samples/akka-sample-cluster-scala/src/main/scala/sample/cluster/simple/SimpleClusterListener.scala#subscribe The events to track the life-cycle of members are: +* ``ClusterEvent.MemberJoined`` - A new member has joined the cluster and its status has been changed to ``Joining``. * ``ClusterEvent.MemberUp`` - A new member has joined the cluster and its status has been changed to ``Up``. * ``ClusterEvent.MemberExited`` - A member is leaving the cluster and its status has been changed to ``Exiting`` Note that the node might already have been shutdown when this event is published on another node. @@ -246,7 +247,7 @@ for details about the events. Instead of subscribing to cluster events it can sometimes be convenient to only get the full membership state with ``Cluster(system).state``. Note that this state is not necessarily in sync with the events published to a -cluster subscription. +cluster subscription. Worker Dial-in Example ---------------------- @@ -280,10 +281,10 @@ Note that the ``TransformationFrontend`` actor watch the registered backend to be able to remove it from its list of available backend workers. Death watch uses the cluster failure detector for nodes in the cluster, i.e. it detects network failures and JVM crashes, in addition to graceful termination of watched -actor. Death watch generates the ``Terminated`` message to the watching actor when the +actor. Death watch generates the ``Terminated`` message to the watching actor when the unreachable cluster node has been downed and removed. -The `Typesafe Activator `_ tutorial named +The `Typesafe Activator `_ tutorial named `Akka Cluster Samples with Scala `_. contains the full source code and instructions of how to run the **Worker Dial-in Example**. @@ -303,7 +304,7 @@ How To Startup when Cluster Size Reached ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ A common use case is to start actors after the cluster has been initialized, -members have joined, and the cluster has reached a certain size. +members have joined, and the cluster has reached a certain size. With a configuration option you can define required number of members before the leader changes member status of 'Joining' members to 'Up'. @@ -315,7 +316,7 @@ before the leader changes member status of 'Joining' members to 'Up'. .. includecode:: ../../../akka-samples/akka-sample-cluster-scala/src/main/resources/factorial.conf#role-min-nr-of-members -You can start the actors in a ``registerOnMemberUp`` callback, which will +You can start the actors in a ``registerOnMemberUp`` callback, which will be invoked when the current member status is changed to 'Up', i.e. the cluster has at least the defined number of members. @@ -345,8 +346,8 @@ Cluster Singleton For some use cases it is convenient and sometimes also mandatory to ensure that you have exactly one actor of a certain type running somewhere in the cluster. -This can be implemented by subscribing to member events, but there are several corner -cases to consider. Therefore, this specific use case is made easily accessible by the +This can be implemented by subscribing to member events, but there are several corner +cases to consider. Therefore, this specific use case is made easily accessible by the :ref:`cluster-singleton-scala`. Cluster Sharding @@ -356,7 +357,7 @@ Distributes actors across several nodes in the cluster and supports interaction with the actors using their logical identifier, but without having to care about their physical location in the cluster. -See :ref:`cluster_sharding_scala` +See :ref:`cluster_sharding_scala` Distributed Publish Subscribe ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -394,11 +395,11 @@ mark a node ``unreachable`` to have the rest of the cluster mark that node ``unr The failure detector will also detect if the node becomes ``reachable`` again. When all nodes that monitored the ``unreachable`` node detects it as ``reachable`` again -the cluster, after gossip dissemination, will consider it as ``reachable``. +the cluster, after gossip dissemination, will consider it as ``reachable``. If system messages cannot be delivered to a node it will be quarantined and then it cannot come back from ``unreachable``. This can happen if the there are too many -unacknowledged system messages (e.g. watch, Terminated, remote actor deployment, +unacknowledged system messages (e.g. watch, Terminated, remote actor deployment, failures of actors supervised by remote parent). Then the node needs to be moved to the ``down`` or ``removed`` states and the actor system must be restarted before it can join the cluster again. @@ -454,10 +455,10 @@ This is how the curve looks like for ``acceptable-heartbeat-pause`` configured t Death watch uses the cluster failure detector for nodes in the cluster, i.e. it detects network failures and JVM crashes, in addition to graceful termination of watched -actor. Death watch generates the ``Terminated`` message to the watching actor when the -unreachable cluster node has been downed and removed. +actor. Death watch generates the ``Terminated`` message to the watching actor when the +unreachable cluster node has been downed and removed. -If you encounter suspicious false positives when the system is under load you should +If you encounter suspicious false positives when the system is under load you should define a separate dispatcher for the cluster actors as described in :ref:`cluster_dispatcher_scala`. .. _cluster_aware_routers_scala: @@ -469,25 +470,25 @@ All :ref:`routers ` can be made aware of member nodes in the clus deploying new routees or looking up routees on nodes in the cluster. When a node becomes unreachable or leaves the cluster the routees of that node are automatically unregistered from the router. When new nodes join the cluster, additional -routees are added to the router, according to the configuration. Routees are also added +routees are added to the router, according to the configuration. Routees are also added when a node becomes reachable again, after having been unreachable. Cluster aware routers make use of members with status :ref:`WeaklyUp ` if that feature is enabled. -There are two distinct types of routers. +There are two distinct types of routers. -* **Group - router that sends messages to the specified path using actor selection** - The routees can be shared among routers running on different nodes in the cluster. - One example of a use case for this type of router is a service running on some backend +* **Group - router that sends messages to the specified path using actor selection** + The routees can be shared among routers running on different nodes in the cluster. + One example of a use case for this type of router is a service running on some backend nodes in the cluster and used by routers running on front-end nodes in the cluster. -* **Pool - router that creates routees as child actors and deploys them on remote nodes.** +* **Pool - router that creates routees as child actors and deploys them on remote nodes.** Each router will have its own routee instances. For example, if you start a router on 3 nodes in a 10-node cluster, you will have 30 routees in total if the router is configured to use one instance per node. The routees created by the different routers will not be shared among the routers. One example of a use case for this type of router - is a single master that coordinates jobs and delegates the actual work to routees running + is a single master that coordinates jobs and delegates the actual work to routees running on other nodes in the cluster. Router with Group of Routees @@ -502,12 +503,12 @@ That is not done by the router. The configuration for a group looks like this: The routee actors should be started as early as possible when starting the actor system, because the router will try to use them as soon as the member status is changed to 'Up'. -The actor paths without address information that are defined in ``routees.paths`` are used for selecting the +The actor paths without address information that are defined in ``routees.paths`` are used for selecting the actors to which the messages will be forwarded to by the router. Messages will be forwarded to the routees using :ref:`ActorSelection `, so the same delivery semantics should be expected. It is possible to limit the lookup of routees to member nodes tagged with a certain role by specifying ``use-role``. -``max-total-nr-of-instances`` defines total number of routees in the cluster. By default ``max-total-nr-of-instances`` +``max-total-nr-of-instances`` defines total number of routees in the cluster. By default ``max-total-nr-of-instances`` is set to a high value (10000) that will result in new routees added to the router when nodes join the cluster. Set it to a lower value if you want to limit total number of routees. @@ -520,7 +521,7 @@ See :ref:`cluster_configuration_scala` section for further descriptions of the s Router Example with Group of Routees ------------------------------------ -Let's take a look at how to use a cluster aware router with a group of routees, +Let's take a look at how to use a cluster aware router with a group of routees, i.e. router sending to the paths of the routees. The example application provides a service to calculate statistics for a text. @@ -552,7 +553,7 @@ The router is configured with ``routees.paths``: This means that user requests can be sent to ``StatsService`` on any node and it will use ``StatsWorker`` on all nodes. -The `Typesafe Activator `_ tutorial named +The `Typesafe Activator `_ tutorial named `Akka Cluster Samples with Scala `_. contains the full source code and instructions of how to run the **Router Example with Group of Routees**. @@ -568,7 +569,7 @@ It is possible to limit the deployment of routees to member nodes tagged with a specifying ``use-role``. ``max-total-nr-of-instances`` defines total number of routees in the cluster, but the number of routees -per node, ``max-nr-of-instances-per-node``, will not be exceeded. By default ``max-total-nr-of-instances`` +per node, ``max-nr-of-instances-per-node``, will not be exceeded. By default ``max-total-nr-of-instances`` is set to a high value (10000) that will result in new routees added to the router when nodes join the cluster. Set it to a lower value if you want to limit total number of routees. @@ -582,7 +583,7 @@ Router Example with Pool of Remote Deployed Routees --------------------------------------------------- Let's take a look at how to use a cluster aware router on single master node that creates -and deploys workers. To keep track of a single master we use the :ref:`cluster-singleton-scala` +and deploys workers. To keep track of a single master we use the :ref:`cluster-singleton-scala` in the contrib module. The ``ClusterSingletonManager`` is started on each node. .. includecode:: ../../../akka-samples/akka-sample-cluster-scala/src/main/scala/sample/cluster/stats/StatsSampleOneMaster.scala#create-singleton-manager @@ -599,7 +600,7 @@ All nodes start ``ClusterSingletonProxy`` and the ``ClusterSingletonManager``. T .. includecode:: ../../../akka-samples/akka-sample-cluster-scala/src/main/resources/stats2.conf#config-router-deploy -The `Typesafe Activator `_ tutorial named +The `Typesafe Activator `_ tutorial named `Akka Cluster Samples with Scala `_. contains the full source code and instructions of how to run the **Router Example with Pool of Remote Deployed Routees**. @@ -710,7 +711,7 @@ Run it without parameters to see instructions about how to use the script:: is-singleton - Checks if the cluster is a singleton cluster (single node cluster) is-available - Checks if the member node is available - Where the should be on the format of + Where the should be on the format of 'akka.://@:' Examples: bin/akka-cluster localhost 9999 is-available @@ -732,7 +733,7 @@ Example of system properties to enable remote monitoring and management:: Configuration ^^^^^^^^^^^^^ -There are several configuration properties for the cluster. We refer to the +There are several configuration properties for the cluster. We refer to the :ref:`reference configuration ` for more information. Cluster Info Logging