From f0fba394ea2d1f00b0d2b7615768ef12df4ec088 Mon Sep 17 00:00:00 2001 From: Richard Imaoka Date: Mon, 26 Jun 2017 18:48:19 +0900 Subject: [PATCH] Merge paradox/scala/fault-tolerance.md and java/fault-tolerance.md (#23172) --- .../scala/akka/actor/SupervisorSpec.scala | 50 +++++- .../src/main/paradox/java/fault-tolerance.md | 161 +----------------- .../src/main/paradox/scala/fault-tolerance.md | 104 ++++++++--- .../java/jdocs/actor/FaultHandlingTest.java | 5 +- 4 files changed, 134 insertions(+), 186 deletions(-) mode change 100644 => 120000 akka-docs/src/main/paradox/java/fault-tolerance.md diff --git a/akka-actor-tests/src/test/scala/akka/actor/SupervisorSpec.scala b/akka-actor-tests/src/test/scala/akka/actor/SupervisorSpec.scala index fc29a47aa4..8aa4bace1b 100644 --- a/akka-actor-tests/src/test/scala/akka/actor/SupervisorSpec.scala +++ b/akka-actor-tests/src/test/scala/akka/actor/SupervisorSpec.scala @@ -179,7 +179,13 @@ class SupervisorSpec extends AkkaSpec(SupervisorSpec.config) with BeforeAndAfter def kill(pingPongActor: ActorRef) = { val result = (pingPongActor.?(DieReply)(DilatedTimeout)) - expectMsg(Timeout, ExceptionMessage) + expectMsg(Timeout, ExceptionMessage) //this is sent from PingPongActor's postRestart() + intercept[RuntimeException] { Await.result(result, DilatedTimeout) } + } + + def killExpectNoRestart(pingPongActor: ActorRef) = { + val result = (pingPongActor.?(DieReply)(DilatedTimeout)) + expectNoMsg(500 milliseconds) intercept[RuntimeException] { Await.result(result, DilatedTimeout) } } @@ -496,4 +502,46 @@ class SupervisorSpec extends AkkaSpec(SupervisorSpec.config) with BeforeAndAfter } } + + "restarts a child infinitely if maxNrOfRetries = -1 and withinTimeRange = Duration.Inf" in { + val supervisor = system.actorOf(Props(new Supervisor( + OneForOneStrategy(maxNrOfRetries = -1, withinTimeRange = Duration.Inf)(classOf[Exception] :: Nil)))) + + val pingpong = child(supervisor, Props(new PingPongActor(testActor))) + + kill(pingpong) + kill(pingpong) + kill(pingpong) + kill(pingpong) + kill(pingpong) + kill(pingpong) + kill(pingpong) + ping(pingpong) + } + + "treats maxNrOfRetries = -1 as maxNrOfRetries = 1 if withinTimeRange is non-infinite Duration" in { + val supervisor = system.actorOf(Props(new Supervisor( + OneForOneStrategy(maxNrOfRetries = -1, withinTimeRange = 10 seconds)(classOf[Exception] :: Nil)))) + + val pingpong = child(supervisor, Props(new PingPongActor(testActor))) + + ping(pingpong) + kill(pingpong) + ping(pingpong) + killExpectNoRestart(pingpong) + } + + "treats withinTimeRange = Duration.Inf as a single infinite restart window" in { + val supervisor = system.actorOf(Props(new Supervisor( + OneForOneStrategy(maxNrOfRetries = 3, withinTimeRange = Duration.Inf)(classOf[Exception] :: Nil)))) + + val pingpong = child(supervisor, Props(new PingPongActor(testActor))) + + //impossible to confirm if the restart window is infinite, so making sure maxNrOfRetries is respected correctly + kill(pingpong) + kill(pingpong) + kill(pingpong) + killExpectNoRestart(pingpong) + } + } diff --git a/akka-docs/src/main/paradox/java/fault-tolerance.md b/akka-docs/src/main/paradox/java/fault-tolerance.md deleted file mode 100644 index 4908305d66..0000000000 --- a/akka-docs/src/main/paradox/java/fault-tolerance.md +++ /dev/null @@ -1,160 +0,0 @@ -# Fault Tolerance - -As explained in @ref:[Actor Systems](general/actor-systems.md) each actor is the supervisor of its -children, and as such each actor defines fault handling supervisor strategy. -This strategy cannot be changed afterwards as it is an integral part of the -actor system’s structure. - -## Fault Handling in Practice - -First, let us look at a sample that illustrates one way to handle data store errors, -which is a typical source of failure in real world applications. Of course it depends -on the actual application what is possible to do when the data store is unavailable, -but in this sample we use a best effort re-connect approach. - -Read the following source code. The inlined comments explain the different pieces of -the fault handling and why they are added. It is also highly recommended to run this -sample as it is easy to follow the log output to understand what is happening in runtime. - -@@toc { depth=1 } - -@@@ index - -* [fault-tolerance-sample](fault-tolerance-sample.md) - -@@@ - -## Creating a Supervisor Strategy - -The following sections explain the fault handling mechanism and alternatives -in more depth. - -For the sake of demonstration let us consider the following strategy: - -@@snip [FaultHandlingTest.java]($code$/java/jdocs/actor/FaultHandlingTest.java) { #strategy } - -I have chosen a few well-known exception types in order to demonstrate the -application of the fault handling directives described in @ref:[supervision](general/supervision.md). -First off, it is a one-for-one strategy, meaning that each child is treated -separately (an all-for-one strategy works very similarly, the only difference -is that any decision is applied to all children of the supervisor, not only the -failing one). There are limits set on the restart frequency, namely maximum 10 -restarts per minute. `-1` and `Duration.Inf()` means that the respective limit -does not apply, leaving the possibility to specify an absolute upper limit on the -restarts or to make the restarts work infinitely. -The child actor is stopped if the limit is exceeded. - -@@@ note - -If the strategy is declared inside the supervising actor (as opposed to -a separate class) its decider has access to all internal state of -the actor in a thread-safe fashion, including obtaining a reference to the -currently failed child (available as the `sender` of the failure message). - -@@@ - -### Default Supervisor Strategy - -`Escalate` is used if the defined strategy doesn't cover the exception that was thrown. - -When the supervisor strategy is not defined for an actor the following -exceptions are handled by default: - - * `ActorInitializationException` will stop the failing child actor - * `ActorKilledException` will stop the failing child actor - * `DeathPactException` will stop the failing child actor - * `Exception` will restart the failing child actor - * Other types of `Throwable` will be escalated to parent actor - -If the exception escalate all the way up to the root guardian it will handle it -in the same way as the default strategy defined above. - -### Stopping Supervisor Strategy - -Closer to the Erlang way is the strategy to just stop children when they fail -and then take corrective action in the supervisor when DeathWatch signals the -loss of the child. This strategy is also provided pre-packaged as -`SupervisorStrategy.stoppingStrategy` with an accompanying -`StoppingSupervisorStrategy` configurator to be used when you want the -`"/user"` guardian to apply it. - -### Logging of Actor Failures - -By default the `SupervisorStrategy` logs failures unless they are escalated. -Escalated failures are supposed to be handled, and potentially logged, at a level -higher in the hierarchy. - -You can mute the default logging of a `SupervisorStrategy` by setting -`loggingEnabled` to `false` when instantiating it. Customized logging -can be done inside the `Decider`. Note that the reference to the currently -failed child is available as the `sender` when the `SupervisorStrategy` is -declared inside the supervising actor. - -You may also customize the logging in your own `SupervisorStrategy` implementation -by overriding the `logFailure` method. - -## Supervision of Top-Level Actors - -Toplevel actors means those which are created using `system.actorOf()`, and -they are children of the @ref:[User Guardian](general/supervision.md#user-guardian). There are no -special rules applied in this case, the guardian simply applies the configured -strategy. - -## Test Application - -The following section shows the effects of the different directives in practice, -where a test setup is needed. First off, we need a suitable supervisor: - -@@snip [FaultHandlingTest.java]($code$/java/jdocs/actor/FaultHandlingTest.java) { #supervisor } - -This supervisor will be used to create a child, with which we can experiment: - -@@snip [FaultHandlingTest.java]($code$/java/jdocs/actor/FaultHandlingTest.java) { #child } - -The test is easier by using the utilities described in @ref:[TestKit](testing.md), -where `TestProbe` provides an actor ref useful for receiving and inspecting replies. - -@@snip [FaultHandlingTest.java]($code$/java/jdocs/actor/FaultHandlingTest.java) { #testkit } - -Let us create actors: - -@@snip [FaultHandlingTest.java]($code$/java/jdocs/actor/FaultHandlingTest.java) { #create } - -The first test shall demonstrate the `Resume` directive, so we try it out by -setting some non-initial state in the actor and have it fail: - -@@snip [FaultHandlingTest.java]($code$/java/jdocs/actor/FaultHandlingTest.java) { #resume } - -As you can see the value 42 survives the fault handling directive. Now, if we -change the failure to a more serious `NullPointerException`, that will no -longer be the case: - -@@snip [FaultHandlingTest.java]($code$/java/jdocs/actor/FaultHandlingTest.java) { #restart } - -And finally in case of the fatal `IllegalArgumentException` the child will be -terminated by the supervisor: - -@@snip [FaultHandlingTest.java]($code$/java/jdocs/actor/FaultHandlingTest.java) { #stop } - -Up to now the supervisor was completely unaffected by the child’s failure, -because the directives set did handle it. In case of an `Exception`, this is not -true anymore and the supervisor escalates the failure. - -@@snip [FaultHandlingTest.java]($code$/java/jdocs/actor/FaultHandlingTest.java) { #escalate-kill } - -The supervisor itself is supervised by the top-level actor provided by the -`ActorSystem`, which has the default policy to restart in case of all -`Exception` cases (with the notable exceptions of -`ActorInitializationException` and `ActorKilledException`). Since the -default directive in case of a restart is to kill all children, we expected our poor -child not to survive this failure. - -In case this is not desired (which depends on the use case), we need to use a -different supervisor which overrides this behavior. - -@@snip [FaultHandlingTest.java]($code$/java/jdocs/actor/FaultHandlingTest.java) { #supervisor2 } - -With this parent, the child survives the escalated restart, as demonstrated in -the last test: - -@@snip [FaultHandlingTest.java]($code$/java/jdocs/actor/FaultHandlingTest.java) { #escalate-restart } diff --git a/akka-docs/src/main/paradox/java/fault-tolerance.md b/akka-docs/src/main/paradox/java/fault-tolerance.md new file mode 120000 index 0000000000..813f37ec73 --- /dev/null +++ b/akka-docs/src/main/paradox/java/fault-tolerance.md @@ -0,0 +1 @@ +../scala/fault-tolerance.md \ No newline at end of file diff --git a/akka-docs/src/main/paradox/scala/fault-tolerance.md b/akka-docs/src/main/paradox/scala/fault-tolerance.md index ae0d49e9e6..ed9d70b8c3 100644 --- a/akka-docs/src/main/paradox/scala/fault-tolerance.md +++ b/akka-docs/src/main/paradox/scala/fault-tolerance.md @@ -31,29 +31,42 @@ in more depth. For the sake of demonstration let us consider the following strategy: -@@snip [FaultHandlingDocSpec.scala]($code$/scala/docs/actor/FaultHandlingDocSpec.scala) { #strategy } +Scala +: @@snip [FaultHandlingDocSpec.scala]($code$/scala/docs/actor/FaultHandlingDocSpec.scala) { #strategy } -I have chosen a few well-known exception types in order to demonstrate the +Java +: @@snip [FaultHandlingTest.java]($code$/java/jdocs/actor/FaultHandlingTest.java) { #strategy } + +We have chosen a few well-known exception types in order to demonstrate the application of the fault handling directives described in @ref:[supervision](general/supervision.md). First off, it is a one-for-one strategy, meaning that each child is treated separately (an all-for-one strategy works very similarly, the only difference is that any decision is applied to all children of the supervisor, not only the -failing one). There are limits set on the restart frequency, namely maximum 10 -restarts per minute; each of these settings could be left out, which means -that the respective limit does not apply, leaving the possibility to specify an -absolute upper limit on the restarts or to make the restarts work infinitely. -The child actor is stopped if the limit is exceeded. +failing one). +In the above example, `10` and @scala[`1 minute`]@java[`Duration.create(1, TimeUnit.MINUTES)`] are passed to the `maxNrOfRetries` +and `withinTimeRange` parameters respectively, which means that the strategy restarts a child up to 10 restarts per minute. +The child actor is stopped if the restart count exceeds `maxNrOfRetries` during the `withinTimeRange` duration. -The match statement which forms the bulk of the body is of type `Decider`, -which is a `PartialFunction[Throwable, Directive]`. This -is the piece which maps child failure types to their corresponding directives. +Also, there are special values for these parameters. If you specify: + +* `-1` to `maxNrOfRetries`, and @scala[`Duration.inf`]@java[`Duration.Inf()`] to `withinTimeRange` + * then the child is always restarted without any limit +* `-1` to `maxNrOfRetries`, and a non-infinite `Duration` to `withinTimeRange` + * `maxNrOfRetries` is treated as `1` +* a non-negative number to `maxNrOfRetries` and @scala[`Duration.inf`]@java[`Duration.Inf()`] to `withinTimeRange` + * `withinTimeRange` is treated as infinite duration (i.e.) no matter how long it takes, once the restart count exceeds `maxNrOfRetries`, the child actor is stopped + +The match statement which forms the bulk of the body +@scala[is of type `Decider` which is a `PartialFunction[Throwable, Directive]`.] +@java[consists of `PFBuilder` returned by `DeciderBuilder`'s `match` method, where the builder is finished by the `build` method.] +This is the piece which maps child failure types to their corresponding directives. @@@ note If the strategy is declared inside the supervising actor (as opposed to -within a companion object) its decider has access to all internal state of +@scala[within a companion object]@java[a separate class]) its decider has access to all internal state of the actor in a thread-safe fashion, including obtaining a reference to the -currently failed child (available as the `sender` of the failure message). +currently failed child (available as the @scala[`sender`]@java[`getSender()`] of the failure message). @@@ @@ -73,10 +86,14 @@ exceptions are handled by default: If the exception escalate all the way up to the root guardian it will handle it in the same way as the default strategy defined above. +@@@ div { .group-scala } + You can combine your own strategy with the default strategy: @@snip [FaultHandlingDocSpec.scala]($code$/scala/docs/actor/FaultHandlingDocSpec.scala) { #default-strategy-fallback } +@@@ + ### Stopping Supervisor Strategy Closer to the Erlang way is the strategy to just stop children when they fail @@ -113,41 +130,74 @@ strategy. The following section shows the effects of the different directives in practice, where a test setup is needed. First off, we need a suitable supervisor: -@@snip [FaultHandlingDocSpec.scala]($code$/scala/docs/actor/FaultHandlingDocSpec.scala) { #supervisor } +Scala +: @@snip [FaultHandlingDocSpec.scala]($code$/scala/docs/actor/FaultHandlingDocSpec.scala) { #supervisor } + +Java +: @@snip [FaultHandlingTest.java]($code$/java/jdocs/actor/FaultHandlingTest.java) { #supervisor } This supervisor will be used to create a child, with which we can experiment: -@@snip [FaultHandlingDocSpec.scala]($code$/scala/docs/actor/FaultHandlingDocSpec.scala) { #child } +Scala +: @@snip [FaultHandlingDocSpec.scala]($code$/scala/docs/actor/FaultHandlingDocSpec.scala) { #child } -The test is easier by using the utilities described in @ref:[Testing Actor Systems](testing.md). +Java +: @@snip [FaultHandlingTest.java]($code$/java/jdocs/actor/FaultHandlingTest.java) { #child } -@@snip [FaultHandlingDocSpec.scala]($code$/scala/docs/actor/FaultHandlingDocSpec.scala) { #testkit } +The test is easier by using the utilities described in @scala[@ref:[Testing Actor Systems](testing.md)]@java[@ref:[TestKit](testing.md)], +where `TestProbe` provides an actor ref useful for receiving and inspecting replies. + +Scala +: @@snip [FaultHandlingDocSpec.scala]($code$/scala/docs/actor/FaultHandlingDocSpec.scala) { #testkit } + +Java +: @@snip [FaultHandlingTest.java]($code$/java/jdocs/actor/FaultHandlingTest.java) { #testkit } Let us create actors: -@@snip [FaultHandlingDocSpec.scala]($code$/scala/docs/actor/FaultHandlingDocSpec.scala) { #create } +Scala +: @@snip [FaultHandlingDocSpec.scala]($code$/scala/docs/actor/FaultHandlingDocSpec.scala) { #create } + +Java +: @@snip [FaultHandlingTest.java]($code$/java/jdocs/actor/FaultHandlingTest.java) { #create } The first test shall demonstrate the `Resume` directive, so we try it out by setting some non-initial state in the actor and have it fail: -@@snip [FaultHandlingDocSpec.scala]($code$/scala/docs/actor/FaultHandlingDocSpec.scala) { #resume } +Scala +: @@snip [FaultHandlingDocSpec.scala]($code$/scala/docs/actor/FaultHandlingDocSpec.scala) { #resume } + +Java +: @@snip [FaultHandlingTest.java]($code$/java/jdocs/actor/FaultHandlingTest.java) { #resume } As you can see the value 42 survives the fault handling directive. Now, if we change the failure to a more serious `NullPointerException`, that will no longer be the case: -@@snip [FaultHandlingDocSpec.scala]($code$/scala/docs/actor/FaultHandlingDocSpec.scala) { #restart } +Scala +: @@snip [FaultHandlingDocSpec.scala]($code$/scala/docs/actor/FaultHandlingDocSpec.scala) { #restart } + +Java +: @@snip [FaultHandlingTest.java]($code$/java/jdocs/actor/FaultHandlingTest.java) { #restart } And finally in case of the fatal `IllegalArgumentException` the child will be terminated by the supervisor: -@@snip [FaultHandlingDocSpec.scala]($code$/scala/docs/actor/FaultHandlingDocSpec.scala) { #stop } +Scala +: @@snip [FaultHandlingDocSpec.scala]($code$/scala/docs/actor/FaultHandlingDocSpec.scala) { #stop } + +Java +: @@snip [FaultHandlingTest.java]($code$/java/jdocs/actor/FaultHandlingTest.java) { #stop } Up to now the supervisor was completely unaffected by the child’s failure, because the directives set did handle it. In case of an `Exception`, this is not true anymore and the supervisor escalates the failure. -@@snip [FaultHandlingDocSpec.scala]($code$/scala/docs/actor/FaultHandlingDocSpec.scala) { #escalate-kill } +Scala +: @@snip [FaultHandlingDocSpec.scala]($code$/scala/docs/actor/FaultHandlingDocSpec.scala) { #escalate-kill } + +Java +: @@snip [FaultHandlingTest.java]($code$/java/jdocs/actor/FaultHandlingTest.java) { #escalate-kill } The supervisor itself is supervised by the top-level actor provided by the `ActorSystem`, which has the default policy to restart in case of all @@ -159,9 +209,17 @@ child not to survive this failure. In case this is not desired (which depends on the use case), we need to use a different supervisor which overrides this behavior. -@@snip [FaultHandlingDocSpec.scala]($code$/scala/docs/actor/FaultHandlingDocSpec.scala) { #supervisor2 } +Scala +: @@snip [FaultHandlingDocSpec.scala]($code$/scala/docs/actor/FaultHandlingDocSpec.scala) { #supervisor2 } + +Java +: @@snip [FaultHandlingTest.java]($code$/java/jdocs/actor/FaultHandlingTest.java) { #supervisor2 } With this parent, the child survives the escalated restart, as demonstrated in the last test: -@@snip [FaultHandlingDocSpec.scala]($code$/scala/docs/actor/FaultHandlingDocSpec.scala) { #escalate-restart } +Scala +: @@snip [FaultHandlingDocSpec.scala]($code$/scala/docs/actor/FaultHandlingDocSpec.scala) { #escalate-restart } + +Java +: @@snip [FaultHandlingTest.java]($code$/java/jdocs/actor/FaultHandlingTest.java) { #escalate-restart } diff --git a/akka-docs/src/test/java/jdocs/actor/FaultHandlingTest.java b/akka-docs/src/test/java/jdocs/actor/FaultHandlingTest.java index 9bbf4cd217..8cf4aa93e6 100644 --- a/akka-docs/src/test/java/jdocs/actor/FaultHandlingTest.java +++ b/akka-docs/src/test/java/jdocs/actor/FaultHandlingTest.java @@ -11,6 +11,7 @@ import com.typesafe.config.Config; import com.typesafe.config.ConfigFactory; import jdocs.AbstractJavaTest; import java.util.Optional; +import java.util.concurrent.TimeUnit; import static akka.pattern.Patterns.ask; @@ -56,7 +57,7 @@ public class FaultHandlingTest extends AbstractJavaTest { //#strategy private static SupervisorStrategy strategy = - new OneForOneStrategy(10, Duration.create("1 minute"), DeciderBuilder. + new OneForOneStrategy(10, Duration.create(1, TimeUnit.MINUTES), DeciderBuilder. match(ArithmeticException.class, e -> resume()). match(NullPointerException.class, e -> restart()). match(IllegalArgumentException.class, e -> stop()). @@ -87,7 +88,7 @@ public class FaultHandlingTest extends AbstractJavaTest { //#strategy2 private static SupervisorStrategy strategy = - new OneForOneStrategy(10, Duration.create("1 minute"), DeciderBuilder. + new OneForOneStrategy(10, Duration.create(1, TimeUnit.MINUTES), DeciderBuilder. match(ArithmeticException.class, e -> resume()). match(NullPointerException.class, e -> restart()). match(IllegalArgumentException.class, e -> stop()).