diff --git a/akka-actor-tests/src/test/scala/akka/actor/ActorRefSpec.scala b/akka-actor-tests/src/test/scala/akka/actor/ActorRefSpec.scala index 3056dc9e95..a003d25757 100644 --- a/akka-actor-tests/src/test/scala/akka/actor/ActorRefSpec.scala +++ b/akka-actor-tests/src/test/scala/akka/actor/ActorRefSpec.scala @@ -227,7 +227,7 @@ class ActorRefSpec extends AkkaSpec with DefaultTimeout { contextStackMustBeEmpty } - filterException[java.lang.IllegalStateException] { + EventFilter[ActorInitializationException](occurrences = 1) intercept { (intercept[java.lang.IllegalStateException] { wrap(result ⇒ actorOf(Props(new OuterActor(actorOf(Props(promiseIntercept({ throw new IllegalStateException("Ur state be b0rked"); new InnerActor })(result))))))) @@ -257,14 +257,14 @@ class ActorRefSpec extends AkkaSpec with DefaultTimeout { val in = new ObjectInputStream(new ByteArrayInputStream(bytes)) val readA = in.readObject - a.isInstanceOf[LocalActorRef] must be === true - readA.isInstanceOf[LocalActorRef] must be === true + a.isInstanceOf[ActorRefWithCell] must be === true + readA.isInstanceOf[ActorRefWithCell] must be === true (readA eq a) must be === true } val ser = new JavaSerializer(esys) val readA = ser.fromBinary(bytes, None) - readA.isInstanceOf[LocalActorRef] must be === true + readA.isInstanceOf[ActorRefWithCell] must be === true (readA eq a) must be === true } @@ -369,13 +369,13 @@ class ActorRefSpec extends AkkaSpec with DefaultTimeout { val timeout = Timeout(20000) val ref = system.actorOf(Props(new Actor { def receive = { - case 5 ⇒ sender.tell("five") - case null ⇒ sender.tell("null") + case 5 ⇒ sender.tell("five") + case 0 ⇒ sender.tell("null") } })) val ffive = (ref.ask(5)(timeout)).mapTo[String] - val fnull = (ref.ask(null)(timeout)).mapTo[String] + val fnull = (ref.ask(0)(timeout)).mapTo[String] ref ! PoisonPill Await.result(ffive, timeout.duration) must be("five") diff --git a/akka-actor-tests/src/test/scala/akka/actor/ActorSystemSpec.scala b/akka-actor-tests/src/test/scala/akka/actor/ActorSystemSpec.scala index 7ae79fea34..1a2d64bb41 100644 --- a/akka-actor-tests/src/test/scala/akka/actor/ActorSystemSpec.scala +++ b/akka-actor-tests/src/test/scala/akka/actor/ActorSystemSpec.scala @@ -10,6 +10,9 @@ import akka.dispatch.Await import akka.util.duration._ import scala.collection.JavaConverters import java.util.concurrent.{ TimeUnit, RejectedExecutionException, CountDownLatch, ConcurrentLinkedQueue } +import akka.pattern.ask +import akka.util.Timeout +import akka.dispatch.Future class JavaExtensionSpec extends JavaExtension with JUnitSuite @@ -21,8 +24,46 @@ object TestExtension extends ExtensionId[TestExtension] with ExtensionIdProvider // Dont't place inside ActorSystemSpec object, since it will not be garbage collected and reference to system remains class TestExtension(val system: ExtendedActorSystem) extends Extension +object ActorSystemSpec { + + class Waves extends Actor { + var master: ActorRef = _ + var terminaters = Set[ActorRef]() + + def receive = { + case n: Int ⇒ + master = sender + terminaters = Set() ++ (for (i ← 1 to n) yield { + val man = context.watch(context.system.actorOf(Props[Terminater])) + man ! "run" + man + }) + case Terminated(child) if terminaters contains child ⇒ + terminaters -= child + if (terminaters.isEmpty) { + master ! "done" + context stop self + } + } + + override def preRestart(cause: Throwable, msg: Option[Any]) { + if (master ne null) { + master ! "failed with " + cause + " while processing " + msg + } + context stop self + } + } + + class Terminater extends Actor { + def receive = { + case "run" ⇒ context.stop(self) + } + } + +} + @org.junit.runner.RunWith(classOf[org.scalatest.junit.JUnitRunner]) -class ActorSystemSpec extends AkkaSpec("""akka.extensions = ["akka.actor.TestExtension$"]""") { +class ActorSystemSpec extends AkkaSpec("""akka.extensions = ["akka.actor.TestExtension$"]""") with ImplicitSender { "An ActorSystem" must { @@ -112,6 +153,35 @@ class ActorSystemSpec extends AkkaSpec("""akka.extensions = ["akka.actor.TestExt }.getMessage must be("Must be called prior to system shutdown.") } + "reliably create waves of actors" in { + import system.dispatcher + implicit val timeout = Timeout(30 seconds) + val waves = for (i ← 1 to 3) yield system.actorOf(Props[ActorSystemSpec.Waves]) ? 50000 + Await.result(Future.sequence(waves), timeout.duration + 5.seconds) must be === Seq("done", "done", "done") + } + + "reliable deny creation of actors while shutting down" in { + val system = ActorSystem() + system.scheduler.scheduleOnce(200 millis) { system.shutdown() } + var failing = false + var created = Vector.empty[ActorRef] + while (!system.isTerminated && system.uptime < 5) { + try { + val t = system.actorOf(Props[ActorSystemSpec.Terminater]) + failing must not be true // because once failing => always failing (it’s due to shutdown) + created :+= t + } catch { + case _: IllegalStateException ⇒ failing = true + } + } + if (system.uptime >= 5) { + println(created.last) + println(system.asInstanceOf[ExtendedActorSystem].printTree) + system.uptime must be < 5L + } + created filter (ref ⇒ !ref.isTerminated && !ref.asInstanceOf[ActorRefWithCell].underlying.isInstanceOf[UnstartedCell]) must be(Seq()) + } + } -} \ No newline at end of file +} diff --git a/akka-actor-tests/src/test/scala/akka/actor/DeathWatchSpec.scala b/akka-actor-tests/src/test/scala/akka/actor/DeathWatchSpec.scala index 97eec5be01..8a21f5f070 100644 --- a/akka-actor-tests/src/test/scala/akka/actor/DeathWatchSpec.scala +++ b/akka-actor-tests/src/test/scala/akka/actor/DeathWatchSpec.scala @@ -143,6 +143,26 @@ trait DeathWatchSpec { this: AkkaSpec with ImplicitSender with DefaultTimeout result must be(Seq(1, 2, 3)) } } + + "be able to watch a child with the same name after the old died" in { + val parent = system.actorOf(Props(new Actor { + def receive = { + case "NKOTB" ⇒ + val currentKid = context.watch(context.actorOf(Props(ctx ⇒ { case "NKOTB" ⇒ ctx stop ctx.self }), "kid")) + currentKid forward "NKOTB" + context become { + case Terminated(`currentKid`) ⇒ + testActor ! "GREEN" + context unbecome + } + } + })) + + parent ! "NKOTB" + expectMsg("GREEN") + parent ! "NKOTB" + expectMsg("GREEN") + } } } diff --git a/akka-actor-tests/src/test/scala/akka/actor/FSMTimingSpec.scala b/akka-actor-tests/src/test/scala/akka/actor/FSMTimingSpec.scala index df47c801bb..76d8df1e92 100644 --- a/akka-actor-tests/src/test/scala/akka/actor/FSMTimingSpec.scala +++ b/akka-actor-tests/src/test/scala/akka/actor/FSMTimingSpec.scala @@ -140,13 +140,13 @@ class FSMTimingSpec extends AkkaSpec with ImplicitSender { object FSMTimingSpec { def suspend(actorRef: ActorRef): Unit = actorRef match { - case l: LocalActorRef ⇒ l.suspend() - case _ ⇒ + case l: ActorRefWithCell ⇒ l.suspend() + case _ ⇒ } def resume(actorRef: ActorRef): Unit = actorRef match { - case l: LocalActorRef ⇒ l.resume() - case _ ⇒ + case l: ActorRefWithCell ⇒ l.resume() + case _ ⇒ } trait State diff --git a/akka-actor-tests/src/test/scala/akka/actor/dispatch/ActorModelSpec.scala b/akka-actor-tests/src/test/scala/akka/actor/dispatch/ActorModelSpec.scala index 4d83c85b82..da789d9dce 100644 --- a/akka-actor-tests/src/test/scala/akka/actor/dispatch/ActorModelSpec.scala +++ b/akka-actor-tests/src/test/scala/akka/actor/dispatch/ActorModelSpec.scala @@ -3,24 +3,23 @@ */ package akka.actor.dispatch -import org.scalatest.Assertions._ -import akka.testkit._ -import akka.dispatch._ -import akka.util.Timeout -import java.util.concurrent.atomic.AtomicLong -import java.util.concurrent.atomic.AtomicInteger -import java.util.concurrent.{ ConcurrentHashMap, CountDownLatch, TimeUnit } -import akka.util.Switch import java.rmi.RemoteException -import org.junit.{ After, Test } -import akka.actor._ -import util.control.NoStackTrace -import akka.actor.ActorSystem -import akka.util.duration._ -import akka.event.Logging.Error +import java.util.concurrent.{ TimeUnit, CountDownLatch, ConcurrentHashMap } +import java.util.concurrent.atomic.{ AtomicLong, AtomicInteger } + +import org.junit.runner.RunWith +import org.scalatest.Assertions.{ fail, assert } +import org.scalatest.junit.JUnitRunner + import com.typesafe.config.Config -import akka.util.Duration + +import akka.actor._ +import akka.dispatch._ +import akka.event.Logging.Error import akka.pattern.ask +import akka.testkit._ +import akka.util.{ Timeout, Switch, Duration } +import akka.util.duration._ object ActorModelSpec { @@ -201,7 +200,7 @@ object ActorModelSpec { msgsReceived: Long = statsFor(actorRef, dispatcher).msgsReceived.get(), msgsProcessed: Long = statsFor(actorRef, dispatcher).msgsProcessed.get(), restarts: Long = statsFor(actorRef, dispatcher).restarts.get())(implicit system: ActorSystem) { - val stats = statsFor(actorRef, Option(dispatcher).getOrElse(actorRef.asInstanceOf[LocalActorRef].underlying.dispatcher)) + val stats = statsFor(actorRef, Option(dispatcher).getOrElse(actorRef.asInstanceOf[ActorRefWithCell].underlying.asInstanceOf[ActorCell].dispatcher)) val deadline = System.currentTimeMillis + 1000 try { await(deadline)(stats.suspensions.get() == suspensions) @@ -241,6 +240,13 @@ abstract class ActorModelSpec(config: String) extends AkkaSpec(config) with Defa def newTestActor(dispatcher: String) = system.actorOf(Props[DispatcherActor].withDispatcher(dispatcher)) + def awaitStarted(ref: ActorRef): Unit = { + awaitCond(ref match { + case r: RepointableRef ⇒ r.isStarted + case _ ⇒ true + }, 1 second, 10 millis) + } + protected def interceptedDispatcher(): MessageDispatcherInterceptor protected def dispatcherType: String @@ -280,6 +286,7 @@ abstract class ActorModelSpec(config: String) extends AkkaSpec(config) with Defa implicit val dispatcher = interceptedDispatcher() val start, oneAtATime = new CountDownLatch(1) val a = newTestActor(dispatcher.id) + awaitStarted(a) a ! CountDown(start) assertCountDown(start, 3.seconds.dilated.toMillis, "Should process first message within 3 seconds") @@ -328,7 +335,8 @@ abstract class ActorModelSpec(config: String) extends AkkaSpec(config) with Defa "not process messages for a suspended actor" in { implicit val dispatcher = interceptedDispatcher() - val a = newTestActor(dispatcher.id).asInstanceOf[LocalActorRef] + val a = newTestActor(dispatcher.id).asInstanceOf[InternalActorRef] + awaitStarted(a) val done = new CountDownLatch(1) a.suspend a ! CountDown(done) @@ -436,6 +444,7 @@ abstract class ActorModelSpec(config: String) extends AkkaSpec(config) with Defa "not double-deregister" in { implicit val dispatcher = interceptedDispatcher() + for (i ← 1 to 1000) system.actorOf(Props.empty) val a = newTestActor(dispatcher.id) a ! DoubleStop awaitCond(statsFor(a, dispatcher).registers.get == 1) diff --git a/akka-actor-tests/src/test/scala/akka/actor/dispatch/BalancingDispatcherSpec.scala b/akka-actor-tests/src/test/scala/akka/actor/dispatch/BalancingDispatcherSpec.scala index 4060587b73..1a5c7e8661 100644 --- a/akka-actor-tests/src/test/scala/akka/actor/dispatch/BalancingDispatcherSpec.scala +++ b/akka-actor-tests/src/test/scala/akka/actor/dispatch/BalancingDispatcherSpec.scala @@ -1,8 +1,12 @@ package akka.actor.dispatch import java.util.concurrent.{ TimeUnit, CountDownLatch } -import akka.dispatch.{ Mailbox, Dispatchers } -import akka.actor.{ LocalActorRef, IllegalActorStateException, Actor, Props } + +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +import akka.actor.{ Props, ActorRefWithCell, ActorCell, Actor } +import akka.dispatch.Mailbox import akka.testkit.AkkaSpec object BalancingDispatcherSpec { @@ -51,8 +55,8 @@ class BalancingDispatcherSpec extends AkkaSpec(BalancingDispatcherSpec.config) { "have fast actor stealing work from slow actor" in { val finishedCounter = new CountDownLatch(110) - val slow = system.actorOf(Props(new DelayableActor(50, finishedCounter)).withDispatcher(delayableActorDispatcher)).asInstanceOf[LocalActorRef] - val fast = system.actorOf(Props(new DelayableActor(10, finishedCounter)).withDispatcher(delayableActorDispatcher)).asInstanceOf[LocalActorRef] + val slow = system.actorOf(Props(new DelayableActor(50, finishedCounter)).withDispatcher(delayableActorDispatcher)).asInstanceOf[ActorRefWithCell] + val fast = system.actorOf(Props(new DelayableActor(10, finishedCounter)).withDispatcher(delayableActorDispatcher)).asInstanceOf[ActorRefWithCell] var sentToFast = 0 @@ -76,11 +80,11 @@ class BalancingDispatcherSpec extends AkkaSpec(BalancingDispatcherSpec.config) { } finishedCounter.await(5, TimeUnit.SECONDS) - fast.underlying.mailbox.asInstanceOf[Mailbox].hasMessages must be(false) - slow.underlying.mailbox.asInstanceOf[Mailbox].hasMessages must be(false) - fast.underlying.actor.asInstanceOf[DelayableActor].invocationCount must be > sentToFast - fast.underlying.actor.asInstanceOf[DelayableActor].invocationCount must be > - (slow.underlying.actor.asInstanceOf[DelayableActor].invocationCount) + fast.underlying.asInstanceOf[ActorCell].mailbox.asInstanceOf[Mailbox].hasMessages must be(false) + slow.underlying.asInstanceOf[ActorCell].mailbox.asInstanceOf[Mailbox].hasMessages must be(false) + fast.underlying.asInstanceOf[ActorCell].actor.asInstanceOf[DelayableActor].invocationCount must be > sentToFast + fast.underlying.asInstanceOf[ActorCell].actor.asInstanceOf[DelayableActor].invocationCount must be > + (slow.underlying.asInstanceOf[ActorCell].actor.asInstanceOf[DelayableActor].invocationCount) system.stop(slow) system.stop(fast) } diff --git a/akka-actor-tests/src/test/scala/akka/dispatch/MailboxConfigSpec.scala b/akka-actor-tests/src/test/scala/akka/dispatch/MailboxConfigSpec.scala index 4f2d61de65..ba025ffe3c 100644 --- a/akka-actor-tests/src/test/scala/akka/dispatch/MailboxConfigSpec.scala +++ b/akka-actor-tests/src/test/scala/akka/dispatch/MailboxConfigSpec.scala @@ -1,13 +1,17 @@ +/** + * Copyright (C) 2009-2012 Typesafe Inc. + */ package akka.dispatch -import org.scalatest.{ BeforeAndAfterAll, BeforeAndAfterEach } -import java.util.concurrent.{ TimeUnit, BlockingQueue } -import java.util.concurrent.ConcurrentLinkedQueue -import akka.util._ -import akka.util.duration._ -import akka.testkit.AkkaSpec +import java.util.concurrent.{ ConcurrentLinkedQueue, BlockingQueue } + +import org.scalatest.{ BeforeAndAfterEach, BeforeAndAfterAll } + import com.typesafe.config.Config -import akka.actor._ + +import akka.actor.{ RepointableRef, Props, DeadLetter, ActorSystem, ActorRefWithCell, ActorRef, ActorCell } +import akka.testkit.AkkaSpec +import akka.util.duration.intToDurationInt @org.junit.runner.RunWith(classOf[org.scalatest.junit.JUnitRunner]) abstract class MailboxSpec extends AkkaSpec with BeforeAndAfterAll with BeforeAndAfterEach { @@ -75,7 +79,7 @@ abstract class MailboxSpec extends AkkaSpec with BeforeAndAfterAll with BeforeAn result } - def createMessageInvocation(msg: Any): Envelope = Envelope(msg, system.deadLetters)(system) + def createMessageInvocation(msg: Any): Envelope = Envelope(msg, system.deadLetters, system) def ensureInitialMailboxState(config: MailboxType, q: MessageQueue) { q must not be null @@ -136,8 +140,8 @@ abstract class MailboxSpec extends AkkaSpec with BeforeAndAfterAll with BeforeAn class DefaultMailboxSpec extends MailboxSpec { lazy val name = "The default mailbox implementation" def factory = { - case u: UnboundedMailbox ⇒ u.create(None) - case b: BoundedMailbox ⇒ b.create(None) + case u: UnboundedMailbox ⇒ u.create(None, None) + case b: BoundedMailbox ⇒ b.create(None, None) } } @@ -145,8 +149,8 @@ class PriorityMailboxSpec extends MailboxSpec { val comparator = PriorityGenerator(_.##) lazy val name = "The priority mailbox implementation" def factory = { - case UnboundedMailbox() ⇒ new UnboundedPriorityMailbox(comparator).create(None) - case BoundedMailbox(capacity, pushTimeOut) ⇒ new BoundedPriorityMailbox(comparator, capacity, pushTimeOut).create(None) + case UnboundedMailbox() ⇒ new UnboundedPriorityMailbox(comparator).create(None, None) + case BoundedMailbox(capacity, pushTimeOut) ⇒ new BoundedPriorityMailbox(comparator, capacity, pushTimeOut).create(None, None) } } @@ -158,13 +162,13 @@ object CustomMailboxSpec { """ class MyMailboxType(settings: ActorSystem.Settings, config: Config) extends MailboxType { - override def create(owner: Option[ActorContext]) = owner match { + override def create(owner: Option[ActorRef], system: Option[ActorSystem]) = owner match { case Some(o) ⇒ new MyMailbox(o) case None ⇒ throw new Exception("no mailbox owner given") } } - class MyMailbox(owner: ActorContext) extends QueueBasedMessageQueue with UnboundedMessageQueueSemantics { + class MyMailbox(owner: ActorRef) extends QueueBasedMessageQueue with UnboundedMessageQueueSemantics { final val queue = new ConcurrentLinkedQueue[Envelope]() } } @@ -174,7 +178,11 @@ class CustomMailboxSpec extends AkkaSpec(CustomMailboxSpec.config) { "Dispatcher configuration" must { "support custom mailboxType" in { val actor = system.actorOf(Props.empty.withDispatcher("my-dispatcher")) - val queue = actor.asInstanceOf[LocalActorRef].underlying.mailbox.messageQueue + awaitCond(actor match { + case r: RepointableRef ⇒ r.isStarted + case _ ⇒ true + }, 1 second, 10 millis) + val queue = actor.asInstanceOf[ActorRefWithCell].underlying.asInstanceOf[ActorCell].mailbox.messageQueue queue.getClass must be(classOf[CustomMailboxSpec.MyMailbox]) } } diff --git a/akka-actor-tests/src/test/scala/akka/dispatch/PriorityDispatcherSpec.scala b/akka-actor-tests/src/test/scala/akka/dispatch/PriorityDispatcherSpec.scala index a9855fef7d..11f8760320 100644 --- a/akka-actor-tests/src/test/scala/akka/dispatch/PriorityDispatcherSpec.scala +++ b/akka-actor-tests/src/test/scala/akka/dispatch/PriorityDispatcherSpec.scala @@ -1,12 +1,14 @@ package akka.dispatch -import akka.actor.{ Props, LocalActorRef, Actor } -import akka.testkit.AkkaSpec -import akka.pattern.ask -import akka.util.duration._ -import akka.testkit.DefaultTimeout +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + import com.typesafe.config.Config -import akka.actor.ActorSystem + +import akka.actor.{ Props, InternalActorRef, ActorSystem, Actor } +import akka.pattern.ask +import akka.testkit.{ DefaultTimeout, AkkaSpec } +import akka.util.duration.intToDurationInt object PriorityDispatcherSpec { val config = """ @@ -54,7 +56,7 @@ class PriorityDispatcherSpec extends AkkaSpec(PriorityDispatcherSpec.config) wit case i: Int ⇒ acc = i :: acc case 'Result ⇒ sender.tell(acc) } - }).withDispatcher(dispatcherKey)).asInstanceOf[LocalActorRef] + }).withDispatcher(dispatcherKey)).asInstanceOf[InternalActorRef] actor.suspend //Make sure the actor isn't treating any messages, let it buffer the incoming messages diff --git a/akka-actor-tests/src/test/scala/akka/event/EventStreamSpec.scala b/akka-actor-tests/src/test/scala/akka/event/EventStreamSpec.scala index a8cd32f5d3..d1846860f3 100644 --- a/akka-actor-tests/src/test/scala/akka/event/EventStreamSpec.scala +++ b/akka-actor-tests/src/test/scala/akka/event/EventStreamSpec.scala @@ -74,6 +74,17 @@ class EventStreamSpec extends AkkaSpec(EventStreamSpec.config) { } } + "not allow null as subscriber" in { + val bus = new EventStream(true) + intercept[IllegalArgumentException] { bus.subscribe(null, classOf[M]) }.getMessage must be("subscriber is null") + } + + "not allow null as unsubscriber" in { + val bus = new EventStream(true) + intercept[IllegalArgumentException] { bus.unsubscribe(null, classOf[M]) }.getMessage must be("subscriber is null") + intercept[IllegalArgumentException] { bus.unsubscribe(null) }.getMessage must be("subscriber is null") + } + "be able to log unhandled messages" in { val sys = ActorSystem("EventStreamSpecUnhandled", configUnhandled) try { diff --git a/akka-actor-tests/src/test/scala/akka/routing/ConfiguredLocalRoutingSpec.scala b/akka-actor-tests/src/test/scala/akka/routing/ConfiguredLocalRoutingSpec.scala index 5bedc8fc33..77ac5daf49 100644 --- a/akka-actor-tests/src/test/scala/akka/routing/ConfiguredLocalRoutingSpec.scala +++ b/akka-actor-tests/src/test/scala/akka/routing/ConfiguredLocalRoutingSpec.scala @@ -4,15 +4,14 @@ package akka.routing import java.util.concurrent.atomic.AtomicInteger - import org.junit.runner.RunWith - -import akka.actor.{ Props, LocalActorRef, Deploy, Actor, ActorRef } +import akka.actor.{ Props, Deploy, Actor, ActorRef } import akka.ConfigurationException import akka.dispatch.Await import akka.pattern.{ ask, gracefulStop } import akka.testkit.{ TestLatch, ImplicitSender, DefaultTimeout, AkkaSpec } import akka.util.duration.intToDurationInt +import akka.actor.UnstartedCell object ConfiguredLocalRoutingSpec { val config = """ @@ -47,6 +46,14 @@ object ConfiguredLocalRoutingSpec { @org.junit.runner.RunWith(classOf[org.scalatest.junit.JUnitRunner]) class ConfiguredLocalRoutingSpec extends AkkaSpec(ConfiguredLocalRoutingSpec.config) with DefaultTimeout with ImplicitSender { + def routerConfig(ref: ActorRef): RouterConfig = ref match { + case r: RoutedActorRef ⇒ + r.underlying match { + case c: RoutedActorCell ⇒ c.routerConfig + case _: UnstartedCell ⇒ awaitCond(r.isStarted, 1 second, 10 millis); routerConfig(ref) + } + } + "RouterConfig" must { "be picked up from Props" in { @@ -55,7 +62,7 @@ class ConfiguredLocalRoutingSpec extends AkkaSpec(ConfiguredLocalRoutingSpec.con case "get" ⇒ sender ! context.props } }).withRouter(RoundRobinRouter(12)), "someOther") - actor.asInstanceOf[LocalActorRef].underlying.props.routerConfig must be === RoundRobinRouter(12) + routerConfig(actor) must be === RoundRobinRouter(12) Await.result(gracefulStop(actor, 3 seconds), 3 seconds) } @@ -65,7 +72,7 @@ class ConfiguredLocalRoutingSpec extends AkkaSpec(ConfiguredLocalRoutingSpec.con case "get" ⇒ sender ! context.props } }).withRouter(RoundRobinRouter(12)), "config") - actor.asInstanceOf[LocalActorRef].underlying.props.routerConfig must be === RandomRouter(4) + routerConfig(actor) must be === RandomRouter(4) Await.result(gracefulStop(actor, 3 seconds), 3 seconds) } @@ -75,7 +82,7 @@ class ConfiguredLocalRoutingSpec extends AkkaSpec(ConfiguredLocalRoutingSpec.con case "get" ⇒ sender ! context.props } }).withRouter(FromConfig).withDeploy(Deploy(routerConfig = RoundRobinRouter(12))), "someOther") - actor.asInstanceOf[LocalActorRef].underlying.props.routerConfig must be === RoundRobinRouter(12) + routerConfig(actor) must be === RoundRobinRouter(12) Await.result(gracefulStop(actor, 3 seconds), 3 seconds) } @@ -85,7 +92,7 @@ class ConfiguredLocalRoutingSpec extends AkkaSpec(ConfiguredLocalRoutingSpec.con case "get" ⇒ sender ! context.props } }).withRouter(FromConfig).withDeploy(Deploy(routerConfig = RoundRobinRouter(12))), "config") - actor.asInstanceOf[LocalActorRef].underlying.props.routerConfig must be === RandomRouter(4) + routerConfig(actor) must be === RandomRouter(4) Await.result(gracefulStop(actor, 3 seconds), 3 seconds) } diff --git a/akka-actor-tests/src/test/scala/akka/routing/RoutingSpec.scala b/akka-actor-tests/src/test/scala/akka/routing/RoutingSpec.scala index 35631924cf..2b946ec1da 100644 --- a/akka-actor-tests/src/test/scala/akka/routing/RoutingSpec.scala +++ b/akka-actor-tests/src/test/scala/akka/routing/RoutingSpec.scala @@ -12,10 +12,11 @@ import akka.dispatch.Await import akka.util.Duration import akka.ConfigurationException import com.typesafe.config.ConfigFactory -import akka.pattern.ask +import akka.pattern.{ ask, pipe } import java.util.concurrent.ConcurrentHashMap import com.typesafe.config.Config import akka.dispatch.Dispatchers +import akka.util.Timeout object RoutingSpec { @@ -25,6 +26,10 @@ object RoutingSpec { router = round-robin nr-of-instances = 3 } + /router2 { + router = round-robin + nr-of-instances = 3 + } /myrouter { router = "akka.routing.RoutingSpec$MyRouter" foo = bar @@ -128,7 +133,7 @@ class RoutingSpec extends AkkaSpec(RoutingSpec.config) with DefaultTimeout with } "use configured nr-of-instances when router is specified" in { - val router = system.actorOf(Props[TestActor].withRouter(RoundRobinRouter(nrOfInstances = 2)), "router1") + val router = system.actorOf(Props[TestActor].withRouter(RoundRobinRouter(nrOfInstances = 2)), "router2") Await.result(router ? CurrentRoutees, 5 seconds).asInstanceOf[RouterRoutees].routees.size must be(3) system.stop(router) } @@ -171,6 +176,18 @@ class RoutingSpec extends AkkaSpec(RoutingSpec.config) with DefaultTimeout with expectMsg("restarted") } + "must start in-line for context.actorOf()" in { + system.actorOf(Props(new Actor { + def receive = { + case "start" ⇒ + context.actorOf(Props(new Actor { + def receive = { case x ⇒ sender ! x } + }).withRouter(RoundRobinRouter(2))) ? "hello" pipeTo sender + } + })) ! "start" + expectMsg("hello") + } + } "no router" must { @@ -528,7 +545,7 @@ class RoutingSpec extends AkkaSpec(RoutingSpec.config) with DefaultTimeout with } } "support custom router" in { - val myrouter = system.actorOf(Props().withRouter(FromConfig), "myrouter") + val myrouter = system.actorOf(Props.empty.withRouter(FromConfig), "myrouter") myrouter.isTerminated must be(false) } } @@ -540,7 +557,7 @@ class RoutingSpec extends AkkaSpec(RoutingSpec.config) with DefaultTimeout with } "count votes as intended - not as in Florida" in { - val routedActor = system.actorOf(Props().withRouter(VoteCountRouter())) + val routedActor = system.actorOf(Props.empty.withRouter(VoteCountRouter())) routedActor ! DemocratVote routedActor ! DemocratVote routedActor ! RepublicanVote diff --git a/akka-actor/src/main/java/akka/actor/AbstractActorCell.java b/akka-actor/src/main/java/akka/actor/AbstractActorCell.java index d6005f463c..95fb7368bc 100644 --- a/akka-actor/src/main/java/akka/actor/AbstractActorCell.java +++ b/akka-actor/src/main/java/akka/actor/AbstractActorCell.java @@ -8,10 +8,14 @@ import akka.util.Unsafe; final class AbstractActorCell { final static long mailboxOffset; + final static long childrenOffset; + final static long nextNameOffset; static { try { mailboxOffset = Unsafe.instance.objectFieldOffset(ActorCell.class.getDeclaredField("_mailboxDoNotCallMeDirectly")); + childrenOffset = Unsafe.instance.objectFieldOffset(ActorCell.class.getDeclaredField("_childrenRefsDoNotCallMeDirectly")); + nextNameOffset = Unsafe.instance.objectFieldOffset(ActorCell.class.getDeclaredField("_nextNameDoNotCallMeDirectly")); } catch(Throwable t){ throw new ExceptionInInitializerError(t); } diff --git a/akka-actor/src/main/java/akka/actor/AbstractActorRef.java b/akka-actor/src/main/java/akka/actor/AbstractActorRef.java new file mode 100644 index 0000000000..97ef09c501 --- /dev/null +++ b/akka-actor/src/main/java/akka/actor/AbstractActorRef.java @@ -0,0 +1,19 @@ +/** + * Copyright (C) 2009-2012 Typesafe Inc. + */ + +package akka.actor; + +import akka.util.Unsafe; + +final class AbstractActorRef { + final static long cellOffset; + + static { + try { + cellOffset = Unsafe.instance.objectFieldOffset(RepointableActorRef.class.getDeclaredField("_cellDoNotCallMeDirectly")); + } catch(Throwable t){ + throw new ExceptionInInitializerError(t); + } + } +} diff --git a/akka-actor/src/main/scala/akka/AkkaException.scala b/akka-actor/src/main/scala/akka/AkkaException.scala index 04e820419f..8e49c7cb11 100644 --- a/akka-actor/src/main/scala/akka/AkkaException.scala +++ b/akka-actor/src/main/scala/akka/AkkaException.scala @@ -9,7 +9,6 @@ package akka * */ //TODO add @SerialVersionUID(1L) when SI-4804 is fixed diff --git a/akka-actor/src/main/scala/akka/actor/Actor.scala b/akka-actor/src/main/scala/akka/actor/Actor.scala index 8fc7df93e5..8b9476efe9 100644 --- a/akka-actor/src/main/scala/akka/actor/Actor.scala +++ b/akka-actor/src/main/scala/akka/actor/Actor.scala @@ -58,7 +58,7 @@ case object Kill extends Kill { /** * When Death Watch is used, the watcher will receive a Terminated(watched) message when watched is terminated. */ -case class Terminated(@BeanProperty actor: ActorRef)(@BeanProperty val existenceConfirmed: Boolean) +case class Terminated(@BeanProperty actor: ActorRef)(@BeanProperty val existenceConfirmed: Boolean) extends AutoReceivedMessage abstract class ReceiveTimeout extends PossiblyHarmful @@ -134,8 +134,7 @@ class ActorInitializationException private[akka] (actor: ActorRef, message: Stri * there might be more of them in the future, or not. */ class InvalidMessageException private[akka] (message: String, cause: Throwable = null) - extends AkkaException(message, cause) - with NoStackTrace { + extends AkkaException(message, cause) { def this(msg: String) = this(msg, null) } diff --git a/akka-actor/src/main/scala/akka/actor/ActorCell.scala b/akka-actor/src/main/scala/akka/actor/ActorCell.scala index 72793513e2..e739ffc859 100644 --- a/akka-actor/src/main/scala/akka/actor/ActorCell.scala +++ b/akka-actor/src/main/scala/akka/actor/ActorCell.scala @@ -15,6 +15,7 @@ import akka.serialization.SerializationExtension import akka.event.Logging.LogEventException import collection.immutable.{ TreeSet, TreeMap } import akka.util.{ Unsafe, Duration, Helpers, NonFatal } +import java.util.concurrent.atomic.AtomicLong //TODO: everything here for current compatibility - could be limited more @@ -167,6 +168,78 @@ trait UntypedActorContext extends ActorContext { } +/** + * INTERNAL API + */ +private[akka] trait Cell { + /** + * The “self” reference which this Cell is attached to. + */ + def self: ActorRef + /** + * The system within which this Cell lives. + */ + def system: ActorSystem + /** + * The system internals where this Cell lives. + */ + def systemImpl: ActorSystemImpl + /** + * Recursively suspend this actor and all its children. + */ + def suspend(): Unit + /** + * Recursively resume this actor and all its children. + */ + def resume(): Unit + /** + * Restart this actor (will recursively restart or stop all children). + */ + def restart(cause: Throwable): Unit + /** + * Recursively terminate this actor and all its children. + */ + def stop(): Unit + /** + * Returns “true” if the actor is locally known to be terminated, “false” if + * alive or uncertain. + */ + def isTerminated: Boolean + /** + * The supervisor of this actor. + */ + def parent: InternalActorRef + /** + * All children of this actor, including only reserved-names. + */ + def childrenRefs: ActorCell.ChildrenContainer + /** + * Enqueue a message to be sent to the actor; may or may not actually + * schedule the actor to run, depending on which type of cell it is. + */ + def tell(message: Any, sender: ActorRef): Unit + /** + * Enqueue a message to be sent to the actor; may or may not actually + * schedule the actor to run, depending on which type of cell it is. + */ + def sendSystemMessage(msg: SystemMessage): Unit + /** + * Returns true if the actor is local, i.e. if it is actually scheduled + * on a Thread in the current JVM when run. + */ + def isLocal: Boolean + /** + * If the actor isLocal, returns whether messages are currently queued, + * “false” otherwise. + */ + def hasMessages: Boolean + /** + * If the actor isLocal, returns the number of messages currently queued, + * which may be a costly operation, 0 otherwise. + */ + def numberOfMessages: Int +} + /** * Everything in here is completely Akka PRIVATE. You will not find any * supported APIs in this place. This is not the API you were looking @@ -201,10 +274,18 @@ private[akka] object ActorCell { def children: Iterable[ActorRef] def stats: Iterable[ChildRestartStats] def shallDie(actor: ActorRef): ChildrenContainer + /** + * reserve that name or throw an exception + */ + def reserve(name: String): ChildrenContainer + /** + * cancel a reservation + */ + def unreserve(name: String): ChildrenContainer } trait EmptyChildrenContainer extends ChildrenContainer { - val emptyStats = TreeMap.empty[String, ChildRestartStats] + val emptyStats = TreeMap.empty[String, ChildStats] def add(child: ActorRef): ChildrenContainer = new NormalChildrenContainer(emptyStats.updated(child.path.name, ChildRestartStats(child))) def remove(child: ActorRef): ChildrenContainer = this @@ -213,6 +294,8 @@ private[akka] object ActorCell { def children: Iterable[ActorRef] = Nil def stats: Iterable[ChildRestartStats] = Nil def shallDie(actor: ActorRef): ChildrenContainer = this + def reserve(name: String): ChildrenContainer = new NormalChildrenContainer(emptyStats.updated(name, ChildNameReserved)) + def unreserve(name: String): ChildrenContainer = this override def toString = "no children" } @@ -228,6 +311,8 @@ private[akka] object ActorCell { */ object TerminatedChildrenContainer extends EmptyChildrenContainer { override def add(child: ActorRef): ChildrenContainer = this + override def reserve(name: String): ChildrenContainer = + throw new IllegalStateException("cannot reserve actor name '" + name + "': already terminated") } /** @@ -236,32 +321,46 @@ private[akka] object ActorCell { * calling context.stop(child) and processing the ChildTerminated() system * message). */ - class NormalChildrenContainer(c: TreeMap[String, ChildRestartStats]) extends ChildrenContainer { + class NormalChildrenContainer(c: TreeMap[String, ChildStats]) extends ChildrenContainer { - def add(child: ActorRef): ChildrenContainer = new NormalChildrenContainer(c.updated(child.path.name, ChildRestartStats(child))) + def add(child: ActorRef): ChildrenContainer = + new NormalChildrenContainer(c.updated(child.path.name, ChildRestartStats(child))) def remove(child: ActorRef): ChildrenContainer = NormalChildrenContainer(c - child.path.name) - def getByName(name: String): Option[ChildRestartStats] = c get name - - def getByRef(actor: ActorRef): Option[ChildRestartStats] = c get actor.path.name match { - case c @ Some(crs) if (crs.child == actor) ⇒ c - case _ ⇒ None + def getByName(name: String): Option[ChildRestartStats] = c.get(name) match { + case s @ Some(_: ChildRestartStats) ⇒ s.asInstanceOf[Option[ChildRestartStats]] + case _ ⇒ None } - def children: Iterable[ActorRef] = c.values.view.map(_.child) + def getByRef(actor: ActorRef): Option[ChildRestartStats] = c.get(actor.path.name) match { + case c @ Some(crs: ChildRestartStats) if (crs.child == actor) ⇒ c.asInstanceOf[Option[ChildRestartStats]] + case _ ⇒ None + } - def stats: Iterable[ChildRestartStats] = c.values + def children: Iterable[ActorRef] = c.values.view.collect { case ChildRestartStats(child, _, _) ⇒ child } + + def stats: Iterable[ChildRestartStats] = c.values.collect { case c: ChildRestartStats ⇒ c } def shallDie(actor: ActorRef): ChildrenContainer = TerminatingChildrenContainer(c, Set(actor), UserRequest) + def reserve(name: String): ChildrenContainer = + if (c contains name) + throw new InvalidActorNameException("actor name " + name + " is not unique!") + else new NormalChildrenContainer(c.updated(name, ChildNameReserved)) + + def unreserve(name: String): ChildrenContainer = c.get(name) match { + case Some(ChildNameReserved) ⇒ NormalChildrenContainer(c - name) + case _ ⇒ this + } + override def toString = if (c.size > 20) c.size + " children" else c.mkString("children:\n ", "\n ", "") } object NormalChildrenContainer { - def apply(c: TreeMap[String, ChildRestartStats]): ChildrenContainer = + def apply(c: TreeMap[String, ChildStats]): ChildrenContainer = if (c.isEmpty) EmptyChildrenContainer else new NormalChildrenContainer(c) } @@ -276,7 +375,7 @@ private[akka] object ActorCell { * type of container, depending on whether or not children are left and whether or not * the reason was “Terminating”. */ - case class TerminatingChildrenContainer(c: TreeMap[String, ChildRestartStats], toDie: Set[ActorRef], reason: SuspendReason) + case class TerminatingChildrenContainer(c: TreeMap[String, ChildStats], toDie: Set[ActorRef], reason: SuspendReason) extends ChildrenContainer { def add(child: ActorRef): ChildrenContainer = copy(c.updated(child.path.name, ChildRestartStats(child))) @@ -290,19 +389,35 @@ private[akka] object ActorCell { else copy(c - child.path.name, t) } - def getByName(name: String): Option[ChildRestartStats] = c get name - - def getByRef(actor: ActorRef): Option[ChildRestartStats] = c get actor.path.name match { - case c @ Some(crs) if (crs.child == actor) ⇒ c - case _ ⇒ None + def getByName(name: String): Option[ChildRestartStats] = c.get(name) match { + case s @ Some(_: ChildRestartStats) ⇒ s.asInstanceOf[Option[ChildRestartStats]] + case _ ⇒ None } - def children: Iterable[ActorRef] = c.values.view.map(_.child) + def getByRef(actor: ActorRef): Option[ChildRestartStats] = c.get(actor.path.name) match { + case c @ Some(crs: ChildRestartStats) if (crs.child == actor) ⇒ c.asInstanceOf[Option[ChildRestartStats]] + case _ ⇒ None + } - def stats: Iterable[ChildRestartStats] = c.values + def children: Iterable[ActorRef] = c.values.view.collect { case ChildRestartStats(child, _, _) ⇒ child } + + def stats: Iterable[ChildRestartStats] = c.values.collect { case c: ChildRestartStats ⇒ c } def shallDie(actor: ActorRef): ChildrenContainer = copy(toDie = toDie + actor) + def reserve(name: String): ChildrenContainer = reason match { + case Termination ⇒ throw new IllegalStateException("cannot reserve actor name '" + name + "': terminating") + case _ ⇒ + if (c contains name) + throw new InvalidActorNameException("actor name " + name + " is not unique!") + else copy(c = c.updated(name, ChildNameReserved)) + } + + def unreserve(name: String): ChildrenContainer = c.get(name) match { + case Some(ChildNameReserved) ⇒ copy(c = c - name) + case _ ⇒ this + } + override def toString = if (c.size > 20) c.size + " children" else c.mkString("children (" + toDie.size + " terminating):\n ", "\n ", "\n") + toDie @@ -316,10 +431,13 @@ private[akka] class ActorCell( val system: ActorSystemImpl, val self: InternalActorRef, val props: Props, - @volatile var parent: InternalActorRef) extends UntypedActorContext { - import AbstractActorCell.mailboxOffset + @volatile var parent: InternalActorRef) extends UntypedActorContext with Cell { + + import AbstractActorCell.{ mailboxOffset, childrenOffset, nextNameOffset } import ActorCell._ + final def isLocal = true + final def systemImpl = system protected final def guardian = self @@ -353,7 +471,46 @@ private[akka] class ActorCell( var receiveTimeoutData: (Long, Cancellable) = emptyReceiveTimeoutData @volatile - var childrenRefs: ChildrenContainer = EmptyChildrenContainer + private var _childrenRefsDoNotCallMeDirectly: ChildrenContainer = EmptyChildrenContainer + + def childrenRefs: ChildrenContainer = Unsafe.instance.getObjectVolatile(this, childrenOffset).asInstanceOf[ChildrenContainer] + + private def swapChildrenRefs(oldChildren: ChildrenContainer, newChildren: ChildrenContainer): Boolean = + Unsafe.instance.compareAndSwapObject(this, childrenOffset, oldChildren, newChildren) + + @tailrec private def reserveChild(name: String): Boolean = { + val c = childrenRefs + swapChildrenRefs(c, c.reserve(name)) || reserveChild(name) + } + + @tailrec private def unreserveChild(name: String): Boolean = { + val c = childrenRefs + swapChildrenRefs(c, c.unreserve(name)) || unreserveChild(name) + } + + @tailrec private def addChild(ref: ActorRef): Boolean = { + val c = childrenRefs + swapChildrenRefs(c, c.add(ref)) || addChild(ref) + } + + @tailrec private def shallDie(ref: ActorRef): Boolean = { + val c = childrenRefs + swapChildrenRefs(c, c.shallDie(ref)) || shallDie(ref) + } + + @tailrec private def removeChild(ref: ActorRef): ChildrenContainer = { + val c = childrenRefs + val n = c.remove(ref) + if (swapChildrenRefs(c, n)) n + else removeChild(ref) + } + + @tailrec private def setChildrenTerminationReason(reason: SuspendReason): Boolean = { + childrenRefs match { + case c: TerminatingChildrenContainer ⇒ swapChildrenRefs(c, c.copy(reason = reason)) || setChildrenTerminationReason(reason) + case _ ⇒ false + } + } private def isTerminating = childrenRefs match { case TerminatingChildrenContainer(_, _, Termination) ⇒ true @@ -365,7 +522,7 @@ private[akka] class ActorCell( case _ ⇒ true } - private def _actorOf(props: Props, name: String): ActorRef = { + private def _actorOf(props: Props, name: String, async: Boolean): ActorRef = { if (system.settings.SerializeAllCreators && !props.creator.isInstanceOf[NoSerializationVerificationNeeded]) { val ser = SerializationExtension(system) ser.serialize(props.creator) match { @@ -376,53 +533,74 @@ private[akka] class ActorCell( } } } - // in case we are currently terminating, swallow creation requests and return EmptyLocalActorRef - if (isTerminating) provider.actorFor(self, Seq(name)) + /* + * in case we are currently terminating, fail external attachChild requests + * (internal calls cannot happen anyway because we are suspended) + */ + if (isTerminating) throw new IllegalStateException("cannot create children while terminating or terminated") else { - val actor = provider.actorOf(systemImpl, props, self, self.path / name, false, None, true) - childrenRefs = childrenRefs.add(actor) + reserveChild(name) + // this name will either be unreserved or overwritten with a real child below + val actor = + try { + provider.actorOf(systemImpl, props, self, self.path / name, + systemService = false, deploy = None, lookupDeploy = true, async = async) + } catch { + case NonFatal(e) ⇒ + unreserveChild(name) + throw e + } + addChild(actor) actor } } - def actorOf(props: Props): ActorRef = _actorOf(props, randomName()) + def actorOf(props: Props): ActorRef = _actorOf(props, randomName(), async = false) - def actorOf(props: Props, name: String): ActorRef = { + def actorOf(props: Props, name: String): ActorRef = _actorOf(props, checkName(name), async = false) + + private def checkName(name: String): String = { import ActorPath.ElementRegex name match { case null ⇒ throw new InvalidActorNameException("actor name must not be null") case "" ⇒ throw new InvalidActorNameException("actor name must not be empty") - case ElementRegex() ⇒ // this is fine + case ElementRegex() ⇒ name case _ ⇒ throw new InvalidActorNameException("illegal actor name '" + name + "', must conform to " + ElementRegex) } - childrenRefs.getByName(name) match { - case None ⇒ _actorOf(props, name) - case _ ⇒ throw new InvalidActorNameException("actor name " + name + " is not unique!") - } } + private[akka] def attachChild(props: Props, name: String): ActorRef = + _actorOf(props, checkName(name), async = true) + + private[akka] def attachChild(props: Props): ActorRef = + _actorOf(props, randomName(), async = true) + final def stop(actor: ActorRef): Unit = { - if (childrenRefs.getByRef(actor).isDefined) childrenRefs = childrenRefs.shallDie(actor) + val started = actor match { + case r: RepointableRef ⇒ r.isStarted + case _ ⇒ true + } + if (childrenRefs.getByRef(actor).isDefined && started) shallDie(actor) actor.asInstanceOf[InternalActorRef].stop() } var currentMessage: Envelope = _ var actor: Actor = _ private var behaviorStack: List[Actor.Receive] = emptyBehaviorStack - @volatile var _mailboxDoNotCallMeDirectly: Mailbox = _ //This must be volatile since it isn't protected by the mailbox status - var nextNameSequence: Long = 0 var watching: Set[ActorRef] = emptyActorRefSet var watchedBy: Set[ActorRef] = emptyActorRefSet - //Not thread safe, so should only be used inside the actor that inhabits this ActorCell + @volatile private var _nextNameDoNotCallMeDirectly = 0L final protected def randomName(): String = { - val n = nextNameSequence - nextNameSequence = n + 1 - Helpers.base64(n) + @tailrec def inc(): Long = { + val current = Unsafe.instance.getLongVolatile(this, nextNameOffset) + if (Unsafe.instance.compareAndSwapLong(this, nextNameOffset, current, current + 1)) current + else inc() + } + Helpers.base64(inc()) } - @inline - final val dispatcher: MessageDispatcher = system.dispatchers.lookup(props.dispatcher) + @volatile private var _mailboxDoNotCallMeDirectly: Mailbox = _ //This must be volatile since it isn't protected by the mailbox status /** * INTERNAL API @@ -442,6 +620,12 @@ private[akka] class ActorCell( else oldMailbox } + final def hasMessages: Boolean = mailbox.hasMessages + + final def numberOfMessages: Int = mailbox.numberOfMessages + + val dispatcher: MessageDispatcher = system.dispatchers.lookup(props.dispatcher) + /** * UntypedActorContext impl */ @@ -449,20 +633,22 @@ private[akka] class ActorCell( final def isTerminated: Boolean = mailbox.isClosed - final def start(): Unit = { + final def start(): this.type = { + /* * Create the mailbox and enqueue the Create() message to ensure that * this is processed before anything else. */ swapMailbox(dispatcher.createMailbox(this)) + mailbox.setActor(this) + // ➡➡➡ NEVER SEND THE SAME SYSTEM MESSAGE OBJECT TO TWO ACTORS ⬅⬅⬅ mailbox.systemEnqueue(self, Create()) - // ➡➡➡ NEVER SEND THE SAME SYSTEM MESSAGE OBJECT TO TWO ACTORS ⬅⬅⬅ - parent.sendSystemMessage(akka.dispatch.Supervise(self)) - // This call is expected to start off the actor by scheduling its mailbox. dispatcher.attach(this) + + this } // ➡➡➡ NEVER SEND THE SAME SYSTEM MESSAGE OBJECT TO TWO ACTORS ⬅⬅⬅ @@ -500,8 +686,10 @@ private[akka] class ActorCell( final def getChildren(): java.lang.Iterable[ActorRef] = scala.collection.JavaConverters.asJavaIterableConverter(children).asJava - final def tell(message: Any, sender: ActorRef): Unit = - dispatcher.dispatch(this, Envelope(message, if (sender eq null) system.deadLetters else sender)(system)) + def tell(message: Any, sender: ActorRef): Unit = + dispatcher.dispatch(this, Envelope(message, if (sender eq null) system.deadLetters else sender, system)) + + override def sendSystemMessage(message: SystemMessage): Unit = dispatcher.systemDispatch(this, message) final def sender: ActorRef = currentMessage match { case null ⇒ system.deadLetters @@ -564,7 +752,7 @@ private[akka] class ActorCell( } childrenRefs match { case ct: TerminatingChildrenContainer ⇒ - childrenRefs = ct.copy(reason = Recreation(cause)) + setChildrenTerminationReason(Recreation(cause)) dispatcher suspend this case _ ⇒ doRecreate(cause, failedActor) @@ -622,7 +810,7 @@ private[akka] class ActorCell( childrenRefs match { case ct: TerminatingChildrenContainer ⇒ - childrenRefs = ct.copy(reason = Termination) + setChildrenTerminationReason(Termination) // do not process normal messages while waiting for all children to terminate dispatcher suspend this if (system.settings.DebugLifecycle) system.eventStream.publish(Debug(self.path.toString, clazz(actor), "stopping")) @@ -631,7 +819,8 @@ private[akka] class ActorCell( } def supervise(child: ActorRef): Unit = if (!isTerminating) { - if (childrenRefs.getByRef(child).isEmpty) childrenRefs = childrenRefs.add(child) + if (childrenRefs.getByRef(child).isEmpty) addChild(child) + handleSupervise(child) if (system.settings.DebugLifecycle) system.eventStream.publish(Debug(self.path.toString, clazz(actor), "now supervising " + child)) } @@ -646,6 +835,7 @@ private[akka] class ActorCell( case Terminate() ⇒ terminate() case Supervise(child) ⇒ supervise(child) case ChildTerminated(child) ⇒ handleChildTerminated(child) + case NoMessage ⇒ // only here to suppress warning } } catch { case e @ (_: InterruptedException | NonFatal(_)) ⇒ handleInvokeFailure(e, "error while processing " + message) @@ -706,6 +896,7 @@ private[akka] class ActorCell( msg.message match { case Failed(cause) ⇒ handleFailure(sender, cause) + case t: Terminated ⇒ watching -= t.actor; receiveMessage(t) case Kill ⇒ throw new ActorKilledException("Kill") case PoisonPill ⇒ self.stop() case SelectParent(m) ⇒ parent.tell(m, msg.sender) @@ -794,8 +985,7 @@ private[akka] class ActorCell( final def handleChildTerminated(child: ActorRef): Unit = try { childrenRefs match { case tc @ TerminatingChildrenContainer(_, _, reason) ⇒ - val n = tc.remove(child) - childrenRefs = n + val n = removeChild(child) actor.supervisorStrategy.handleChildTerminated(this, child, children) if (!n.isInstanceOf[TerminatingChildrenContainer]) reason match { case Recreation(cause) ⇒ doRecreate(cause, actor) // doRecreate since this is the continuation of "recreate" @@ -803,7 +993,7 @@ private[akka] class ActorCell( case _ ⇒ } case _ ⇒ - childrenRefs = childrenRefs.remove(child) + removeChild(child) actor.supervisorStrategy.handleChildTerminated(this, child, children) } } catch { @@ -816,6 +1006,11 @@ private[akka] class ActorCell( } } + protected def handleSupervise(child: ActorRef): Unit = child match { + case r: RepointableActorRef ⇒ r.activate() + case _ ⇒ + } + // ➡➡➡ NEVER SEND THE SAME SYSTEM MESSAGE OBJECT TO TWO ACTORS ⬅⬅⬅ final def restart(cause: Throwable): Unit = dispatcher.systemDispatch(this, Recreate(cause)) diff --git a/akka-actor/src/main/scala/akka/actor/ActorPath.scala b/akka-actor/src/main/scala/akka/actor/ActorPath.scala index aa93dbcc47..1112b90f31 100644 --- a/akka-actor/src/main/scala/akka/actor/ActorPath.scala +++ b/akka-actor/src/main/scala/akka/actor/ActorPath.scala @@ -192,7 +192,7 @@ final class ChildActorPath(val parent: ActorPath, val name: String) extends Acto // TODO RK investigate Phil’s hash from scala.collection.mutable.HashTable.improve override def hashCode: Int = { - import scala.util.MurmurHash._ + import akka.routing.MurmurHash._ @tailrec def rec(p: ActorPath, h: Int, c: Int, k: Int): Int = p match { diff --git a/akka-actor/src/main/scala/akka/actor/ActorRef.scala b/akka-actor/src/main/scala/akka/actor/ActorRef.scala index 0620a73a28..8d42714b00 100644 --- a/akka-actor/src/main/scala/akka/actor/ActorRef.scala +++ b/akka-actor/src/main/scala/akka/actor/ActorRef.scala @@ -163,10 +163,24 @@ private[akka] trait ActorRefScope { def isLocal: Boolean } +/** + * Refs which are statically known to be local inherit from this Scope + */ private[akka] trait LocalRef extends ActorRefScope { final def isLocal = true } +/** + * RepointableActorRef (and potentially others) may change their locality at + * runtime, meaning that isLocal might not be stable. RepointableActorRef has + * the feature that it starts out “not fully started” (but you can send to it), + * which is why `isStarted` features here; it is not improbable that cluster + * actor refs will have the same behavior. + */ +private[akka] trait RepointableRef extends ActorRefScope { + def isStarted: Boolean +} + /** * Internal trait for assembling all the functionality needed internally on * ActorRefs. NOTE THAT THIS IS NOT A STABLE EXTERNAL INTERFACE! @@ -210,6 +224,16 @@ private[akka] abstract class InternalActorRef extends ActorRef with ScalaActorRe def isLocal: Boolean } +/** + * Common trait of all actor refs which actually have a Cell, most notably + * LocalActorRef and RepointableActorRef. The former specializes the return + * type of `underlying` so that follow-up calls can use invokevirtual instead + * of invokeinterface. + */ +private[akka] abstract class ActorRefWithCell extends InternalActorRef { this: ActorRefScope ⇒ + def underlying: Cell +} + /** * This is an internal look-up failure token, not useful for anything else. */ @@ -228,21 +252,21 @@ private[akka] class LocalActorRef private[akka] ( _props: Props, _supervisor: InternalActorRef, override val path: ActorPath) - extends InternalActorRef with LocalRef { + extends ActorRefWithCell with LocalRef { /* - * actorCell.start() publishes actorCell & this to the dispatcher, which - * means that messages may be processed theoretically before the constructor - * ends. The JMM guarantees visibility for final fields only after the end - * of the constructor, so publish the actorCell safely by making it a - * @volatile var which is NOT TO BE WRITTEN TO. The alternative would be to - * move start() outside of the constructor, which would basically require - * us to use purely factory methods for creating LocalActorRefs. + * Safe publication of this class’s fields is guaranteed by mailbox.setActor() + * which is called indirectly from actorCell.start() (if you’re wondering why + * this is at all important, remember that under the JMM final fields are only + * frozen at the _end_ of the constructor, but we are publishing “this” before + * that is reached). */ - @volatile - private var actorCell = newActorCell(_system, this, _props, _supervisor) + private val actorCell: ActorCell = newActorCell(_system, this, _props, _supervisor) actorCell.start() + // ➡➡➡ NEVER SEND THE SAME SYSTEM MESSAGE OBJECT TO TWO ACTORS ⬅⬅⬅ + _supervisor.sendSystemMessage(akka.dispatch.Supervise(this)) + protected def newActorCell(system: ActorSystemImpl, ref: InternalActorRef, props: Props, supervisor: InternalActorRef): ActorCell = new ActorCell(system, ref, props, supervisor) @@ -313,9 +337,9 @@ private[akka] class LocalActorRef private[akka] ( // ========= AKKA PROTECTED FUNCTIONS ========= - protected[akka] def underlying: ActorCell = actorCell + def underlying: ActorCell = actorCell - override def sendSystemMessage(message: SystemMessage): Unit = underlying.dispatcher.systemDispatch(underlying, message) + override def sendSystemMessage(message: SystemMessage): Unit = actorCell.sendSystemMessage(message) override def !(message: Any)(implicit sender: ActorRef = null): Unit = actorCell.tell(message, sender) diff --git a/akka-actor/src/main/scala/akka/actor/ActorRefProvider.scala b/akka-actor/src/main/scala/akka/actor/ActorRefProvider.scala index 4c200b204c..bbb84144c5 100644 --- a/akka-actor/src/main/scala/akka/actor/ActorRefProvider.scala +++ b/akka-actor/src/main/scala/akka/actor/ActorRefProvider.scala @@ -26,12 +26,12 @@ trait ActorRefProvider { /** * Reference to the supervisor used for all top-level user actors. */ - def guardian: InternalActorRef + def guardian: LocalActorRef /** * Reference to the supervisor used for all top-level system actors. */ - def systemGuardian: InternalActorRef + def systemGuardian: LocalActorRef /** * Dead letter destination for this provider. @@ -104,7 +104,8 @@ trait ActorRefProvider { path: ActorPath, systemService: Boolean, deploy: Option[Deploy], - lookupDeploy: Boolean): InternalActorRef + lookupDeploy: Boolean, + async: Boolean): InternalActorRef /** * Create actor reference for a specified local or remote path. If no such @@ -481,11 +482,10 @@ class LocalActorRefProvider( } } - lazy val guardian: InternalActorRef = - actorOf(system, guardianProps, rootGuardian, rootPath / "user", true, None, false) + lazy val guardian: LocalActorRef = new LocalActorRef(system, guardianProps, rootGuardian, rootPath / "user") - lazy val systemGuardian: InternalActorRef = - actorOf(system, guardianProps.withCreator(new SystemGuardian), rootGuardian, rootPath / "system", true, None, false) + lazy val systemGuardian: LocalActorRef = + new LocalActorRef(system, guardianProps.withCreator(new SystemGuardian), rootGuardian, rootPath / "system") lazy val tempContainer = new VirtualPathContainer(system.provider, tempNode, rootGuardian, log) @@ -539,22 +539,20 @@ class LocalActorRefProvider( } def actorOf(system: ActorSystemImpl, props: Props, supervisor: InternalActorRef, path: ActorPath, - systemService: Boolean, deploy: Option[Deploy], lookupDeploy: Boolean): InternalActorRef = { + systemService: Boolean, deploy: Option[Deploy], lookupDeploy: Boolean, async: Boolean): InternalActorRef = { props.routerConfig match { - case NoRouter ⇒ new LocalActorRef(system, props, supervisor, path) // create a local actor + case NoRouter ⇒ + if (async) new RepointableActorRef(system, props, supervisor, path).initialize() + else new LocalActorRef(system, props, supervisor, path) case router ⇒ val lookup = if (lookupDeploy) deployer.lookup(path) else None val fromProps = Iterator(props.deploy.copy(routerConfig = props.deploy.routerConfig withFallback router)) val d = fromProps ++ deploy.iterator ++ lookup.iterator reduce ((a, b) ⇒ b withFallback a) - new RoutedActorRef(system, props.withRouter(d.routerConfig), supervisor, path) + val ref = new RoutedActorRef(system, props.withRouter(d.routerConfig), supervisor, path).initialize() + if (async) ref else ref.activate() } } def getExternalAddressFor(addr: Address): Option[Address] = if (addr == rootPath.address) Some(addr) else None } -private[akka] class GuardianCell(_system: ActorSystemImpl, _self: InternalActorRef, _props: Props, _parent: InternalActorRef) - extends ActorCell(_system, _self, _props, _parent) { - -} - diff --git a/akka-actor/src/main/scala/akka/actor/ActorSystem.scala b/akka-actor/src/main/scala/akka/actor/ActorSystem.scala index 0d13f2451a..eb0f241177 100644 --- a/akka-actor/src/main/scala/akka/actor/ActorSystem.scala +++ b/akka-actor/src/main/scala/akka/actor/ActorSystem.scala @@ -422,6 +422,13 @@ abstract class ExtendedActorSystem extends ActorSystem { * creation. */ def dynamicAccess: DynamicAccess + + /** + * For debugging: traverse actor hierarchy and make string representation. + * Careful, this may OOM on large actor systems, and it is only meant for + * helping debugging in case something already went terminally wrong. + */ + private[akka] def printTree: String } private[akka] class ActorSystemImpl(val name: String, applicationConfig: Config, classLoader: ClassLoader) extends ExtendedActorSystem { @@ -479,20 +486,11 @@ private[akka] class ActorSystemImpl(val name: String, applicationConfig: Config, protected def systemImpl: ActorSystemImpl = this - private[akka] def systemActorOf(props: Props, name: String): ActorRef = { - implicit val timeout = settings.CreationTimeout - Await.result((systemGuardian ? CreateChild(props, name)).mapTo[ActorRef], timeout.duration) - } + private[akka] def systemActorOf(props: Props, name: String): ActorRef = systemGuardian.underlying.attachChild(props, name) - def actorOf(props: Props, name: String): ActorRef = { - implicit val timeout = settings.CreationTimeout - Await.result((guardian ? CreateChild(props, name)).mapTo[ActorRef], timeout.duration) - } + def actorOf(props: Props, name: String): ActorRef = guardian.underlying.attachChild(props, name) - def actorOf(props: Props): ActorRef = { - implicit val timeout = settings.CreationTimeout - Await.result((guardian ? CreateRandomNameChild(props)).mapTo[ActorRef], timeout.duration) - } + def actorOf(props: Props): ActorRef = guardian.underlying.attachChild(props) def stop(actor: ActorRef): Unit = { implicit val timeout = settings.CreationTimeout @@ -539,10 +537,10 @@ private[akka] class ActorSystemImpl(val name: String, applicationConfig: Config, def dequeue() = null def hasMessages = false def numberOfMessages = 0 - def cleanUp(owner: ActorContext, deadLetters: MessageQueue): Unit = () + def cleanUp(owner: ActorRef, deadLetters: MessageQueue): Unit = () } //FIXME Why do we need this at all? - val deadLetterMailbox: Mailbox = new Mailbox(null, deadLetterQueue) { + val deadLetterMailbox: Mailbox = new Mailbox(deadLetterQueue) { becomeClosed() def systemEnqueue(receiver: ActorRef, handle: SystemMessage): Unit = deadLetters ! DeadLetter(handle, receiver, receiver) @@ -557,8 +555,8 @@ private[akka] class ActorSystemImpl(val name: String, applicationConfig: Config, def terminationFuture: Future[Unit] = provider.terminationFuture def lookupRoot: InternalActorRef = provider.rootGuardian - def guardian: InternalActorRef = provider.guardian - def systemGuardian: InternalActorRef = provider.systemGuardian + def guardian: LocalActorRef = provider.guardian + def systemGuardian: LocalActorRef = provider.systemGuardian def /(actorName: String): ActorPath = guardian.path / actorName def /(path: Iterable[String]): ActorPath = guardian.path / path @@ -682,6 +680,31 @@ private[akka] class ActorSystemImpl(val name: String, applicationConfig: Config, override def toString: String = lookupRoot.path.root.address.toString + override def printTree: String = { + def printNode(node: ActorRef, indent: String): String = { + node match { + case wc: ActorRefWithCell ⇒ + val cell = wc.underlying + indent + "-> " + node.path.name + " " + Logging.simpleName(node) + " " + + (cell match { + case real: ActorCell ⇒ if (real.actor ne null) real.actor.getClass else "null" + case _ ⇒ Logging.simpleName(cell) + }) + + " " + (cell.childrenRefs match { + case ActorCell.TerminatingChildrenContainer(_, toDie, reason) ⇒ + "Terminating(" + reason + ")" + + (toDie.toSeq.sorted mkString ("\n" + indent + " toDie: ", "\n" + indent + " ", "")) + case x ⇒ Logging.simpleName(x) + }) + + (if (cell.childrenRefs.children.isEmpty) "" else "\n") + + (cell.childrenRefs.children.toSeq.sorted map (printNode(_, indent + " |")) mkString ("\n")) + case _ ⇒ + indent + node.path.name + " " + Logging.simpleName(node) + } + } + printNode(actorFor("/"), "") + } + final class TerminationCallbacks extends Runnable with Awaitable[Unit] { private val lock = new ReentrantGuard private var callbacks: List[Runnable] = _ //non-volatile since guarded by the lock diff --git a/akka-actor/src/main/scala/akka/actor/FaultHandling.scala b/akka-actor/src/main/scala/akka/actor/FaultHandling.scala index 27a9f346db..76eed2eca9 100644 --- a/akka-actor/src/main/scala/akka/actor/FaultHandling.scala +++ b/akka-actor/src/main/scala/akka/actor/FaultHandling.scala @@ -9,11 +9,22 @@ import scala.collection.JavaConversions._ import java.lang.{ Iterable ⇒ JIterable } import akka.util.Duration +/** + * INTERNAL API + */ +private[akka] sealed trait ChildStats + +/** + * INTERNAL API + */ +private[akka] case object ChildNameReserved extends ChildStats + /** * ChildRestartStats is the statistics kept by every parent Actor for every child Actor * and is used for SupervisorStrategies to know how to deal with problems that occur for the children. */ -case class ChildRestartStats(val child: ActorRef, var maxNrOfRetriesCount: Int = 0, var restartTimeWindowStartNanos: Long = 0L) { +case class ChildRestartStats(val child: ActorRef, var maxNrOfRetriesCount: Int = 0, var restartTimeWindowStartNanos: Long = 0L) + extends ChildStats { //FIXME How about making ChildRestartStats immutable and then move these methods into the actual supervisor strategies? def requestRestartPermission(retriesWindow: (Option[Int], Option[Int])): Boolean = diff --git a/akka-actor/src/main/scala/akka/actor/RepointableActorRef.scala b/akka-actor/src/main/scala/akka/actor/RepointableActorRef.scala new file mode 100644 index 0000000000..ad9a7cb0c4 --- /dev/null +++ b/akka-actor/src/main/scala/akka/actor/RepointableActorRef.scala @@ -0,0 +1,214 @@ +/** + * Copyright (C) 2009-2012 Typesafe Inc. + */ + +package akka.actor + +import akka.util.Unsafe +import scala.annotation.tailrec +import akka.dispatch.SystemMessage +import akka.dispatch.Mailbox +import akka.dispatch.Terminate +import akka.dispatch.Envelope +import akka.dispatch.Supervise +import akka.dispatch.Create +import akka.dispatch.MessageDispatcher +import java.util.concurrent.locks.ReentrantLock +import akka.event.Logging.Warning +import scala.collection.mutable.Queue + +/** + * This actor ref starts out with some dummy cell (by default just enqueuing + * messages into vectors protected by ReentrantLock), it must be initialize()’d + * before it can be sent to, and it will be activate()’d by its supervisor in + * response to the Supervise() message, which will replace the contained Cell + * with a fully functional one, transfer all messages from dummy to real queue + * and swap out the cell ref. + */ +private[akka] class RepointableActorRef( + val system: ActorSystemImpl, + val props: Props, + val supervisor: InternalActorRef, + val path: ActorPath) + extends ActorRefWithCell with RepointableRef { + + import AbstractActorRef.cellOffset + + @volatile private var _cellDoNotCallMeDirectly: Cell = _ + + def underlying: Cell = Unsafe.instance.getObjectVolatile(this, cellOffset).asInstanceOf[Cell] + + @tailrec final def swapCell(next: Cell): Cell = { + val old = underlying + if (Unsafe.instance.compareAndSwapObject(this, cellOffset, old, next)) old else swapCell(next) + } + + /** + * Initialize: make a dummy cell which holds just a mailbox, then tell our + * supervisor that we exist so that he can create the real Cell in + * handleSupervise(). + * + * Call twice on your own peril! + * + * This is protected so that others can have different initialization. + */ + def initialize(): this.type = { + swapCell(new UnstartedCell(system, this, props, supervisor)) + supervisor.sendSystemMessage(Supervise(this)) + this + } + + /** + * This method is supposed to be called by the supervisor in handleSupervise() + * to replace the UnstartedCell with the real one. It assumes no concurrent + * modification of the `underlying` field, though it is safe to send messages + * at any time. + */ + def activate(): this.type = { + underlying match { + case u: UnstartedCell ⇒ u.replaceWith(newCell()) + case _ ⇒ // this happens routinely for things which were created async=false + } + this + } + + /** + * This is called by activate() to obtain the cell which is to replace the + * unstarted cell. The cell must be fully functional. + */ + def newCell(): Cell = new ActorCell(system, this, props, supervisor).start() + + def suspend(): Unit = underlying.suspend() + + def resume(): Unit = underlying.resume() + + def stop(): Unit = underlying.stop() + + def restart(cause: Throwable): Unit = underlying.restart(cause) + + def isStarted: Boolean = !underlying.isInstanceOf[UnstartedCell] + + def isTerminated: Boolean = underlying.isTerminated + + def provider: ActorRefProvider = system.provider + + def isLocal: Boolean = underlying.isLocal + + def getParent: InternalActorRef = underlying.parent + + def getChild(name: Iterator[String]): InternalActorRef = + if (name.hasNext) { + name.next match { + case ".." ⇒ getParent.getChild(name) + case "" ⇒ getChild(name) + case other ⇒ + underlying.childrenRefs.getByName(other) match { + case Some(crs) ⇒ crs.child.asInstanceOf[InternalActorRef].getChild(name) + case None ⇒ Nobody + } + } + } else this + + def !(message: Any)(implicit sender: ActorRef = null) = underlying.tell(message, sender) + + def sendSystemMessage(message: SystemMessage) = underlying.sendSystemMessage(message) + + @throws(classOf[java.io.ObjectStreamException]) + protected def writeReplace(): AnyRef = SerializedActorRef(path) +} + +private[akka] class UnstartedCell(val systemImpl: ActorSystemImpl, val self: RepointableActorRef, val props: Props, val supervisor: InternalActorRef) + extends Cell { + + /* + * This lock protects all accesses to this cell’s queues. It also ensures + * safe switching to the started ActorCell. + */ + val lock = new ReentrantLock + + // use Envelope to keep on-send checks in the same place + val queue: Queue[Envelope] = Queue() + val systemQueue: Queue[SystemMessage] = Queue() + + def replaceWith(cell: Cell): Unit = { + lock.lock() + try { + /* + * The CallingThreadDispatcher nicely dives under the ReentrantLock and + * breaks things by enqueueing into stale queues from within the message + * processing which happens in-line for sendSystemMessage() and tell(). + * Since this is the only possible way to f*ck things up within this + * lock, double-tap (well, N-tap, really); concurrent modification is + * still not possible because we’re the only thread accessing the queues. + */ + var interrupted = false + while (systemQueue.nonEmpty || queue.nonEmpty) { + while (systemQueue.nonEmpty) { + val msg = systemQueue.dequeue() + try cell.sendSystemMessage(msg) + catch { + case _: InterruptedException ⇒ interrupted = true + } + } + if (queue.nonEmpty) { + val envelope = queue.dequeue() + try cell.tell(envelope.message, envelope.sender) + catch { + case _: InterruptedException ⇒ interrupted = true + } + } + } + if (interrupted) throw new InterruptedException + } finally try + self.swapCell(cell) + finally + lock.unlock() + } + + def system: ActorSystem = systemImpl + def suspend(): Unit = {} + def resume(): Unit = {} + def restart(cause: Throwable): Unit = {} + def stop(): Unit = sendSystemMessage(Terminate()) + def isTerminated: Boolean = false + def parent: InternalActorRef = supervisor + def childrenRefs: ActorCell.ChildrenContainer = ActorCell.EmptyChildrenContainer + def tell(message: Any, sender: ActorRef): Unit = { + lock.lock() + try { + if (self.underlying eq this) queue enqueue Envelope(message, sender, system) + else self.underlying.tell(message, sender) + } finally { + lock.unlock() + } + } + def sendSystemMessage(msg: SystemMessage): Unit = { + lock.lock() + try { + if (self.underlying eq this) systemQueue enqueue msg + else self.underlying.sendSystemMessage(msg) + } finally { + lock.unlock() + } + } + def isLocal = true + def hasMessages: Boolean = { + lock.lock() + try { + if (self.underlying eq this) !queue.isEmpty + else self.underlying.hasMessages + } finally { + lock.unlock() + } + } + def numberOfMessages: Int = { + lock.lock() + try { + if (self.underlying eq this) queue.size + else self.underlying.numberOfMessages + } finally { + lock.unlock() + } + } + +} \ No newline at end of file diff --git a/akka-actor/src/main/scala/akka/actor/TypedActor.scala b/akka-actor/src/main/scala/akka/actor/TypedActor.scala index 9bb560417b..1933015e88 100644 --- a/akka-actor/src/main/scala/akka/actor/TypedActor.scala +++ b/akka-actor/src/main/scala/akka/actor/TypedActor.scala @@ -592,7 +592,7 @@ case class TypedProps[T <: AnyRef] protected[TypedProps] ( /** * Returns the akka.actor.Props representation of this TypedProps */ - def actorProps(): Props = if (dispatcher == Props().dispatcher) Props() else Props(dispatcher = dispatcher) + def actorProps(): Props = if (dispatcher == Props.default.dispatcher) Props.default else Props(dispatcher = dispatcher) } /** diff --git a/akka-actor/src/main/scala/akka/dispatch/AbstractDispatcher.scala b/akka-actor/src/main/scala/akka/dispatch/AbstractDispatcher.scala index 12eea14ffc..546373c33f 100644 --- a/akka-actor/src/main/scala/akka/dispatch/AbstractDispatcher.scala +++ b/akka-actor/src/main/scala/akka/dispatch/AbstractDispatcher.scala @@ -16,8 +16,10 @@ import akka.event.Logging.LogEventException import akka.jsr166y.{ ForkJoinTask, ForkJoinPool } import akka.util.{ Unsafe, Duration, NonFatal, Index } -final case class Envelope(val message: Any, val sender: ActorRef)(system: ActorSystem) { - if (message.isInstanceOf[AnyRef]) { +final case class Envelope private (val message: Any, val sender: ActorRef) + +object Envelope { + def apply(message: Any, sender: ActorRef, system: ActorSystem): Envelope = { val msg = message.asInstanceOf[AnyRef] if (msg eq null) throw new InvalidMessageException("Message is null") if (system.settings.SerializeAllMessages && !msg.isInstanceOf[NoSerializationVerificationNeeded]) { @@ -30,6 +32,7 @@ final case class Envelope(val message: Any, val sender: ActorRef)(system: ActorS } } } + new Envelope(message, sender) } } @@ -228,8 +231,8 @@ private[akka] object MessageDispatcher { } { val status = if (a.isTerminated) " (terminated)" else " (alive)" val messages = a match { - case l: LocalActorRef ⇒ " " + l.underlying.mailbox.numberOfMessages + " messages" - case _ ⇒ " " + a.getClass + case r: ActorRefWithCell ⇒ " " + r.underlying.numberOfMessages + " messages" + case _ ⇒ " " + a.getClass } val parent = a match { case i: InternalActorRef ⇒ ", parent: " + i.getParent @@ -265,7 +268,7 @@ abstract class MessageDispatcher(val prerequisites: DispatcherPrerequisites) ext /** * Creates and returns a mailbox for the given actor. */ - protected[akka] def createMailbox(actor: ActorCell): Mailbox //FIXME should this really be private[akka]? + protected[akka] def createMailbox(actor: Cell): Mailbox //FIXME should this really be private[akka]? /** * Identifier of this dispatcher, corresponds to the full key diff --git a/akka-actor/src/main/scala/akka/dispatch/BalancingDispatcher.scala b/akka-actor/src/main/scala/akka/dispatch/BalancingDispatcher.scala index 6beee3c9da..5b8c5209b0 100644 --- a/akka-actor/src/main/scala/akka/dispatch/BalancingDispatcher.scala +++ b/akka-actor/src/main/scala/akka/dispatch/BalancingDispatcher.scala @@ -9,6 +9,7 @@ import annotation.tailrec import akka.util.{ Duration, Helpers } import java.util.{ Comparator, Iterator } import java.util.concurrent.{ Executor, LinkedBlockingQueue, ConcurrentLinkedQueue, ConcurrentSkipListSet } +import akka.actor.ActorSystemImpl /** * An executor based event driven dispatcher which will try to redistribute work from busy actors to idle actors. It is assumed @@ -46,24 +47,25 @@ class BalancingDispatcher( /** * INTERNAL USE ONLY */ - private[akka] val messageQueue: MessageQueue = mailboxType.create(None) + private[akka] val messageQueue: MessageQueue = mailboxType.create(None, None) - private class SharingMailbox(_actor: ActorCell, _messageQueue: MessageQueue) extends Mailbox(_actor, _messageQueue) with DefaultSystemMessageQueue { + private class SharingMailbox(val system: ActorSystemImpl, _messageQueue: MessageQueue) + extends Mailbox(_messageQueue) with DefaultSystemMessageQueue { override def cleanUp(): Unit = { - val dlq = actor.systemImpl.deadLetterMailbox + val dlq = system.deadLetterMailbox //Don't call the original implementation of this since it scraps all messages, and we don't want to do that var message = systemDrain(NoMessage) while (message ne null) { // message must be “virgin” before being able to systemEnqueue again val next = message.next message.next = null - dlq.systemEnqueue(actor.self, message) + dlq.systemEnqueue(system.deadLetters, message) message = next } } } - protected[akka] override def createMailbox(actor: ActorCell): Mailbox = new SharingMailbox(actor, messageQueue) + protected[akka] override def createMailbox(actor: akka.actor.Cell): Mailbox = new SharingMailbox(actor.systemImpl, messageQueue) protected[akka] override def register(actor: ActorCell): Unit = { super.register(actor) diff --git a/akka-actor/src/main/scala/akka/dispatch/Dispatcher.scala b/akka-actor/src/main/scala/akka/dispatch/Dispatcher.scala index 3c17ab8db4..d382cc5ecc 100644 --- a/akka-actor/src/main/scala/akka/dispatch/Dispatcher.scala +++ b/akka-actor/src/main/scala/akka/dispatch/Dispatcher.scala @@ -82,7 +82,8 @@ class Dispatcher( /** * INTERNAL USE ONLY */ - protected[akka] def createMailbox(actor: ActorCell): Mailbox = new Mailbox(actor, mailboxType.create(Some(actor))) with DefaultSystemMessageQueue + protected[akka] def createMailbox(actor: akka.actor.Cell): Mailbox = + new Mailbox(mailboxType.create(Some(actor.self), Some(actor.system))) with DefaultSystemMessageQueue /** * INTERNAL USE ONLY diff --git a/akka-actor/src/main/scala/akka/dispatch/Mailbox.scala b/akka-actor/src/main/scala/akka/dispatch/Mailbox.scala index 25fc0250af..36b386cef1 100644 --- a/akka-actor/src/main/scala/akka/dispatch/Mailbox.scala +++ b/akka-actor/src/main/scala/akka/dispatch/Mailbox.scala @@ -6,6 +6,7 @@ package akka.dispatch import akka.AkkaException import java.util.{ Comparator, PriorityQueue, Queue, Deque } import akka.util._ +import akka.actor.{ ActorCell, ActorRef, Cell } import java.util.concurrent._ import annotation.tailrec import akka.event.Logging.Error @@ -41,11 +42,32 @@ private[akka] object Mailbox { * * INTERNAL API */ -private[akka] abstract class Mailbox(val actor: ActorCell, val messageQueue: MessageQueue) +private[akka] abstract class Mailbox(val messageQueue: MessageQueue) extends SystemMessageQueue with Runnable { import Mailbox._ + /* + * This is needed for actually executing the mailbox, i.e. invoking the + * ActorCell. There are situations (e.g. RepointableActorRef) where a Mailbox + * is constructed but we know that we will not execute it, in which case this + * will be null. It must be a var to support switching into an “active” + * mailbox, should the owning ActorRef turn local. + * + * ANOTHER THING, IMPORTANT: + * + * actorCell.start() publishes actorCell & self to the dispatcher, which + * means that messages may be processed theoretically before self’s constructor + * ends. The JMM guarantees visibility for final fields only after the end + * of the constructor, so safe publication requires that THIS WRITE BELOW + * stay as it is. + */ + @volatile + var actor: ActorCell = _ + def setActor(cell: ActorCell): Unit = actor = cell + + def dispatcher: MessageDispatcher = actor.dispatcher + /** * Try to enqueue the message to this queue, or throw an exception. */ @@ -230,11 +252,12 @@ private[akka] abstract class Mailbox(val actor: ActorCell, val messageQueue: Mes * if we closed the mailbox, we must dump the remaining system messages * to deadLetters (this is essential for DeathWatch) */ + val dlm = actor.systemImpl.deadLetterMailbox while (nextMessage ne null) { val msg = nextMessage nextMessage = nextMessage.next msg.next = null - try actor.systemImpl.deadLetterMailbox.systemEnqueue(actor.self, msg) + try dlm.systemEnqueue(actor.self, msg) catch { case NonFatal(e) ⇒ actor.system.eventStream.publish( Error(e, actor.self.path.toString, this.getClass, "error while enqueuing " + msg + " to deadLetters: " + e.getMessage)) @@ -244,9 +267,6 @@ private[akka] abstract class Mailbox(val actor: ActorCell, val messageQueue: Mes if (failure ne null) actor.handleInvokeFailure(failure, failure.getMessage) } - @inline - final def dispatcher: MessageDispatcher = actor.dispatcher - /** * Overridable callback to clean up the mailbox, * called when an actor is unregistered. @@ -265,7 +285,7 @@ private[akka] abstract class Mailbox(val actor: ActorCell, val messageQueue: Mes } if (messageQueue ne null) // needed for CallingThreadDispatcher, which never calls Mailbox.run() - messageQueue.cleanUp(actor, actor.systemImpl.deadLetterQueue) + messageQueue.cleanUp(actor.self, actor.systemImpl.deadLetterQueue) } } @@ -303,7 +323,7 @@ trait MessageQueue { * which is passed in. The owner of this MessageQueue is passed in if * available (e.g. for creating DeadLetters()), “/deadletters” otherwise. */ - def cleanUp(owner: ActorContext, deadLetters: MessageQueue): Unit + def cleanUp(owner: ActorRef, deadLetters: MessageQueue): Unit } /** @@ -331,10 +351,11 @@ private[akka] trait DefaultSystemMessageQueue { self: Mailbox ⇒ @tailrec final def systemEnqueue(receiver: ActorRef, message: SystemMessage): Unit = { assert(message.next eq null) - if (Mailbox.debug) println(actor.self + " having enqueued " + message) + if (Mailbox.debug) println(receiver + " having enqueued " + message) val head = systemQueueGet - if (head == NoMessage) actor.system.deadLetterMailbox.systemEnqueue(receiver, message) - else { + if (head == NoMessage) { + if (actor ne null) actor.systemImpl.deadLetterMailbox.systemEnqueue(receiver, message) + } else { /* * this write is safely published by the compareAndSet contained within * systemQueuePut; “Intra-Thread Semantics” on page 12 of the JSR133 spec @@ -366,11 +387,11 @@ trait QueueBasedMessageQueue extends MessageQueue { def queue: Queue[Envelope] def numberOfMessages = queue.size def hasMessages = !queue.isEmpty - def cleanUp(owner: ActorContext, deadLetters: MessageQueue): Unit = { + def cleanUp(owner: ActorRef, deadLetters: MessageQueue): Unit = { if (hasMessages) { var envelope = dequeue while (envelope ne null) { - deadLetters.enqueue(owner.self, envelope) + deadLetters.enqueue(owner, envelope) envelope = dequeue } } @@ -445,10 +466,20 @@ trait BoundedDequeBasedMessageQueueSemantics extends DequeBasedMessageQueue { } /** - * MailboxType is a factory to create MessageQueues for an optionally provided ActorContext + * MailboxType is a factory to create MessageQueues for an optionally + * provided ActorContext. + * + * Possibly Important Notice + * + * When implementing a custom mailbox type, be aware that there is special + * semantics attached to `system.actorOf()` in that sending to the returned + * ActorRef may—for a short period of time—enqueue the messages first in a + * dummy queue. Top-level actors are created in two steps, and only after the + * guardian actor has performed that second step will all previously sent + * messages be transferred from the dummy queue into the real mailbox. */ trait MailboxType { - def create(owner: Option[ActorContext]): MessageQueue + def create(owner: Option[ActorRef], system: Option[ActorSystem]): MessageQueue } /** @@ -458,7 +489,7 @@ case class UnboundedMailbox() extends MailboxType { def this(settings: ActorSystem.Settings, config: Config) = this() - final override def create(owner: Option[ActorContext]): MessageQueue = + final override def create(owner: Option[ActorRef], system: Option[ActorSystem]): MessageQueue = new ConcurrentLinkedQueue[Envelope]() with QueueBasedMessageQueue with UnboundedMessageQueueSemantics { final def queue: Queue[Envelope] = this } @@ -475,7 +506,7 @@ case class BoundedMailbox( final val capacity: Int, final val pushTimeOut: Durat if (capacity < 0) throw new IllegalArgumentException("The capacity for BoundedMailbox can not be negative") if (pushTimeOut eq null) throw new IllegalArgumentException("The push time-out for BoundedMailbox can not be null") - final override def create(owner: Option[ActorContext]): MessageQueue = + final override def create(owner: Option[ActorRef], system: Option[ActorSystem]): MessageQueue = new LinkedBlockingQueue[Envelope](capacity) with QueueBasedMessageQueue with BoundedMessageQueueSemantics { final def queue: BlockingQueue[Envelope] = this final val pushTimeOut = BoundedMailbox.this.pushTimeOut @@ -488,7 +519,7 @@ case class BoundedMailbox( final val capacity: Int, final val pushTimeOut: Durat */ class UnboundedPriorityMailbox( final val cmp: Comparator[Envelope], final val initialCapacity: Int) extends MailboxType { def this(cmp: Comparator[Envelope]) = this(cmp, 11) - final override def create(owner: Option[ActorContext]): MessageQueue = + final override def create(owner: Option[ActorRef], system: Option[ActorSystem]): MessageQueue = new PriorityBlockingQueue[Envelope](initialCapacity, cmp) with QueueBasedMessageQueue with UnboundedMessageQueueSemantics { final def queue: Queue[Envelope] = this } @@ -503,7 +534,7 @@ class BoundedPriorityMailbox( final val cmp: Comparator[Envelope], final val cap if (capacity < 0) throw new IllegalArgumentException("The capacity for BoundedMailbox can not be negative") if (pushTimeOut eq null) throw new IllegalArgumentException("The push time-out for BoundedMailbox can not be null") - final override def create(owner: Option[ActorContext]): MessageQueue = + final override def create(owner: Option[ActorRef], system: Option[ActorSystem]): MessageQueue = new BoundedBlockingQueue[Envelope](capacity, new PriorityQueue[Envelope](11, cmp)) with QueueBasedMessageQueue with BoundedMessageQueueSemantics { final def queue: BlockingQueue[Envelope] = this final val pushTimeOut = BoundedPriorityMailbox.this.pushTimeOut @@ -517,7 +548,7 @@ case class UnboundedDequeBasedMailbox() extends MailboxType { def this(settings: ActorSystem.Settings, config: Config) = this() - final override def create(owner: Option[ActorContext]): MessageQueue = + final override def create(owner: Option[ActorRef], system: Option[ActorSystem]): MessageQueue = new LinkedBlockingDeque[Envelope]() with DequeBasedMessageQueue with UnboundedDequeBasedMessageQueueSemantics { final val queue = this } @@ -534,7 +565,7 @@ case class BoundedDequeBasedMailbox( final val capacity: Int, final val pushTime if (capacity < 0) throw new IllegalArgumentException("The capacity for BoundedDequeBasedMailbox can not be negative") if (pushTimeOut eq null) throw new IllegalArgumentException("The push time-out for BoundedDequeBasedMailbox can not be null") - final override def create(owner: Option[ActorContext]): MessageQueue = + final override def create(owner: Option[ActorRef], system: Option[ActorSystem]): MessageQueue = new LinkedBlockingDeque[Envelope](capacity) with DequeBasedMessageQueue with BoundedDequeBasedMessageQueueSemantics { final val queue = this final val pushTimeOut = BoundedDequeBasedMailbox.this.pushTimeOut diff --git a/akka-actor/src/main/scala/akka/event/EventBus.scala b/akka-actor/src/main/scala/akka/event/EventBus.scala index 6a5cc67cc4..cad7351bbb 100644 --- a/akka-actor/src/main/scala/akka/event/EventBus.scala +++ b/akka-actor/src/main/scala/akka/event/EventBus.scala @@ -324,7 +324,17 @@ trait ActorClassification { this: ActorEventBus with ActorClassifier ⇒ case some ⇒ some foreach { _ ! event } } - def subscribe(subscriber: Subscriber, to: Classifier): Boolean = associate(to, subscriber) - def unsubscribe(subscriber: Subscriber, from: Classifier): Boolean = dissociate(from, subscriber) - def unsubscribe(subscriber: Subscriber): Unit = dissociate(subscriber) + def subscribe(subscriber: Subscriber, to: Classifier): Boolean = + if (subscriber eq null) throw new IllegalArgumentException("Subscriber is null") + else if (to eq null) throw new IllegalArgumentException("Classifier is null") + else associate(to, subscriber) + + def unsubscribe(subscriber: Subscriber, from: Classifier): Boolean = + if (subscriber eq null) throw new IllegalArgumentException("Subscriber is null") + else if (from eq null) throw new IllegalArgumentException("Classifier is null") + else dissociate(from, subscriber) + + def unsubscribe(subscriber: Subscriber): Unit = + if (subscriber eq null) throw new IllegalArgumentException("Subscriber is null") + else dissociate(subscriber) } diff --git a/akka-actor/src/main/scala/akka/event/EventStream.scala b/akka-actor/src/main/scala/akka/event/EventStream.scala index 172cf052ca..2cc9bf8c2b 100644 --- a/akka-actor/src/main/scala/akka/event/EventStream.scala +++ b/akka-actor/src/main/scala/akka/event/EventStream.scala @@ -39,17 +39,20 @@ class EventStream(private val debug: Boolean = false) extends LoggingBus with Su } override def subscribe(subscriber: ActorRef, channel: Class[_]): Boolean = { + if (subscriber eq null) throw new IllegalArgumentException("subscriber is null") if (debug) publish(Logging.Debug(simpleName(this), this.getClass, "subscribing " + subscriber + " to channel " + channel)) super.subscribe(subscriber, channel) } override def unsubscribe(subscriber: ActorRef, channel: Class[_]): Boolean = { + if (subscriber eq null) throw new IllegalArgumentException("subscriber is null") val ret = super.unsubscribe(subscriber, channel) if (debug) publish(Logging.Debug(simpleName(this), this.getClass, "unsubscribing " + subscriber + " from channel " + channel)) ret } override def unsubscribe(subscriber: ActorRef) { + if (subscriber eq null) throw new IllegalArgumentException("subscriber is null") super.unsubscribe(subscriber) if (debug) publish(Logging.Debug(simpleName(this), this.getClass, "unsubscribing " + subscriber + " from all channels")) } diff --git a/akka-actor/src/main/scala/akka/routing/Routing.scala b/akka-actor/src/main/scala/akka/routing/Routing.scala index bcd92794da..cb0f5ee09b 100644 --- a/akka-actor/src/main/scala/akka/routing/Routing.scala +++ b/akka-actor/src/main/scala/akka/routing/Routing.scala @@ -23,42 +23,28 @@ import scala.runtime.ScalaRunTime * send a message to on (or more) of these actors. */ private[akka] class RoutedActorRef(_system: ActorSystemImpl, _props: Props, _supervisor: InternalActorRef, _path: ActorPath) - extends LocalActorRef( - _system, - _props.copy(creator = () ⇒ _props.routerConfig.createActor(), dispatcher = _props.routerConfig.routerDispatcher), - _supervisor, - _path) { + extends RepointableActorRef(_system, _props, _supervisor, _path) { - /* - * CAUTION: RoutedActorRef is PROBLEMATIC - * ====================================== - * - * We are constructing/assembling the children outside of the scope of the - * Router actor, inserting them in its childrenRef list, which is not at all - * synchronized. This is done exactly once at start-up, all other accesses - * are done from the Router actor. This means that the only thing which is - * really hairy is making sure that the Router does not touch its childrenRefs - * before we are done with them: lock the monitor of the actor cell (hence the - * override of newActorCell) and use that to block the Router constructor for - * as long as it takes to setup the RoutedActorRef itself. - * - * ===> I M P O R T A N T N O T I C E <=== - * - * DO NOT THROW ANY EXCEPTIONS BEFORE THE FOLLOWING TRY-BLOCK WITHOUT - * EXITING THE MONITOR OF THE actorCell! - * - * This is important, just don’t do it! No kidding. - */ - override def newActorCell( - system: ActorSystemImpl, - ref: InternalActorRef, - props: Props, - supervisor: InternalActorRef): ActorCell = { - val cell = super.newActorCell(system, ref, props, supervisor) - Unsafe.instance.monitorEnter(cell) - cell + // verify that a BalancingDispatcher is not used with a Router + if (_props.routerConfig != NoRouter && _system.dispatchers.isBalancingDispatcher(_props.routerConfig.routerDispatcher)) { + throw new ConfigurationException( + "Configuration for " + this + + " is invalid - you can not use a 'BalancingDispatcher' as a Router's dispatcher, you can however use it for the routees.") } + _props.routerConfig.verifyConfig() + + override def newCell(): Cell = new RoutedActorCell(system, this, props, supervisor) + +} + +private[akka] class RoutedActorCell(_system: ActorSystemImpl, _ref: InternalActorRef, _props: Props, _supervisor: InternalActorRef) + extends ActorCell( + _system, + _ref, + _props.copy(creator = () ⇒ _props.routerConfig.createActor(), dispatcher = _props.routerConfig.routerDispatcher), + _supervisor) { + private[akka] val routerConfig = _props.routerConfig private[akka] val routeeProps = _props.copy(routerConfig = NoRouter) private[akka] val resizeInProgress = new AtomicBoolean @@ -72,39 +58,28 @@ private[akka] class RoutedActorRef(_system: ActorSystemImpl, _props: Props, _sup private var _routeeProvider: RouteeProvider = _ def routeeProvider = _routeeProvider - val route = - try { - // verify that a BalancingDispatcher is not used with a Router - if (_props.routerConfig != NoRouter && _system.dispatchers.isBalancingDispatcher(_props.routerConfig.routerDispatcher)) { - actorContext.stop(actorContext.self) - throw new ConfigurationException( - "Configuration for actor [" + _path.toString + - "] is invalid - you can not use a 'BalancingDispatcher' as a Router's dispatcher, you can however use it for the routees.") - } - - _routeeProvider = routerConfig.createRouteeProvider(actorContext) - val r = routerConfig.createRoute(routeeProps, routeeProvider) - // initial resize, before message send - routerConfig.resizer foreach { r ⇒ - if (r.isTimeForResize(resizeCounter.getAndIncrement())) - r.resize(routeeProps, routeeProvider) - } - r - } finally { - assert(Thread.holdsLock(actorContext)) - Unsafe.instance.monitorExit(actorContext) // unblock Router’s constructor + val route = { + _routeeProvider = routerConfig.createRouteeProvider(this) + val r = routerConfig.createRoute(routeeProps, routeeProvider) + // initial resize, before message send + routerConfig.resizer foreach { r ⇒ + if (r.isTimeForResize(resizeCounter.getAndIncrement())) + r.resize(routeeProps, routeeProvider) } + r + } if (routerConfig.resizer.isEmpty && _routees.isEmpty) throw new ActorInitializationException("router " + routerConfig + " did not register routees!") + start() + /* * end of construction */ def applyRoute(sender: ActorRef, message: Any): Iterable[Destination] = message match { - case _: AutoReceivedMessage ⇒ Destination(this, this) :: Nil - case Terminated(_) ⇒ Destination(this, this) :: Nil + case _: AutoReceivedMessage ⇒ Destination(self, self) :: Nil case CurrentRoutees ⇒ sender ! RouterRoutees(_routees) Nil @@ -122,7 +97,7 @@ private[akka] class RoutedActorRef(_system: ActorSystemImpl, _props: Props, _sup private[akka] def addRoutees(newRoutees: IndexedSeq[ActorRef]): Unit = { _routees = _routees ++ newRoutees // subscribe to Terminated messages for all route destinations, to be handled by Router actor - newRoutees foreach underlying.watch + newRoutees foreach watch } /** @@ -133,13 +108,13 @@ private[akka] class RoutedActorRef(_system: ActorSystemImpl, _props: Props, _sup */ private[akka] def removeRoutees(abandonedRoutees: IndexedSeq[ActorRef]): Unit = { _routees = _routees diff abandonedRoutees - abandonedRoutees foreach underlying.unwatch + abandonedRoutees foreach unwatch } - override def !(message: Any)(implicit sender: ActorRef = null): Unit = { + override def tell(message: Any, sender: ActorRef): Unit = { resize() - val s = if (sender eq null) underlying.system.deadLetters else sender + val s = if (sender eq null) system.deadLetters else sender val msg = message match { case Broadcast(m) ⇒ m @@ -147,15 +122,18 @@ private[akka] class RoutedActorRef(_system: ActorSystemImpl, _props: Props, _sup } applyRoute(s, message) match { - case Destination(_, x) :: Nil if x eq this ⇒ super.!(message)(s) - case refs ⇒ refs foreach (p ⇒ p.recipient.!(msg)(p.sender)) + case Destination(_, x) :: Nil if x == self ⇒ super.tell(message, s) + case refs ⇒ + refs foreach (p ⇒ + if (p.recipient == self) super.tell(msg, p.sender) + else p.recipient.!(msg)(p.sender)) } } def resize(): Unit = { for (r ← routerConfig.resizer) { if (r.isTimeForResize(resizeCounter.getAndIncrement()) && resizeInProgress.compareAndSet(false, true)) - super.!(Router.Resize) + super.tell(Router.Resize, self) } } } @@ -212,6 +190,11 @@ trait RouterConfig { */ def resizer: Option[Resizer] = None + /** + * Check that everything is there which is needed. Called in constructor of RoutedActorRef to fail early. + */ + def verifyConfig(): Unit = {} + } /** @@ -227,7 +210,7 @@ class RouteeProvider(val context: ActorContext, val resizer: Option[Resizer]) { * Not thread safe, but intended to be called from protected points, such as * `RouterConfig.createRoute` and `Resizer.resize`. */ - def registerRoutees(routees: IndexedSeq[ActorRef]): Unit = routedRef.addRoutees(routees) + def registerRoutees(routees: IndexedSeq[ActorRef]): Unit = routedCell.addRoutees(routees) /** * Adds the routees to the router. @@ -247,7 +230,7 @@ class RouteeProvider(val context: ActorContext, val resizer: Option[Resizer]) { * Not thread safe, but intended to be called from protected points, such as * `Resizer.resize`. */ - def unregisterRoutees(routees: IndexedSeq[ActorRef]): Unit = routedRef.removeRoutees(routees) + def unregisterRoutees(routees: IndexedSeq[ActorRef]): Unit = routedCell.removeRoutees(routees) def createRoutees(props: Props, nrOfInstances: Int, routees: Iterable[String]): IndexedSeq[ActorRef] = (nrOfInstances, routees) match { @@ -264,9 +247,9 @@ class RouteeProvider(val context: ActorContext, val resizer: Option[Resizer]) { /** * All routees of the router */ - def routees: IndexedSeq[ActorRef] = routedRef.routees + def routees: IndexedSeq[ActorRef] = routedCell.routees - private def routedRef = context.self.asInstanceOf[RoutedActorRef] + private def routedCell = context.asInstanceOf[RoutedActorCell] } /** @@ -298,12 +281,9 @@ trait CustomRoute { */ trait Router extends Actor { - // make sure that we synchronize properly to get the childrenRefs into our CPU cache - val ref = context.synchronized { - self match { - case x: RoutedActorRef ⇒ x - case _ ⇒ throw new ActorInitializationException("Router actor can only be used in RoutedActorRef") - } + val ref = context match { + case x: RoutedActorCell ⇒ x + case _ ⇒ throw new ActorInitializationException("Router actor can only be used in RoutedActorRef, not in " + context.getClass) } final def receive = ({ @@ -417,8 +397,10 @@ class FromConfig(val routerDispatcher: String = Dispatchers.DefaultDispatcherId) def this() = this(Dispatchers.DefaultDispatcherId) - def createRoute(props: Props, routeeProvider: RouteeProvider): Route = - throw new ConfigurationException("router " + routeeProvider.context.self + " needs external configuration from file (e.g. application.conf)") + override def verifyConfig(): Unit = + throw new ConfigurationException("router needs external configuration from file (e.g. application.conf)") + + def createRoute(props: Props, routeeProvider: RouteeProvider): Route = null def supervisorStrategy: SupervisorStrategy = Router.defaultSupervisorStrategy } @@ -774,9 +756,11 @@ trait SmallestMailboxLike { this: RouterConfig ⇒ * routers based on mailbox and actor internal state. */ protected def isProcessingMessage(a: ActorRef): Boolean = a match { - case x: LocalActorRef ⇒ - val cell = x.underlying - cell.mailbox.isScheduled && cell.currentMessage != null + case x: ActorRefWithCell ⇒ + x.underlying match { + case cell: ActorCell ⇒ cell.mailbox.isScheduled && cell.currentMessage != null + case _ ⇒ false + } case _ ⇒ false } @@ -788,8 +772,8 @@ trait SmallestMailboxLike { this: RouterConfig ⇒ * routers based on mailbox and actor internal state. */ protected def hasMessages(a: ActorRef): Boolean = a match { - case x: LocalActorRef ⇒ x.underlying.mailbox.hasMessages - case _ ⇒ false + case x: ActorRefWithCell ⇒ x.underlying.hasMessages + case _ ⇒ false } /** @@ -799,8 +783,12 @@ trait SmallestMailboxLike { this: RouterConfig ⇒ * routers based on mailbox and actor internal state. */ protected def isSuspended(a: ActorRef): Boolean = a match { - case x: LocalActorRef ⇒ x.underlying.mailbox.isSuspended - case _ ⇒ false + case x: ActorRefWithCell ⇒ + x.underlying match { + case cell: ActorCell ⇒ cell.mailbox.isSuspended + case _ ⇒ true + } + case _ ⇒ false } /** @@ -810,8 +798,8 @@ trait SmallestMailboxLike { this: RouterConfig ⇒ * routers based on mailbox and actor internal state. */ protected def numberOfMessages(a: ActorRef): Int = a match { - case x: LocalActorRef ⇒ x.underlying.mailbox.numberOfMessages - case _ ⇒ 0 + case x: ActorRefWithCell ⇒ x.underlying.numberOfMessages + case _ ⇒ 0 } def createRoute(props: Props, routeeProvider: RouteeProvider): Route = { @@ -1283,12 +1271,20 @@ case class DefaultResizer( */ def pressure(routees: IndexedSeq[ActorRef]): Int = { routees count { - case a: LocalActorRef ⇒ - val cell = a.underlying - pressureThreshold match { - case 1 ⇒ cell.mailbox.isScheduled && cell.mailbox.hasMessages - case i if i < 1 ⇒ cell.mailbox.isScheduled && cell.currentMessage != null - case threshold ⇒ cell.mailbox.numberOfMessages >= threshold + case a: ActorRefWithCell ⇒ + a.underlying match { + case cell: ActorCell ⇒ + pressureThreshold match { + case 1 ⇒ cell.mailbox.isScheduled && cell.mailbox.hasMessages + case i if i < 1 ⇒ cell.mailbox.isScheduled && cell.currentMessage != null + case threshold ⇒ cell.mailbox.numberOfMessages >= threshold + } + case cell ⇒ + pressureThreshold match { + case 1 ⇒ cell.hasMessages + case i if i < 1 ⇒ true // unstarted cells are always busy, for example + case threshold ⇒ cell.numberOfMessages >= threshold + } } case x ⇒ false diff --git a/akka-actor/src/main/scala/akka/util/Duration.scala b/akka-actor/src/main/scala/akka/util/Duration.scala index b37cf24c3b..3a1c2e80c8 100644 --- a/akka-actor/src/main/scala/akka/util/Duration.scala +++ b/akka-actor/src/main/scala/akka/util/Duration.scala @@ -9,16 +9,22 @@ import TimeUnit._ import java.lang.{ Double ⇒ JDouble } //TODO add @SerialVersionUID(1L) when SI-4804 is fixed -case class Deadline private (time: Duration) { +case class Deadline private (time: Duration) extends Ordered[Deadline] { def +(other: Duration): Deadline = copy(time = time + other) def -(other: Duration): Deadline = copy(time = time - other) def -(other: Deadline): Duration = time - other.time def timeLeft: Duration = this - Deadline.now def hasTimeLeft(): Boolean = !isOverdue() //Code reuse FTW def isOverdue(): Boolean = (time.toNanos - System.nanoTime()) < 0 + def compare(that: Deadline) = this.time compare that.time } + object Deadline { def now: Deadline = Deadline(Duration(System.nanoTime, NANOSECONDS)) + + implicit object DeadlineIsOrdered extends Ordering[Deadline] { + def compare(a: Deadline, b: Deadline) = a compare b + } } object Duration { diff --git a/akka-agent/src/main/scala/akka/agent/Agent.scala b/akka-agent/src/main/scala/akka/agent/Agent.scala index 64834178a8..ea3d8719cd 100644 --- a/akka-agent/src/main/scala/akka/agent/Agent.scala +++ b/akka-agent/src/main/scala/akka/agent/Agent.scala @@ -97,7 +97,7 @@ object Agent { */ class Agent[T](initialValue: T, system: ActorSystem) { private val ref = Ref(initialValue) - private val updater = system.actorOf(Props(new AgentUpdater(this, ref))).asInstanceOf[LocalActorRef] //TODO can we avoid this somehow? + private val updater = system.actorOf(Props(new AgentUpdater(this, ref))).asInstanceOf[InternalActorRef] //TODO can we avoid this somehow? /** * Read the internal state of the agent. diff --git a/akka-cluster/src/main/resources/reference.conf b/akka-cluster/src/main/resources/reference.conf index b9104fe6cf..d226506acc 100644 --- a/akka-cluster/src/main/resources/reference.conf +++ b/akka-cluster/src/main/resources/reference.conf @@ -8,9 +8,19 @@ akka { cluster { - # node to join - the full URI defined by a string on the form of "akka://system@hostname:port" - # leave as empty string if the node should be a singleton cluster - node-to-join = "" + # Initial contact points of the cluster. Nodes to join at startup if auto-join = on. + # The seed nodes also play the role of deputy nodes (the nodes responsible + # for breaking network partitions). + # Comma separated full URIs defined by a string on the form of "akka://system@hostname:port" + # Leave as empty if the node should be a singleton cluster. + seed-nodes = [] + + # how long to wait for one of the seed nodes to reply to initial join request + seed-node-timeout = 5s + + # Automatic join the seed-nodes at startup. + # If seed-nodes is empty it will join itself and become a single node cluster. + auto-join = on # should the 'leader' in the cluster be allowed to automatically mark unreachable nodes as DOWN? auto-down = on @@ -36,6 +46,10 @@ akka { # how often should the node move nodes, marked as unreachable by the failure detector, out of the membership ring? unreachable-nodes-reaper-interval = 1s + # A joining node stops sending heartbeats to the node to join if it hasn't become member + # of the cluster within this deadline. + join-timeout = 60s + failure-detector { # defines the failure detector threshold @@ -43,9 +57,23 @@ akka { # a quick detection in the event of a real crash. Conversely, a high # threshold generates fewer mistakes but needs more time to detect # actual crashes - threshold = 8 + threshold = 8.0 - implementation-class = "" + # Minimum standard deviation to use for the normal distribution in + # AccrualFailureDetector. Too low standard deviation might result in + # too much sensitivity for sudden, but normal, deviations in heartbeat + # inter arrival times. + min-std-deviation = 100 ms + + # Number of potentially lost/delayed heartbeats that will be + # accepted before considering it to be an anomaly. + # It is a factor of heartbeat-interval. + # This margin is important to be able to survive sudden, occasional, + # pauses in heartbeat arrivals, due to for example garbage collect or + # network drop. + acceptable-heartbeat-pause = 3s + + implementation-class = "akka.cluster.AccrualFailureDetector" max-sample-size = 1000 } diff --git a/akka-cluster/src/main/scala/akka/cluster/AccrualFailureDetector.scala b/akka-cluster/src/main/scala/akka/cluster/AccrualFailureDetector.scala index 6632111f00..c397d065e5 100644 --- a/akka-cluster/src/main/scala/akka/cluster/AccrualFailureDetector.scala +++ b/akka-cluster/src/main/scala/akka/cluster/AccrualFailureDetector.scala @@ -7,50 +7,98 @@ package akka.cluster import akka.actor.{ ActorSystem, Address, ExtendedActorSystem } import akka.remote.RemoteActorRefProvider import akka.event.Logging - import scala.collection.immutable.Map import scala.annotation.tailrec - import java.util.concurrent.atomic.AtomicReference +import java.util.concurrent.TimeUnit.NANOSECONDS +import akka.util.Duration +import akka.util.duration._ +object AccrualFailureDetector { + private def realClock: () ⇒ Long = () ⇒ NANOSECONDS.toMillis(System.nanoTime) +} /** * Implementation of 'The Phi Accrual Failure Detector' by Hayashibara et al. as defined in their paper: * [http://ddg.jaist.ac.jp/pub/HDY+04.pdf] - *

- * A low threshold is prone to generate many wrong suspicions but ensures a quick detection in the event - * of a real crash. Conversely, a high threshold generates fewer mistakes but needs more time to detect - * actual crashes - *

- * Default threshold is 8, but can be configured in the Akka config. + * + * The suspicion level of failure is given by a value called φ (phi). + * The basic idea of the φ failure detector is to express the value of φ on a scale that + * is dynamically adjusted to reflect current network conditions. A configurable + * threshold is used to decide if φ is considered to be a failure. + * + * The value of φ is calculated as: + * + * {{{ + * φ = -log10(1 - F(timeSinceLastHeartbeat) + * }}} + * where F is the cumulative distribution function of a normal distribution with mean + * and standard deviation estimated from historical heartbeat inter-arrival times. + * + * + * @param system Belongs to the [[akka.actor.ActorSystem]]. Used for logging. + * + * @param threshold A low threshold is prone to generate many wrong suspicions but ensures a quick detection in the event + * of a real crash. Conversely, a high threshold generates fewer mistakes but needs more time to detect + * actual crashes + * + * @param maxSampleSize Number of samples to use for calculation of mean and standard deviation of + * inter-arrival times. + * + * @param minStdDeviation Minimum standard deviation to use for the normal distribution used when calculating phi. + * Too low standard deviation might result in too much sensitivity for sudden, but normal, deviations + * in heartbeat inter arrival times. + * + * @param acceptableHeartbeatPause Duration corresponding to number of potentially lost/delayed + * heartbeats that will be accepted before considering it to be an anomaly. + * This margin is important to be able to survive sudden, occasional, pauses in heartbeat + * arrivals, due to for example garbage collect or network drop. + * + * @param firstHeartbeatEstimate Bootstrap the stats with heartbeats that corresponds to + * to this duration, with a with rather high standard deviation (since environment is unknown + * in the beginning) + * + * @param clock The clock, returning current time in milliseconds, but can be faked for testing + * purposes. It is only used for measuring intervals (duration). + * */ class AccrualFailureDetector( val system: ActorSystem, - val threshold: Int = 8, - val maxSampleSize: Int = 1000, - val timeMachine: () ⇒ Long = System.currentTimeMillis) extends FailureDetector { + val threshold: Double, + val maxSampleSize: Int, + val minStdDeviation: Duration, + val acceptableHeartbeatPause: Duration, + val firstHeartbeatEstimate: Duration, + val clock: () ⇒ Long = AccrualFailureDetector.realClock) extends FailureDetector { + import AccrualFailureDetector._ + + /** + * Constructor that picks configuration from the settings. + */ def this( system: ActorSystem, - settings: ClusterSettings, - timeMachine: () ⇒ Long = System.currentTimeMillis) = + settings: ClusterSettings) = this( system, settings.FailureDetectorThreshold, settings.FailureDetectorMaxSampleSize, - timeMachine) - - private final val PhiFactor = 1.0 / math.log(10.0) + settings.FailureDetectorAcceptableHeartbeatPause, + settings.FailureDetectorMinStdDeviation, + settings.HeartbeatInterval, + AccrualFailureDetector.realClock) private val log = Logging(system, "FailureDetector") - /** - * Holds the failure statistics for a specific node Address. - */ - private case class FailureStats(mean: Double = 0.0, variance: Double = 0.0, deviation: Double = 0.0) - // guess statistics for first heartbeat, - // important so that connections with only one heartbeat becomes unavailble - private val failureStatsFirstHeartbeat = FailureStats(mean = 1000.0) + // important so that connections with only one heartbeat becomes unavailable + private val firstHeartbeat: HeartbeatHistory = { + // bootstrap with 2 entries with rather high standard deviation + val mean = firstHeartbeatEstimate.toMillis + val stdDeviation = mean / 4 + HeartbeatHistory(maxSampleSize) :+ (mean - stdDeviation) :+ (mean + stdDeviation) + } + + private val acceptableHeartbeatPauseMillis = acceptableHeartbeatPause.toMillis /** * Implement using optimistic lockless concurrency, all state is represented @@ -58,8 +106,7 @@ class AccrualFailureDetector( */ private case class State( version: Long = 0L, - failureStats: Map[Address, FailureStats] = Map.empty[Address, FailureStats], - intervalHistory: Map[Address, IndexedSeq[Long]] = Map.empty[Address, IndexedSeq[Long]], + history: Map[Address, HeartbeatHistory] = Map.empty, timestamps: Map[Address, Long] = Map.empty[Address, Long], explicitRemovals: Set[Address] = Set.empty[Address]) @@ -78,96 +125,76 @@ class AccrualFailureDetector( final def heartbeat(connection: Address) { log.debug("Heartbeat from connection [{}] ", connection) + val timestamp = clock() val oldState = state.get - val latestTimestamp = oldState.timestamps.get(connection) - if (latestTimestamp.isEmpty) { - // this is heartbeat from a new connection - // add starter records for this new connection - val newState = oldState copy ( - version = oldState.version + 1, - failureStats = oldState.failureStats + (connection -> failureStatsFirstHeartbeat), - intervalHistory = oldState.intervalHistory + (connection -> IndexedSeq.empty[Long]), - timestamps = oldState.timestamps + (connection -> timeMachine()), - explicitRemovals = oldState.explicitRemovals - connection) - - // if we won the race then update else try again - if (!state.compareAndSet(oldState, newState)) heartbeat(connection) // recur - - } else { - // this is a known connection - val timestamp = timeMachine() - val interval = timestamp - latestTimestamp.get - - val newIntervalsForConnection = (oldState.intervalHistory.get(connection) match { - case Some(history) if history.size >= maxSampleSize ⇒ - // reached max history, drop first interval - history drop 1 - case Some(history) ⇒ history - case _ ⇒ IndexedSeq.empty[Long] - }) :+ interval - - val newFailureStats = { - val newMean: Double = newIntervalsForConnection.sum.toDouble / newIntervalsForConnection.size - - val oldConnectionFailureStats = oldState.failureStats.get(connection).getOrElse { - throw new IllegalStateException("Can't calculate new failure statistics due to missing heartbeat history") - } - - val deviationSum = (0.0d /: newIntervalsForConnection) { (mean, interval) ⇒ - mean + interval.toDouble - newMean - } - - val newVariance: Double = deviationSum / newIntervalsForConnection.size - val newDeviation: Double = math.sqrt(newVariance) - - val newFailureStats = oldConnectionFailureStats copy (mean = newMean, deviation = newDeviation, variance = newVariance) - oldState.failureStats + (connection -> newFailureStats) - } - - val newState = oldState copy (version = oldState.version + 1, - failureStats = newFailureStats, - intervalHistory = oldState.intervalHistory + (connection -> newIntervalsForConnection), - timestamps = oldState.timestamps + (connection -> timestamp), // record new timestamp, - explicitRemovals = oldState.explicitRemovals - connection) - - // if we won the race then update else try again - if (!state.compareAndSet(oldState, newState)) heartbeat(connection) // recur + val newHistory = oldState.timestamps.get(connection) match { + case None ⇒ + // this is heartbeat from a new connection + // add starter records for this new connection + firstHeartbeat + case Some(latestTimestamp) ⇒ + // this is a known connection + val interval = timestamp - latestTimestamp + oldState.history(connection) :+ interval } + + val newState = oldState copy (version = oldState.version + 1, + history = oldState.history + (connection -> newHistory), + timestamps = oldState.timestamps + (connection -> timestamp), // record new timestamp, + explicitRemovals = oldState.explicitRemovals - connection) + + // if we won the race then update else try again + if (!state.compareAndSet(oldState, newState)) heartbeat(connection) // recur } /** - * Calculates how likely it is that the connection has failed. - *

+ * The suspicion level of the accrual failure detector. + * * If a connection does not have any records in failure detector then it is * considered healthy. - *

- * Implementations of 'Cumulative Distribution Function' for Exponential Distribution. - * For a discussion on the math read [https://issues.apache.org/jira/browse/CASSANDRA-2597]. */ def phi(connection: Address): Double = { val oldState = state.get val oldTimestamp = oldState.timestamps.get(connection) - val phi = - // if connection has been removed explicitly - if (oldState.explicitRemovals.contains(connection)) Double.MaxValue - else if (oldTimestamp.isEmpty) 0.0 // treat unmanaged connections, e.g. with zero heartbeats, as healthy connections - else { - val timestampDiff = timeMachine() - oldTimestamp.get + // if connection has been removed explicitly + if (oldState.explicitRemovals.contains(connection)) Double.MaxValue + else if (oldTimestamp.isEmpty) 0.0 // treat unmanaged connections, e.g. with zero heartbeats, as healthy connections + else { + val timeDiff = clock() - oldTimestamp.get - val mean = oldState.failureStats.get(connection) match { - case Some(FailureStats(mean, _, _)) ⇒ mean - case _ ⇒ throw new IllegalStateException("Can't calculate Failure Detector Phi value for a node that have no heartbeat history") - } + val history = oldState.history(connection) + val mean = history.mean + val stdDeviation = ensureValidStdDeviation(history.stdDeviation) - if (mean == 0.0) 0.0 - else PhiFactor * timestampDiff / mean - } + val φ = phi(timeDiff, mean + acceptableHeartbeatPauseMillis, stdDeviation) - // FIXME change to debug log level, when failure detector is stable - log.info("Phi value [{}] and threshold [{}] for connection [{}] ", phi, threshold, connection) - phi + // FIXME change to debug log level, when failure detector is stable + if (φ > 1.0) log.info("Phi value [{}] for connection [{}], after [{} ms], based on [{}]", + φ, connection, timeDiff, "N(" + mean + ", " + stdDeviation + ")") + + φ + } + } + + private[cluster] def phi(timeDiff: Long, mean: Double, stdDeviation: Double): Double = { + val cdf = cumulativeDistributionFunction(timeDiff, mean, stdDeviation) + -math.log10(1.0 - cdf) + } + + private val minStdDeviationMillis = minStdDeviation.toMillis + + private def ensureValidStdDeviation(stdDeviation: Double): Double = math.max(stdDeviation, minStdDeviationMillis) + + /** + * Cumulative distribution function for N(mean, stdDeviation) normal distribution. + * This is an approximation defined in β Mathematics Handbook. + */ + private[cluster] def cumulativeDistributionFunction(x: Double, mean: Double, stdDeviation: Double): Double = { + val y = (x - mean) / stdDeviation + // Cumulative distribution function for N(0, 1) + 1.0 / (1.0 + math.exp(-y * (1.5976 + 0.070566 * y * y))) } /** @@ -178,10 +205,9 @@ class AccrualFailureDetector( log.debug("Remove connection [{}] ", connection) val oldState = state.get - if (oldState.failureStats.contains(connection)) { + if (oldState.history.contains(connection)) { val newState = oldState copy (version = oldState.version + 1, - failureStats = oldState.failureStats - connection, - intervalHistory = oldState.intervalHistory - connection, + history = oldState.history - connection, timestamps = oldState.timestamps - connection, explicitRemovals = oldState.explicitRemovals + connection) @@ -190,3 +216,66 @@ class AccrualFailureDetector( } } } + +private[cluster] object HeartbeatHistory { + + /** + * Create an empty HeartbeatHistory, without any history. + * Can only be used as starting point for appending intervals. + * The stats (mean, variance, stdDeviation) are not defined for + * for empty HeartbeatHistory, i.e. throws AritmeticException. + */ + def apply(maxSampleSize: Int): HeartbeatHistory = HeartbeatHistory( + maxSampleSize = maxSampleSize, + intervals = IndexedSeq.empty, + intervalSum = 0L, + squaredIntervalSum = 0L) + +} + +/** + * Holds the heartbeat statistics for a specific node Address. + * It is capped by the number of samples specified in `maxSampleSize`. + * + * The stats (mean, variance, stdDeviation) are not defined for + * for empty HeartbeatHistory, i.e. throws AritmeticException. + */ +private[cluster] case class HeartbeatHistory private ( + maxSampleSize: Int, + intervals: IndexedSeq[Long], + intervalSum: Long, + squaredIntervalSum: Long) { + + if (maxSampleSize < 1) + throw new IllegalArgumentException("maxSampleSize must be >= 1, got [%s]" format maxSampleSize) + if (intervalSum < 0L) + throw new IllegalArgumentException("intervalSum must be >= 0, got [%s]" format intervalSum) + if (squaredIntervalSum < 0L) + throw new IllegalArgumentException("squaredIntervalSum must be >= 0, got [%s]" format squaredIntervalSum) + + def mean: Double = intervalSum.toDouble / intervals.size + + def variance: Double = (squaredIntervalSum.toDouble / intervals.size) - (mean * mean) + + def stdDeviation: Double = math.sqrt(variance) + + @tailrec + final def :+(interval: Long): HeartbeatHistory = { + if (intervals.size < maxSampleSize) + HeartbeatHistory( + maxSampleSize, + intervals = intervals :+ interval, + intervalSum = intervalSum + interval, + squaredIntervalSum = squaredIntervalSum + pow2(interval)) + else + dropOldest :+ interval // recur + } + + private def dropOldest: HeartbeatHistory = HeartbeatHistory( + maxSampleSize, + intervals = intervals drop 1, + intervalSum = intervalSum - intervals.head, + squaredIntervalSum = squaredIntervalSum - pow2(intervals.head)) + + private def pow2(x: Long) = x * x +} \ No newline at end of file diff --git a/akka-cluster/src/main/scala/akka/cluster/Cluster.scala b/akka-cluster/src/main/scala/akka/cluster/Cluster.scala index c495e470ce..3eddb5bf60 100644 --- a/akka-cluster/src/main/scala/akka/cluster/Cluster.scala +++ b/akka-cluster/src/main/scala/akka/cluster/Cluster.scala @@ -6,27 +6,27 @@ package akka.cluster import akka.actor._ import akka.actor.Status._ +import akka.ConfigurationException +import akka.dispatch.Await +import akka.dispatch.MonitorableThreadFactory +import akka.event.Logging +import akka.jsr166y.ThreadLocalRandom +import akka.pattern._ import akka.remote._ import akka.routing._ -import akka.event.Logging -import akka.dispatch.Await -import akka.pattern.ask import akka.util._ import akka.util.duration._ -import akka.ConfigurationException -import java.util.concurrent.atomic.{ AtomicReference, AtomicBoolean } -import java.util.concurrent.TimeUnit._ -import java.util.concurrent.TimeoutException -import akka.jsr166y.ThreadLocalRandom -import java.lang.management.ManagementFactory -import java.io.Closeable -import javax.management._ -import scala.collection.immutable.{ Map, SortedSet } -import scala.annotation.tailrec -import com.google.protobuf.ByteString import akka.util.internal.HashedWheelTimer -import akka.dispatch.MonitorableThreadFactory +import com.google.protobuf.ByteString +import java.io.Closeable +import java.lang.management.ManagementFactory +import java.util.concurrent.atomic.{ AtomicReference, AtomicBoolean } +import java.util.concurrent.TimeoutException +import java.util.concurrent.TimeUnit._ +import javax.management._ import MemberStatus._ +import scala.annotation.tailrec +import scala.collection.immutable.{ Map, SortedSet } /** * Interface for membership change listener. @@ -52,14 +52,32 @@ sealed trait ClusterMessage extends Serializable /** * Cluster commands sent by the USER. */ -object ClusterAction { +object ClusterUserAction { /** - * Command to join the cluster. Sent when a node (reprsesented by 'address') + * Command to join the cluster. Sent when a node (represented by 'address') * wants to join another node (the receiver). */ case class Join(address: Address) extends ClusterMessage + /** + * Start message of the process to join one of the seed nodes. + * The node sends `InitJoin` to all seed nodes, which replies + * with `InitJoinAck`. The first reply is used others are discarded. + * The node sends `Join` command to the seed node that replied first. + */ + case object JoinSeedNode extends ClusterMessage + + /** + * @see JoinSeedNode + */ + case object InitJoin extends ClusterMessage + + /** + * @see JoinSeedNode + */ + case class InitJoinAck(address: Address) extends ClusterMessage + /** * Command to leave the cluster. */ @@ -69,22 +87,33 @@ object ClusterAction { * Command to mark node as temporary down. */ case class Down(address: Address) extends ClusterMessage +} + +/** + * Cluster commands sent by the LEADER. + */ +object ClusterLeaderAction { /** - * Command to remove a node from the cluster immediately. - */ - case class Remove(address: Address) extends ClusterMessage - - /** + * INTERNAL API. + * * Command to mark a node to be removed from the cluster immediately. * Can only be sent by the leader. */ - private[akka] case class Exit(address: Address) extends ClusterMessage + private[cluster] case class Exit(address: Address) extends ClusterMessage + + /** + * INTERNAL API. + * + * Command to remove a node from the cluster immediately. + */ + private[cluster] case class Remove(address: Address) extends ClusterMessage } /** * Represents the address and the current status of a cluster member node. * + * Note: `hashCode` and `equals` are solely based on the underlying `Address`, not its `MemberStatus`. */ class Member(val address: Address, val status: MemberStatus) extends ClusterMessage { override def hashCode = address.## @@ -94,12 +123,12 @@ class Member(val address: Address, val status: MemberStatus) extends ClusterMess } /** - * Factory and Utility module for Member instances. + * Module with factory and ordering methods for Member instances. */ object Member { /** - * Sort Address by host and port + * `Address` ordering type class, sorts addresses by host and port. */ implicit val addressOrdering: Ordering[Address] = Ordering.fromLessThan[Address] { (a, b) ⇒ if (a.host != b.host) a.host.getOrElse("").compareTo(b.host.getOrElse("")) < 0 @@ -107,8 +136,14 @@ object Member { else false } - implicit val ordering: Ordering[Member] = new Ordering[Member] { - def compare(x: Member, y: Member) = addressOrdering.compare(x.address, y.address) + /** + * `Member` ordering type class, sorts members by host and port with the exception that + * it puts all members that are in MemberStatus.EXITING last. + */ + implicit val ordering: Ordering[Member] = Ordering.fromLessThan[Member] { (a, b) ⇒ + if (a.status == Exiting && b.status != Exiting) false + else if (a.status != Exiting && b.status == Exiting) true + else addressOrdering.compare(a.address, b.address) < 0 } def apply(address: Address, status: MemberStatus): Member = new Member(address, status) @@ -157,10 +192,11 @@ case class GossipEnvelope(from: Address, gossip: Gossip) extends ClusterMessage * Can be one of: Joining, Up, Leaving, Exiting and Down. */ sealed trait MemberStatus extends ClusterMessage { + /** - * Using the same notion for 'unavailable' as 'non-convergence': DOWN and REMOVED. + * Using the same notion for 'unavailable' as 'non-convergence': DOWN */ - def isUnavailable: Boolean = this == Down || this == Removed + def isUnavailable: Boolean = this == Down } object MemberStatus { @@ -176,8 +212,11 @@ object MemberStatus { * Represents the overview of the cluster, holds the cluster convergence table and set with unreachable nodes. */ case class GossipOverview( - seen: Map[Address, VectorClock] = Map.empty[Address, VectorClock], - unreachable: Set[Member] = Set.empty[Member]) { + seen: Map[Address, VectorClock] = Map.empty, + unreachable: Set[Member] = Set.empty) { + + def isNonDownUnreachable(address: Address): Boolean = + unreachable.exists { m ⇒ m.address == address && m.status != Down } override def toString = "GossipOverview(seen = [" + seen.mkString(", ") + @@ -193,39 +232,44 @@ object Gossip { * Represents the state of the cluster; cluster ring membership, ring convergence, meta data - * all versioned by a vector clock. * - * When a node is joining the Member, with status Joining, is added to `members`. - * If the joining node was downed it is moved from `overview.unreachable` (status Down) - * to `members` (status Joining). It cannot rejoin if not first downed. + * When a node is joining the `Member`, with status `Joining`, is added to `members`. + * If the joining node was downed it is moved from `overview.unreachable` (status `Down`) + * to `members` (status `Joining`). It cannot rejoin if not first downed. * - * When convergence is reached the leader change status of `members` from Joining - * to Up. + * When convergence is reached the leader change status of `members` from `Joining` + * to `Up`. * * When failure detector consider a node as unavailble it will be moved from * `members` to `overview.unreachable`. * - * When a node is downed, either manually or automatically, its status is changed to Down. - * It is also removed from `overview.seen` table. - * The node will reside as Down in the `overview.unreachable` set until joining - * again and it will then go through the normal joining procedure. + * When a node is downed, either manually or automatically, its status is changed to `Down`. + * It is also removed from `overview.seen` table. The node will reside as `Down` in the + * `overview.unreachable` set until joining again and it will then go through the normal + * joining procedure. * - * When a Gossip is received the version (vector clock) is used to determine if the - * received Gossip is newer or older than the current local Gossip. The received Gossip - * and local Gossip is merged in case of conflicting version, i.e. vector clocks without + * When a `Gossip` is received the version (vector clock) is used to determine if the + * received `Gossip` is newer or older than the current local `Gossip`. The received `Gossip` + * and local `Gossip` is merged in case of conflicting version, i.e. vector clocks without * same history. When merged the seen table is cleared. * - * TODO document leaving, exiting and removed when that is implemented - * + * When a node is told by the user to leave the cluster the leader will move it to `Leaving` + * and then rebalance and repartition the cluster and start hand-off by migrating the actors + * from the leaving node to the new partitions. Once this process is complete the leader will + * move the node to the `Exiting` state and once a convergence is complete move the node to + * `Removed` by removing it from the `members` set and sending a `Removed` command to the + * removed node telling it to shut itself down. */ case class Gossip( overview: GossipOverview = GossipOverview(), members: SortedSet[Member], // sorted set of members with their status, sorted by address - meta: Map[String, Array[Byte]] = Map.empty[String, Array[Byte]], + meta: Map[String, Array[Byte]] = Map.empty, version: VectorClock = VectorClock()) // vector clock version extends ClusterMessage // is a serializable cluster message with Versioned[Gossip] { // FIXME can be disabled as optimization assertInvariants + private def assertInvariants: Unit = { val unreachableAndLive = members.intersect(overview.unreachable) if (unreachableAndLive.nonEmpty) @@ -251,14 +295,17 @@ case class Gossip( */ def :+(node: VectorClock.Node): Gossip = copy(version = version :+ node) + /** + * Adds a member to the member node ring. + */ def :+(member: Member): Gossip = { if (members contains member) this else this copy (members = members + member) } /** - * Marks the gossip as seen by this node (selfAddress) by updating the address entry in the 'gossip.overview.seen' - * Map with the VectorClock for the new gossip. + * Marks the gossip as seen by this node (address) by updating the address entry in the 'gossip.overview.seen' + * Map with the VectorClock (version) for the new gossip. */ def seen(address: Address): Gossip = { if (overview.seen.contains(address) && overview.seen(address) == version) this @@ -282,8 +329,7 @@ case class Gossip( // 4. merge members by selecting the single Member with highest MemberStatus out of the Member groups, // and exclude unreachable - val mergedMembers = Gossip.emptyMembers ++ Member.pickHighestPriority(this.members, that.members). - filterNot(mergedUnreachable.contains) + val mergedMembers = Gossip.emptyMembers ++ Member.pickHighestPriority(this.members, that.members).filterNot(mergedUnreachable.contains) // 5. fresh seen table val mergedSeen = Map.empty[Address, VectorClock] @@ -306,30 +352,56 @@ case class Gossip( case class Heartbeat(from: Address) extends ClusterMessage /** + * INTERNAL API. + * * Manages routing of the different cluster commands. * Instantiated as a single instance for each Cluster - e.g. commands are serialized to Cluster message after message. */ -private[akka] final class ClusterCommandDaemon(cluster: Cluster) extends Actor { - import ClusterAction._ +private[cluster] final class ClusterCommandDaemon(cluster: Cluster) extends Actor { + import ClusterUserAction._ + import ClusterLeaderAction._ val log = Logging(context.system, this) def receive = { - case Join(address) ⇒ cluster.joining(address) - case Down(address) ⇒ cluster.downing(address) - case Leave(address) ⇒ cluster.leaving(address) - case Exit(address) ⇒ cluster.exiting(address) - case Remove(address) ⇒ cluster.removing(address) + case JoinSeedNode ⇒ joinSeedNode() + case InitJoin ⇒ sender ! InitJoinAck(cluster.selfAddress) + case InitJoinAck(address) ⇒ cluster.join(address) + case Join(address) ⇒ cluster.joining(address) + case Down(address) ⇒ cluster.downing(address) + case Leave(address) ⇒ cluster.leaving(address) + case Exit(address) ⇒ cluster.exiting(address) + case Remove(address) ⇒ cluster.removing(address) + case Failure(e: AskTimeoutException) ⇒ joinSeedNodeTimeout() } + def joinSeedNode(): Unit = { + val seedRoutees = for (address ← cluster.seedNodes; if address != cluster.selfAddress) + yield self.path.toStringWithAddress(address) + if (seedRoutees.isEmpty) { + cluster join cluster.selfAddress + } else { + implicit val within = Timeout(cluster.clusterSettings.SeedNodeTimeout) + val seedRouter = context.actorOf( + Props.empty.withRouter(ScatterGatherFirstCompletedRouter( + routees = seedRoutees, within = within.duration))) + seedRouter ? InitJoin pipeTo self + seedRouter ! PoisonPill + } + } + + def joinSeedNodeTimeout(): Unit = cluster join cluster.selfAddress + override def unhandled(unknown: Any) = log.error("Illegal command [{}]", unknown) } /** + * INTERNAL API. + * * Pooled and routed with N number of configurable instances. * Concurrent access to Cluster. */ -private[akka] final class ClusterGossipDaemon(cluster: Cluster) extends Actor { +private[cluster] final class ClusterGossipDaemon(cluster: Cluster) extends Actor { val log = Logging(context.system, this) def receive = { @@ -341,9 +413,11 @@ private[akka] final class ClusterGossipDaemon(cluster: Cluster) extends Actor { } /** + * INTERNAL API. + * * Supervisor managing the different Cluster daemons. */ -private[akka] final class ClusterDaemonSupervisor(cluster: Cluster) extends Actor { +private[cluster] final class ClusterDaemonSupervisor(cluster: Cluster) extends Actor { val log = Logging(context.system, this) private val commands = context.actorOf(Props(new ClusterCommandDaemon(cluster)), "commands") @@ -371,14 +445,12 @@ object Cluster extends ExtensionId[Cluster] with ExtensionIdProvider { override def createExtension(system: ExtendedActorSystem): Cluster = { val clusterSettings = new ClusterSettings(system.settings.config, system.name) - val failureDetector = clusterSettings.FailureDetectorImplementationClass match { - case None ⇒ new AccrualFailureDetector(system, clusterSettings) - case Some(fqcn) ⇒ - system.dynamicAccess.createInstanceFor[FailureDetector]( - fqcn, Seq((classOf[ActorSystem], system), (classOf[ClusterSettings], clusterSettings))) match { - case Right(fd) ⇒ fd - case Left(e) ⇒ throw new ConfigurationException("Could not create custom failure detector [" + fqcn + "] due to:" + e.toString) - } + val failureDetector = { + import clusterSettings.{ FailureDetectorImplementationClass ⇒ fqcn } + system.dynamicAccess.createInstanceFor[FailureDetector]( + fqcn, Seq(classOf[ActorSystem] -> system, classOf[ClusterSettings] -> clusterSettings)).fold( + e ⇒ throw new ConfigurationException("Could not create custom failure detector [" + fqcn + "] due to:" + e.toString), + identity) } new Cluster(system, failureDetector) @@ -396,27 +468,22 @@ trait ClusterNodeMBean { def isSingleton: Boolean def isConvergence: Boolean def isAvailable: Boolean + def isRunning: Boolean def join(address: String) def leave(address: String) def down(address: String) - def remove(address: String) - - def shutdown() } /** * This module is responsible for Gossiping cluster information. The abstraction maintains the list of live * and dead members. Periodically i.e. every 1 second this module chooses a random member and initiates a round - * of Gossip with it. Whenever it gets gossip updates it updates the Failure Detector with the liveness - * information. + * of Gossip with it. *

- * During each of these runs the member initiates gossip exchange according to following rules (as defined in the - * Cassandra documentation [http://wiki.apache.org/cassandra/ArchitectureGossip]: + * During each of these runs the member initiates gossip exchange according to following rules: *

  *   1) Gossip to random live member (if any)
- *   2) Gossip to random unreachable member with certain probability depending on number of unreachable and live members
- *   3) If the member gossiped to at (1) was not deputy, or the number of live members is less than number of deputy list,
+ *   2) If the member gossiped to at (1) was not deputy, or the number of live members is less than number of deputy list,
  *       gossip to random deputy with certain probability depending on number of unreachable, deputy and live members.
  * 
* @@ -433,7 +500,8 @@ class Cluster(system: ExtendedActorSystem, val failureDetector: FailureDetector) */ private case class State( latestGossip: Gossip, - memberMembershipChangeListeners: Set[MembershipChangeListener] = Set.empty[MembershipChangeListener]) + joinInProgress: Map[Address, Deadline] = Map.empty, + memberMembershipChangeListeners: Set[MembershipChangeListener] = Set.empty) if (!system.provider.isInstanceOf[RemoteActorRefProvider]) throw new ConfigurationException("ActorSystem[" + system + "] needs to have a 'RemoteActorRefProvider' enabled in the configuration") @@ -451,11 +519,9 @@ class Cluster(system: ExtendedActorSystem, val failureDetector: FailureDetector) implicit private val defaultTimeout = Timeout(remoteSettings.RemoteSystemDaemonAckTimeout) - private val nodeToJoin: Option[Address] = NodeToJoin filter (_ != selfAddress) - private val serialization = remote.serialization - private val isRunning = new AtomicBoolean(true) + private val _isRunning = new AtomicBoolean(true) private val log = Logging(system, "Node") private val mBeanServer = ManagementFactory.getPlatformMBeanServer @@ -473,14 +539,13 @@ class Cluster(system: ExtendedActorSystem, val failureDetector: FailureDetector) } private val state = { - val member = Member(selfAddress, Joining) - val versionedGossip = Gossip(members = Gossip.emptyMembers + member) :+ vclockNode // add me as member and update my vector clock - val seenVersionedGossip = versionedGossip seen selfAddress - new AtomicReference[State](State(seenVersionedGossip)) + // note that self is not initially member, + // and the Gossip is not versioned for this 'Node' yet + new AtomicReference[State](State(Gossip(members = Gossip.emptyMembers))) } - // try to join the node defined in the 'akka.cluster.node-to-join' option - autoJoin() + // try to join one of the nodes defined in the 'akka.cluster.seed-nodes' + if (AutoJoin) joinSeedNode() // ======================================================== // ===================== WORK DAEMONS ===================== @@ -566,15 +631,27 @@ class Cluster(system: ExtendedActorSystem, val failureDetector: FailureDetector) } } + /** + * Returns true if the cluster node is up and running, false if it is shut down. + */ + def isRunning: Boolean = _isRunning.get + /** * Latest gossip. */ def latestGossip: Gossip = state.get.latestGossip /** - * Member status for this node. + * Member status for this node (`MemberStatus`). + * + * NOTE: If the node has been removed from the cluster (and shut down) then it's status is set to the 'REMOVED' tombstone state + * and is no longer present in the node ring or any other part of the gossiping state. However in order to maintain the + * model and the semantics the user would expect, this method will in this situation return `MemberStatus.Removed`. */ - def status: MemberStatus = self.status + def status: MemberStatus = { + if (isRunning) self.status + else MemberStatus.Removed + } /** * Is this node the leader? @@ -607,31 +684,10 @@ class Cluster(system: ExtendedActorSystem, val failureDetector: FailureDetector) def isAvailable: Boolean = !isUnavailable(state.get) /** - * Shuts down all connections to other members, the cluster daemon and the periodic gossip and cleanup tasks. + * Make it possible to override/configure seedNodes from tests without + * specifying in config. Addresses are unknown before startup time. */ - def shutdown(): Unit = { - if (isRunning.compareAndSet(true, false)) { - log.info("Cluster Node [{}] - Shutting down cluster Node and cluster daemons...", selfAddress) - - // cancel the periodic tasks, note that otherwise they will be run when scheduler is shutdown - gossipTask.cancel() - heartbeatTask.cancel() - failureDetectorReaperTask.cancel() - leaderActionsTask.cancel() - clusterScheduler.close() - - // FIXME isTerminated check can be removed when ticket #2221 is fixed - // now it prevents logging if system is shutdown (or in progress of shutdown) - if (!clusterDaemons.isTerminated) - system.stop(clusterDaemons) - - try { - mBeanServer.unregisterMBean(clusterMBeanName) - } catch { - case e: InstanceNotFoundException ⇒ // ignore - we are running multiple cluster nodes in the same JVM (probably for testing) - } - } - } + def seedNodes: IndexedSeq[Address] = SeedNodes /** * Registers a listener to subscribe to cluster membership changes. @@ -659,32 +715,32 @@ class Cluster(system: ExtendedActorSystem, val failureDetector: FailureDetector) * Try to join this cluster node with the node specified by 'address'. * A 'Join(thisNodeAddress)' command is sent to the node to join. */ - def join(address: Address): Unit = { - val connection = clusterCommandConnectionFor(address) - val command = ClusterAction.Join(selfAddress) - log.info("Cluster Node [{}] - Trying to send JOIN to [{}] through connection [{}]", selfAddress, address, connection) - connection ! command + @tailrec + final def join(address: Address): Unit = { + val localState = state.get + val newState = localState copy (joinInProgress = localState.joinInProgress + + (address -> (Deadline.now + JoinTimeout))) + if (!state.compareAndSet(localState, newState)) join(address) // recur + else { + val connection = clusterCommandConnectionFor(address) + val command = ClusterUserAction.Join(selfAddress) + log.info("Cluster Node [{}] - Trying to send JOIN to [{}] through connection [{}]", selfAddress, address, connection) + connection ! command + } } /** * Send command to issue state transition to LEAVING for the node specified by 'address'. */ def leave(address: Address): Unit = { - clusterCommandDaemon ! ClusterAction.Leave(address) + clusterCommandDaemon ! ClusterUserAction.Leave(address) } /** - * Send command to issue state transition to from DOWN to EXITING for the node specified by 'address'. + * Send command to DOWN the node specified by 'address'. */ def down(address: Address): Unit = { - clusterCommandDaemon ! ClusterAction.Down(address) - } - - /** - * Send command to issue state transition to REMOVED for the node specified by 'address'. - */ - def remove(address: Address): Unit = { - clusterCommandDaemon ! ClusterAction.Remove(address) + clusterCommandDaemon ! ClusterUserAction.Down(address) } // ======================================================== @@ -692,22 +748,52 @@ class Cluster(system: ExtendedActorSystem, val failureDetector: FailureDetector) // ======================================================== /** - * State transition to JOINING. - * New node joining. + * INTERNAL API. + * + * Shuts down all connections to other members, the cluster daemon and the periodic gossip and cleanup tasks. + * + * Should not called by the user. The user can issue a LEAVE command which will tell the node + * to go through graceful handoff process `LEAVE -> EXITING -> REMOVED -> SHUTDOWN`. + */ + private[cluster] def shutdown(): Unit = { + if (_isRunning.compareAndSet(true, false)) { + log.info("Cluster Node [{}] - Shutting down cluster Node and cluster daemons...", selfAddress) + + // cancel the periodic tasks, note that otherwise they will be run when scheduler is shutdown + gossipTask.cancel() + heartbeatTask.cancel() + failureDetectorReaperTask.cancel() + leaderActionsTask.cancel() + clusterScheduler.close() + + // FIXME isTerminated check can be removed when ticket #2221 is fixed + // now it prevents logging if system is shutdown (or in progress of shutdown) + if (!clusterDaemons.isTerminated) + system.stop(clusterDaemons) + + try { + mBeanServer.unregisterMBean(clusterMBeanName) + } catch { + case e: InstanceNotFoundException ⇒ // ignore - we are running multiple cluster nodes in the same JVM (probably for testing) + } + log.info("Cluster Node [{}] - Cluster node successfully shut down", selfAddress) + } + } + + /** + * INTERNAL API. + * + * State transition to JOINING - new node joining. */ @tailrec private[cluster] final def joining(node: Address): Unit = { - log.info("Cluster Node [{}] - Node [{}] is JOINING", selfAddress, node) - val localState = state.get val localGossip = localState.latestGossip val localMembers = localGossip.members val localUnreachable = localGossip.overview.unreachable val alreadyMember = localMembers.exists(_.address == node) - val isUnreachable = localUnreachable.exists { m ⇒ - m.address == node && m.status != Down && m.status != Removed - } + val isUnreachable = localGossip.overview.isNonDownUnreachable(node) if (!alreadyMember && !isUnreachable) { @@ -715,7 +801,9 @@ class Cluster(system: ExtendedActorSystem, val failureDetector: FailureDetector) val newUnreachableMembers = localUnreachable filterNot { _.address == node } val newOverview = localGossip.overview copy (unreachable = newUnreachableMembers) - val newMembers = localMembers + Member(node, Joining) // add joining node as Joining + // add joining node as Joining + // add self in case someone else joins before self has joined (Set discards duplicates) + val newMembers = localMembers + Member(node, Joining) + Member(selfAddress, Joining) val newGossip = localGossip copy (overview = newOverview, members = newMembers) val versionedGossip = newGossip :+ vclockNode @@ -725,6 +813,7 @@ class Cluster(system: ExtendedActorSystem, val failureDetector: FailureDetector) if (!state.compareAndSet(localState, newState)) joining(node) // recur if we failed update else { + log.info("Cluster Node [{}] - Node [{}] is JOINING", selfAddress, node) // treat join as initial heartbeat, so that it becomes unavailable if nothing more happens if (node != selfAddress) failureDetector heartbeat node notifyMembershipChangeListeners(localState, newState) @@ -733,52 +822,60 @@ class Cluster(system: ExtendedActorSystem, val failureDetector: FailureDetector) } /** + * INTERNAL API. + * * State transition to LEAVING. */ @tailrec private[cluster] final def leaving(address: Address) { - log.info("Cluster Node [{}] - Marking address [{}] as LEAVING", selfAddress, address) - val localState = state.get val localGossip = localState.latestGossip - val localMembers = localGossip.members + if (localGossip.members.exists(_.address == address)) { // only try to update if the node is available (in the member ring) + val newMembers = localGossip.members map { member ⇒ if (member.address == address) Member(address, Leaving) else member } // mark node as LEAVING + val newGossip = localGossip copy (members = newMembers) - val newMembers = localMembers + Member(address, Leaving) // mark node as LEAVING - val newGossip = localGossip copy (members = newMembers) + val versionedGossip = newGossip :+ vclockNode + val seenVersionedGossip = versionedGossip seen selfAddress - val versionedGossip = newGossip :+ vclockNode - val seenVersionedGossip = versionedGossip seen selfAddress + val newState = localState copy (latestGossip = seenVersionedGossip) - val newState = localState copy (latestGossip = seenVersionedGossip) - - if (!state.compareAndSet(localState, newState)) leaving(address) // recur if we failed update - else { - notifyMembershipChangeListeners(localState, newState) + if (!state.compareAndSet(localState, newState)) leaving(address) // recur if we failed update + else { + log.info("Cluster Node [{}] - Marked address [{}] as LEAVING", selfAddress, address) + notifyMembershipChangeListeners(localState, newState) + } } } - private def notifyMembershipChangeListeners(oldState: State, newState: State): Unit = { - val oldMembersStatus = oldState.latestGossip.members.toSeq.map(m ⇒ (m.address, m.status)) - val newMembersStatus = newState.latestGossip.members.toSeq.map(m ⇒ (m.address, m.status)) - if (newMembersStatus != oldMembersStatus) - newState.memberMembershipChangeListeners foreach { _ notify newState.latestGossip.members } - } - /** + * INTERNAL API. + * * State transition to EXITING. */ private[cluster] final def exiting(address: Address): Unit = { - log.info("Cluster Node [{}] - Marking node [{}] as EXITING", selfAddress, address) + log.info("Cluster Node [{}] - Marked node [{}] as EXITING", selfAddress, address) + // FIXME implement when we implement hand-off } /** + * INTERNAL API. + * * State transition to REMOVED. + * + * This method is for now only called after the LEADER have sent a Removed message - telling the node + * to shut down himself. + * + * In the future we might change this to allow the USER to send a Removed(address) message telling an + * arbitrary node to be moved direcly from UP -> REMOVED. */ private[cluster] final def removing(address: Address): Unit = { - log.info("Cluster Node [{}] - Marking node [{}] as REMOVED", selfAddress, address) + log.info("Cluster Node [{}] - Node has been REMOVED by the leader - shutting down...", selfAddress) + shutdown() } /** + * INTERNAL API. + * * The node to DOWN is removed from the 'members' set and put in the 'unreachable' set (if not already there) * and its status is set to DOWN. The node is also removed from the 'seen' table. * @@ -836,6 +933,8 @@ class Cluster(system: ExtendedActorSystem, val failureDetector: FailureDetector) } /** + * INTERNAL API. + * * Receive new gossip. */ @tailrec @@ -843,54 +942,64 @@ class Cluster(system: ExtendedActorSystem, val failureDetector: FailureDetector) val localState = state.get val localGossip = localState.latestGossip - val winningGossip = - if (remoteGossip.version <> localGossip.version) { - // concurrent - val mergedGossip = remoteGossip merge localGossip - val versionedMergedGossip = mergedGossip :+ vclockNode + if (!localGossip.overview.isNonDownUnreachable(from)) { - // FIXME change to debug log level, when failure detector is stable - log.info( - """Can't establish a causal relationship between "remote" gossip [{}] and "local" gossip [{}] - merging them into [{}]""", - remoteGossip, localGossip, versionedMergedGossip) + val winningGossip = + if (remoteGossip.version <> localGossip.version) { + // concurrent + val mergedGossip = remoteGossip merge localGossip + val versionedMergedGossip = mergedGossip :+ vclockNode - versionedMergedGossip + log.debug( + """Can't establish a causal relationship between "remote" gossip and "local" gossip - Remote[{}] - Local[{}] - merging them into [{}]""", + remoteGossip, localGossip, versionedMergedGossip) - } else if (remoteGossip.version < localGossip.version) { - // local gossip is newer - localGossip + versionedMergedGossip - } else { - // remote gossip is newer - remoteGossip + } else if (remoteGossip.version < localGossip.version) { + // local gossip is newer + localGossip + + } else { + // remote gossip is newer + remoteGossip + } + + val newJoinInProgress = + if (localState.joinInProgress.isEmpty) localState.joinInProgress + else localState.joinInProgress -- + winningGossip.members.map(_.address) -- + winningGossip.overview.unreachable.map(_.address) + + val newState = localState copy ( + latestGossip = winningGossip seen selfAddress, + joinInProgress = newJoinInProgress) + + // if we won the race then update else try again + if (!state.compareAndSet(localState, newState)) receiveGossip(from, remoteGossip) // recur if we fail the update + else { + log.debug("Cluster Node [{}] - Receiving gossip from [{}]", selfAddress, from) + notifyMembershipChangeListeners(localState, newState) } - - val newState = localState copy (latestGossip = winningGossip seen selfAddress) - - // if we won the race then update else try again - if (!state.compareAndSet(localState, newState)) receiveGossip(from, remoteGossip) // recur if we fail the update - else { - log.debug("Cluster Node [{}] - Receiving gossip from [{}]", selfAddress, from) - notifyMembershipChangeListeners(localState, newState) } } /** - * INTERNAL API + * INTERNAL API. */ private[cluster] def receiveHeartbeat(from: Address): Unit = failureDetector heartbeat from /** - * Joins the pre-configured contact point. + * Joins the pre-configured contact points. */ - private def autoJoin(): Unit = nodeToJoin foreach join + private def joinSeedNode(): Unit = clusterCommandDaemon ! ClusterUserAction.JoinSeedNode /** - * INTERNAL API + * INTERNAL API. * * Gossips latest gossip to an address. */ - private[akka] def gossipTo(address: Address): Unit = { + private[cluster] def gossipTo(address: Address): Unit = { val connection = clusterGossipConnectionFor(address) log.debug("Cluster Node [{}] - Gossiping to [{}]", selfAddress, connection) connection ! GossipEnvelope(selfAddress, latestGossip) @@ -910,18 +1019,9 @@ class Cluster(system: ExtendedActorSystem, val failureDetector: FailureDetector) } /** - * INTERNAL API + * INTERNAL API. */ - private[akka] def gossipToUnreachableProbablity(membersSize: Int, unreachableSize: Int): Double = - (membersSize + unreachableSize) match { - case 0 ⇒ 0.0 - case sum ⇒ unreachableSize.toDouble / sum - } - - /** - * INTERNAL API - */ - private[akka] def gossipToDeputyProbablity(membersSize: Int, unreachableSize: Int, nrOfDeputyNodes: Int): Double = { + private[cluster] def gossipToDeputyProbablity(membersSize: Int, unreachableSize: Int, nrOfDeputyNodes: Int): Double = { if (nrOfDeputyNodes > membersSize) 1.0 else if (nrOfDeputyNodes == 0) 0.0 else (membersSize + unreachableSize) match { @@ -931,11 +1031,11 @@ class Cluster(system: ExtendedActorSystem, val failureDetector: FailureDetector) } /** - * INTERNAL API + * INTERNAL API. * * Initates a new round of gossip. */ - private[akka] def gossip(): Unit = { + private[cluster] def gossip(): Unit = { val localState = state.get log.debug("Cluster Node [{}] - Initiating new round of gossip", selfAddress) @@ -953,18 +1053,11 @@ class Cluster(system: ExtendedActorSystem, val failureDetector: FailureDetector) // 1. gossip to alive members val gossipedToAlive = gossipToRandomNodeOf(localMemberAddresses) - // 2. gossip to unreachable members - if (localUnreachableSize > 0) { - val probability = gossipToUnreachableProbablity(localMembersSize, localUnreachableSize) - if (ThreadLocalRandom.current.nextDouble() < probability) - gossipToRandomNodeOf(localUnreachableMembers.map(_.address)) - } - - // 3. gossip to a deputy nodes for facilitating partition healing + // 2. gossip to a deputy nodes for facilitating partition healing val deputies = deputyNodes(localMemberAddresses) val alreadyGossipedToDeputy = gossipedToAlive.map(deputies.contains(_)).getOrElse(false) - if ((!alreadyGossipedToDeputy || localMembersSize < NrOfDeputyNodes) && deputies.nonEmpty) { - val probability = gossipToDeputyProbablity(localMembersSize, localUnreachableSize, NrOfDeputyNodes) + if ((!alreadyGossipedToDeputy || localMembersSize < seedNodes.size) && deputies.nonEmpty) { + val probability = gossipToDeputyProbablity(localMembersSize, localUnreachableSize, seedNodes.size) if (ThreadLocalRandom.current.nextDouble() < probability) gossipToRandomNodeOf(deputies) } @@ -972,29 +1065,28 @@ class Cluster(system: ExtendedActorSystem, val failureDetector: FailureDetector) } /** - * INTERNAL API + * INTERNAL API. */ - private[akka] def heartbeat(): Unit = { + private[cluster] def heartbeat(): Unit = { + removeOverdueJoinInProgress() val localState = state.get - if (!isSingletonCluster(localState)) { - val liveMembers = localState.latestGossip.members.toIndexedSeq + val beatTo = localState.latestGossip.members.toSeq.map(_.address) ++ localState.joinInProgress.keys - for (member ← liveMembers; if member.address != selfAddress) { - val connection = clusterGossipConnectionFor(member.address) - log.debug("Cluster Node [{}] - Heartbeat to [{}]", selfAddress, connection) - connection ! selfHeartbeat - } + for (address ← beatTo; if address != selfAddress) { + val connection = clusterGossipConnectionFor(address) + log.debug("Cluster Node [{}] - Heartbeat to [{}]", selfAddress, connection) + connection ! selfHeartbeat } } /** - * INTERNAL API + * INTERNAL API. * * Reaps the unreachable members (moves them to the 'unreachable' list in the cluster overview) according to the failure detector's verdict. */ @tailrec - final private[akka] def reapUnreachableMembers(): Unit = { + final private[cluster] def reapUnreachableMembers(): Unit = { val localState = state.get if (!isSingletonCluster(localState) && isAvailable(localState)) { @@ -1033,124 +1125,198 @@ class Cluster(system: ExtendedActorSystem, val failureDetector: FailureDetector) } /** - * INTERNAL API + * INTERNAL API. + * + * Removes overdue joinInProgress from State. + */ + @tailrec + final private[cluster] def removeOverdueJoinInProgress(): Unit = { + val localState = state.get + val overdueJoins = localState.joinInProgress collect { + case (address, deadline) if deadline.isOverdue ⇒ address + } + if (overdueJoins.nonEmpty) { + val newState = localState copy (joinInProgress = localState.joinInProgress -- overdueJoins) + if (!state.compareAndSet(localState, newState)) removeOverdueJoinInProgress() // recur + } + } + + /** + * INTERNAL API. * * Runs periodic leader actions, such as auto-downing unreachable nodes, assigning partitions etc. */ @tailrec - final private[akka] def leaderActions(): Unit = { + final private[cluster] def leaderActions(): Unit = { val localState = state.get val localGossip = localState.latestGossip val localMembers = localGossip.members val isLeader = localMembers.nonEmpty && (selfAddress == localMembers.head.address) - // FIXME implement partion handoff and a check if it is completed - now just returns TRUE - e.g. has completed successfully - def hasPartionHandoffCompletedSuccessfully(gossip: Gossip): Boolean = { - true - } - if (isLeader && isAvailable(localState)) { // only run the leader actions if we are the LEADER and available val localOverview = localGossip.overview val localSeen = localOverview.seen val localUnreachableMembers = localOverview.unreachable + val hasPartionHandoffCompletedSuccessfully: Boolean = { + // FIXME implement partion handoff and a check if it is completed - now just returns TRUE - e.g. has completed successfully + true + } // Leader actions are as follows: - // 1. Move JOINING => UP -- When a node joins the cluster - // 2. Move EXITING => REMOVED -- When all nodes have seen that the node is EXITING (convergence) + // 1. Move EXITING => REMOVED -- When all nodes have seen that the node is EXITING (convergence) - remove the nodes from the node ring and seen table + // 2. Move JOINING => UP -- When a node joins the cluster // 3. Move LEAVING => EXITING -- When all partition handoff has completed // 4. Move UNREACHABLE => DOWN -- When the node is in the UNREACHABLE set it can be auto-down by leader - // 5. Updating the vclock version for the changes - // 6. Updating the 'seen' table + // 5. Store away all stuff needed for the side-effecting processing in 10. + // 6. Updating the vclock version for the changes + // 7. Updating the 'seen' table + // 8. Try to update the state with the new gossip + // 9. If failure - retry + // 10. If success - run all the side-effecting processing - var hasChangedState = false - val newGossip = + val ( + newGossip: Gossip, + hasChangedState: Boolean, + upMembers, + exitingMembers, + removedMembers, + unreachableButNotDownedMembers) = if (convergence(localGossip).isDefined) { // we have convergence - so we can't have unreachable nodes + // transform the node member ring - filterNot/map/map val newMembers = - - localMembers map { member ⇒ + localMembers filterNot { member ⇒ // ---------------------- - // 1. Move JOINING => UP (once all nodes have seen that this node is JOINING e.g. we have a convergence) + // 1. Move EXITING => REMOVED - e.g. remove the nodes from the 'members' set/node ring and seen table // ---------------------- - if (member.status == Joining) { - log.info("Cluster Node [{}] - Leader is moving node [{}] from JOINING to UP", selfAddress, member.address) - hasChangedState = true - member copy (status = Up) - } else member + member.status == MemberStatus.Exiting } map { member ⇒ // ---------------------- - // 2. Move EXITING => REMOVED (once all nodes have seen that this node is EXITING e.g. we have a convergence) + // 2. Move JOINING => UP (once all nodes have seen that this node is JOINING e.g. we have a convergence) // ---------------------- - if (member.status == Exiting) { - log.info("Cluster Node [{}] - Leader is moving node [{}] from EXITING to REMOVED", selfAddress, member.address) - hasChangedState = true - member copy (status = Removed) - } else member + if (member.status == Joining) member copy (status = Up) + else member } map { member ⇒ // ---------------------- // 3. Move LEAVING => EXITING (once we have a convergence on LEAVING *and* if we have a successful partition handoff) // ---------------------- - if (member.status == Leaving && hasPartionHandoffCompletedSuccessfully(localGossip)) { - log.info("Cluster Node [{}] - Leader is moving node [{}] from LEAVING to EXITING", selfAddress, member.address) - hasChangedState = true - member copy (status = Exiting) - } else member - + if (member.status == Leaving && hasPartionHandoffCompletedSuccessfully) member copy (status = Exiting) + else member } - localGossip copy (members = newMembers) // update gossip + + // ---------------------- + // 5. Store away all stuff needed for the side-effecting processing in 10. + // ---------------------- + + // Check for the need to do side-effecting on successful state change + // Repeat the checking for transitions between JOINING -> UP, LEAVING -> EXITING, EXITING -> REMOVED + // to check for state-changes and to store away removed and exiting members for later notification + // 1. check for state-changes to update + // 2. store away removed and exiting members so we can separate the pure state changes (that can be retried on collision) and the side-effecting message sending + val (removedMembers, newMembers1) = localMembers partition (_.status == Exiting) + + val (upMembers, newMembers2) = newMembers1 partition (_.status == Joining) + + val (exitingMembers, newMembers3) = newMembers2 partition (_.status == Leaving && hasPartionHandoffCompletedSuccessfully) + + val hasChangedState = removedMembers.nonEmpty || upMembers.nonEmpty || exitingMembers.nonEmpty + + // removing REMOVED nodes from the 'seen' table + val newSeen = localSeen -- removedMembers.map(_.address) + + // removing REMOVED nodes from the 'unreachable' set + val newUnreachableMembers = localUnreachableMembers -- removedMembers + + val newOverview = localOverview copy (seen = newSeen, unreachable = newUnreachableMembers) // update gossip overview + val newGossip = localGossip copy (members = newMembers, overview = newOverview) // update gossip + + (newGossip, hasChangedState, upMembers, exitingMembers, removedMembers, Set.empty[Member]) } else if (AutoDown) { // we don't have convergence - so we might have unreachable nodes + // if 'auto-down' is turned on, then try to auto-down any unreachable nodes - - // ---------------------- - // 4. Move UNREACHABLE => DOWN (auto-downing by leader) - // ---------------------- - val newUnreachableMembers = - localUnreachableMembers.map { member ⇒ - // no need to DOWN members already DOWN - if (member.status == Down) member - else { - log.info("Cluster Node [{}] - Leader is marking unreachable node [{}] as DOWN", selfAddress, member.address) - hasChangedState = true - member copy (status = Down) - } - } - - // removing nodes marked as DOWN from the 'seen' table - val newSeen = localSeen -- newUnreachableMembers.collect { - case m if m.status == Down ⇒ m.address + val newUnreachableMembers = localUnreachableMembers.map { member ⇒ + // ---------------------- + // 5. Move UNREACHABLE => DOWN (auto-downing by leader) + // ---------------------- + if (member.status == Down) member // no need to DOWN members already DOWN + else member copy (status = Down) } - val newOverview = localOverview copy (seen = newSeen, unreachable = newUnreachableMembers) // update gossip overview - localGossip copy (overview = newOverview) // update gossip + // Check for the need to do side-effecting on successful state change + val (unreachableButNotDownedMembers, _) = localUnreachableMembers partition (_.status != Down) - } else localGossip + // removing nodes marked as DOWN from the 'seen' table + val newSeen = localSeen -- newUnreachableMembers.collect { case m if m.status == Down ⇒ m.address } + + val newOverview = localOverview copy (seen = newSeen, unreachable = newUnreachableMembers) // update gossip overview + val newGossip = localGossip copy (overview = newOverview) // update gossip + + (newGossip, unreachableButNotDownedMembers.nonEmpty, Set.empty[Member], Set.empty[Member], Set.empty[Member], unreachableButNotDownedMembers) + + } else (localGossip, false, Set.empty[Member], Set.empty[Member], Set.empty[Member], Set.empty[Member]) if (hasChangedState) { // we have a change of state - version it and try to update - // ---------------------- - // 5. Updating the vclock version for the changes + // 6. Updating the vclock version for the changes // ---------------------- val versionedGossip = newGossip :+ vclockNode // ---------------------- - // 6. Updating the 'seen' table + // 7. Updating the 'seen' table + // Unless the leader (this node) is part of the removed members, i.e. the leader have moved himself from EXITING -> REMOVED // ---------------------- - val seenVersionedGossip = versionedGossip seen selfAddress + val seenVersionedGossip = + if (removedMembers.exists(_.address == selfAddress)) versionedGossip + else versionedGossip seen selfAddress val newState = localState copy (latestGossip = seenVersionedGossip) - // if we won the race then update else try again - if (!state.compareAndSet(localState, newState)) leaderActions() // recur - else { + // ---------------------- + // 8. Try to update the state with the new gossip + // ---------------------- + if (!state.compareAndSet(localState, newState)) { + + // ---------------------- + // 9. Failure - retry + // ---------------------- + leaderActions() // recur + + } else { + // ---------------------- + // 10. Success - run all the side-effecting processing + // ---------------------- + + // log the move of members from joining to up + upMembers foreach { member ⇒ log.info("Cluster Node [{}] - Leader is moving node [{}] from JOINING to UP", selfAddress, member.address) } + + // tell all removed members to remove and shut down themselves + removedMembers foreach { member ⇒ + val address = member.address + log.info("Cluster Node [{}] - Leader is moving node [{}] from EXITING to REMOVED - and removing node from node ring", selfAddress, address) + clusterCommandConnectionFor(address) ! ClusterLeaderAction.Remove(address) + } + + // tell all exiting members to exit + exitingMembers foreach { member ⇒ + val address = member.address + log.info("Cluster Node [{}] - Leader is moving node [{}] from LEAVING to EXITING", selfAddress, address) + clusterCommandConnectionFor(address) ! ClusterLeaderAction.Exit(address) // FIXME should use ? to await completion of handoff? + } + + // log the auto-downing of the unreachable nodes + unreachableButNotDownedMembers foreach { member ⇒ + log.info("Cluster Node [{}] - Leader is marking unreachable node [{}] as DOWN", selfAddress, member.address) + } + notifyMembershipChangeListeners(localState, newState) } } @@ -1174,9 +1340,7 @@ class Cluster(system: ExtendedActorSystem, val failureDetector: FailureDetector) // Else we can't continue to check for convergence // When that is done we check that all the entries in the 'seen' table have the same vector clock version // and that all members exists in seen table - val hasUnreachable = unreachable.nonEmpty && unreachable.exists { m ⇒ - m.status != Down && m.status != Removed - } + val hasUnreachable = unreachable.nonEmpty && unreachable.exists { _.status != Down } val allMembersInSeen = gossip.members.forall(m ⇒ seen.contains(m.address)) if (hasUnreachable) { @@ -1205,14 +1369,18 @@ class Cluster(system: ExtendedActorSystem, val failureDetector: FailureDetector) private def isUnavailable(state: State): Boolean = { val localGossip = state.latestGossip - val localOverview = localGossip.overview - val localMembers = localGossip.members - val localUnreachableMembers = localOverview.unreachable - val isUnreachable = localUnreachableMembers exists { _.address == selfAddress } - val hasUnavailableMemberStatus = localMembers exists { m ⇒ (m == self) && m.status.isUnavailable } + val isUnreachable = localGossip.overview.unreachable exists { _.address == selfAddress } + val hasUnavailableMemberStatus = localGossip.members exists { m ⇒ (m == self) && m.status.isUnavailable } isUnreachable || hasUnavailableMemberStatus } + private def notifyMembershipChangeListeners(oldState: State, newState: State): Unit = { + val oldMembersStatus = oldState.latestGossip.members.map(m ⇒ (m.address, m.status)) + val newMembersStatus = newState.latestGossip.members.map(m ⇒ (m.address, m.status)) + if (newMembersStatus != oldMembersStatus) + newState.memberMembershipChangeListeners foreach { _ notify newState.latestGossip.members } + } + /** * Looks up and returns the local cluster command connection. */ @@ -1232,12 +1400,12 @@ class Cluster(system: ExtendedActorSystem, val failureDetector: FailureDetector) * Gets the addresses of a all the 'deputy' nodes - excluding this node if part of the group. */ private def deputyNodes(addresses: IndexedSeq[Address]): IndexedSeq[Address] = - addresses drop 1 take NrOfDeputyNodes filterNot (_ == selfAddress) + addresses filterNot (_ == selfAddress) intersect seedNodes /** - * INTERNAL API + * INTERNAL API. */ - private[akka] def selectRandomNode(addresses: IndexedSeq[Address]): Option[Address] = + private[cluster] def selectRandomNode(addresses: IndexedSeq[Address]): Option[Address] = if (addresses.isEmpty) None else Some(addresses(ThreadLocalRandom.current nextInt addresses.size)) @@ -1280,6 +1448,8 @@ class Cluster(system: ExtendedActorSystem, val failureDetector: FailureDetector) def isAvailable: Boolean = clusterNode.isAvailable + def isRunning: Boolean = clusterNode.isRunning + // JMX commands def join(address: String) = clusterNode.join(AddressFromURIString(address)) @@ -1287,10 +1457,6 @@ class Cluster(system: ExtendedActorSystem, val failureDetector: FailureDetector) def leave(address: String) = clusterNode.leave(AddressFromURIString(address)) def down(address: String) = clusterNode.down(AddressFromURIString(address)) - - def remove(address: String) = clusterNode.remove(AddressFromURIString(address)) - - def shutdown() = clusterNode.shutdown() } log.info("Cluster Node [{}] - registering cluster JMX MBean [{}]", selfAddress, clusterMBeanName) try { diff --git a/akka-cluster/src/main/scala/akka/cluster/ClusterSettings.scala b/akka-cluster/src/main/scala/akka/cluster/ClusterSettings.scala index ee4f6a03d2..08a9b5160d 100644 --- a/akka-cluster/src/main/scala/akka/cluster/ClusterSettings.scala +++ b/akka-cluster/src/main/scala/akka/cluster/ClusterSettings.scala @@ -13,24 +13,29 @@ import akka.actor.AddressFromURIString class ClusterSettings(val config: Config, val systemName: String) { import config._ - final val FailureDetectorThreshold = getInt("akka.cluster.failure-detector.threshold") + + final val FailureDetectorThreshold = getDouble("akka.cluster.failure-detector.threshold") final val FailureDetectorMaxSampleSize = getInt("akka.cluster.failure-detector.max-sample-size") - final val FailureDetectorImplementationClass: Option[String] = getString("akka.cluster.failure-detector.implementation-class") match { - case "" ⇒ None - case fqcn ⇒ Some(fqcn) - } - final val NodeToJoin: Option[Address] = getString("akka.cluster.node-to-join") match { - case "" ⇒ None - case AddressFromURIString(addr) ⇒ Some(addr) - } - final val PeriodicTasksInitialDelay = Duration(getMilliseconds("akka.cluster.periodic-tasks-initial-delay"), MILLISECONDS) - final val GossipInterval = Duration(getMilliseconds("akka.cluster.gossip-interval"), MILLISECONDS) - final val HeartbeatInterval = Duration(getMilliseconds("akka.cluster.heartbeat-interval"), MILLISECONDS) - final val LeaderActionsInterval = Duration(getMilliseconds("akka.cluster.leader-actions-interval"), MILLISECONDS) - final val UnreachableNodesReaperInterval = Duration(getMilliseconds("akka.cluster.unreachable-nodes-reaper-interval"), MILLISECONDS) - final val NrOfGossipDaemons = getInt("akka.cluster.nr-of-gossip-daemons") - final val NrOfDeputyNodes = getInt("akka.cluster.nr-of-deputy-nodes") - final val AutoDown = getBoolean("akka.cluster.auto-down") - final val SchedulerTickDuration = Duration(getMilliseconds("akka.cluster.scheduler.tick-duration"), MILLISECONDS) - final val SchedulerTicksPerWheel = getInt("akka.cluster.scheduler.ticks-per-wheel") + final val FailureDetectorImplementationClass = getString("akka.cluster.failure-detector.implementation-class") + final val FailureDetectorMinStdDeviation: Duration = + Duration(getMilliseconds("akka.cluster.failure-detector.min-std-deviation"), MILLISECONDS) + final val FailureDetectorAcceptableHeartbeatPause: Duration = + Duration(getMilliseconds("akka.cluster.failure-detector.acceptable-heartbeat-pause"), MILLISECONDS) + + final val SeedNodes: IndexedSeq[Address] = getStringList("akka.cluster.seed-nodes").asScala.map { + case AddressFromURIString(addr) ⇒ addr + }.toIndexedSeq + final val SeedNodeTimeout: Duration = Duration(getMilliseconds("akka.cluster.seed-node-timeout"), MILLISECONDS) + final val PeriodicTasksInitialDelay: Duration = Duration(getMilliseconds("akka.cluster.periodic-tasks-initial-delay"), MILLISECONDS) + final val GossipInterval: Duration = Duration(getMilliseconds("akka.cluster.gossip-interval"), MILLISECONDS) + final val HeartbeatInterval: Duration = Duration(getMilliseconds("akka.cluster.heartbeat-interval"), MILLISECONDS) + final val LeaderActionsInterval: Duration = Duration(getMilliseconds("akka.cluster.leader-actions-interval"), MILLISECONDS) + final val UnreachableNodesReaperInterval: Duration = Duration(getMilliseconds("akka.cluster.unreachable-nodes-reaper-interval"), MILLISECONDS) + final val NrOfGossipDaemons: Int = getInt("akka.cluster.nr-of-gossip-daemons") + final val NrOfDeputyNodes: Int = getInt("akka.cluster.nr-of-deputy-nodes") + final val AutoJoin: Boolean = getBoolean("akka.cluster.auto-join") + final val AutoDown: Boolean = getBoolean("akka.cluster.auto-down") + final val JoinTimeout: Duration = Duration(getMilliseconds("akka.cluster.join-timeout"), MILLISECONDS) + final val SchedulerTickDuration: Duration = Duration(getMilliseconds("akka.cluster.scheduler.tick-duration"), MILLISECONDS) + final val SchedulerTicksPerWheel: Int = getInt("akka.cluster.scheduler.ticks-per-wheel") } diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/ClientDowningNodeThatIsUnreachableSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/ClientDowningNodeThatIsUnreachableSpec.scala index 343f0c7c17..8112aeab25 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/ClientDowningNodeThatIsUnreachableSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/ClientDowningNodeThatIsUnreachableSpec.scala @@ -37,7 +37,7 @@ abstract class ClientDowningNodeThatIsUnreachableSpec "Client of a 4 node cluster" must { "be able to DOWN a node that is UNREACHABLE (killed)" taggedAs LongRunningTest in { - val thirdAddress = node(third).address + val thirdAddress = address(third) awaitClusterUp(first, second, third, fourth) runOn(first) { @@ -47,23 +47,23 @@ abstract class ClientDowningNodeThatIsUnreachableSpec // mark 'third' node as DOWN cluster.down(thirdAddress) - testConductor.enter("down-third-node") + enterBarrier("down-third-node") awaitUpConvergence(numberOfMembers = 3, canNotBePartOfMemberRing = Seq(thirdAddress)) cluster.latestGossip.members.exists(_.address == thirdAddress) must be(false) } runOn(third) { - testConductor.enter("down-third-node") + enterBarrier("down-third-node") } runOn(second, fourth) { - testConductor.enter("down-third-node") + enterBarrier("down-third-node") awaitUpConvergence(numberOfMembers = 3, canNotBePartOfMemberRing = Seq(thirdAddress)) } - testConductor.enter("await-completion") + enterBarrier("await-completion") } } } diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/ClientDowningNodeThatIsUpSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/ClientDowningNodeThatIsUpSpec.scala index 95eeefd982..4c65e85054 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/ClientDowningNodeThatIsUpSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/ClientDowningNodeThatIsUpSpec.scala @@ -37,13 +37,13 @@ abstract class ClientDowningNodeThatIsUpSpec "Client of a 4 node cluster" must { "be able to DOWN a node that is UP (healthy and available)" taggedAs LongRunningTest in { - val thirdAddress = node(third).address + val thirdAddress = address(third) awaitClusterUp(first, second, third, fourth) runOn(first) { // mark 'third' node as DOWN cluster.down(thirdAddress) - testConductor.enter("down-third-node") + enterBarrier("down-third-node") markNodeAsUnavailable(thirdAddress) @@ -52,16 +52,16 @@ abstract class ClientDowningNodeThatIsUpSpec } runOn(third) { - testConductor.enter("down-third-node") + enterBarrier("down-third-node") } runOn(second, fourth) { - testConductor.enter("down-third-node") + enterBarrier("down-third-node") awaitUpConvergence(numberOfMembers = 3, canNotBePartOfMemberRing = Seq(thirdAddress)) } - testConductor.enter("await-completion") + enterBarrier("await-completion") } } } diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/ClusterAccrualFailureDetectorSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/ClusterAccrualFailureDetectorSpec.scala new file mode 100644 index 0000000000..d5d41b52aa --- /dev/null +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/ClusterAccrualFailureDetectorSpec.scala @@ -0,0 +1,63 @@ +/** + * Copyright (C) 2009-2012 Typesafe Inc. + */ +package akka.cluster + +import com.typesafe.config.ConfigFactory +import akka.remote.testkit.MultiNodeConfig +import akka.remote.testkit.MultiNodeSpec +import akka.util.duration._ +import akka.testkit._ + +object ClusterAccrualFailureDetectorMultiJvmSpec extends MultiNodeConfig { + val first = role("first") + val second = role("second") + val third = role("third") + + commonConfig(debugConfig(on = false). + withFallback(ConfigFactory.parseString("akka.cluster.failure-detector.threshold = 4")). + withFallback(MultiNodeClusterSpec.clusterConfig)) +} + +class ClusterAccrualFailureDetectorMultiJvmNode1 extends ClusterAccrualFailureDetectorSpec with AccrualFailureDetectorStrategy +class ClusterAccrualFailureDetectorMultiJvmNode2 extends ClusterAccrualFailureDetectorSpec with AccrualFailureDetectorStrategy +class ClusterAccrualFailureDetectorMultiJvmNode3 extends ClusterAccrualFailureDetectorSpec with AccrualFailureDetectorStrategy + +abstract class ClusterAccrualFailureDetectorSpec + extends MultiNodeSpec(ClusterAccrualFailureDetectorMultiJvmSpec) + with MultiNodeClusterSpec { + + import ClusterAccrualFailureDetectorMultiJvmSpec._ + + "A heartbeat driven Failure Detector" must { + + "receive heartbeats so that all member nodes in the cluster are marked 'available'" taggedAs LongRunningTest in { + awaitClusterUp(first, second, third) + + 5.seconds.dilated.sleep // let them heartbeat + cluster.failureDetector.isAvailable(first) must be(true) + cluster.failureDetector.isAvailable(second) must be(true) + cluster.failureDetector.isAvailable(third) must be(true) + + enterBarrier("after-1") + } + + "mark node as 'unavailable' if a node in the cluster is shut down (and its heartbeats stops)" taggedAs LongRunningTest in { + runOn(first) { + testConductor.shutdown(third, 0) + } + + enterBarrier("third-shutdown") + + runOn(first, second) { + // remaning nodes should detect failure... + awaitCond(!cluster.failureDetector.isAvailable(third), 15.seconds) + // other connections still ok + cluster.failureDetector.isAvailable(first) must be(true) + cluster.failureDetector.isAvailable(second) must be(true) + } + + enterBarrier("after-2") + } + } +} diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/ConvergenceSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/ConvergenceSpec.scala index 52206f1b8c..6d92a6f094 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/ConvergenceSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/ConvergenceSpec.scala @@ -46,12 +46,12 @@ abstract class ConvergenceSpec // doesn't join immediately } - testConductor.enter("after-1") + enterBarrier("after-1") } "not reach convergence while any nodes are unreachable" taggedAs LongRunningTest in { - val thirdAddress = node(third).address - testConductor.enter("before-shutdown") + val thirdAddress = address(third) + enterBarrier("before-shutdown") runOn(first) { // kill 'third' node @@ -60,15 +60,13 @@ abstract class ConvergenceSpec } runOn(first, second) { - val firstAddress = node(first).address - val secondAddress = node(second).address within(28 seconds) { // third becomes unreachable awaitCond(cluster.latestGossip.overview.unreachable.size == 1) awaitCond(cluster.latestGossip.members.size == 2) awaitCond(cluster.latestGossip.members.forall(_.status == MemberStatus.Up)) - awaitSeenSameState(Seq(firstAddress, secondAddress)) + awaitSeenSameState(first, second) // still one unreachable cluster.latestGossip.overview.unreachable.size must be(1) cluster.latestGossip.overview.unreachable.head.address must be(thirdAddress) @@ -78,30 +76,26 @@ abstract class ConvergenceSpec } } - testConductor.enter("after-2") + enterBarrier("after-2") } "not move a new joining node to Up while there is no convergence" taggedAs LongRunningTest in { runOn(fourth) { // try to join - cluster.join(node(first).address) + cluster.join(first) } - val firstAddress = node(first).address - val secondAddress = node(second).address - val fourthAddress = node(fourth).address - def memberStatus(address: Address): Option[MemberStatus] = cluster.latestGossip.members.collectFirst { case m if m.address == address ⇒ m.status } def assertNotMovedUp: Unit = { within(20 seconds) { awaitCond(cluster.latestGossip.members.size == 3) - awaitSeenSameState(Seq(firstAddress, secondAddress, fourthAddress)) - memberStatus(firstAddress) must be(Some(MemberStatus.Up)) - memberStatus(secondAddress) must be(Some(MemberStatus.Up)) + awaitSeenSameState(first, second, fourth) + memberStatus(first) must be(Some(MemberStatus.Up)) + memberStatus(second) must be(Some(MemberStatus.Up)) // leader is not allowed to move the new node to Up - memberStatus(fourthAddress) must be(Some(MemberStatus.Joining)) + memberStatus(fourth) must be(Some(MemberStatus.Joining)) // still no convergence cluster.convergence.isDefined must be(false) } @@ -116,7 +110,7 @@ abstract class ConvergenceSpec } } - testConductor.enter("after-3") + enterBarrier("after-3") } } } diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/FailureDetectorStrategy.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/FailureDetectorStrategy.scala index dcbb65d0f1..86e03f9457 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/FailureDetectorStrategy.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/FailureDetectorStrategy.scala @@ -55,7 +55,7 @@ trait AccrualFailureDetectorStrategy extends FailureDetectorStrategy { self: Mul override val failureDetector: FailureDetector = new AccrualFailureDetector(system, new ClusterSettings(system.settings.config, system.name)) - override def markNodeAsAvailable(address: Address): Unit = { /* no-op */ } + override def markNodeAsAvailable(address: Address): Unit = () - override def markNodeAsUnavailable(address: Address): Unit = { /* no-op */ } + override def markNodeAsUnavailable(address: Address): Unit = () } diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/GossipingAccrualFailureDetectorSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/GossipingAccrualFailureDetectorSpec.scala deleted file mode 100644 index b14c0d927c..0000000000 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/GossipingAccrualFailureDetectorSpec.scala +++ /dev/null @@ -1,65 +0,0 @@ -/** - * Copyright (C) 2009-2012 Typesafe Inc. - */ -package akka.cluster - -import com.typesafe.config.ConfigFactory -import akka.remote.testkit.MultiNodeConfig -import akka.remote.testkit.MultiNodeSpec -import akka.util.duration._ -import akka.testkit._ - -object GossipingAccrualFailureDetectorMultiJvmSpec extends MultiNodeConfig { - val first = role("first") - val second = role("second") - val third = role("third") - - commonConfig(debugConfig(on = false). - withFallback(ConfigFactory.parseString("akka.cluster.failure-detector.threshold = 4")). - withFallback(MultiNodeClusterSpec.clusterConfig)) -} - -class GossipingWithAccrualFailureDetectorMultiJvmNode1 extends GossipingAccrualFailureDetectorSpec with AccrualFailureDetectorStrategy -class GossipingWithAccrualFailureDetectorMultiJvmNode2 extends GossipingAccrualFailureDetectorSpec with AccrualFailureDetectorStrategy -class GossipingWithAccrualFailureDetectorMultiJvmNode3 extends GossipingAccrualFailureDetectorSpec with AccrualFailureDetectorStrategy - -abstract class GossipingAccrualFailureDetectorSpec - extends MultiNodeSpec(GossipingAccrualFailureDetectorMultiJvmSpec) - with MultiNodeClusterSpec { - - import GossipingAccrualFailureDetectorMultiJvmSpec._ - - lazy val firstAddress = node(first).address - lazy val secondAddress = node(second).address - lazy val thirdAddress = node(third).address - - "A Gossip-driven Failure Detector" must { - - "receive gossip heartbeats so that all member nodes in the cluster are marked 'available'" taggedAs LongRunningTest in { - awaitClusterUp(first, second, third) - - 5.seconds.dilated.sleep // let them gossip - cluster.failureDetector.isAvailable(firstAddress) must be(true) - cluster.failureDetector.isAvailable(secondAddress) must be(true) - cluster.failureDetector.isAvailable(thirdAddress) must be(true) - - testConductor.enter("after-1") - } - - "mark node as 'unavailable' if a node in the cluster is shut down (and its heartbeats stops)" taggedAs LongRunningTest in { - runOn(first) { - testConductor.shutdown(third, 0) - } - - runOn(first, second) { - // remaning nodes should detect failure... - awaitCond(!cluster.failureDetector.isAvailable(thirdAddress), 10.seconds) - // other connections still ok - cluster.failureDetector.isAvailable(firstAddress) must be(true) - cluster.failureDetector.isAvailable(secondAddress) must be(true) - } - - testConductor.enter("after-2") - } - } -} diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/JoinInProgressSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/JoinInProgressSpec.scala new file mode 100644 index 0000000000..256b7d563d --- /dev/null +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/JoinInProgressSpec.scala @@ -0,0 +1,65 @@ +/** + * Copyright (C) 2009-2012 Typesafe Inc. + */ +package akka.cluster + +import com.typesafe.config.ConfigFactory +import org.scalatest.BeforeAndAfter +import akka.remote.testkit.MultiNodeConfig +import akka.remote.testkit.MultiNodeSpec +import akka.testkit._ +import akka.util.duration._ +import akka.util.Deadline + +object JoinInProgressMultiJvmSpec extends MultiNodeConfig { + val first = role("first") + val second = role("second") + + commonConfig( + debugConfig(on = false) + .withFallback(ConfigFactory.parseString(""" + akka.cluster { + # simulate delay in gossip by turning it off + gossip-interval = 300 s + failure-detector { + threshold = 4 + acceptable-heartbeat-pause = 1 second + } + }""") // increase the leader action task interval + .withFallback(MultiNodeClusterSpec.clusterConfig))) +} + +class JoinInProgressMultiJvmNode1 extends JoinInProgressSpec with AccrualFailureDetectorStrategy +class JoinInProgressMultiJvmNode2 extends JoinInProgressSpec with AccrualFailureDetectorStrategy + +abstract class JoinInProgressSpec + extends MultiNodeSpec(JoinInProgressMultiJvmSpec) + with MultiNodeClusterSpec { + + import JoinInProgressMultiJvmSpec._ + + "A cluster node" must { + "send heartbeats immediately when joining to avoid false failure detection due to delayed gossip" taggedAs LongRunningTest in { + + runOn(first) { + startClusterNode() + } + + enterBarrier("first-started") + + runOn(second) { + cluster.join(first) + } + + runOn(first) { + val until = Deadline.now + 5.seconds + while (!until.isOverdue) { + 200.millis.sleep + cluster.failureDetector.isAvailable(second) must be(true) + } + } + + enterBarrier("after") + } + } +} diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/JoinSeedNodeSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/JoinSeedNodeSpec.scala new file mode 100644 index 0000000000..20dec26a45 --- /dev/null +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/JoinSeedNodeSpec.scala @@ -0,0 +1,65 @@ +/** + * Copyright (C) 2009-2012 Typesafe Inc. + */ +package akka.cluster + +import com.typesafe.config.ConfigFactory +import org.scalatest.BeforeAndAfter +import akka.remote.testkit.MultiNodeConfig +import akka.remote.testkit.MultiNodeSpec +import akka.testkit._ +import akka.util.duration._ + +object JoinSeedNodeMultiJvmSpec extends MultiNodeConfig { + val seed1 = role("seed1") + val seed2 = role("seed2") + val ordinary1 = role("ordinary1") + val ordinary2 = role("ordinary2") + + commonConfig(debugConfig(on = false). + withFallback(ConfigFactory.parseString("akka.cluster.auto-join = on")). + withFallback(MultiNodeClusterSpec.clusterConfig)) +} + +class JoinSeedNodeMultiJvmNode1 extends JoinSeedNodeSpec with FailureDetectorPuppetStrategy +class JoinSeedNodeMultiJvmNode2 extends JoinSeedNodeSpec with FailureDetectorPuppetStrategy +class JoinSeedNodeMultiJvmNode3 extends JoinSeedNodeSpec with FailureDetectorPuppetStrategy +class JoinSeedNodeMultiJvmNode4 extends JoinSeedNodeSpec with FailureDetectorPuppetStrategy + +abstract class JoinSeedNodeSpec + extends MultiNodeSpec(JoinSeedNodeMultiJvmSpec) + with MultiNodeClusterSpec { + + import JoinSeedNodeMultiJvmSpec._ + + override def seedNodes = IndexedSeq(seed1, seed2) + + "A cluster with configured seed nodes" must { + "start the seed nodes sequentially" taggedAs LongRunningTest in { + runOn(seed1) { + startClusterNode() + } + enterBarrier("seed1-started") + + runOn(seed2) { + startClusterNode() + } + enterBarrier("seed2-started") + + runOn(seed1, seed2) { + awaitUpConvergence(2) + } + enterBarrier("after-1") + } + + "join the seed nodes at startup" taggedAs LongRunningTest in { + + startClusterNode() + enterBarrier("all-started") + + awaitUpConvergence(4) + + enterBarrier("after-2") + } + } +} diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/JoinTwoClustersSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/JoinTwoClustersSpec.scala index 4b64bb6e58..d34a48f48e 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/JoinTwoClustersSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/JoinTwoClustersSpec.scala @@ -33,10 +33,6 @@ abstract class JoinTwoClustersSpec import JoinTwoClustersMultiJvmSpec._ - lazy val a1Address = node(a1).address - lazy val b1Address = node(b1).address - lazy val c1Address = node(c1).address - "Three different clusters (A, B and C)" must { "be able to 'elect' a single leader after joining (A -> B)" taggedAs LongRunningTest in { @@ -44,16 +40,16 @@ abstract class JoinTwoClustersSpec runOn(a1, b1, c1) { startClusterNode() } - testConductor.enter("first-started") + enterBarrier("first-started") runOn(a1, a2) { - cluster.join(a1Address) + cluster.join(a1) } runOn(b1, b2) { - cluster.join(b1Address) + cluster.join(b1) } runOn(c1, c2) { - cluster.join(c1Address) + cluster.join(c1) } awaitUpConvergence(numberOfMembers = 2) @@ -62,10 +58,10 @@ abstract class JoinTwoClustersSpec assertLeader(b1, b2) assertLeader(c1, c2) - testConductor.enter("two-members") + enterBarrier("two-members") runOn(b2) { - cluster.join(a1Address) + cluster.join(a1) } runOn(a1, a2, b1, b2) { @@ -75,20 +71,20 @@ abstract class JoinTwoClustersSpec assertLeader(a1, a2, b1, b2) assertLeader(c1, c2) - testConductor.enter("four-members") + enterBarrier("four-members") } "be able to 'elect' a single leader after joining (C -> A + B)" taggedAs LongRunningTest in { runOn(b2) { - cluster.join(c1Address) + cluster.join(c1) } awaitUpConvergence(numberOfMembers = 6) assertLeader(a1, a2, b1, b2, c1, c2) - testConductor.enter("six-members") + enterBarrier("six-members") } } } diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/LeaderDowningNodeThatIsUnreachableSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/LeaderDowningNodeThatIsUnreachableSpec.scala index 5e2545394d..d1640be511 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/LeaderDowningNodeThatIsUnreachableSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/LeaderDowningNodeThatIsUnreachableSpec.scala @@ -42,11 +42,11 @@ abstract class LeaderDowningNodeThatIsUnreachableSpec "be able to DOWN a 'last' node that is UNREACHABLE" taggedAs LongRunningTest in { awaitClusterUp(first, second, third, fourth) - val fourthAddress = node(fourth).address + val fourthAddress = address(fourth) runOn(first) { // kill 'fourth' node testConductor.shutdown(fourth, 0) - testConductor.enter("down-fourth-node") + enterBarrier("down-fourth-node") // mark the node as unreachable in the failure detector markNodeAsUnavailable(fourthAddress) @@ -57,26 +57,26 @@ abstract class LeaderDowningNodeThatIsUnreachableSpec } runOn(fourth) { - testConductor.enter("down-fourth-node") + enterBarrier("down-fourth-node") } runOn(second, third) { - testConductor.enter("down-fourth-node") + enterBarrier("down-fourth-node") awaitUpConvergence(numberOfMembers = 3, canNotBePartOfMemberRing = Seq(fourthAddress), 30.seconds) } - testConductor.enter("await-completion-1") + enterBarrier("await-completion-1") } "be able to DOWN a 'middle' node that is UNREACHABLE" taggedAs LongRunningTest in { - val secondAddress = node(second).address + val secondAddress = address(second) - testConductor.enter("before-down-second-node") + enterBarrier("before-down-second-node") runOn(first) { // kill 'second' node testConductor.shutdown(second, 0) - testConductor.enter("down-second-node") + enterBarrier("down-second-node") // mark the node as unreachable in the failure detector markNodeAsUnavailable(secondAddress) @@ -87,16 +87,16 @@ abstract class LeaderDowningNodeThatIsUnreachableSpec } runOn(second) { - testConductor.enter("down-second-node") + enterBarrier("down-second-node") } runOn(third) { - testConductor.enter("down-second-node") + enterBarrier("down-second-node") awaitUpConvergence(numberOfMembers = 2, canNotBePartOfMemberRing = Seq(secondAddress), 30 seconds) } - testConductor.enter("await-completion-2") + enterBarrier("await-completion-2") } } } diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/LeaderElectionSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/LeaderElectionSpec.scala index e161206ba0..9ed8f27ad4 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/LeaderElectionSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/LeaderElectionSpec.scala @@ -50,7 +50,7 @@ abstract class LeaderElectionSpec assertLeaderIn(sortedRoles) } - testConductor.enter("after") + enterBarrier("after-1") } def shutdownLeaderAndVerifyNewLeader(alreadyShutdown: Int): Unit = { @@ -63,44 +63,46 @@ abstract class LeaderElectionSpec myself match { case `controller` ⇒ - val leaderAddress = node(leader).address - testConductor.enter("before-shutdown") + val leaderAddress = address(leader) + enterBarrier("before-shutdown") testConductor.shutdown(leader, 0) - testConductor.enter("after-shutdown", "after-down", "completed") + enterBarrier("after-shutdown", "after-down", "completed") markNodeAsUnavailable(leaderAddress) case `leader` ⇒ - testConductor.enter("before-shutdown", "after-shutdown") + enterBarrier("before-shutdown", "after-shutdown") // this node will be shutdown by the controller and doesn't participate in more barriers case `aUser` ⇒ - val leaderAddress = node(leader).address - testConductor.enter("before-shutdown", "after-shutdown") + val leaderAddress = address(leader) + enterBarrier("before-shutdown", "after-shutdown") // user marks the shutdown leader as DOWN cluster.down(leaderAddress) - testConductor.enter("after-down", "completed") + enterBarrier("after-down", "completed") markNodeAsUnavailable(leaderAddress) case _ if remainingRoles.contains(myself) ⇒ // remaining cluster nodes, not shutdown - testConductor.enter("before-shutdown", "after-shutdown", "after-down") + enterBarrier("before-shutdown", "after-shutdown", "after-down") awaitUpConvergence(currentRoles.size - 1) val nextExpectedLeader = remainingRoles.head cluster.isLeader must be(myself == nextExpectedLeader) assertLeaderIn(remainingRoles) - testConductor.enter("completed") + enterBarrier("completed") } } "be able to 're-elect' a single leader after leader has left" taggedAs LongRunningTest in { shutdownLeaderAndVerifyNewLeader(alreadyShutdown = 0) + enterBarrier("after-2") } "be able to 're-elect' a single leader after leader has left (again)" taggedAs LongRunningTest in { shutdownLeaderAndVerifyNewLeader(alreadyShutdown = 1) + enterBarrier("after-3") } } } diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/LeaderLeavingSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/LeaderLeavingSpec.scala new file mode 100644 index 0000000000..54154b6973 --- /dev/null +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/LeaderLeavingSpec.scala @@ -0,0 +1,85 @@ +/** + * Copyright (C) 2009-2012 Typesafe Inc. + */ +package akka.cluster + +import scala.collection.immutable.SortedSet +import com.typesafe.config.ConfigFactory +import akka.remote.testkit.MultiNodeConfig +import akka.remote.testkit.MultiNodeSpec +import akka.testkit._ +import akka.util.duration._ + +object LeaderLeavingMultiJvmSpec extends MultiNodeConfig { + val first = role("first") + val second = role("second") + val third = role("third") + + commonConfig( + debugConfig(on = false) + .withFallback(ConfigFactory.parseString(""" + akka.cluster { + leader-actions-interval = 5 s # increase the leader action task frequency to make sure we get a chance to test the LEAVING state + unreachable-nodes-reaper-interval = 30 s + }""") + .withFallback(MultiNodeClusterSpec.clusterConfig))) +} + +class LeaderLeavingMultiJvmNode1 extends LeaderLeavingSpec with FailureDetectorPuppetStrategy +class LeaderLeavingMultiJvmNode2 extends LeaderLeavingSpec with FailureDetectorPuppetStrategy +class LeaderLeavingMultiJvmNode3 extends LeaderLeavingSpec with FailureDetectorPuppetStrategy + +abstract class LeaderLeavingSpec + extends MultiNodeSpec(LeaderLeavingMultiJvmSpec) + with MultiNodeClusterSpec { + + import LeaderLeavingMultiJvmSpec._ + + val leaderHandoffWaitingTime = 30.seconds.dilated + + "A LEADER that is LEAVING" must { + + "be moved to LEAVING, then to EXITING, then to REMOVED, then be shut down and then a new LEADER should be elected" taggedAs LongRunningTest in { + + awaitClusterUp(first, second, third) + + val oldLeaderAddress = cluster.leader + + if (cluster.isLeader) { + + cluster.leave(oldLeaderAddress) + enterBarrier("leader-left") + + // verify that a NEW LEADER have taken over + awaitCond(!cluster.isLeader) + + // verify that the LEADER is shut down + awaitCond(!cluster.isRunning, 30.seconds.dilated) + + // verify that the LEADER is REMOVED + awaitCond(cluster.status == MemberStatus.Removed) + + } else { + + enterBarrier("leader-left") + + // verify that the LEADER is LEAVING + awaitCond(cluster.latestGossip.members.exists(m ⇒ m.status == MemberStatus.Leaving && m.address == oldLeaderAddress), leaderHandoffWaitingTime) // wait on LEAVING + + // verify that the LEADER is EXITING + awaitCond(cluster.latestGossip.members.exists(m ⇒ m.status == MemberStatus.Exiting && m.address == oldLeaderAddress), leaderHandoffWaitingTime) // wait on EXITING + + // verify that the LEADER is no longer part of the 'members' set + awaitCond(cluster.latestGossip.members.forall(_.address != oldLeaderAddress), leaderHandoffWaitingTime) + + // verify that the LEADER is not part of the 'unreachable' set + awaitCond(cluster.latestGossip.overview.unreachable.forall(_.address != oldLeaderAddress), leaderHandoffWaitingTime) + + // verify that we have a new LEADER + awaitCond(cluster.leader != oldLeaderAddress, leaderHandoffWaitingTime) + } + + enterBarrier("finished") + } + } +} diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/MembershipChangeListenerExitingSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/MembershipChangeListenerExitingSpec.scala index 88cee08191..ee74584953 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/MembershipChangeListenerExitingSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/MembershipChangeListenerExitingSpec.scala @@ -37,37 +37,33 @@ abstract class MembershipChangeListenerExitingSpec import MembershipChangeListenerExitingMultiJvmSpec._ - lazy val firstAddress = node(first).address - lazy val secondAddress = node(second).address - lazy val thirdAddress = node(third).address - "A registered MembershipChangeListener" must { "be notified when new node is EXITING" taggedAs LongRunningTest in { awaitClusterUp(first, second, third) runOn(first) { - testConductor.enter("registered-listener") - cluster.leave(secondAddress) + enterBarrier("registered-listener") + cluster.leave(second) } runOn(second) { - testConductor.enter("registered-listener") + enterBarrier("registered-listener") } runOn(third) { val exitingLatch = TestLatch() cluster.registerListener(new MembershipChangeListener { def notify(members: SortedSet[Member]) { - if (members.size == 3 && members.exists(m ⇒ m.address == secondAddress && m.status == MemberStatus.Exiting)) + if (members.size == 3 && members.exists(m ⇒ m.address == address(second) && m.status == MemberStatus.Exiting)) exitingLatch.countDown() } }) - testConductor.enter("registered-listener") + enterBarrier("registered-listener") exitingLatch.await } - testConductor.enter("finished") + enterBarrier("finished") } } } diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/MembershipChangeListenerJoinSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/MembershipChangeListenerJoinSpec.scala index 536fb3b58d..eacec24109 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/MembershipChangeListenerJoinSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/MembershipChangeListenerJoinSpec.scala @@ -30,34 +30,31 @@ abstract class MembershipChangeListenerJoinSpec import MembershipChangeListenerJoinMultiJvmSpec._ - lazy val firstAddress = node(first).address - lazy val secondAddress = node(second).address - "A registered MembershipChangeListener" must { "be notified when new node is JOINING" taggedAs LongRunningTest in { runOn(first) { val joinLatch = TestLatch() - val expectedAddresses = Set(firstAddress, secondAddress) + val expectedAddresses = Set(first, second) map address cluster.registerListener(new MembershipChangeListener { def notify(members: SortedSet[Member]) { if (members.map(_.address) == expectedAddresses && members.exists(_.status == MemberStatus.Joining)) joinLatch.countDown() } }) - testConductor.enter("registered-listener") + enterBarrier("registered-listener") joinLatch.await } runOn(second) { - testConductor.enter("registered-listener") - cluster.join(firstAddress) + enterBarrier("registered-listener") + cluster.join(first) } awaitUpConvergence(2) - testConductor.enter("after") + enterBarrier("after") } } } diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/MembershipChangeListenerLeavingSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/MembershipChangeListenerLeavingSpec.scala index 0640e58175..e6430314d4 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/MembershipChangeListenerLeavingSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/MembershipChangeListenerLeavingSpec.scala @@ -9,6 +9,7 @@ import com.typesafe.config.ConfigFactory import akka.remote.testkit.MultiNodeConfig import akka.remote.testkit.MultiNodeSpec import akka.testkit._ +import akka.actor.Address object MembershipChangeListenerLeavingMultiJvmSpec extends MultiNodeConfig { val first = role("first") @@ -34,39 +35,35 @@ abstract class MembershipChangeListenerLeavingSpec import MembershipChangeListenerLeavingMultiJvmSpec._ - lazy val firstAddress = node(first).address - lazy val secondAddress = node(second).address - lazy val thirdAddress = node(third).address - "A registered MembershipChangeListener" must { "be notified when new node is LEAVING" taggedAs LongRunningTest in { awaitClusterUp(first, second, third) runOn(first) { - testConductor.enter("registered-listener") - cluster.leave(secondAddress) + enterBarrier("registered-listener") + cluster.leave(second) } runOn(second) { - testConductor.enter("registered-listener") + enterBarrier("registered-listener") } runOn(third) { val latch = TestLatch() - val expectedAddresses = Set(firstAddress, secondAddress, thirdAddress) + val expectedAddresses = Set(first, second, third) map address cluster.registerListener(new MembershipChangeListener { def notify(members: SortedSet[Member]) { if (members.map(_.address) == expectedAddresses && - members.exists(m ⇒ m.address == secondAddress && m.status == MemberStatus.Leaving)) + members.exists(m ⇒ m.address == address(second) && m.status == MemberStatus.Leaving)) latch.countDown() } }) - testConductor.enter("registered-listener") + enterBarrier("registered-listener") latch.await } - testConductor.enter("finished") + enterBarrier("finished") } } } diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/MembershipChangeListenerUpSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/MembershipChangeListenerUpSpec.scala index f48f9c8d9b..5638399b59 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/MembershipChangeListenerUpSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/MembershipChangeListenerUpSpec.scala @@ -27,10 +27,6 @@ abstract class MembershipChangeListenerUpSpec import MembershipChangeListenerUpMultiJvmSpec._ - lazy val firstAddress = node(first).address - lazy val secondAddress = node(second).address - lazy val thirdAddress = node(third).address - "A set of connected cluster systems" must { "(when two nodes) after cluster convergence updates the membership table then all MembershipChangeListeners should be triggered" taggedAs LongRunningTest in { @@ -39,44 +35,44 @@ abstract class MembershipChangeListenerUpSpec runOn(first, second) { val latch = TestLatch() - val expectedAddresses = Set(firstAddress, secondAddress) + val expectedAddresses = Set(first, second) map address cluster.registerListener(new MembershipChangeListener { def notify(members: SortedSet[Member]) { if (members.map(_.address) == expectedAddresses && members.forall(_.status == MemberStatus.Up)) latch.countDown() } }) - testConductor.enter("listener-1-registered") - cluster.join(firstAddress) + enterBarrier("listener-1-registered") + cluster.join(first) latch.await } runOn(third) { - testConductor.enter("listener-1-registered") + enterBarrier("listener-1-registered") } - testConductor.enter("after-1") + enterBarrier("after-1") } "(when three nodes) after cluster convergence updates the membership table then all MembershipChangeListeners should be triggered" taggedAs LongRunningTest in { val latch = TestLatch() - val expectedAddresses = Set(firstAddress, secondAddress, thirdAddress) + val expectedAddresses = Set(first, second, third) map address cluster.registerListener(new MembershipChangeListener { def notify(members: SortedSet[Member]) { if (members.map(_.address) == expectedAddresses && members.forall(_.status == MemberStatus.Up)) latch.countDown() } }) - testConductor.enter("listener-2-registered") + enterBarrier("listener-2-registered") runOn(third) { - cluster.join(firstAddress) + cluster.join(first) } latch.await - testConductor.enter("after-2") + enterBarrier("after-2") } } } diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/MultiNodeClusterSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/MultiNodeClusterSpec.scala index b5afaf404c..ed95013bf4 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/MultiNodeClusterSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/MultiNodeClusterSpec.scala @@ -13,18 +13,20 @@ import akka.util.duration._ import akka.util.Duration import org.scalatest.Suite import org.scalatest.TestFailedException -import scala.util.control.NoStackTrace +import java.util.concurrent.ConcurrentHashMap +import akka.actor.ActorPath +import akka.actor.RootActorPath object MultiNodeClusterSpec { def clusterConfig: Config = ConfigFactory.parseString(""" akka.cluster { + auto-join = off auto-down = off gossip-interval = 200 ms heartbeat-interval = 400 ms leader-actions-interval = 200 ms unreachable-nodes-reaper-interval = 200 ms periodic-tasks-initial-delay = 300 ms - nr-of-deputy-nodes = 2 } akka.test { single-expect-default = 5 s @@ -36,6 +38,27 @@ trait MultiNodeClusterSpec extends FailureDetectorStrategy with Suite { self: Mu override def initialParticipants = roles.size + private val cachedAddresses = new ConcurrentHashMap[RoleName, Address] + + /** + * Lookup the Address for the role. + * + * Implicit conversion from RoleName to Address. + * + * It is cached, which has the implication that stopping + * and then restarting a role (jvm) with another address is not + * supported. + */ + implicit def address(role: RoleName): Address = { + cachedAddresses.get(role) match { + case null ⇒ + val address = node(role).address + cachedAddresses.put(role, address) + address + case address ⇒ address + } + } + // Cluster tests are written so that if previous step (test method) failed // it will most likely not be possible to run next step. This ensures // fail fast of steps after the first failure. @@ -54,10 +77,22 @@ trait MultiNodeClusterSpec extends FailureDetectorStrategy with Suite { self: Mu throw t } + /** + * Make it possible to override/configure seedNodes from tests without + * specifying in config. Addresses are unknown before startup time. + */ + protected def seedNodes: IndexedSeq[RoleName] = IndexedSeq.empty + /** * The cluster node instance. Needs to be lazily created. */ - private lazy val clusterNode = new Cluster(system.asInstanceOf[ExtendedActorSystem], failureDetector) + private lazy val clusterNode = new Cluster(system.asInstanceOf[ExtendedActorSystem], failureDetector) { + override def seedNodes: IndexedSeq[Address] = { + val testSeedNodes = MultiNodeClusterSpec.this.seedNodes + if (testSeedNodes.isEmpty) super.seedNodes + else testSeedNodes map address + } + } /** * Get the cluster node to use. @@ -65,10 +100,15 @@ trait MultiNodeClusterSpec extends FailureDetectorStrategy with Suite { self: Mu def cluster: Cluster = clusterNode /** - * Use this method instead of 'cluster.self' - * for the initial startup of the cluster node. + * Use this method for the initial startup of the cluster node. */ - def startClusterNode(): Unit = cluster.self + def startClusterNode(): Unit = { + if (cluster.latestGossip.members.isEmpty) { + cluster join myself + awaitCond(cluster.latestGossip.members.exists(_.address == address(myself))) + } else + cluster.self + } /** * Initialize the cluster with the specified member @@ -92,14 +132,14 @@ trait MultiNodeClusterSpec extends FailureDetectorStrategy with Suite { self: Mu // make sure that the node-to-join is started before other join startClusterNode() } - testConductor.enter(roles.head.name + "-started") + enterBarrier(roles.head.name + "-started") if (roles.tail.contains(myself)) { - cluster.join(node(roles.head).address) + cluster.join(roles.head) } if (upConvergence && roles.contains(myself)) { awaitUpConvergence(numberOfMembers = roles.length) } - testConductor.enter(roles.map(_.name).mkString("-") + "-joined") + enterBarrier(roles.map(_.name).mkString("-") + "-joined") } /** @@ -150,7 +190,7 @@ trait MultiNodeClusterSpec extends FailureDetectorStrategy with Suite { self: Mu /** * Wait until the specified nodes have seen the same gossip overview. */ - def awaitSeenSameState(addresses: Seq[Address]): Unit = { + def awaitSeenSameState(addresses: Address*): Unit = { awaitCond { val seen = cluster.latestGossip.overview.seen val seenVectorClocks = addresses.flatMap(seen.get(_)) @@ -168,10 +208,9 @@ trait MultiNodeClusterSpec extends FailureDetectorStrategy with Suite { self: Mu */ implicit val clusterOrdering: Ordering[RoleName] = new Ordering[RoleName] { import Member.addressOrdering - def compare(x: RoleName, y: RoleName) = addressOrdering.compare(node(x).address, node(y).address) + def compare(x: RoleName, y: RoleName) = addressOrdering.compare(address(x), address(y)) } - def roleName(address: Address): Option[RoleName] = { - roles.find(node(_).address == address) - } + def roleName(addr: Address): Option[RoleName] = roles.find(address(_) == addr) + } diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeJoinSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeJoinSpec.scala index 6cf5fc220d..50656a6a9d 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeJoinSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeJoinSpec.scala @@ -17,7 +17,7 @@ object NodeJoinMultiJvmSpec extends MultiNodeConfig { commonConfig( debugConfig(on = false) .withFallback(ConfigFactory.parseString("akka.cluster.leader-actions-interval = 5 s") // increase the leader action task interval - .withFallback(MultiNodeClusterSpec.clusterConfig))) + .withFallback(MultiNodeClusterSpec.clusterConfig))) } class NodeJoinMultiJvmNode1 extends NodeJoinSpec with FailureDetectorPuppetStrategy @@ -29,9 +29,6 @@ abstract class NodeJoinSpec import NodeJoinMultiJvmSpec._ - lazy val firstAddress = node(first).address - lazy val secondAddress = node(second).address - "A cluster node" must { "join another cluster and get status JOINING - when sending a 'Join' command" taggedAs LongRunningTest in { @@ -39,13 +36,15 @@ abstract class NodeJoinSpec startClusterNode() } + enterBarrier("first-started") + runOn(second) { - cluster.join(firstAddress) + cluster.join(first) } - awaitCond(cluster.latestGossip.members.exists { member ⇒ member.address == secondAddress && member.status == MemberStatus.Joining }) + awaitCond(cluster.latestGossip.members.exists { member ⇒ member.address == address(second) && member.status == MemberStatus.Joining }) - testConductor.enter("after") + enterBarrier("after") } } } diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeLeavingAndExitingAndBeingRemovedSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeLeavingAndExitingAndBeingRemovedSpec.scala index 01e5f8aa74..64f52c4549 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeLeavingAndExitingAndBeingRemovedSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeLeavingAndExitingAndBeingRemovedSpec.scala @@ -18,9 +18,9 @@ object NodeLeavingAndExitingAndBeingRemovedMultiJvmSpec extends MultiNodeConfig commonConfig(debugConfig(on = false).withFallback(MultiNodeClusterSpec.clusterConfig)) } -class NodeLeavingAndExitingAndBeingRemovedMultiJvmNode1 extends NodeLeavingAndExitingAndBeingRemovedSpec with AccrualFailureDetectorStrategy -class NodeLeavingAndExitingAndBeingRemovedMultiJvmNode2 extends NodeLeavingAndExitingAndBeingRemovedSpec with AccrualFailureDetectorStrategy -class NodeLeavingAndExitingAndBeingRemovedMultiJvmNode3 extends NodeLeavingAndExitingAndBeingRemovedSpec with AccrualFailureDetectorStrategy +class NodeLeavingAndExitingAndBeingRemovedMultiJvmNode1 extends NodeLeavingAndExitingAndBeingRemovedSpec with FailureDetectorPuppetStrategy +class NodeLeavingAndExitingAndBeingRemovedMultiJvmNode2 extends NodeLeavingAndExitingAndBeingRemovedSpec with FailureDetectorPuppetStrategy +class NodeLeavingAndExitingAndBeingRemovedMultiJvmNode3 extends NodeLeavingAndExitingAndBeingRemovedSpec with FailureDetectorPuppetStrategy abstract class NodeLeavingAndExitingAndBeingRemovedSpec extends MultiNodeSpec(NodeLeavingAndExitingAndBeingRemovedMultiJvmSpec) @@ -28,38 +28,34 @@ abstract class NodeLeavingAndExitingAndBeingRemovedSpec import NodeLeavingAndExitingAndBeingRemovedMultiJvmSpec._ - lazy val firstAddress = node(first).address - lazy val secondAddress = node(second).address - lazy val thirdAddress = node(third).address - val reaperWaitingTime = 30.seconds.dilated "A node that is LEAVING a non-singleton cluster" must { - // FIXME make it work and remove ignore - "be moved to EXITING and then to REMOVED by the reaper" taggedAs LongRunningTest ignore { + "eventually set to REMOVED by the reaper, and removed from membership ring and seen table" taggedAs LongRunningTest in { awaitClusterUp(first, second, third) runOn(first) { - cluster.leave(secondAddress) + cluster.leave(second) } - testConductor.enter("second-left") + enterBarrier("second-left") runOn(first, third) { // verify that the 'second' node is no longer part of the 'members' set - awaitCond(cluster.latestGossip.members.forall(_.address != secondAddress), reaperWaitingTime) + awaitCond(cluster.latestGossip.members.forall(_.address != address(second)), reaperWaitingTime) - // verify that the 'second' node is part of the 'unreachable' set - awaitCond(cluster.latestGossip.overview.unreachable.exists(_.status == MemberStatus.Removed), reaperWaitingTime) - - // verify node that got removed is 'second' node - val isRemoved = cluster.latestGossip.overview.unreachable.find(_.status == MemberStatus.Removed) - isRemoved must be('defined) - isRemoved.get.address must be(secondAddress) + // verify that the 'second' node is not part of the 'unreachable' set + awaitCond(cluster.latestGossip.overview.unreachable.forall(_.address != address(second)), reaperWaitingTime) } - testConductor.enter("finished") + runOn(second) { + // verify that the second node is shut down and has status REMOVED + awaitCond(!cluster.isRunning, reaperWaitingTime) + awaitCond(cluster.status == MemberStatus.Removed, reaperWaitingTime) + } + + enterBarrier("finished") } } } diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeLeavingAndExitingSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeLeavingAndExitingSpec.scala index fc62c17c1d..5f9efb0b47 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeLeavingAndExitingSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeLeavingAndExitingSpec.scala @@ -36,21 +36,16 @@ abstract class NodeLeavingAndExitingSpec import NodeLeavingAndExitingMultiJvmSpec._ - lazy val firstAddress = node(first).address - lazy val secondAddress = node(second).address - lazy val thirdAddress = node(third).address - "A node that is LEAVING a non-singleton cluster" must { - // FIXME make it work and remove ignore - "be moved to EXITING by the leader" taggedAs LongRunningTest ignore { + "be moved to EXITING by the leader" taggedAs LongRunningTest in { awaitClusterUp(first, second, third) runOn(first) { - cluster.leave(secondAddress) + cluster.leave(second) } - testConductor.enter("second-left") + enterBarrier("second-left") runOn(first, third) { @@ -60,16 +55,16 @@ abstract class NodeLeavingAndExitingSpec awaitCond(cluster.latestGossip.members.exists(_.status == MemberStatus.Leaving)) // wait on LEAVING val hasLeft = cluster.latestGossip.members.find(_.status == MemberStatus.Leaving) // verify node that left hasLeft must be('defined) - hasLeft.get.address must be(secondAddress) + hasLeft.get.address must be(address(second)) // 2. Verify that 'second' node is set to EXITING awaitCond(cluster.latestGossip.members.exists(_.status == MemberStatus.Exiting)) // wait on EXITING val hasExited = cluster.latestGossip.members.find(_.status == MemberStatus.Exiting) // verify node that exited hasExited must be('defined) - hasExited.get.address must be(secondAddress) + hasExited.get.address must be(address(second)) } - testConductor.enter("finished") + enterBarrier("finished") } } } diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeLeavingSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeLeavingSpec.scala index 8ea21e9380..9ece38aae8 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeLeavingSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeLeavingSpec.scala @@ -30,31 +30,26 @@ abstract class NodeLeavingSpec import NodeLeavingMultiJvmSpec._ - lazy val firstAddress = node(first).address - lazy val secondAddress = node(second).address - lazy val thirdAddress = node(third).address - "A node that is LEAVING a non-singleton cluster" must { - // FIXME make it work and remove ignore - "be marked as LEAVING in the converged membership table" taggedAs LongRunningTest ignore { + "be marked as LEAVING in the converged membership table" taggedAs LongRunningTest in { awaitClusterUp(first, second, third) runOn(first) { - cluster.leave(secondAddress) + cluster.leave(second) } - testConductor.enter("second-left") + enterBarrier("second-left") runOn(first, third) { awaitCond(cluster.latestGossip.members.exists(_.status == MemberStatus.Leaving)) val hasLeft = cluster.latestGossip.members.find(_.status == MemberStatus.Leaving) hasLeft must be('defined) - hasLeft.get.address must be(secondAddress) + hasLeft.get.address must be(address(second)) } - testConductor.enter("finished") + enterBarrier("finished") } } } diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeMembershipSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeMembershipSpec.scala index fb0573f77f..364edca08b 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeMembershipSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeMembershipSpec.scala @@ -26,10 +26,6 @@ abstract class NodeMembershipSpec import NodeMembershipMultiJvmSpec._ - lazy val firstAddress = node(first).address - lazy val secondAddress = node(second).address - lazy val thirdAddress = node(third).address - "A set of connected cluster systems" must { "(when two nodes) start gossiping to each other so that both nodes gets the same gossip info" taggedAs LongRunningTest in { @@ -38,35 +34,35 @@ abstract class NodeMembershipSpec runOn(first) { startClusterNode() } - testConductor.enter("first-started") + enterBarrier("first-started") runOn(first, second) { - cluster.join(firstAddress) + cluster.join(first) awaitCond(cluster.latestGossip.members.size == 2) - assertMembers(cluster.latestGossip.members, firstAddress, secondAddress) + assertMembers(cluster.latestGossip.members, first, second) awaitCond { cluster.latestGossip.members.forall(_.status == MemberStatus.Up) } awaitCond(cluster.convergence.isDefined) } - testConductor.enter("after-1") + enterBarrier("after-1") } "(when three nodes) start gossiping to each other so that all nodes gets the same gossip info" taggedAs LongRunningTest in { runOn(third) { - cluster.join(firstAddress) + cluster.join(first) } awaitCond(cluster.latestGossip.members.size == 3) - assertMembers(cluster.latestGossip.members, firstAddress, secondAddress, thirdAddress) + assertMembers(cluster.latestGossip.members, first, second, third) awaitCond { cluster.latestGossip.members.forall(_.status == MemberStatus.Up) } awaitCond(cluster.convergence.isDefined) - testConductor.enter("after-2") + enterBarrier("after-2") } } } diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeUpSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeUpSpec.scala index 0fdc3c89b8..3da6b2715a 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeUpSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/NodeUpSpec.scala @@ -33,7 +33,7 @@ abstract class NodeUpSpec awaitClusterUp(first, second) - testConductor.enter("after-1") + enterBarrier("after-1") } "be unaffected when joining again" taggedAs LongRunningTest in { @@ -45,12 +45,12 @@ abstract class NodeUpSpec unexpected.set(members) } }) - testConductor.enter("listener-registered") + enterBarrier("listener-registered") runOn(second) { - cluster.join(node(first).address) + cluster.join(first) } - testConductor.enter("joined-again") + enterBarrier("joined-again") // let it run for a while to make sure that nothing bad happens for (n ← 1 to 20) { @@ -59,7 +59,7 @@ abstract class NodeUpSpec cluster.latestGossip.members.forall(_.status == MemberStatus.Up) must be(true) } - testConductor.enter("after-2") + enterBarrier("after-2") } } } diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/SingletonClusterSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/SingletonClusterSpec.scala index cada29e210..3c35e95333 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/SingletonClusterSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/SingletonClusterSpec.scala @@ -16,6 +16,7 @@ object SingletonClusterMultiJvmSpec extends MultiNodeConfig { commonConfig(debugConfig(on = false). withFallback(ConfigFactory.parseString(""" akka.cluster { + auto-join = on auto-down = on failure-detector.threshold = 4 } @@ -38,17 +39,25 @@ abstract class SingletonClusterSpec "A cluster of 2 nodes" must { - "not be singleton cluster when joined" taggedAs LongRunningTest in { + "become singleton cluster when started with 'auto-join=on' and 'seed-nodes=[]'" taggedAs LongRunningTest in { + startClusterNode() + awaitUpConvergence(1) + cluster.isSingletonCluster must be(true) + + enterBarrier("after-1") + } + + "not be singleton cluster when joined with other node" taggedAs LongRunningTest in { awaitClusterUp(first, second) cluster.isSingletonCluster must be(false) assertLeader(first, second) - testConductor.enter("after-1") + enterBarrier("after-2") } "become singleton cluster when one node is shutdown" taggedAs LongRunningTest in { runOn(first) { - val secondAddress = node(second).address + val secondAddress = address(second) testConductor.shutdown(second, 0) markNodeAsUnavailable(secondAddress) @@ -58,7 +67,7 @@ abstract class SingletonClusterSpec assertLeader(first) } - testConductor.enter("after-2") + enterBarrier("after-3") } } } diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/SunnyWeatherSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/SunnyWeatherSpec.scala index ef420ab302..3be082d2f3 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/SunnyWeatherSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/SunnyWeatherSpec.scala @@ -19,11 +19,13 @@ object SunnyWeatherMultiJvmSpec extends MultiNodeConfig { val fourth = role("fourth") val fifth = role("fifth") + // Note that this test uses default configuration, + // not MultiNodeClusterSpec.clusterConfig commonConfig(ConfigFactory.parseString(""" akka.cluster { - nr-of-deputy-nodes = 0 # FIXME remove this (use default) when ticket #2239 has been fixed gossip-interval = 400 ms + auto-join = off } akka.loglevel = INFO """)) @@ -63,7 +65,7 @@ abstract class SunnyWeatherSpec }) for (n ← 1 to 30) { - testConductor.enter("period-" + n) + enterBarrier("period-" + n) unexpected.get must be(null) awaitUpConvergence(roles.size) assertLeaderIn(roles) @@ -71,7 +73,7 @@ abstract class SunnyWeatherSpec 1.seconds.sleep } - testConductor.enter("after") + enterBarrier("after") } } } diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/TransitionSpec.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/TransitionSpec.scala index 0fb3cb03c4..0376545b41 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/TransitionSpec.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/TransitionSpec.scala @@ -20,8 +20,7 @@ object TransitionMultiJvmSpec extends MultiNodeConfig { val fifth = role("fifth") commonConfig(debugConfig(on = false). - withFallback(ConfigFactory.parseString( - "akka.cluster.periodic-tasks-initial-delay = 300 s # turn off all periodic tasks")). + withFallback(ConfigFactory.parseString("akka.cluster.periodic-tasks-initial-delay = 300 s # turn off all periodic tasks")). withFallback(MultiNodeClusterSpec.clusterConfig)) } @@ -61,7 +60,7 @@ abstract class TransitionSpec } def awaitSeen(addresses: Address*): Unit = awaitCond { - seenLatestGossip.map(node(_).address) == addresses.toSet + (seenLatestGossip map address) == addresses.toSet } def awaitMembers(addresses: Address*): Unit = awaitCond { @@ -69,12 +68,9 @@ abstract class TransitionSpec } def awaitMemberStatus(address: Address, status: MemberStatus): Unit = awaitCond { - memberStatus(address) == Up + memberStatus(address) == status } - // implicit conversion from RoleName to Address - implicit def role2Address(role: RoleName): Address = node(role).address - // DSL sugar for `role1 gossipTo role2` implicit def roleExtras(role: RoleName): RoleWrapper = new RoleWrapper(role) var gossipBarrierCounter = 0 @@ -83,18 +79,18 @@ abstract class TransitionSpec gossipBarrierCounter += 1 runOn(toRole) { val g = cluster.latestGossip - testConductor.enter("before-gossip-" + gossipBarrierCounter) + enterBarrier("before-gossip-" + gossipBarrierCounter) awaitCond(cluster.latestGossip != g) // received gossip - testConductor.enter("after-gossip-" + gossipBarrierCounter) + enterBarrier("after-gossip-" + gossipBarrierCounter) } runOn(fromRole) { - testConductor.enter("before-gossip-" + gossipBarrierCounter) - cluster.gossipTo(node(toRole).address) // send gossip - testConductor.enter("after-gossip-" + gossipBarrierCounter) + enterBarrier("before-gossip-" + gossipBarrierCounter) + cluster.gossipTo(toRole) // send gossip + enterBarrier("after-gossip-" + gossipBarrierCounter) } runOn(roles.filterNot(r ⇒ r == fromRole || r == toRole): _*) { - testConductor.enter("before-gossip-" + gossipBarrierCounter) - testConductor.enter("after-gossip-" + gossipBarrierCounter) + enterBarrier("before-gossip-" + gossipBarrierCounter) + enterBarrier("after-gossip-" + gossipBarrierCounter) } } } @@ -110,7 +106,7 @@ abstract class TransitionSpec cluster.leaderActions() cluster.status must be(Up) - testConductor.enter("after-1") + enterBarrier("after-1") } "perform correct transitions when second joining first" taggedAs LongRunningTest in { @@ -122,43 +118,28 @@ abstract class TransitionSpec awaitMembers(first, second) memberStatus(first) must be(Up) memberStatus(second) must be(Joining) + seenLatestGossip must be(Set(first)) cluster.convergence.isDefined must be(false) } - testConductor.enter("second-joined") + enterBarrier("second-joined") first gossipTo second - runOn(second) { - members must be(Set(first, second)) - memberStatus(first) must be(Up) - memberStatus(second) must be(Joining) - // we got a conflicting version in second, and therefore not convergence in second - seenLatestGossip must be(Set(second)) - cluster.convergence.isDefined must be(false) - } - second gossipTo first - runOn(first) { - seenLatestGossip must be(Set(first, second)) - } - - first gossipTo second - runOn(second) { - seenLatestGossip must be(Set(first, second)) - } runOn(first, second) { memberStatus(first) must be(Up) memberStatus(second) must be(Joining) + seenLatestGossip must be(Set(first, second)) cluster.convergence.isDefined must be(true) } - testConductor.enter("convergence-joining-2") + enterBarrier("convergence-joining-2") runOn(leader(first, second)) { cluster.leaderActions() memberStatus(first) must be(Up) memberStatus(second) must be(Up) } - testConductor.enter("leader-actions-2") + enterBarrier("leader-actions-2") leader(first, second) gossipTo nonLeader(first, second).head runOn(nonLeader(first, second).head) { @@ -176,7 +157,7 @@ abstract class TransitionSpec cluster.convergence.isDefined must be(true) } - testConductor.enter("after-2") + enterBarrier("after-2") } "perform correct transitions when third joins second" taggedAs LongRunningTest in { @@ -190,51 +171,29 @@ abstract class TransitionSpec memberStatus(third) must be(Joining) seenLatestGossip must be(Set(second)) } - testConductor.enter("third-joined-second") + enterBarrier("third-joined-second") second gossipTo first runOn(first) { members must be(Set(first, second, third)) - cluster.convergence.isDefined must be(false) memberStatus(third) must be(Joining) + seenLatestGossip must be(Set(first, second)) + cluster.convergence.isDefined must be(false) } first gossipTo third - runOn(third) { - members must be(Set(first, second, third)) - cluster.convergence.isDefined must be(false) - memberStatus(third) must be(Joining) - // conflicting version - seenLatestGossip must be(Set(third)) - } - third gossipTo first third gossipTo second - runOn(first, second) { - seenLatestGossip must be(Set(myself, third)) - } - - first gossipTo second - runOn(second) { - seenLatestGossip must be(Set(first, second, third)) - cluster.convergence.isDefined must be(true) - } - - runOn(first, third) { - cluster.convergence.isDefined must be(false) - } - - second gossipTo first - second gossipTo third runOn(first, second, third) { - seenLatestGossip must be(Set(first, second, third)) + members must be(Set(first, second, third)) memberStatus(first) must be(Up) memberStatus(second) must be(Up) memberStatus(third) must be(Joining) + seenLatestGossip must be(Set(first, second, third)) cluster.convergence.isDefined must be(true) } - testConductor.enter("convergence-joining-3") + enterBarrier("convergence-joining-3") runOn(leader(first, second, third)) { cluster.leaderActions() @@ -242,7 +201,7 @@ abstract class TransitionSpec memberStatus(second) must be(Up) memberStatus(third) must be(Up) } - testConductor.enter("leader-actions-3") + enterBarrier("leader-actions-3") // leader gossipTo first non-leader leader(first, second, third) gossipTo nonLeader(first, second, third).head @@ -255,7 +214,7 @@ abstract class TransitionSpec // first non-leader gossipTo the other non-leader nonLeader(first, second, third).head gossipTo nonLeader(first, second, third).tail.head runOn(nonLeader(first, second, third).head) { - cluster.gossipTo(node(nonLeader(first, second, third).tail.head).address) + cluster.gossipTo(nonLeader(first, second, third).tail.head) } runOn(nonLeader(first, second, third).tail.head) { memberStatus(third) must be(Up) @@ -281,27 +240,29 @@ abstract class TransitionSpec cluster.convergence.isDefined must be(true) } - testConductor.enter("after-3") + enterBarrier("after-3") } "startup a second separated cluster consisting of nodes fourth and fifth" taggedAs LongRunningTest in { runOn(fourth) { cluster.join(fifth) - awaitMembers(fourth, fifth) - cluster.gossipTo(fifth) - awaitSeen(fourth, fifth) - cluster.convergence.isDefined must be(true) } runOn(fifth) { awaitMembers(fourth, fifth) - cluster.gossipTo(fourth) - awaitSeen(fourth, fifth) - cluster.gossipTo(fourth) + } + testConductor.enter("fourth-joined") + + fifth gossipTo fourth + fourth gossipTo fifth + + runOn(fourth, fifth) { + memberStatus(fourth) must be(Joining) + memberStatus(fifth) must be(Up) + seenLatestGossip must be(Set(fourth, fifth)) cluster.convergence.isDefined must be(true) } - testConductor.enter("fourth-joined-fifth") - testConductor.enter("after-4") + enterBarrier("after-4") } "perform correct transitions when second cluster (node fourth) joins first cluster (node third)" taggedAs LongRunningTest in { @@ -313,7 +274,7 @@ abstract class TransitionSpec awaitMembers(first, second, third, fourth) seenLatestGossip must be(Set(third)) } - testConductor.enter("fourth-joined-third") + enterBarrier("fourth-joined-third") third gossipTo second runOn(second) { @@ -365,7 +326,7 @@ abstract class TransitionSpec memberStatus(fifth) must be(Up) cluster.convergence.isDefined must be(true) - testConductor.enter("convergence-joining-3") + enterBarrier("convergence-joining-3") runOn(leader(roles: _*)) { cluster.leaderActions() @@ -378,7 +339,7 @@ abstract class TransitionSpec x gossipTo y } - testConductor.enter("spread-5") + enterBarrier("spread-5") seenLatestGossip must be(roles.toSet) memberStatus(first) must be(Up) @@ -388,7 +349,7 @@ abstract class TransitionSpec memberStatus(fifth) must be(Up) cluster.convergence.isDefined must be(true) - testConductor.enter("after-5") + enterBarrier("after-5") } "perform correct transitions when second becomes unavailble" taggedAs LongRunningTest in { @@ -399,6 +360,8 @@ abstract class TransitionSpec seenLatestGossip must be(Set(fifth)) } + enterBarrier("after-second-unavailble") + // spread the word val gossipRound = List(fifth, fourth, third, first, third, fourth, fifth) for (x :: y :: Nil ← gossipRound.sliding(2)) { @@ -415,6 +378,8 @@ abstract class TransitionSpec awaitMemberStatus(second, Down) } + enterBarrier("after-second-down") + // spread the word val gossipRound2 = List(third, fourth, fifth, first, third, fourth, fifth) for (x :: y :: Nil ← gossipRound2.sliding(2)) { @@ -428,7 +393,7 @@ abstract class TransitionSpec cluster.convergence.isDefined must be(true) } - testConductor.enter("after-6") + enterBarrier("after-6") } } diff --git a/akka-cluster/src/test/scala/akka/cluster/AccrualFailureDetectorSpec.scala b/akka-cluster/src/test/scala/akka/cluster/AccrualFailureDetectorSpec.scala index bd4d5d2c52..5c7186502c 100644 --- a/akka-cluster/src/test/scala/akka/cluster/AccrualFailureDetectorSpec.scala +++ b/akka-cluster/src/test/scala/akka/cluster/AccrualFailureDetectorSpec.scala @@ -6,6 +6,9 @@ package akka.cluster import akka.actor.Address import akka.testkit.{ LongRunningTest, AkkaSpec } +import scala.collection.immutable.TreeMap +import akka.util.duration._ +import akka.util.Duration @org.junit.runner.RunWith(classOf[org.scalatest.junit.JUnitRunner]) class AccrualFailureDetectorSpec extends AkkaSpec(""" @@ -27,33 +30,72 @@ class AccrualFailureDetectorSpec extends AkkaSpec(""" timeGenerator } + val defaultFakeTimeIntervals = Vector.fill(20)(1000L) + def createFailureDetector( + threshold: Double = 8.0, + maxSampleSize: Int = 1000, + minStdDeviation: Duration = 10.millis, + acceptableLostDuration: Duration = Duration.Zero, + firstHeartbeatEstimate: Duration = 1.second, + clock: () ⇒ Long = fakeTimeGenerator(defaultFakeTimeIntervals)): AccrualFailureDetector = + new AccrualFailureDetector(system, + threshold, + maxSampleSize, + minStdDeviation, + acceptableLostDuration, + firstHeartbeatEstimate = firstHeartbeatEstimate, + clock = clock) + + "use good enough cumulative distribution function" in { + val fd = createFailureDetector() + fd.cumulativeDistributionFunction(0.0, 0, 1) must be(0.5 plusOrMinus (0.001)) + fd.cumulativeDistributionFunction(0.6, 0, 1) must be(0.7257 plusOrMinus (0.001)) + fd.cumulativeDistributionFunction(1.5, 0, 1) must be(0.9332 plusOrMinus (0.001)) + fd.cumulativeDistributionFunction(2.0, 0, 1) must be(0.97725 plusOrMinus (0.01)) + fd.cumulativeDistributionFunction(2.5, 0, 1) must be(0.9379 plusOrMinus (0.1)) + fd.cumulativeDistributionFunction(3.5, 0, 1) must be(0.99977 plusOrMinus (0.1)) + fd.cumulativeDistributionFunction(4.0, 0, 1) must be(0.99997 plusOrMinus (0.1)) + + for (x :: y :: Nil ← (0.0 to 4.0 by 0.1).toList.sliding(2)) { + fd.cumulativeDistributionFunction(x, 0, 1) must be < ( + fd.cumulativeDistributionFunction(y, 0, 1)) + } + + fd.cumulativeDistributionFunction(2.2, 2.0, 0.3) must be(0.7475 plusOrMinus (0.001)) + } + + "return realistic phi values" in { + val fd = createFailureDetector() + val test = TreeMap(0 -> 0.0, 500 -> 0.1, 1000 -> 0.3, 1200 -> 1.6, 1400 -> 4.7, 1600 -> 10.8, 1700 -> 15.3) + for ((timeDiff, expectedPhi) ← test) { + fd.phi(timeDiff = timeDiff, mean = 1000.0, stdDeviation = 100.0) must be(expectedPhi plusOrMinus (0.1)) + } + + // larger stdDeviation results => lower phi + fd.phi(timeDiff = 1100, mean = 1000.0, stdDeviation = 500.0) must be < ( + fd.phi(timeDiff = 1100, mean = 1000.0, stdDeviation = 100.0)) + } + "return phi value of 0.0 on startup for each address, when no heartbeats" in { - val fd = new AccrualFailureDetector(system) + val fd = createFailureDetector() fd.phi(conn) must be(0.0) fd.phi(conn2) must be(0.0) } "return phi based on guess when only one heartbeat" in { - // 1 second ticks - val timeInterval = Vector.fill(30)(1000L) - val fd = new AccrualFailureDetector(system, - timeMachine = fakeTimeGenerator(timeInterval)) + val timeInterval = List[Long](0, 1000, 1000, 1000, 1000) + val fd = createFailureDetector(firstHeartbeatEstimate = 1.seconds, + clock = fakeTimeGenerator(timeInterval)) fd.heartbeat(conn) - fd.phi(conn) must be > (0.0) - // let time go - for (n ← 2 to 8) - fd.phi(conn) must be < (4.0) - for (n ← 9 to 18) - fd.phi(conn) must be < (8.0) - - fd.phi(conn) must be > (8.0) + fd.phi(conn) must be(0.3 plusOrMinus 0.2) + fd.phi(conn) must be(4.5 plusOrMinus 0.3) + fd.phi(conn) must be > (15.0) } "return phi value using first interval after second heartbeat" in { val timeInterval = List[Long](0, 100, 100, 100) - val fd = new AccrualFailureDetector(system, - timeMachine = fakeTimeGenerator(timeInterval)) + val fd = createFailureDetector(clock = fakeTimeGenerator(timeInterval)) fd.heartbeat(conn) fd.phi(conn) must be > (0.0) @@ -63,8 +105,7 @@ class AccrualFailureDetectorSpec extends AkkaSpec(""" "mark node as available after a series of successful heartbeats" in { val timeInterval = List[Long](0, 1000, 100, 100) - val fd = new AccrualFailureDetector(system, - timeMachine = fakeTimeGenerator(timeInterval)) + val fd = createFailureDetector(clock = fakeTimeGenerator(timeInterval)) fd.heartbeat(conn) fd.heartbeat(conn) @@ -75,8 +116,7 @@ class AccrualFailureDetectorSpec extends AkkaSpec(""" "mark node as dead after explicit removal of connection" in { val timeInterval = List[Long](0, 1000, 100, 100, 100) - val fd = new AccrualFailureDetector(system, - timeMachine = fakeTimeGenerator(timeInterval)) + val fd = createFailureDetector(clock = fakeTimeGenerator(timeInterval)) fd.heartbeat(conn) fd.heartbeat(conn) @@ -89,8 +129,7 @@ class AccrualFailureDetectorSpec extends AkkaSpec(""" "mark node as available after explicit removal of connection and receiving heartbeat again" in { val timeInterval = List[Long](0, 1000, 100, 1100, 1100, 1100, 1100, 1100, 100) - val fd = new AccrualFailureDetector(system, - timeMachine = fakeTimeGenerator(timeInterval)) + val fd = createFailureDetector(clock = fakeTimeGenerator(timeInterval)) fd.heartbeat(conn) //0 @@ -112,40 +151,65 @@ class AccrualFailureDetectorSpec extends AkkaSpec(""" } "mark node as dead if heartbeat are missed" in { - val timeInterval = List[Long](0, 1000, 100, 100, 5000) + val timeInterval = List[Long](0, 1000, 100, 100, 7000) val ft = fakeTimeGenerator(timeInterval) - val fd = new AccrualFailureDetector(system, threshold = 3, - timeMachine = fakeTimeGenerator(timeInterval)) + val fd = createFailureDetector(threshold = 3, clock = fakeTimeGenerator(timeInterval)) fd.heartbeat(conn) //0 fd.heartbeat(conn) //1000 fd.heartbeat(conn) //1100 fd.isAvailable(conn) must be(true) //1200 - fd.isAvailable(conn) must be(false) //6200 + fd.isAvailable(conn) must be(false) //8200 } "mark node as available if it starts heartbeat again after being marked dead due to detection of failure" in { - val timeInterval = List[Long](0, 1000, 100, 1100, 5000, 100, 1000, 100, 100) - val fd = new AccrualFailureDetector(system, threshold = 3, - timeMachine = fakeTimeGenerator(timeInterval)) + val timeInterval = List[Long](0, 1000, 100, 1100, 7000, 100, 1000, 100, 100) + val fd = createFailureDetector(threshold = 3, clock = fakeTimeGenerator(timeInterval)) fd.heartbeat(conn) //0 fd.heartbeat(conn) //1000 fd.heartbeat(conn) //1100 fd.isAvailable(conn) must be(true) //1200 - fd.isAvailable(conn) must be(false) //6200 - fd.heartbeat(conn) //6300 - fd.heartbeat(conn) //7300 - fd.heartbeat(conn) //7400 + fd.isAvailable(conn) must be(false) //8200 + fd.heartbeat(conn) //8300 + fd.heartbeat(conn) //9300 + fd.heartbeat(conn) //9400 - fd.isAvailable(conn) must be(true) //7500 + fd.isAvailable(conn) must be(true) //9500 + } + + "accept some configured missing heartbeats" in { + val timeInterval = List[Long](0, 1000, 1000, 1000, 4000, 1000, 1000) + val fd = createFailureDetector(acceptableLostDuration = 3.seconds, clock = fakeTimeGenerator(timeInterval)) + + fd.heartbeat(conn) + fd.heartbeat(conn) + fd.heartbeat(conn) + fd.heartbeat(conn) + fd.isAvailable(conn) must be(true) + fd.heartbeat(conn) + fd.isAvailable(conn) must be(true) + } + + "fail after configured acceptable missing heartbeats" in { + val timeInterval = List[Long](0, 1000, 1000, 1000, 1000, 1000, 500, 500, 5000) + val fd = createFailureDetector(acceptableLostDuration = 3.seconds, clock = fakeTimeGenerator(timeInterval)) + + fd.heartbeat(conn) + fd.heartbeat(conn) + fd.heartbeat(conn) + fd.heartbeat(conn) + fd.heartbeat(conn) + fd.heartbeat(conn) + fd.isAvailable(conn) must be(true) + fd.heartbeat(conn) + fd.isAvailable(conn) must be(false) } "use maxSampleSize heartbeats" in { val timeInterval = List[Long](0, 100, 100, 100, 100, 600, 1000, 1000, 1000, 1000, 1000) - val fd = new AccrualFailureDetector(system, maxSampleSize = 3, - timeMachine = fakeTimeGenerator(timeInterval)) + val fd = createFailureDetector(maxSampleSize = 3, clock = fakeTimeGenerator(timeInterval)) // 100 ms interval fd.heartbeat(conn) //0 @@ -163,4 +227,33 @@ class AccrualFailureDetectorSpec extends AkkaSpec(""" } } + + "Statistics for heartbeats" must { + + "calculate correct mean and variance" in { + val samples = Seq(100, 200, 125, 340, 130) + val stats = (HeartbeatHistory(maxSampleSize = 20) /: samples) { (stats, value) ⇒ stats :+ value } + stats.mean must be(179.0 plusOrMinus 0.00001) + stats.variance must be(7584.0 plusOrMinus 0.00001) + } + + "have 0.0 variance for one sample" in { + (HeartbeatHistory(600) :+ 1000L).variance must be(0.0 plusOrMinus 0.00001) + } + + "be capped by the specified maxSampleSize" in { + val history3 = HeartbeatHistory(maxSampleSize = 3) :+ 100 :+ 110 :+ 90 + history3.mean must be(100.0 plusOrMinus 0.00001) + history3.variance must be(66.6666667 plusOrMinus 0.00001) + + val history4 = history3 :+ 140 + history4.mean must be(113.333333 plusOrMinus 0.00001) + history4.variance must be(422.222222 plusOrMinus 0.00001) + + val history5 = history4 :+ 80 + history5.mean must be(103.333333 plusOrMinus 0.00001) + history5.variance must be(688.88888889 plusOrMinus 0.00001) + + } + } } diff --git a/akka-cluster/src/test/scala/akka/cluster/ClusterConfigSpec.scala b/akka-cluster/src/test/scala/akka/cluster/ClusterConfigSpec.scala index 481d9f7e5a..92e219a540 100644 --- a/akka-cluster/src/test/scala/akka/cluster/ClusterConfigSpec.scala +++ b/akka-cluster/src/test/scala/akka/cluster/ClusterConfigSpec.scala @@ -16,17 +16,21 @@ class ClusterConfigSpec extends AkkaSpec { "be able to parse generic cluster config elements" in { val settings = new ClusterSettings(system.settings.config, system.name) import settings._ - FailureDetectorThreshold must be(8) + FailureDetectorThreshold must be(8.0 plusOrMinus 0.0001) FailureDetectorMaxSampleSize must be(1000) - FailureDetectorImplementationClass must be(None) - NodeToJoin must be(None) + FailureDetectorImplementationClass must be(classOf[AccrualFailureDetector].getName) + FailureDetectorMinStdDeviation must be(100 millis) + FailureDetectorAcceptableHeartbeatPause must be(3 seconds) + SeedNodes must be(Seq.empty[String]) + SeedNodeTimeout must be(5 seconds) PeriodicTasksInitialDelay must be(1 seconds) GossipInterval must be(1 second) HeartbeatInterval must be(1 second) LeaderActionsInterval must be(1 second) UnreachableNodesReaperInterval must be(1 second) + JoinTimeout must be(60 seconds) NrOfGossipDaemons must be(4) - NrOfDeputyNodes must be(3) + AutoJoin must be(true) AutoDown must be(true) SchedulerTickDuration must be(33 millis) SchedulerTicksPerWheel must be(512) diff --git a/akka-cluster/src/test/scala/akka/cluster/ClusterSpec.scala b/akka-cluster/src/test/scala/akka/cluster/ClusterSpec.scala index 03f6460ea1..e818847969 100644 --- a/akka-cluster/src/test/scala/akka/cluster/ClusterSpec.scala +++ b/akka-cluster/src/test/scala/akka/cluster/ClusterSpec.scala @@ -11,12 +11,13 @@ import akka.actor.ExtendedActorSystem import akka.actor.Address import java.util.concurrent.atomic.AtomicInteger import org.scalatest.BeforeAndAfter +import akka.remote.RemoteActorRefProvider object ClusterSpec { val config = """ akka.cluster { + auto-join = off auto-down = off - nr-of-deputy-nodes = 3 periodic-tasks-initial-delay = 120 seconds // turn off scheduled tasks } akka.actor.provider = "akka.remote.RemoteActorRefProvider" @@ -31,9 +32,23 @@ object ClusterSpec { class ClusterSpec extends AkkaSpec(ClusterSpec.config) with BeforeAndAfter { import ClusterSpec._ + val selfAddress = system.asInstanceOf[ExtendedActorSystem].provider.asInstanceOf[RemoteActorRefProvider].transport.address + val addresses = IndexedSeq( + selfAddress, + Address("akka", system.name, selfAddress.host.get, selfAddress.port.get + 1), + Address("akka", system.name, selfAddress.host.get, selfAddress.port.get + 2), + Address("akka", system.name, selfAddress.host.get, selfAddress.port.get + 3), + Address("akka", system.name, selfAddress.host.get, selfAddress.port.get + 4), + Address("akka", system.name, selfAddress.host.get, selfAddress.port.get + 5)) + val deterministicRandom = new AtomicInteger - val cluster = new Cluster(system.asInstanceOf[ExtendedActorSystem], new FailureDetectorPuppet(system)) { + val failureDetector = new FailureDetectorPuppet(system) + + val cluster = new Cluster(system.asInstanceOf[ExtendedActorSystem], failureDetector) { + + // 3 deputy nodes (addresses index 1, 2, 3) + override def seedNodes = addresses.slice(1, 4) override def selectRandomNode(addresses: IndexedSeq[Address]): Option[Address] = { if (addresses.isEmpty) None @@ -48,14 +63,6 @@ class ClusterSpec extends AkkaSpec(ClusterSpec.config) with BeforeAndAfter { testActor ! GossipTo(address) } - @volatile - var _gossipToUnreachableProbablity = 0.0 - - override def gossipToUnreachableProbablity(membersSize: Int, unreachableSize: Int): Double = { - if (_gossipToUnreachableProbablity < 0.0) super.gossipToUnreachableProbablity(membersSize, unreachableSize) - else _gossipToUnreachableProbablity - } - @volatile var _gossipToDeputyProbablity = 0.0 @@ -64,41 +71,28 @@ class ClusterSpec extends AkkaSpec(ClusterSpec.config) with BeforeAndAfter { else _gossipToDeputyProbablity } - @volatile - var _unavailable: Set[Address] = Set.empty - - override val failureDetector = new FailureDetectorPuppet(system) { - override def isAvailable(connection: Address): Boolean = { - if (_unavailable.contains(connection)) false - else super.isAvailable(connection) - } - } - } - val selfAddress = cluster.self.address - val addresses = IndexedSeq( - selfAddress, - Address("akka", system.name, selfAddress.host.get, selfAddress.port.get + 1), - Address("akka", system.name, selfAddress.host.get, selfAddress.port.get + 2), - Address("akka", system.name, selfAddress.host.get, selfAddress.port.get + 3), - Address("akka", system.name, selfAddress.host.get, selfAddress.port.get + 4), - Address("akka", system.name, selfAddress.host.get, selfAddress.port.get + 5)) - def memberStatus(address: Address): Option[MemberStatus] = cluster.latestGossip.members.collectFirst { case m if m.address == address ⇒ m.status } before { - cluster._gossipToUnreachableProbablity = 0.0 cluster._gossipToDeputyProbablity = 0.0 - cluster._unavailable = Set.empty + addresses foreach failureDetector.remove deterministicRandom.set(0) } "A Cluster" must { - "initially be singleton cluster and reach convergence immediately" in { - cluster.isSingletonCluster must be(true) + "use the address of the remote transport" in { + cluster.selfAddress must be(selfAddress) + } + + "initially become singleton cluster when joining itself and reach convergence" in { + cluster.isSingletonCluster must be(false) // auto-join = off + cluster.join(selfAddress) + awaitCond(cluster.isSingletonCluster) + cluster.self.address must be(selfAddress) cluster.latestGossip.members.map(_.address) must be(Set(selfAddress)) memberStatus(selfAddress) must be(Some(MemberStatus.Joining)) cluster.convergence.isDefined must be(true) @@ -141,17 +135,6 @@ class ClusterSpec extends AkkaSpec(ClusterSpec.config) with BeforeAndAfter { expectNoMsg(1 second) } - "use certain probability for gossiping to unreachable node depending on the number of unreachable and live nodes" in { - cluster._gossipToUnreachableProbablity = -1.0 // use real impl - cluster.gossipToUnreachableProbablity(10, 1) must be < (cluster.gossipToUnreachableProbablity(9, 1)) - cluster.gossipToUnreachableProbablity(10, 1) must be < (cluster.gossipToUnreachableProbablity(10, 2)) - cluster.gossipToUnreachableProbablity(10, 5) must be < (cluster.gossipToUnreachableProbablity(10, 9)) - cluster.gossipToUnreachableProbablity(0, 10) must be <= (1.0) - cluster.gossipToUnreachableProbablity(1, 10) must be <= (1.0) - cluster.gossipToUnreachableProbablity(10, 0) must be(0.0 plusOrMinus (0.0001)) - cluster.gossipToUnreachableProbablity(0, 0) must be(0.0 plusOrMinus (0.0001)) - } - "use certain probability for gossiping to deputy node depending on the number of unreachable and live nodes" in { cluster._gossipToDeputyProbablity = -1.0 // use real impl cluster.gossipToDeputyProbablity(10, 1, 2) must be < (cluster.gossipToDeputyProbablity(9, 1, 2)) @@ -169,7 +152,7 @@ class ClusterSpec extends AkkaSpec(ClusterSpec.config) with BeforeAndAfter { "gossip to duputy node" in { cluster._gossipToDeputyProbablity = 1.0 // always - // we have configured 2 deputy nodes + // we have configured 3 deputy nodes (seedNodes) cluster.gossip() // 1 is deputy cluster.gossip() // 2 is deputy cluster.gossip() // 3 is deputy @@ -186,27 +169,11 @@ class ClusterSpec extends AkkaSpec(ClusterSpec.config) with BeforeAndAfter { } - "gossip to random unreachable node" in { - val dead = Set(addresses(1)) - cluster._unavailable = dead - cluster._gossipToUnreachableProbablity = 1.0 // always - - cluster.reapUnreachableMembers() - cluster.latestGossip.overview.unreachable.map(_.address) must be(dead) - - cluster.gossip() - - expectMsg(GossipTo(addresses(2))) // first available - expectMsg(GossipTo(addresses(1))) // the unavailable - - expectNoMsg(1 second) - } - "gossip to random deputy node if number of live nodes is less than number of deputy nodes" in { cluster._gossipToDeputyProbablity = -1.0 // real impl // 0 and 2 still alive val dead = Set(addresses(1), addresses(3), addresses(4), addresses(5)) - cluster._unavailable = dead + dead foreach failureDetector.markNodeAsUnavailable cluster.reapUnreachableMembers() cluster.latestGossip.overview.unreachable.map(_.address) must be(dead) diff --git a/akka-cluster/src/test/scala/akka/cluster/MemberOrderingSpec.scala b/akka-cluster/src/test/scala/akka/cluster/MemberOrderingSpec.scala new file mode 100644 index 0000000000..d8687312da --- /dev/null +++ b/akka-cluster/src/test/scala/akka/cluster/MemberOrderingSpec.scala @@ -0,0 +1,138 @@ +/** + * Copyright (C) 2009-2012 Typesafe Inc. + */ + +package akka.cluster + +import akka.actor.{ Address, AddressFromURIString } +import java.net.InetSocketAddress +import org.scalatest.matchers.MustMatchers +import org.scalatest.WordSpec +import scala.collection.immutable.SortedSet +import scala.util.Random + +@org.junit.runner.RunWith(classOf[org.scalatest.junit.JUnitRunner]) +class MemberOrderingSpec extends WordSpec with MustMatchers { + import Member.ordering + import Member.addressOrdering + import MemberStatus._ + + "An Ordering[Member]" must { + + "order non-exiting members by host:port" in { + val members = SortedSet.empty[Member] + + Member(AddressFromURIString("akka://sys@darkstar:1112"), Up) + + Member(AddressFromURIString("akka://sys@darkstar:1113"), Joining) + + Member(AddressFromURIString("akka://sys@darkstar:1111"), Up) + + val seq = members.toSeq + seq.size must equal(3) + seq(0) must equal(Member(AddressFromURIString("akka://sys@darkstar:1111"), Up)) + seq(1) must equal(Member(AddressFromURIString("akka://sys@darkstar:1112"), Up)) + seq(2) must equal(Member(AddressFromURIString("akka://sys@darkstar:1113"), Joining)) + } + + "order exiting members by last" in { + val members = SortedSet.empty[Member] + + Member(AddressFromURIString("akka://sys@darkstar:1112"), Exiting) + + Member(AddressFromURIString("akka://sys@darkstar:1113"), Up) + + Member(AddressFromURIString("akka://sys@darkstar:1111"), Joining) + + val seq = members.toSeq + seq.size must equal(3) + seq(0) must equal(Member(AddressFromURIString("akka://sys@darkstar:1111"), Joining)) + seq(1) must equal(Member(AddressFromURIString("akka://sys@darkstar:1113"), Up)) + seq(2) must equal(Member(AddressFromURIString("akka://sys@darkstar:1112"), Exiting)) + } + + "order multiple exiting members by last but internally by host:port" in { + val members = SortedSet.empty[Member] + + Member(AddressFromURIString("akka://sys@darkstar:1112"), Exiting) + + Member(AddressFromURIString("akka://sys@darkstar:1113"), Leaving) + + Member(AddressFromURIString("akka://sys@darkstar:1111"), Up) + + Member(AddressFromURIString("akka://sys@darkstar:1110"), Exiting) + + val seq = members.toSeq + seq.size must equal(4) + seq(0) must equal(Member(AddressFromURIString("akka://sys@darkstar:1111"), Up)) + seq(1) must equal(Member(AddressFromURIString("akka://sys@darkstar:1113"), Leaving)) + seq(2) must equal(Member(AddressFromURIString("akka://sys@darkstar:1110"), Exiting)) + seq(3) must equal(Member(AddressFromURIString("akka://sys@darkstar:1112"), Exiting)) + } + + "be sorted by address correctly" in { + import Member.ordering + // sorting should be done on host and port, only + val m1 = Member(Address("akka", "sys1", "host1", 9000), MemberStatus.Up) + val m2 = Member(Address("akka", "sys1", "host1", 10000), MemberStatus.Up) + val m3 = Member(Address("cluster", "sys2", "host2", 8000), MemberStatus.Up) + val m4 = Member(Address("cluster", "sys2", "host2", 9000), MemberStatus.Up) + val m5 = Member(Address("cluster", "sys1", "host2", 10000), MemberStatus.Up) + + val expected = IndexedSeq(m1, m2, m3, m4, m5) + val shuffled = Random.shuffle(expected) + shuffled.sorted must be(expected) + (SortedSet.empty[Member] ++ shuffled).toIndexedSeq must be(expected) + } + + "have stable equals and hashCode" in { + val m1 = Member(Address("akka", "sys1", "host1", 9000), MemberStatus.Joining) + val m2 = Member(Address("akka", "sys1", "host1", 9000), MemberStatus.Up) + val m3 = Member(Address("akka", "sys1", "host1", 10000), MemberStatus.Up) + + m1 must be(m2) + m1.hashCode must be(m2.hashCode) + + m3 must not be (m2) + m3 must not be (m1) + } + } + + "An Ordering[Address]" must { + + "order addresses by port" in { + val addresses = SortedSet.empty[Address] + + AddressFromURIString("akka://sys@darkstar:1112") + + AddressFromURIString("akka://sys@darkstar:1113") + + AddressFromURIString("akka://sys@darkstar:1110") + + AddressFromURIString("akka://sys@darkstar:1111") + + val seq = addresses.toSeq + seq.size must equal(4) + seq(0) must equal(AddressFromURIString("akka://sys@darkstar:1110")) + seq(1) must equal(AddressFromURIString("akka://sys@darkstar:1111")) + seq(2) must equal(AddressFromURIString("akka://sys@darkstar:1112")) + seq(3) must equal(AddressFromURIString("akka://sys@darkstar:1113")) + } + + "order addresses by hostname" in { + val addresses = SortedSet.empty[Address] + + AddressFromURIString("akka://sys@darkstar2:1110") + + AddressFromURIString("akka://sys@darkstar1:1110") + + AddressFromURIString("akka://sys@darkstar3:1110") + + AddressFromURIString("akka://sys@darkstar0:1110") + + val seq = addresses.toSeq + seq.size must equal(4) + seq(0) must equal(AddressFromURIString("akka://sys@darkstar0:1110")) + seq(1) must equal(AddressFromURIString("akka://sys@darkstar1:1110")) + seq(2) must equal(AddressFromURIString("akka://sys@darkstar2:1110")) + seq(3) must equal(AddressFromURIString("akka://sys@darkstar3:1110")) + } + + "order addresses by hostname and port" in { + val addresses = SortedSet.empty[Address] + + AddressFromURIString("akka://sys@darkstar2:1110") + + AddressFromURIString("akka://sys@darkstar0:1111") + + AddressFromURIString("akka://sys@darkstar2:1111") + + AddressFromURIString("akka://sys@darkstar0:1110") + + val seq = addresses.toSeq + seq.size must equal(4) + seq(0) must equal(AddressFromURIString("akka://sys@darkstar0:1110")) + seq(1) must equal(AddressFromURIString("akka://sys@darkstar0:1111")) + seq(2) must equal(AddressFromURIString("akka://sys@darkstar2:1110")) + seq(3) must equal(AddressFromURIString("akka://sys@darkstar2:1111")) + } + } +} diff --git a/akka-cluster/src/test/scala/akka/cluster/MemberSpec.scala b/akka-cluster/src/test/scala/akka/cluster/MemberSpec.scala deleted file mode 100644 index bc1f70ae86..0000000000 --- a/akka-cluster/src/test/scala/akka/cluster/MemberSpec.scala +++ /dev/null @@ -1,45 +0,0 @@ -/** - * Copyright (C) 2009-2012 Typesafe Inc. - */ - -package akka.cluster - -import org.scalatest.WordSpec -import org.scalatest.matchers.MustMatchers -import akka.actor.Address -import scala.util.Random -import scala.collection.immutable.SortedSet - -@org.junit.runner.RunWith(classOf[org.scalatest.junit.JUnitRunner]) -class MemberSpec extends WordSpec with MustMatchers { - - "Member" must { - - "be sorted by address correctly" in { - import Member.ordering - // sorting should be done on host and port, only - val m1 = Member(Address("akka", "sys1", "host1", 9000), MemberStatus.Up) - val m2 = Member(Address("akka", "sys1", "host1", 10000), MemberStatus.Up) - val m3 = Member(Address("cluster", "sys2", "host2", 8000), MemberStatus.Up) - val m4 = Member(Address("cluster", "sys2", "host2", 9000), MemberStatus.Up) - val m5 = Member(Address("cluster", "sys1", "host2", 10000), MemberStatus.Up) - - val expected = IndexedSeq(m1, m2, m3, m4, m5) - val shuffled = Random.shuffle(expected) - shuffled.sorted must be(expected) - (SortedSet.empty[Member] ++ shuffled).toIndexedSeq must be(expected) - } - - "have stable equals and hashCode" in { - val m1 = Member(Address("akka", "sys1", "host1", 9000), MemberStatus.Joining) - val m2 = Member(Address("akka", "sys1", "host1", 9000), MemberStatus.Up) - val m3 = Member(Address("akka", "sys1", "host1", 10000), MemberStatus.Up) - - m1 must be(m2) - m1.hashCode must be(m2.hashCode) - - m3 must not be (m2) - m3 must not be (m1) - } - } -} diff --git a/akka-docs/cluster/cluster.rst b/akka-docs/cluster/cluster.rst index fb53f13131..1812c33561 100644 --- a/akka-docs/cluster/cluster.rst +++ b/akka-docs/cluster/cluster.rst @@ -5,8 +5,7 @@ Cluster Specification ###################### -.. note:: *This document describes the new clustering coming in Akka Coltrane and -is not available in the latest stable release)* +.. note:: *This document describes the new clustering coming in Akka Coltrane and is not available in the latest stable release)* Intro ===== @@ -164,8 +163,8 @@ After gossip convergence a ``leader`` for the cluster can be determined. There i ``leader`` election process, the ``leader`` can always be recognised deterministically by any node whenever there is gossip convergence. The ``leader`` is simply the first node in sorted order that is able to take the leadership role, where the only -allowed member states for a ``leader`` are ``up`` or ``leaving`` (see below for more -information about member states). +allowed member states for a ``leader`` are ``up``, ``leaving`` or ``exiting`` (see +below for more information about member states). The role of the ``leader`` is to shift members in and out of the cluster, changing ``joining`` members to the ``up`` state or ``exiting`` members to the @@ -184,14 +183,20 @@ according to the Failure Detector is considered unreachable. This means setting the unreachable node status to ``down`` automatically. +Seed Nodes +^^^^^^^^^^ + +The seed nodes are configured contact points for inital join of the cluster. +When a new node is started started it sends a message to all seed nodes and +then sends join command to the one that answers first. + +It is possible to turn off automatic join. + Deputy Nodes ^^^^^^^^^^^^ -After gossip convergence a set of ``deputy`` nodes for the cluster can be -determined. As with the ``leader``, there is no ``deputy`` election process, -the deputies can always be recognised deterministically by any node whenever there -is gossip convergence. The list of ``deputy`` nodes is simply the N - 1 number -of nodes (e.g. starting with the first node after the ``leader``) in sorted order. +The deputy nodes are the live members of the configured seed nodes. +It is preferred to use deputy nodes in different racks/data centers. The nodes defined as ``deputy`` nodes are just regular member nodes whose only "special role" is to help breaking logical partitions as seen in the gossip @@ -214,7 +219,7 @@ nodes involved in a gossip exchange. Periodically, the default is every 1 second, each node chooses another random node to initiate a round of gossip with. The choice of node is random but can -also include extra gossiping for unreachable nodes, ``deputy`` nodes, and nodes with +also include extra gossiping for ``deputy`` nodes, and nodes with either newer or older state versions. The gossip overview contains the current state version for all nodes and also a @@ -229,14 +234,11 @@ During each round of gossip exchange the following process is used: 1. Gossip to random live node (if any) -2. Gossip to random unreachable node with certain probability depending on the - number of unreachable and live nodes - -3. If the node gossiped to at (1) was not a ``deputy`` node, or the number of live +2. If the node gossiped to at (1) was not a ``deputy`` node, or the number of live nodes is less than number of ``deputy`` nodes, gossip to random ``deputy`` node with certain probability depending on number of unreachable, ``deputy``, and live nodes. -4. Gossip to random node with newer or older state information, based on the +3. Gossip to random node with newer or older state information, based on the current gossip overview, with some probability (?) The gossiper only sends the gossip overview to the chosen node. The recipient of @@ -302,10 +304,6 @@ handoff has completed then the node will change to the ``exiting`` state. Once all nodes have seen the exiting state (convergence) the ``leader`` will remove the node from the cluster, marking it as ``removed``. -A node can also be removed forcefully by moving it directly to the ``removed`` -state using the ``remove`` action. The cluster will rebalance based on the new -cluster membership. - If a node is unreachable then gossip convergence is not possible and therefore any ``leader`` actions are also not possible (for instance, allowing a node to become a part of the cluster, or changing actor distribution). To be able to @@ -314,11 +312,12 @@ unreachable node is experiencing only transient difficulties then it can be explicitly marked as ``down`` using the ``down`` user action. When this node comes back up and begins gossiping it will automatically go through the joining process again. If the unreachable node will be permanently down then it can be -removed from the cluster directly with the ``remove`` user action. The cluster -can also *auto-down* a node using the accrual failure detector. +removed from the cluster directly by shutting the actor system down or killing it +through an external ``SIGKILL`` signal, invocation of ``System.exit(status)`` or +similar. The cluster can, through the leader, also *auto-down* a node. -This means that nodes can join and leave the cluster at any point in time, -e.g. provide cluster elasticity. +This means that nodes can join and leave the cluster at any point in time, i.e. +provide cluster elasticity. State Diagram for the Member States @@ -339,12 +338,12 @@ Member States - **leaving** / **exiting** states during graceful removal -- **removed** - tombstone state (no longer a member) - - **down** marked as down/offline/unreachable +- **removed** + tombstone state (no longer a member) + User Actions ^^^^^^^^^^^^ @@ -359,9 +358,6 @@ User Actions - **down** mark a node as temporarily down -- **remove** - remove a node from the cluster immediately - Leader Actions ^^^^^^^^^^^^^^ diff --git a/akka-docs/general/message-send-semantics.rst b/akka-docs/general/message-send-semantics.rst index d9488d1f2b..41eb727358 100644 --- a/akka-docs/general/message-send-semantics.rst +++ b/akka-docs/general/message-send-semantics.rst @@ -48,14 +48,14 @@ At-most-once Actual transports may provide stronger semantics, but at-most-once is the semantics you should expect. -The alternatives would be once-and-only-once, which is extremely costly, +The alternatives would be once-and-only-once, which is extremely costly, or at-least-once which essentially requires idempotency of message processing, which is a user-level concern. Ordering is preserved on a per-sender basis ------------------------------------------- -Actor ``A1` sends messages ``M1``, ``M2``, ``M3`` to ``A2`` +Actor ``A1`` sends messages ``M1``, ``M2``, ``M3`` to ``A2`` Actor ``A3`` sends messages ``M4``, ``M5``, ``M6`` to ``A2`` This means that: @@ -66,4 +66,4 @@ This means that: 5) ``A2`` can see messages from ``A1`` interleaved with messages from ``A3`` 6) Since there is no guaranteed delivery, none, some or all of the messages may arrive to ``A2`` -.. _Erlang documentation: http://www.erlang.org/faq/academic.html \ No newline at end of file +.. _Erlang documentation: http://www.erlang.org/faq/academic.html diff --git a/akka-docs/java/code/docs/dispatcher/DispatcherDocTestBase.java b/akka-docs/java/code/docs/dispatcher/DispatcherDocTestBase.java index 94e4b38121..ca5569657e 100644 --- a/akka-docs/java/code/docs/dispatcher/DispatcherDocTestBase.java +++ b/akka-docs/java/code/docs/dispatcher/DispatcherDocTestBase.java @@ -24,6 +24,15 @@ import com.typesafe.config.Config; //#imports-prio-mailbox +//#imports-custom +import akka.dispatch.Envelope; +import akka.dispatch.MessageQueue; +import akka.dispatch.MailboxType; +import java.util.Queue; +import java.util.concurrent.ConcurrentLinkedQueue; + +//#imports-custom + import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -136,4 +145,32 @@ public class DispatcherDocTestBase { } } //#prio-mailbox + + //#mailbox-implementation-example + class MyUnboundedMailbox implements MailboxType { + + // This constructor signature must exist, it will be called by Akka + public MyUnboundedMailbox(ActorSystem.Settings settings, Config config) { + // put your initialization code here + } + + // The create method is called to create the MessageQueue + public MessageQueue create(Option owner, Option system) { + return new MessageQueue() { + private final Queue queue = new ConcurrentLinkedQueue(); + + // these must be implemented; queue used as example + public void enqueue(ActorRef receiver, Envelope handle) { queue.offer(handle); } + public Envelope dequeue() { return queue.poll(); } + public int numberOfMessages() { return queue.size(); } + public boolean hasMessages() { return !queue.isEmpty(); } + public void cleanUp(ActorRef owner, MessageQueue deadLetters) { + for (Envelope handle: queue) { + deadLetters.enqueue(owner, handle); + } + } + }; + } + } + //#mailbox-implementation-example } diff --git a/akka-docs/java/dispatchers.rst b/akka-docs/java/dispatchers.rst index 2723883e9c..577740d78c 100644 --- a/akka-docs/java/dispatchers.rst +++ b/akka-docs/java/dispatchers.rst @@ -183,3 +183,46 @@ And then an example on how you would use it: the configuration which describes the dispatcher using this mailbox type; the mailbox type will be instantiated once for each dispatcher using it. +Creating your own Mailbox type +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +An example is worth a thousand quacks: + +.. includecode:: code/docs/dispatcher/DispatcherDocTestBase.java#imports-custom + +.. includecode:: code/docs/dispatcher/DispatcherDocTestBase.java#mailbox-implementation-example + +And then you just specify the FQCN of your MailboxType as the value of the "mailbox-type" in the dispatcher configuration. + +.. note:: + + Make sure to include a constructor which takes + ``akka.actor.ActorSystem.Settings`` and ``com.typesafe.config.Config`` + arguments, as this constructor is invoked reflectively to construct your + mailbox type. The config passed in as second argument is that section from + the configuration which describes the dispatcher using this mailbox type; the + mailbox type will be instantiated once for each dispatcher using it. + + +Special Semantics of ``system.actorOf`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In order to make ``system.actorOf`` both synchronous and non-blocking while +keeping the return type :class:`ActorRef` (and the semantics that the returned +ref is fully functional), special handling takes place for this case. Behind +the scenes, a hollow kind of actor reference is constructed, which is sent to +the system’s guardian actor who actually creates the actor and its context and +puts those inside the reference. Until that has happened, messages sent to the +:class:`ActorRef` will be queued locally, and only upon swapping the real +filling in will they be transferred into the real mailbox. Thus, + +.. code-block:: scala + + final Props props = ... + // this actor uses MyCustomMailbox, which is assumed to be a singleton + system.actorOf(props.withDispatcher("myCustomMailbox").tell("bang"); + assert(MyCustomMailbox.getInstance().getLastEnqueued().equals("bang")); + +will probably fail; you will have to allow for some time to pass and retry the +check à la :meth:`TestKit.awaitCond`. + diff --git a/akka-docs/java/untyped-actors.rst b/akka-docs/java/untyped-actors.rst index ac911fd216..57dbaa5604 100644 --- a/akka-docs/java/untyped-actors.rst +++ b/akka-docs/java/untyped-actors.rst @@ -82,13 +82,6 @@ that is used in log messages and for identifying actors. The name must not be em or start with ``$``. If the given name is already in use by another child to the same parent actor an `InvalidActorNameException` is thrown. -.. warning:: - - Creating top-level actors with ``system.actorOf`` is a blocking operation, - hence it may dead-lock due to starvation if the default dispatcher is - overloaded. To avoid problems, do not call this method from within actors or - futures which run on the default dispatcher. - Actors are automatically started asynchronously when created. When you create the ``UntypedActor`` then it will automatically call the ``preStart`` callback method on the ``UntypedActor`` class. This is an excellent place to diff --git a/akka-docs/modules/code/docs/actor/mailbox/DurableMailboxDocSpec.scala b/akka-docs/modules/code/docs/actor/mailbox/DurableMailboxDocSpec.scala index b51c7bb170..fc62cd940d 100644 --- a/akka-docs/modules/code/docs/actor/mailbox/DurableMailboxDocSpec.scala +++ b/akka-docs/modules/code/docs/actor/mailbox/DurableMailboxDocSpec.scala @@ -11,7 +11,7 @@ import akka.actor.Props import org.scalatest.{ BeforeAndAfterAll, WordSpec } import org.scalatest.matchers.MustMatchers import akka.testkit.AkkaSpec -import akka.actor.Actor +import akka.actor.{ Actor, ExtendedActorSystem } class MyActor extends Actor { def receive = { @@ -56,20 +56,20 @@ import akka.util.duration._ class MyMailboxType(systemSettings: ActorSystem.Settings, config: Config) extends MailboxType { - override def create(owner: Option[ActorContext]): MessageQueue = owner match { - case Some(o) ⇒ new MyMessageQueue(o) + override def create(owner: Option[ActorRef], system: Option[ActorSystem]): MessageQueue = owner zip system headOption match { + case Some((o, s: ExtendedActorSystem)) ⇒ new MyMessageQueue(o, s) case None ⇒ throw new IllegalArgumentException( "requires an owner (i.e. does not work with BalancingDispatcher)") } } -class MyMessageQueue(_owner: ActorContext) - extends DurableMessageQueue(_owner) with DurableMessageSerialization { +class MyMessageQueue(_owner: ActorRef, _system: ExtendedActorSystem) + extends DurableMessageQueue(_owner, _system) with DurableMessageSerialization { val storage = new QueueStorage // A real-world implmentation would use configuration to set the last // three parameters below - val breaker = CircuitBreaker(_owner.system.scheduler, 5, 30.seconds, 1.minute) + val breaker = CircuitBreaker(system.scheduler, 5, 30.seconds, 1.minute) def enqueue(receiver: ActorRef, envelope: Envelope): Unit = breaker.withSyncCircuitBreaker { val data: Array[Byte] = serialize(envelope) @@ -91,7 +91,7 @@ class MyMessageQueue(_owner: ActorContext) * but the purpose of a durable mailbox is to continue * with the same message queue when the actor is started again. */ - def cleanUp(owner: ActorContext, deadLetters: MessageQueue): Unit = () + def cleanUp(owner: ActorRef, deadLetters: MessageQueue): Unit = () } //#custom-mailbox diff --git a/akka-docs/project/licenses.rst b/akka-docs/project/licenses.rst index b83b6a5f46..7dbcf5ef9f 100644 --- a/akka-docs/project/licenses.rst +++ b/akka-docs/project/licenses.rst @@ -196,4 +196,4 @@ Licenses for Dependency Libraries --------------------------------- Each dependency and its license can be seen in the project build file (the comment on the side of each dependency): -``_ +``_ diff --git a/akka-docs/scala/actors.rst b/akka-docs/scala/actors.rst index 9b2cb9a7e5..47a2318e53 100644 --- a/akka-docs/scala/actors.rst +++ b/akka-docs/scala/actors.rst @@ -76,13 +76,6 @@ that is used in log messages and for identifying actors. The name must not be em or start with ``$``. If the given name is already in use by another child to the same parent actor an `InvalidActorNameException` is thrown. -.. warning:: - - Creating top-level actors with ``system.actorOf`` is a blocking operation, - hence it may dead-lock due to starvation if the default dispatcher is - overloaded. To avoid problems, do not call this method from within actors or - futures which run on the default dispatcher. - Actors are automatically started asynchronously when created. When you create the ``Actor`` then it will automatically call the ``preStart`` callback method on the ``Actor`` trait. This is an excellent place to diff --git a/akka-docs/scala/code/docs/dispatcher/DispatcherDocSpec.scala b/akka-docs/scala/code/docs/dispatcher/DispatcherDocSpec.scala index 3ff8d9c1ea..7fdd0cd9bf 100644 --- a/akka-docs/scala/code/docs/dispatcher/DispatcherDocSpec.scala +++ b/akka-docs/scala/code/docs/dispatcher/DispatcherDocSpec.scala @@ -134,8 +134,8 @@ object DispatcherDocSpec { } //#mailbox-implementation-example - case class MyUnboundedMailbox() extends akka.dispatch.MailboxType { - import akka.actor.ActorContext + class MyUnboundedMailbox extends akka.dispatch.MailboxType { + import akka.actor.{ ActorRef, ActorSystem } import com.typesafe.config.Config import java.util.concurrent.ConcurrentLinkedQueue import akka.dispatch.{ @@ -149,12 +149,12 @@ object DispatcherDocSpec { def this(settings: ActorSystem.Settings, config: Config) = this() // The create method is called to create the MessageQueue - final override def create(owner: Option[ActorContext]): MessageQueue = + final override def create(owner: Option[ActorRef], system: Option[ActorSystem]): MessageQueue = new QueueBasedMessageQueue with UnboundedMessageQueueSemantics { final val queue = new ConcurrentLinkedQueue[Envelope]() } - //#mailbox-implementation-example } + //#mailbox-implementation-example } class DispatcherDocSpec extends AkkaSpec(DispatcherDocSpec.config) { diff --git a/akka-docs/scala/dispatchers.rst b/akka-docs/scala/dispatchers.rst index cea9ee6e0a..4253d3a1e4 100644 --- a/akka-docs/scala/dispatchers.rst +++ b/akka-docs/scala/dispatchers.rst @@ -198,3 +198,25 @@ And then you just specify the FQCN of your MailboxType as the value of the "mail the configuration which describes the dispatcher using this mailbox type; the mailbox type will be instantiated once for each dispatcher using it. +Special Semantics of ``system.actorOf`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In order to make ``system.actorOf`` both synchronous and non-blocking while +keeping the return type :class:`ActorRef` (and the semantics that the returned +ref is fully functional), special handling takes place for this case. Behind +the scenes, a hollow kind of actor reference is constructed, which is sent to +the system’s guardian actor who actually creates the actor and its context and +puts those inside the reference. Until that has happened, messages sent to the +:class:`ActorRef` will be queued locally, and only upon swapping the real +filling in will they be transferred into the real mailbox. Thus, + +.. code-block:: scala + + val props: Props = ... + // this actor uses MyCustomMailbox, which is assumed to be a singleton + system.actorOf(props.withDispatcher("myCustomMailbox")) ! "bang" + assert(MyCustomMailbox.instance.getLastEnqueuedMessage == "bang") + +will probably fail; you will have to allow for some time to pass and retry the +check à la :meth:`TestKit.awaitCond`. + diff --git a/akka-durable-mailboxes/akka-file-mailbox/src/main/resources/reference.conf b/akka-durable-mailboxes/akka-file-mailbox/src/main/resources/reference.conf index f454716af0..1fb5cceeb1 100644 --- a/akka-durable-mailboxes/akka-file-mailbox/src/main/resources/reference.conf +++ b/akka-durable-mailboxes/akka-file-mailbox/src/main/resources/reference.conf @@ -13,50 +13,50 @@ akka { file-based { # directory below which this queue resides directory-path = "./_mb" - + # attempting to add an item after the queue reaches this size (in items) will fail. max-items = 2147483647 - + # attempting to add an item after the queue reaches this size (in bytes) will fail. max-size = 2147483647 bytes - + # attempting to add an item larger than this size (in bytes) will fail. max-item-size = 2147483647 bytes - + # maximum expiration time for this queue (seconds). max-age = 0s - + # maximum journal size before the journal should be rotated. max-journal-size = 16 MiB - + # maximum size of a queue before it drops into read-behind mode. max-memory-size = 128 MiB - + # maximum overflow (multiplier) of a journal file before we re-create it. max-journal-overflow = 10 - + # absolute maximum size of a journal file until we rebuild it, no matter what. max-journal-size-absolute = 9223372036854775807 bytes - + # whether to drop older items (instead of newer) when the queue is full - discard-old-when-full = on - + discard-old-when-full = on + # whether to keep a journal file at all - keep-journal = on - + keep-journal = on + # whether to sync the journal after each transaction sync-journal = off # circuit breaker configuration circuit-breaker { - # maximum number of failures before opening breaker - max-failures = 3 + # maximum number of failures before opening breaker + max-failures = 3 - # duration of time beyond which a call is assumed to be timed out and considered a failure - call-timeout = 3 seconds + # duration of time beyond which a call is assumed to be timed out and considered a failure + call-timeout = 3 seconds - # duration of time to wait until attempting to reset the breaker during which all calls fail-fast - reset-timeout = 30 seconds + # duration of time to wait until attempting to reset the breaker during which all calls fail-fast + reset-timeout = 30 seconds } } } diff --git a/akka-durable-mailboxes/akka-file-mailbox/src/main/scala/akka/actor/mailbox/FileBasedMailbox.scala b/akka-durable-mailboxes/akka-file-mailbox/src/main/scala/akka/actor/mailbox/FileBasedMailbox.scala index fccb6b5aea..c703bf0b49 100644 --- a/akka-durable-mailboxes/akka-file-mailbox/src/main/scala/akka/actor/mailbox/FileBasedMailbox.scala +++ b/akka-durable-mailboxes/akka-file-mailbox/src/main/scala/akka/actor/mailbox/FileBasedMailbox.scala @@ -13,26 +13,28 @@ import akka.actor.ActorSystem import akka.dispatch._ import akka.util.{ Duration, NonFatal } import akka.pattern.{ CircuitBreakerOpenException, CircuitBreaker } +import akka.actor.ExtendedActorSystem class FileBasedMailboxType(systemSettings: ActorSystem.Settings, config: Config) extends MailboxType { private val settings = new FileBasedMailboxSettings(systemSettings, config) - override def create(owner: Option[ActorContext]): MessageQueue = owner match { - case Some(o) ⇒ new FileBasedMessageQueue(o, settings) - case None ⇒ throw new ConfigurationException("creating a durable mailbox requires an owner (i.e. does not work with BalancingDispatcher)") + override def create(owner: Option[ActorRef], system: Option[ActorSystem]): MessageQueue = owner zip system headOption match { + case Some((o, s: ExtendedActorSystem)) ⇒ new FileBasedMessageQueue(o, s, settings) + case None ⇒ throw new ConfigurationException("creating a durable mailbox requires an owner (i.e. does not work with BalancingDispatcher)") } } -class FileBasedMessageQueue(_owner: ActorContext, val settings: FileBasedMailboxSettings) extends DurableMessageQueue(_owner) with DurableMessageSerialization { +class FileBasedMessageQueue(_owner: ActorRef, _system: ExtendedActorSystem, val settings: FileBasedMailboxSettings) + extends DurableMessageQueue(_owner, _system) with DurableMessageSerialization { // TODO Is it reasonable for all FileBasedMailboxes to have their own logger? private val log = Logging(system, "FileBasedMessageQueue") - val breaker = CircuitBreaker(_owner.system.scheduler, settings.CircuitBreakerMaxFailures, settings.CircuitBreakerCallTimeout, settings.CircuitBreakerResetTimeout) + val breaker = CircuitBreaker(system.scheduler, settings.CircuitBreakerMaxFailures, settings.CircuitBreakerCallTimeout, settings.CircuitBreakerResetTimeout) private val queue = try { (new java.io.File(settings.QueuePath)) match { case dir if dir.exists && !dir.isDirectory ⇒ throw new IllegalStateException("Path already occupied by non-directory " + dir) case dir if !dir.exists ⇒ if (!dir.mkdirs() && !dir.isDirectory) throw new IllegalStateException("Creation of directory failed " + dir) - case _ ⇒ //All good + case _ ⇒ // All good } val queue = new filequeue.PersistentQueue(settings.QueuePath, name, settings, log) queue.setup // replays journal @@ -79,5 +81,5 @@ class FileBasedMessageQueue(_owner: ActorContext, val settings: FileBasedMailbox case NonFatal(_) ⇒ false } - def cleanUp(owner: ActorContext, deadLetters: MessageQueue): Unit = () + def cleanUp(owner: ActorRef, deadLetters: MessageQueue): Unit = () } diff --git a/akka-durable-mailboxes/akka-file-mailbox/src/main/scala/akka/actor/mailbox/FileBasedMailboxSettings.scala b/akka-durable-mailboxes/akka-file-mailbox/src/main/scala/akka/actor/mailbox/FileBasedMailboxSettings.scala index dff4021d96..27088dfc92 100644 --- a/akka-durable-mailboxes/akka-file-mailbox/src/main/scala/akka/actor/mailbox/FileBasedMailboxSettings.scala +++ b/akka-durable-mailboxes/akka-file-mailbox/src/main/scala/akka/actor/mailbox/FileBasedMailboxSettings.scala @@ -16,20 +16,20 @@ class FileBasedMailboxSettings(val systemSettings: ActorSystem.Settings, val use val config = initialize import config._ - val QueuePath: String = getString("directory-path") - val MaxItems: Int = getInt("max-items") - val MaxSize: Long = getBytes("max-size") - val MaxItemSize: Long = getBytes("max-item-size") - val MaxAge: Duration = Duration(getMilliseconds("max-age"), MILLISECONDS) - val MaxJournalSize: Long = getBytes("max-journal-size") - val MaxMemorySize: Long = getBytes("max-memory-size") - val MaxJournalOverflow: Int = getInt("max-journal-overflow") - val MaxJournalSizeAbsolute: Long = getBytes("max-journal-size-absolute") - val DiscardOldWhenFull: Boolean = getBoolean("discard-old-when-full") - val KeepJournal: Boolean = getBoolean("keep-journal") - val SyncJournal: Boolean = getBoolean("sync-journal") + final val QueuePath: String = getString("directory-path") + final val MaxItems: Int = getInt("max-items") + final val MaxSize: Long = getBytes("max-size") + final val MaxItemSize: Long = getBytes("max-item-size") + final val MaxAge: Duration = Duration(getMilliseconds("max-age"), MILLISECONDS) + final val MaxJournalSize: Long = getBytes("max-journal-size") + final val MaxMemorySize: Long = getBytes("max-memory-size") + final val MaxJournalOverflow: Int = getInt("max-journal-overflow") + final val MaxJournalSizeAbsolute: Long = getBytes("max-journal-size-absolute") + final val DiscardOldWhenFull: Boolean = getBoolean("discard-old-when-full") + final val KeepJournal: Boolean = getBoolean("keep-journal") + final val SyncJournal: Boolean = getBoolean("sync-journal") - val CircuitBreakerMaxFailures = getInt("circuit-breaker.max-failures") - val CircuitBreakerCallTimeout = Duration.fromNanos(getNanoseconds("circuit-breaker.call-timeout")) - val CircuitBreakerResetTimeout = Duration.fromNanos(getNanoseconds("circuit-breaker.reset-timeout")) -} \ No newline at end of file + final val CircuitBreakerMaxFailures = getInt("circuit-breaker.max-failures") + final val CircuitBreakerCallTimeout = Duration.fromNanos(getNanoseconds("circuit-breaker.call-timeout")) + final val CircuitBreakerResetTimeout = Duration.fromNanos(getNanoseconds("circuit-breaker.reset-timeout")) +} diff --git a/akka-durable-mailboxes/akka-file-mailbox/src/main/scala/akka/actor/mailbox/filequeue/PersistentQueue.scala b/akka-durable-mailboxes/akka-file-mailbox/src/main/scala/akka/actor/mailbox/filequeue/PersistentQueue.scala index 1a5ddf4a8c..152b29406c 100644 --- a/akka-durable-mailboxes/akka-file-mailbox/src/main/scala/akka/actor/mailbox/filequeue/PersistentQueue.scala +++ b/akka-durable-mailboxes/akka-file-mailbox/src/main/scala/akka/actor/mailbox/filequeue/PersistentQueue.scala @@ -68,44 +68,44 @@ class PersistentQueue(persistencePath: String, val name: String, val settings: F def overlay[T](base: ⇒ T) = new OverlaySetting(base) // attempting to add an item after the queue reaches this size (in items) will fail. - val maxItems = overlay(PersistentQueue.maxItems) + final val maxItems = overlay(PersistentQueue.maxItems) // attempting to add an item after the queue reaches this size (in bytes) will fail. - val maxSize = overlay(PersistentQueue.maxSize) + final val maxSize = overlay(PersistentQueue.maxSize) // attempting to add an item larger than this size (in bytes) will fail. - val maxItemSize = overlay(PersistentQueue.maxItemSize) + final val maxItemSize = overlay(PersistentQueue.maxItemSize) // maximum expiration time for this queue (seconds). - val maxAge = overlay(PersistentQueue.maxAge) + final val maxAge = overlay(PersistentQueue.maxAge) // maximum journal size before the journal should be rotated. - val maxJournalSize = overlay(PersistentQueue.maxJournalSize) + final val maxJournalSize = overlay(PersistentQueue.maxJournalSize) // maximum size of a queue before it drops into read-behind mode. - val maxMemorySize = overlay(PersistentQueue.maxMemorySize) + final val maxMemorySize = overlay(PersistentQueue.maxMemorySize) // maximum overflow (multiplier) of a journal file before we re-create it. - val maxJournalOverflow = overlay(PersistentQueue.maxJournalOverflow) + final val maxJournalOverflow = overlay(PersistentQueue.maxJournalOverflow) // absolute maximum size of a journal file until we rebuild it, no matter what. - val maxJournalSizeAbsolute = overlay(PersistentQueue.maxJournalSizeAbsolute) + final val maxJournalSizeAbsolute = overlay(PersistentQueue.maxJournalSizeAbsolute) // whether to drop older items (instead of newer) when the queue is full - val discardOldWhenFull = overlay(PersistentQueue.discardOldWhenFull) + final val discardOldWhenFull = overlay(PersistentQueue.discardOldWhenFull) // whether to keep a journal file at all - val keepJournal = overlay(PersistentQueue.keepJournal) + final val keepJournal = overlay(PersistentQueue.keepJournal) // whether to sync the journal after each transaction - val syncJournal = overlay(PersistentQueue.syncJournal) + final val syncJournal = overlay(PersistentQueue.syncJournal) // (optional) move expired items over to this queue - val expiredQueue = overlay(PersistentQueue.expiredQueue) + final val expiredQueue = overlay(PersistentQueue.expiredQueue) private var journal = new Journal(new File(persistencePath, name).getCanonicalPath, syncJournal(), log) - // track tentative removals + // track tentative remofinal vals private var xidCounter: Int = 0 private val openTransactions = new mutable.HashMap[Int, QItem] def openTransactionCount = openTransactions.size diff --git a/akka-durable-mailboxes/akka-mailboxes-common/src/main/scala/akka/actor/mailbox/DurableMailbox.scala b/akka-durable-mailboxes/akka-mailboxes-common/src/main/scala/akka/actor/mailbox/DurableMailbox.scala index b21878d00e..79ece7625d 100644 --- a/akka-durable-mailboxes/akka-mailboxes-common/src/main/scala/akka/actor/mailbox/DurableMailbox.scala +++ b/akka-durable-mailboxes/akka-mailboxes-common/src/main/scala/akka/actor/mailbox/DurableMailbox.scala @@ -13,11 +13,10 @@ private[akka] object DurableExecutableMailboxConfig { val Name = "[\\.\\/\\$\\s]".r } -abstract class DurableMessageQueue(val owner: ActorContext) extends MessageQueue { +abstract class DurableMessageQueue(val owner: ActorRef, val system: ExtendedActorSystem) extends MessageQueue { import DurableExecutableMailboxConfig._ - def system: ExtendedActorSystem = owner.system.asInstanceOf[ExtendedActorSystem] - def ownerPath: ActorPath = owner.self.path + def ownerPath: ActorPath = owner.path val ownerPathString: String = ownerPath.elements.mkString("/") val name: String = "mailbox_" + Name.replaceAllIn(ownerPathString, "_") @@ -42,7 +41,7 @@ trait DurableMessageSerialization { this: DurableMessageQueue ⇒ val message = MessageSerializer.serialize(system, durableMessage.message.asInstanceOf[AnyRef]) val builder = RemoteMessageProtocol.newBuilder .setMessage(message) - .setRecipient(serializeActorRef(owner.self)) + .setRecipient(serializeActorRef(owner)) .setSender(serializeActorRef(durableMessage.sender)) builder.build.toByteArray @@ -60,7 +59,7 @@ trait DurableMessageSerialization { this: DurableMessageQueue ⇒ val message = MessageSerializer.deserialize(system, durableMessage.getMessage) val sender = deserializeActorRef(durableMessage.getSender) - Envelope(message, sender)(system) + Envelope(message, sender, system) } } @@ -69,11 +68,15 @@ trait DurableMessageSerialization { this: DurableMessageQueue ⇒ * Conventional organization of durable mailbox settings: * * {{{ - * my-durable-dispatcher { - * mailbox-type = "my.durable.mailbox" - * my-durable-mailbox { - * setting1 = 1 - * setting2 = 2 + * akka { + * actor { + * my-durable-dispatcher { + * mailbox-type = "my.durable.mailbox" + * my-durable-mailbox { + * setting1 = 1 + * setting2 = 2 + * } + * } * } * } * }}} diff --git a/akka-durable-mailboxes/akka-mailboxes-common/src/test/scala/akka/actor/mailbox/DurableMailboxSpec.scala b/akka-durable-mailboxes/akka-mailboxes-common/src/test/scala/akka/actor/mailbox/DurableMailboxSpec.scala index 9081a5fcb0..8264bd0348 100644 --- a/akka-durable-mailboxes/akka-mailboxes-common/src/test/scala/akka/actor/mailbox/DurableMailboxSpec.scala +++ b/akka-durable-mailboxes/akka-mailboxes-common/src/test/scala/akka/actor/mailbox/DurableMailboxSpec.scala @@ -3,25 +3,21 @@ */ package akka.actor.mailbox -import DurableMailboxSpecActorFactory.AccumulatorActor -import DurableMailboxSpecActorFactory.MailboxTestActor -import akka.actor.Actor -import akka.actor.ActorRef -import akka.actor.ActorSystem -import akka.actor.LocalActorRef -import akka.actor.Props -import akka.actor.actorRef2Scala +import java.io.InputStream +import java.util.concurrent.TimeoutException + +import scala.annotation.tailrec + +import org.scalatest.{ WordSpec, BeforeAndAfterAll } +import org.scalatest.matchers.MustMatchers + +import com.typesafe.config.{ ConfigFactory, Config } + +import DurableMailboxSpecActorFactory.{ MailboxTestActor, AccumulatorActor } +import akka.actor.{ RepointableRef, Props, ActorSystem, ActorRefWithCell, ActorRef, ActorCell, Actor } import akka.dispatch.Mailbox import akka.testkit.TestKit import akka.util.duration.intToDurationInt -import com.typesafe.config.Config -import com.typesafe.config.ConfigFactory -import java.io.InputStream -import java.util.concurrent.TimeoutException -import org.scalatest.BeforeAndAfterAll -import org.scalatest.WordSpec -import org.scalatest.matchers.MustMatchers -import scala.annotation.tailrec object DurableMailboxSpecActorFactory { @@ -115,9 +111,15 @@ abstract class DurableMailboxSpec(system: ActorSystem, val backendName: String) if (!result.contains(words)) throw new Exception("stream did not contain '" + words + "':\n" + result) } - def createMailboxTestActor(props: Props = Props[MailboxTestActor], id: String = ""): ActorRef = id match { - case null | "" ⇒ system.actorOf(props.withDispatcher(backendName + "-dispatcher")) - case some ⇒ system.actorOf(props.withDispatcher(backendName + "-dispatcher"), some) + def createMailboxTestActor(props: Props = Props[MailboxTestActor], id: String = ""): ActorRef = { + val ref = id match { + case null | "" ⇒ system.actorOf(props.withDispatcher(backendName + "-dispatcher")) + case some ⇒ system.actorOf(props.withDispatcher(backendName + "-dispatcher"), some) + } + awaitCond(ref match { + case r: RepointableRef ⇒ r.isStarted + }, 1 second, 10 millis) + ref } private def isDurableMailbox(m: Mailbox): Boolean = @@ -127,9 +129,11 @@ abstract class DurableMailboxSpec(system: ActorSystem, val backendName: String) "get a new, unique, durable mailbox" in { val a1, a2 = createMailboxTestActor() - isDurableMailbox(a1.asInstanceOf[LocalActorRef].underlying.mailbox) must be(true) - isDurableMailbox(a2.asInstanceOf[LocalActorRef].underlying.mailbox) must be(true) - (a1.asInstanceOf[LocalActorRef].underlying.mailbox ne a2.asInstanceOf[LocalActorRef].underlying.mailbox) must be(true) + val mb1 = a1.asInstanceOf[ActorRefWithCell].underlying.asInstanceOf[ActorCell].mailbox + val mb2 = a2.asInstanceOf[ActorRefWithCell].underlying.asInstanceOf[ActorCell].mailbox + isDurableMailbox(mb1) must be(true) + isDurableMailbox(mb2) must be(true) + (mb1 ne mb2) must be(true) } "deliver messages at most once" in { @@ -148,7 +152,7 @@ abstract class DurableMailboxSpec(system: ActorSystem, val backendName: String) "support having multiple actors at the same time" in { val actors = Vector.fill(3)(createMailboxTestActor(Props[AccumulatorActor])) - actors foreach { a ⇒ isDurableMailbox(a.asInstanceOf[LocalActorRef].underlying.mailbox) must be(true) } + actors foreach { a ⇒ isDurableMailbox(a.asInstanceOf[ActorRefWithCell].underlying.asInstanceOf[ActorCell].mailbox) must be(true) } val msgs = 1 to 3 diff --git a/akka-kernel/src/main/dist/bin/akka-cluster b/akka-kernel/src/main/dist/bin/akka-cluster index 3e76cdbb11..fe3af38449 100755 --- a/akka-kernel/src/main/dist/bin/akka-cluster +++ b/akka-kernel/src/main/dist/bin/akka-cluster @@ -63,20 +63,6 @@ case "$2" in $JMX_CLIENT $HOST akka:type=Cluster leave=$ACTOR_SYSTEM_URL ;; - remove) - if [ $# -ne 3 ]; then - echo "Usage: $SELF remove " - exit 1 - fi - - ensureNodeIsRunningAndAvailable - shift - - ACTOR_SYSTEM_URL=$2 - echo "Scheduling $ACTOR_SYSTEM_URL to REMOVE" - $JMX_CLIENT $HOST akka:type=Cluster remove=$ACTOR_SYSTEM_URL - ;; - down) if [ $# -ne 3 ]; then echo "Usage: $SELF down " @@ -169,19 +155,32 @@ case "$2" in $JMX_CLIENT $HOST akka:type=Cluster Available ;; + is-running) + if [ $# -ne 2 ]; then + echo "Usage: $SELF is-running" + exit 1 + fi + + ensureNodeIsRunningAndAvailable + shift + + echo "Checking if member node on $HOST is AVAILABLE" + $JMX_CLIENT $HOST akka:type=Cluster Running + ;; + *) printf "Usage: bin/$SELF ...\n" printf "\n" printf "Supported commands are:\n" printf "%26s - %s\n" "join " "Sends request a JOIN node with the specified URL" printf "%26s - %s\n" "leave " "Sends a request for node with URL to LEAVE the cluster" - printf "%26s - %s\n" "remove " "Sends a request for node with URL to be instantly REMOVED from the cluster" printf "%26s - %s\n" "down " "Sends a request for marking node with URL as DOWN" printf "%26s - %s\n" member-status "Asks the member node for its current status" printf "%26s - %s\n" cluster-status "Asks the cluster for its current status (member ring, unavailable nodes, meta data etc.)" printf "%26s - %s\n" leader "Asks the cluster who the current leader is" printf "%26s - %s\n" is-singleton "Checks if the cluster is a singleton cluster (single node cluster)" printf "%26s - %s\n" is-available "Checks if the member node is available" + printf "%26s - %s\n" is-running "Checks if the member node is running" printf "%26s - %s\n" has-convergence "Checks if there is a cluster convergence" printf "Where the should be on the format of 'akka://actor-system-name@hostname:port'\n" printf "\n" diff --git a/akka-remote-tests/src/main/java/akka/remote/testconductor/TestConductorProtocol.java b/akka-remote-tests/src/main/java/akka/remote/testconductor/TestConductorProtocol.java index 99c33e6728..bd8de8a052 100644 --- a/akka-remote-tests/src/main/java/akka/remote/testconductor/TestConductorProtocol.java +++ b/akka-remote-tests/src/main/java/akka/remote/testconductor/TestConductorProtocol.java @@ -8,6 +8,81 @@ public final class TestConductorProtocol { public static void registerAllExtensions( com.google.protobuf.ExtensionRegistry registry) { } + public enum BarrierOp + implements com.google.protobuf.ProtocolMessageEnum { + Enter(0, 1), + Fail(1, 2), + Succeeded(2, 3), + Failed(3, 4), + ; + + public static final int Enter_VALUE = 1; + public static final int Fail_VALUE = 2; + public static final int Succeeded_VALUE = 3; + public static final int Failed_VALUE = 4; + + + public final int getNumber() { return value; } + + public static BarrierOp valueOf(int value) { + switch (value) { + case 1: return Enter; + case 2: return Fail; + case 3: return Succeeded; + case 4: return Failed; + default: return null; + } + } + + public static com.google.protobuf.Internal.EnumLiteMap + internalGetValueMap() { + return internalValueMap; + } + private static com.google.protobuf.Internal.EnumLiteMap + internalValueMap = + new com.google.protobuf.Internal.EnumLiteMap() { + public BarrierOp findValueByNumber(int number) { + return BarrierOp.valueOf(number); + } + }; + + public final com.google.protobuf.Descriptors.EnumValueDescriptor + getValueDescriptor() { + return getDescriptor().getValues().get(index); + } + public final com.google.protobuf.Descriptors.EnumDescriptor + getDescriptorForType() { + return getDescriptor(); + } + public static final com.google.protobuf.Descriptors.EnumDescriptor + getDescriptor() { + return akka.remote.testconductor.TestConductorProtocol.getDescriptor().getEnumTypes().get(0); + } + + private static final BarrierOp[] VALUES = { + Enter, Fail, Succeeded, Failed, + }; + + public static BarrierOp valueOf( + com.google.protobuf.Descriptors.EnumValueDescriptor desc) { + if (desc.getType() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "EnumValueDescriptor is not for this type."); + } + return VALUES[desc.getIndex()]; + } + + private final int index; + private final int value; + + private BarrierOp(int index, int value) { + this.index = index; + this.value = value; + } + + // @@protoc_insertion_point(enum_scope:BarrierOp) + } + public enum FailType implements com.google.protobuf.ProtocolMessageEnum { Throttle(0, 1), @@ -56,7 +131,7 @@ public final class TestConductorProtocol { } public static final com.google.protobuf.Descriptors.EnumDescriptor getDescriptor() { - return akka.remote.testconductor.TestConductorProtocol.getDescriptor().getEnumTypes().get(0); + return akka.remote.testconductor.TestConductorProtocol.getDescriptor().getEnumTypes().get(1); } private static final FailType[] VALUES = { @@ -128,7 +203,7 @@ public final class TestConductorProtocol { } public static final com.google.protobuf.Descriptors.EnumDescriptor getDescriptor() { - return akka.remote.testconductor.TestConductorProtocol.getDescriptor().getEnumTypes().get(1); + return akka.remote.testconductor.TestConductorProtocol.getDescriptor().getEnumTypes().get(2); } private static final Direction[] VALUES = { @@ -1699,9 +1774,13 @@ public final class TestConductorProtocol { boolean hasName(); String getName(); - // optional bool status = 2; - boolean hasStatus(); - boolean getStatus(); + // required .BarrierOp op = 2; + boolean hasOp(); + akka.remote.testconductor.TestConductorProtocol.BarrierOp getOp(); + + // optional int64 timeout = 3; + boolean hasTimeout(); + long getTimeout(); } public static final class EnterBarrier extends com.google.protobuf.GeneratedMessage @@ -1764,19 +1843,30 @@ public final class TestConductorProtocol { } } - // optional bool status = 2; - public static final int STATUS_FIELD_NUMBER = 2; - private boolean status_; - public boolean hasStatus() { + // required .BarrierOp op = 2; + public static final int OP_FIELD_NUMBER = 2; + private akka.remote.testconductor.TestConductorProtocol.BarrierOp op_; + public boolean hasOp() { return ((bitField0_ & 0x00000002) == 0x00000002); } - public boolean getStatus() { - return status_; + public akka.remote.testconductor.TestConductorProtocol.BarrierOp getOp() { + return op_; + } + + // optional int64 timeout = 3; + public static final int TIMEOUT_FIELD_NUMBER = 3; + private long timeout_; + public boolean hasTimeout() { + return ((bitField0_ & 0x00000004) == 0x00000004); + } + public long getTimeout() { + return timeout_; } private void initFields() { name_ = ""; - status_ = false; + op_ = akka.remote.testconductor.TestConductorProtocol.BarrierOp.Enter; + timeout_ = 0L; } private byte memoizedIsInitialized = -1; public final boolean isInitialized() { @@ -1787,6 +1877,10 @@ public final class TestConductorProtocol { memoizedIsInitialized = 0; return false; } + if (!hasOp()) { + memoizedIsInitialized = 0; + return false; + } memoizedIsInitialized = 1; return true; } @@ -1798,7 +1892,10 @@ public final class TestConductorProtocol { output.writeBytes(1, getNameBytes()); } if (((bitField0_ & 0x00000002) == 0x00000002)) { - output.writeBool(2, status_); + output.writeEnum(2, op_.getNumber()); + } + if (((bitField0_ & 0x00000004) == 0x00000004)) { + output.writeInt64(3, timeout_); } getUnknownFields().writeTo(output); } @@ -1815,7 +1912,11 @@ public final class TestConductorProtocol { } if (((bitField0_ & 0x00000002) == 0x00000002)) { size += com.google.protobuf.CodedOutputStream - .computeBoolSize(2, status_); + .computeEnumSize(2, op_.getNumber()); + } + if (((bitField0_ & 0x00000004) == 0x00000004)) { + size += com.google.protobuf.CodedOutputStream + .computeInt64Size(3, timeout_); } size += getUnknownFields().getSerializedSize(); memoizedSerializedSize = size; @@ -1943,8 +2044,10 @@ public final class TestConductorProtocol { super.clear(); name_ = ""; bitField0_ = (bitField0_ & ~0x00000001); - status_ = false; + op_ = akka.remote.testconductor.TestConductorProtocol.BarrierOp.Enter; bitField0_ = (bitField0_ & ~0x00000002); + timeout_ = 0L; + bitField0_ = (bitField0_ & ~0x00000004); return this; } @@ -1990,7 +2093,11 @@ public final class TestConductorProtocol { if (((from_bitField0_ & 0x00000002) == 0x00000002)) { to_bitField0_ |= 0x00000002; } - result.status_ = status_; + result.op_ = op_; + if (((from_bitField0_ & 0x00000004) == 0x00000004)) { + to_bitField0_ |= 0x00000004; + } + result.timeout_ = timeout_; result.bitField0_ = to_bitField0_; onBuilt(); return result; @@ -2010,8 +2117,11 @@ public final class TestConductorProtocol { if (other.hasName()) { setName(other.getName()); } - if (other.hasStatus()) { - setStatus(other.getStatus()); + if (other.hasOp()) { + setOp(other.getOp()); + } + if (other.hasTimeout()) { + setTimeout(other.getTimeout()); } this.mergeUnknownFields(other.getUnknownFields()); return this; @@ -2022,6 +2132,10 @@ public final class TestConductorProtocol { return false; } + if (!hasOp()) { + + return false; + } return true; } @@ -2054,8 +2168,19 @@ public final class TestConductorProtocol { break; } case 16: { - bitField0_ |= 0x00000002; - status_ = input.readBool(); + int rawValue = input.readEnum(); + akka.remote.testconductor.TestConductorProtocol.BarrierOp value = akka.remote.testconductor.TestConductorProtocol.BarrierOp.valueOf(rawValue); + if (value == null) { + unknownFields.mergeVarintField(2, rawValue); + } else { + bitField0_ |= 0x00000002; + op_ = value; + } + break; + } + case 24: { + bitField0_ |= 0x00000004; + timeout_ = input.readInt64(); break; } } @@ -2100,23 +2225,47 @@ public final class TestConductorProtocol { onChanged(); } - // optional bool status = 2; - private boolean status_ ; - public boolean hasStatus() { + // required .BarrierOp op = 2; + private akka.remote.testconductor.TestConductorProtocol.BarrierOp op_ = akka.remote.testconductor.TestConductorProtocol.BarrierOp.Enter; + public boolean hasOp() { return ((bitField0_ & 0x00000002) == 0x00000002); } - public boolean getStatus() { - return status_; + public akka.remote.testconductor.TestConductorProtocol.BarrierOp getOp() { + return op_; } - public Builder setStatus(boolean value) { + public Builder setOp(akka.remote.testconductor.TestConductorProtocol.BarrierOp value) { + if (value == null) { + throw new NullPointerException(); + } bitField0_ |= 0x00000002; - status_ = value; + op_ = value; onChanged(); return this; } - public Builder clearStatus() { + public Builder clearOp() { bitField0_ = (bitField0_ & ~0x00000002); - status_ = false; + op_ = akka.remote.testconductor.TestConductorProtocol.BarrierOp.Enter; + onChanged(); + return this; + } + + // optional int64 timeout = 3; + private long timeout_ ; + public boolean hasTimeout() { + return ((bitField0_ & 0x00000004) == 0x00000004); + } + public long getTimeout() { + return timeout_; + } + public Builder setTimeout(long value) { + bitField0_ |= 0x00000004; + timeout_ = value; + onChanged(); + return this; + } + public Builder clearTimeout() { + bitField0_ = (bitField0_ & ~0x00000004); + timeout_ = 0L; onChanged(); return this; } @@ -4056,19 +4205,21 @@ public final class TestConductorProtocol { "\0132\r.EnterBarrier\022\037\n\007failure\030\003 \001(\0132\016.Inje" + "ctFailure\022\014\n\004done\030\004 \001(\t\022\035\n\004addr\030\005 \001(\0132\017." + "AddressRequest\"0\n\005Hello\022\014\n\004name\030\001 \002(\t\022\031\n" + - "\007address\030\002 \002(\0132\010.Address\",\n\014EnterBarrier" + - "\022\014\n\004name\030\001 \002(\t\022\016\n\006status\030\002 \001(\010\"6\n\016Addres" + - "sRequest\022\014\n\004node\030\001 \002(\t\022\026\n\004addr\030\002 \001(\0132\010.A" + - "ddress\"G\n\007Address\022\020\n\010protocol\030\001 \002(\t\022\016\n\006s" + - "ystem\030\002 \002(\t\022\014\n\004host\030\003 \002(\t\022\014\n\004port\030\004 \002(\005\"", - "\212\001\n\rInjectFailure\022\032\n\007failure\030\001 \002(\0162\t.Fai" + - "lType\022\035\n\tdirection\030\002 \001(\0162\n.Direction\022\031\n\007" + - "address\030\003 \001(\0132\010.Address\022\020\n\010rateMBit\030\006 \001(" + - "\002\022\021\n\texitValue\030\007 \001(\005*A\n\010FailType\022\014\n\010Thro" + - "ttle\020\001\022\016\n\nDisconnect\020\002\022\t\n\005Abort\020\003\022\014\n\010Shu" + - "tdown\020\004*,\n\tDirection\022\010\n\004Send\020\001\022\013\n\007Receiv" + - "e\020\002\022\010\n\004Both\020\003B\035\n\031akka.remote.testconduct" + - "orH\001" + "\007address\030\002 \002(\0132\010.Address\"E\n\014EnterBarrier" + + "\022\014\n\004name\030\001 \002(\t\022\026\n\002op\030\002 \002(\0162\n.BarrierOp\022\017" + + "\n\007timeout\030\003 \001(\003\"6\n\016AddressRequest\022\014\n\004nod" + + "e\030\001 \002(\t\022\026\n\004addr\030\002 \001(\0132\010.Address\"G\n\007Addre" + + "ss\022\020\n\010protocol\030\001 \002(\t\022\016\n\006system\030\002 \002(\t\022\014\n\004", + "host\030\003 \002(\t\022\014\n\004port\030\004 \002(\005\"\212\001\n\rInjectFailu" + + "re\022\032\n\007failure\030\001 \002(\0162\t.FailType\022\035\n\tdirect" + + "ion\030\002 \001(\0162\n.Direction\022\031\n\007address\030\003 \001(\0132\010" + + ".Address\022\020\n\010rateMBit\030\006 \001(\002\022\021\n\texitValue\030" + + "\007 \001(\005*;\n\tBarrierOp\022\t\n\005Enter\020\001\022\010\n\004Fail\020\002\022" + + "\r\n\tSucceeded\020\003\022\n\n\006Failed\020\004*A\n\010FailType\022\014" + + "\n\010Throttle\020\001\022\016\n\nDisconnect\020\002\022\t\n\005Abort\020\003\022" + + "\014\n\010Shutdown\020\004*,\n\tDirection\022\010\n\004Send\020\001\022\013\n\007" + + "Receive\020\002\022\010\n\004Both\020\003B\035\n\031akka.remote.testc" + + "onductorH\001" }; com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { @@ -4096,7 +4247,7 @@ public final class TestConductorProtocol { internal_static_EnterBarrier_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_EnterBarrier_descriptor, - new java.lang.String[] { "Name", "Status", }, + new java.lang.String[] { "Name", "Op", "Timeout", }, akka.remote.testconductor.TestConductorProtocol.EnterBarrier.class, akka.remote.testconductor.TestConductorProtocol.EnterBarrier.Builder.class); internal_static_AddressRequest_descriptor = diff --git a/akka-remote-tests/src/main/protocol/TestConductorProtocol.proto b/akka-remote-tests/src/main/protocol/TestConductorProtocol.proto index 648234614e..1ff8a83c24 100644 --- a/akka-remote-tests/src/main/protocol/TestConductorProtocol.proto +++ b/akka-remote-tests/src/main/protocol/TestConductorProtocol.proto @@ -7,8 +7,10 @@ option optimize_for = SPEED; /****************************************** Compile with: - cd ./akka-remote/src/main/protocol + cd ./akka-remote-tests/src/main/protocol protoc TestConductorProtocol.proto --java_out ../java + cd ../../../.. + ./scripts/fix-protobuf.sh *******************************************/ message Wrapper { @@ -24,9 +26,17 @@ message Hello { required Address address = 2; } +enum BarrierOp { + Enter = 1; + Fail = 2; + Succeeded = 3; + Failed = 4; +} + message EnterBarrier { required string name = 1; - optional bool status = 2; + required BarrierOp op = 2; + optional int64 timeout = 3; } message AddressRequest { @@ -47,11 +57,13 @@ enum FailType { Abort = 3; Shutdown = 4; } + enum Direction { Send = 1; Receive = 2; Both = 3; } + message InjectFailure { required FailType failure = 1; optional Direction direction = 2; diff --git a/akka-remote-tests/src/main/scala/akka/remote/testconductor/Conductor.scala b/akka-remote-tests/src/main/scala/akka/remote/testconductor/Conductor.scala index 17a2bfcd5f..eba0fffe63 100644 --- a/akka-remote-tests/src/main/scala/akka/remote/testconductor/Conductor.scala +++ b/akka-remote-tests/src/main/scala/akka/remote/testconductor/Conductor.scala @@ -8,8 +8,6 @@ import RemoteConnection.getAddrString import TestConductorProtocol._ import org.jboss.netty.channel.{ Channel, SimpleChannelUpstreamHandler, ChannelHandlerContext, ChannelStateEvent, MessageEvent } import com.typesafe.config.ConfigFactory -import akka.util.Timeout -import akka.util.Duration import akka.util.duration._ import akka.pattern.ask import java.util.concurrent.TimeUnit.MILLISECONDS @@ -26,6 +24,7 @@ import akka.actor.OneForOneStrategy import akka.actor.SupervisorStrategy import java.util.concurrent.ConcurrentHashMap import akka.actor.Status +import akka.util.{ Deadline, Timeout, Duration } sealed trait Direction { def includes(other: Direction): Boolean @@ -283,6 +282,8 @@ private[akka] class ServerFSM(val controller: ActorRef, val channel: Channel) ex import akka.actor.FSM._ import Controller._ + var roleName: RoleName = null + startWith(Initial, None) whenUnhandled { @@ -293,12 +294,15 @@ private[akka] class ServerFSM(val controller: ActorRef, val channel: Channel) ex } onTermination { - case _ ⇒ controller ! ClientDisconnected + case _ ⇒ + controller ! ClientDisconnected(roleName) + channel.close() } when(Initial, stateTimeout = 10 seconds) { case Event(Hello(name, addr), _) ⇒ - controller ! NodeInfo(RoleName(name), addr, self) + roleName = RoleName(name) + controller ! NodeInfo(roleName, addr, self) goto(Ready) case Event(x: NetworkOp, _) ⇒ log.warning("client {} sent no Hello in first message (instead {}), disconnecting", getAddrString(channel), x) @@ -335,10 +339,6 @@ private[akka] class ServerFSM(val controller: ActorRef, val channel: Channel) ex } initialize - - onTermination { - case _ ⇒ channel.close() - } } /** @@ -376,7 +376,8 @@ private[akka] class Controller(private var initialParticipants: Int, controllerP * BarrierTimeouts in the players). */ override def supervisorStrategy = OneForOneStrategy() { - case BarrierTimeout(data) ⇒ SupervisorStrategy.Resume + case BarrierTimeout(data) ⇒ failBarrier(data) + case FailedBarrier(data) ⇒ failBarrier(data) case BarrierEmpty(data, msg) ⇒ SupervisorStrategy.Resume case WrongBarrier(name, client, data) ⇒ client ! ToClient(BarrierResult(name, false)); failBarrier(data) case ClientLost(data, node) ⇒ failBarrier(data) @@ -426,6 +427,7 @@ private[akka] class Controller(private var initialParticipants: Int, controllerP case op: ServerOp ⇒ op match { case _: EnterBarrier ⇒ barrier forward op + case _: FailBarrier ⇒ barrier forward op case GetAddress(node) ⇒ if (nodes contains node) sender ! ToClient(AddressReply(node, nodes(node).addr)) else addrInterest += node -> ((addrInterest get node getOrElse Set()) + sender) @@ -463,7 +465,7 @@ private[akka] object BarrierCoordinator { case class RemoveClient(name: RoleName) - case class Data(clients: Set[Controller.NodeInfo], barrier: String, arrived: List[ActorRef]) + case class Data(clients: Set[Controller.NodeInfo], barrier: String, arrived: List[ActorRef], deadline: Deadline) trait Printer { this: Product with Throwable with NoStackTrace ⇒ override def toString = productPrefix + productIterator.mkString("(", ", ", ")") @@ -471,6 +473,8 @@ private[akka] object BarrierCoordinator { case class BarrierTimeout(data: Data) extends RuntimeException("timeout while waiting for barrier '" + data.barrier + "'") with NoStackTrace with Printer + case class FailedBarrier(data: Data) + extends RuntimeException("failing barrier '" + data.barrier + "'") with NoStackTrace with Printer case class DuplicateNode(data: Data, node: Controller.NodeInfo) extends RuntimeException(node.toString) with NoStackTrace with Printer case class WrongBarrier(barrier: String, client: ActorRef, data: Data) @@ -497,61 +501,77 @@ private[akka] class BarrierCoordinator extends Actor with LoggingFSM[BarrierCoor import BarrierCoordinator._ import akka.actor.FSM._ import Controller._ + import akka.util.{ Timeout ⇒ auTimeout } - // this shall be set to false if all subsequent barriers shall fail + // this shall be set to true if all subsequent barriers shall fail var failed = false + override def preRestart(reason: Throwable, message: Option[Any]) {} override def postRestart(reason: Throwable) { failed = true } // TODO what happens with the other waiting players in case of a test failure? - startWith(Idle, Data(Set(), "", Nil)) + startWith(Idle, Data(Set(), "", Nil, null)) whenUnhandled { - case Event(n: NodeInfo, d @ Data(clients, _, _)) ⇒ + case Event(n: NodeInfo, d @ Data(clients, _, _, _)) ⇒ if (clients.find(_.name == n.name).isDefined) throw new DuplicateNode(d, n) stay using d.copy(clients = clients + n) - case Event(ClientDisconnected(name), d @ Data(clients, _, arrived)) ⇒ - if (clients.isEmpty) throw BarrierEmpty(d, "cannot disconnect " + name + ": no client to disconnect") - (clients find (_.name == name)) match { - case None ⇒ stay - case Some(c) ⇒ throw ClientLost(d.copy(clients = clients - c, arrived = arrived filterNot (_ == c.fsm)), name) + case Event(ClientDisconnected(name), d @ Data(clients, _, arrived, _)) ⇒ + if (arrived.isEmpty) + stay using d.copy(clients = clients.filterNot(_.name == name)) + else { + (clients find (_.name == name)) match { + case None ⇒ stay + case Some(c) ⇒ throw ClientLost(d.copy(clients = clients - c, arrived = arrived filterNot (_ == c.fsm)), name) + } } } when(Idle) { - case Event(EnterBarrier(name), d @ Data(clients, _, _)) ⇒ + case Event(EnterBarrier(name, timeout), d @ Data(clients, _, _, _)) ⇒ if (failed) stay replying ToClient(BarrierResult(name, false)) else if (clients.map(_.fsm) == Set(sender)) stay replying ToClient(BarrierResult(name, true)) else if (clients.find(_.fsm == sender).isEmpty) stay replying ToClient(BarrierResult(name, false)) - else - goto(Waiting) using d.copy(barrier = name, arrived = sender :: Nil) - case Event(RemoveClient(name), d @ Data(clients, _, _)) ⇒ + else { + goto(Waiting) using d.copy(barrier = name, arrived = sender :: Nil, + deadline = getDeadline(timeout)) + } + case Event(RemoveClient(name), d @ Data(clients, _, _, _)) ⇒ if (clients.isEmpty) throw BarrierEmpty(d, "cannot remove " + name + ": no client to remove") stay using d.copy(clients = clients filterNot (_.name == name)) } onTransition { - case Idle -> Waiting ⇒ setTimer("Timeout", StateTimeout, TestConductor().Settings.BarrierTimeout.duration, false) + case Idle -> Waiting ⇒ setTimer("Timeout", StateTimeout, nextStateData.deadline.timeLeft, false) case Waiting -> Idle ⇒ cancelTimer("Timeout") } when(Waiting) { - case Event(EnterBarrier(name), d @ Data(clients, barrier, arrived)) ⇒ + case Event(EnterBarrier(name, timeout), d @ Data(clients, barrier, arrived, deadline)) ⇒ if (name != barrier) throw WrongBarrier(name, sender, d) val together = if (clients.exists(_.fsm == sender)) sender :: arrived else arrived - handleBarrier(d.copy(arrived = together)) - case Event(RemoveClient(name), d @ Data(clients, barrier, arrived)) ⇒ + val enterDeadline = getDeadline(timeout) + // we only allow the deadlines to get shorter + if (enterDeadline < deadline) { + setTimer("Timeout", StateTimeout, enterDeadline.timeLeft, false) + handleBarrier(d.copy(arrived = together, deadline = enterDeadline)) + } else + handleBarrier(d.copy(arrived = together)) + case Event(RemoveClient(name), d @ Data(clients, barrier, arrived, _)) ⇒ clients find (_.name == name) match { case None ⇒ stay case Some(client) ⇒ handleBarrier(d.copy(clients = clients - client, arrived = arrived filterNot (_ == client.fsm))) } - case Event(StateTimeout, data) ⇒ - throw BarrierTimeout(data) + case Event(FailBarrier(name), d @ Data(_, barrier, _, _)) ⇒ + if (name != barrier) throw WrongBarrier(name, sender, d) + throw FailedBarrier(d) + case Event(StateTimeout, d) ⇒ + throw BarrierTimeout(d) } initialize @@ -568,5 +588,9 @@ private[akka] class BarrierCoordinator extends Actor with LoggingFSM[BarrierCoor } } + def getDeadline(timeout: Option[Duration]): Deadline = { + Deadline.now + timeout.getOrElse(TestConductor().Settings.BarrierTimeout.duration) + } + } diff --git a/akka-remote-tests/src/main/scala/akka/remote/testconductor/DataTypes.scala b/akka-remote-tests/src/main/scala/akka/remote/testconductor/DataTypes.scala index 022ae2d89b..830b32e485 100644 --- a/akka-remote-tests/src/main/scala/akka/remote/testconductor/DataTypes.scala +++ b/akka-remote-tests/src/main/scala/akka/remote/testconductor/DataTypes.scala @@ -10,6 +10,8 @@ import akka.remote.testconductor.{ TestConductorProtocol ⇒ TCP } import com.google.protobuf.Message import akka.actor.Address import org.jboss.netty.handler.codec.oneone.OneToOneDecoder +import akka.util.Duration +import akka.remote.testconductor.TestConductorProtocol.BarrierOp case class RoleName(name: String) @@ -28,7 +30,8 @@ private[akka] sealed trait ConfirmedClientOp extends ClientOp */ private[akka] case class Hello(name: String, addr: Address) extends NetworkOp -private[akka] case class EnterBarrier(name: String) extends ServerOp with NetworkOp +private[akka] case class EnterBarrier(name: String, timeout: Option[Duration]) extends ServerOp with NetworkOp +private[akka] case class FailBarrier(name: String) extends ServerOp with NetworkOp private[akka] case class BarrierResult(name: String, success: Boolean) extends UnconfirmedClientOp with NetworkOp private[akka] case class Throttle(node: RoleName, target: RoleName, direction: Direction, rateMBit: Float) extends CommandOp @@ -72,10 +75,16 @@ private[akka] class MsgEncoder extends OneToOneEncoder { x match { case Hello(name, addr) ⇒ w.setHello(TCP.Hello.newBuilder.setName(name).setAddress(addr)) - case EnterBarrier(name) ⇒ - w.setBarrier(TCP.EnterBarrier.newBuilder.setName(name)) + case EnterBarrier(name, timeout) ⇒ + val barrier = TCP.EnterBarrier.newBuilder.setName(name) + timeout foreach (t ⇒ barrier.setTimeout(t.toNanos)) + barrier.setOp(BarrierOp.Enter) + w.setBarrier(barrier) case BarrierResult(name, success) ⇒ - w.setBarrier(TCP.EnterBarrier.newBuilder.setName(name).setStatus(success)) + val res = if (success) BarrierOp.Succeeded else BarrierOp.Failed + w.setBarrier(TCP.EnterBarrier.newBuilder.setName(name).setOp(res)) + case FailBarrier(name) ⇒ + w.setBarrier(TCP.EnterBarrier.newBuilder.setName(name).setOp(BarrierOp.Fail)) case ThrottleMsg(target, dir, rate) ⇒ w.setFailure(TCP.InjectFailure.newBuilder.setAddress(target) .setFailure(TCP.FailType.Throttle).setDirection(dir).setRateMBit(rate)) @@ -114,8 +123,13 @@ private[akka] class MsgDecoder extends OneToOneDecoder { Hello(h.getName, h.getAddress) } else if (w.hasBarrier) { val barrier = w.getBarrier - if (barrier.hasStatus) BarrierResult(barrier.getName, barrier.getStatus) - else EnterBarrier(w.getBarrier.getName) + barrier.getOp match { + case BarrierOp.Succeeded ⇒ BarrierResult(barrier.getName, true) + case BarrierOp.Failed ⇒ BarrierResult(barrier.getName, false) + case BarrierOp.Fail ⇒ FailBarrier(barrier.getName) + case BarrierOp.Enter ⇒ EnterBarrier(barrier.getName, + if (barrier.hasTimeout) Option(Duration.fromNanos(barrier.getTimeout)) else None) + } } else if (w.hasFailure) { val f = w.getFailure import TCP.{ FailType ⇒ FT } diff --git a/akka-remote-tests/src/main/scala/akka/remote/testconductor/Player.scala b/akka-remote-tests/src/main/scala/akka/remote/testconductor/Player.scala index 53c03d5d40..c7f69091cf 100644 --- a/akka-remote-tests/src/main/scala/akka/remote/testconductor/Player.scala +++ b/akka-remote-tests/src/main/scala/akka/remote/testconductor/Player.scala @@ -11,7 +11,7 @@ import com.typesafe.config.ConfigFactory import akka.util.Timeout import akka.util.Duration import java.util.concurrent.TimeUnit.MILLISECONDS -import akka.pattern.{ ask, pipe } +import akka.pattern.{ ask, pipe, AskTimeoutException } import akka.dispatch.Await import scala.util.control.NoStackTrace import akka.actor.Status @@ -26,6 +26,7 @@ import org.jboss.netty.channel.WriteCompletionEvent import java.net.ConnectException import akka.util.Deadline import akka.actor.Scheduler +import java.util.concurrent.TimeoutException /** * The Player is the client component of the @@ -76,10 +77,31 @@ trait Player { this: TestConductorExt ⇒ * throw an exception in case of timeouts or other errors. */ def enter(name: String*) { + enter(Settings.BarrierTimeout, name) + } + + /** + * Enter the named barriers, one after the other, in the order given. Will + * throw an exception in case of timeouts or other errors. + */ + def enter(timeout: Timeout, name: Seq[String]) { system.log.debug("entering barriers " + name.mkString("(", ", ", ")")) + val stop = Deadline.now + timeout.duration name foreach { b ⇒ - import Settings.BarrierTimeout - Await.result(client ? ToServer(EnterBarrier(b)), Duration.Inf) + val barrierTimeout = stop.timeLeft + if (barrierTimeout < Duration.Zero) { + client ! ToServer(FailBarrier(b)) + throw new TimeoutException("Server timed out while waiting for barrier " + b); + } + try { + implicit val timeout = Timeout(barrierTimeout + Settings.QueryTimeout.duration) + Await.result(client ? ToServer(EnterBarrier(b, Option(barrierTimeout))), Duration.Inf) + } catch { + case e: AskTimeoutException ⇒ + client ! ToServer(FailBarrier(b)) + // Why don't TimeoutException have a constructor that takes a cause? + throw new TimeoutException("Client timed out while waiting for barrier " + b); + } system.log.debug("passed barrier {}", b) } } @@ -88,7 +110,7 @@ trait Player { this: TestConductorExt ⇒ * Query remote transport address of named node. */ def getAddressFor(name: RoleName): Future[Address] = { - import Settings.BarrierTimeout + import Settings.QueryTimeout client ? ToServer(GetAddress(name)) mapTo } } @@ -168,8 +190,8 @@ private[akka] class ClientFSM(name: RoleName, controllerAddr: InetSocketAddress) case Event(ToServer(msg), d @ Data(Some(channel), None)) ⇒ channel.write(msg) val token = msg match { - case EnterBarrier(barrier) ⇒ barrier - case GetAddress(node) ⇒ node.name + case EnterBarrier(barrier, timeout) ⇒ barrier + case GetAddress(node) ⇒ node.name } stay using d.copy(runningOp = Some(token, sender)) case Event(ToServer(op), Data(channel, Some((token, _)))) ⇒ diff --git a/akka-remote-tests/src/multi-jvm/scala/akka/remote/LookupRemoteActorSpec.scala b/akka-remote-tests/src/multi-jvm/scala/akka/remote/LookupRemoteActorSpec.scala index cfbbae67dc..f49dc53e2b 100644 --- a/akka-remote-tests/src/multi-jvm/scala/akka/remote/LookupRemoteActorSpec.scala +++ b/akka-remote-tests/src/multi-jvm/scala/akka/remote/LookupRemoteActorSpec.scala @@ -47,7 +47,7 @@ class LookupRemoteActorSpec extends MultiNodeSpec(LookupRemoteActorMultiJvmSpec) val masterAddress = testConductor.getAddressFor(master).await (hello ? "identify").await.asInstanceOf[ActorRef].path.address must equal(masterAddress) } - testConductor.enter("done") + enterBarrier("done") } } diff --git a/akka-remote-tests/src/multi-jvm/scala/akka/remote/NewRemoteActorSpec.scala b/akka-remote-tests/src/multi-jvm/scala/akka/remote/NewRemoteActorSpec.scala index 5aa79eb775..eca91495d6 100644 --- a/akka-remote-tests/src/multi-jvm/scala/akka/remote/NewRemoteActorSpec.scala +++ b/akka-remote-tests/src/multi-jvm/scala/akka/remote/NewRemoteActorSpec.scala @@ -56,7 +56,7 @@ class NewRemoteActorSpec extends MultiNodeSpec(NewRemoteActorMultiJvmSpec) system.stop(actor) } - testConductor.enter("done") + enterBarrier("done") } "be locally instantiated on a remote node and be able to communicate through its RemoteActorRef (with deployOnAll)" taggedAs LongRunningTest in { @@ -74,7 +74,7 @@ class NewRemoteActorSpec extends MultiNodeSpec(NewRemoteActorMultiJvmSpec) system.stop(actor) } - testConductor.enter("done") + enterBarrier("done") } } } diff --git a/akka-remote-tests/src/multi-jvm/scala/akka/remote/router/RandomRoutedRemoteActorSpec.scala b/akka-remote-tests/src/multi-jvm/scala/akka/remote/router/RandomRoutedRemoteActorSpec.scala index 58f230e487..44c7ae5047 100644 --- a/akka-remote-tests/src/multi-jvm/scala/akka/remote/router/RandomRoutedRemoteActorSpec.scala +++ b/akka-remote-tests/src/multi-jvm/scala/akka/remote/router/RandomRoutedRemoteActorSpec.scala @@ -55,11 +55,11 @@ class RandomRoutedRemoteActorSpec extends MultiNodeSpec(RandomRoutedRemoteActorM "be locally instantiated on a remote node and be able to communicate through its RemoteActorRef" taggedAs LongRunningTest in { runOn(first, second, third) { - testConductor.enter("start", "broadcast-end", "end", "done") + enterBarrier("start", "broadcast-end", "end", "done") } runOn(fourth) { - testConductor.enter("start") + enterBarrier("start") val actor = system.actorOf(Props[SomeActor].withRouter(RandomRouter()), "service-hello") actor.isInstanceOf[RoutedActorRef] must be(true) @@ -76,17 +76,17 @@ class RandomRoutedRemoteActorSpec extends MultiNodeSpec(RandomRoutedRemoteActorM case (replyMap, address) ⇒ replyMap + (address -> (replyMap(address) + 1)) } - testConductor.enter("broadcast-end") + enterBarrier("broadcast-end") actor ! Broadcast(PoisonPill) - testConductor.enter("end") + enterBarrier("end") replies.values foreach { _ must be > (0) } replies.get(node(fourth).address) must be(None) // shut down the actor before we let the other node(s) shut down so we don't try to send // "Terminate" to a shut down node system.stop(actor) - testConductor.enter("done") + enterBarrier("done") } } } diff --git a/akka-remote-tests/src/multi-jvm/scala/akka/remote/router/RoundRobinRoutedRemoteActorSpec.scala b/akka-remote-tests/src/multi-jvm/scala/akka/remote/router/RoundRobinRoutedRemoteActorSpec.scala index c72644899e..76a7e41ad1 100644 --- a/akka-remote-tests/src/multi-jvm/scala/akka/remote/router/RoundRobinRoutedRemoteActorSpec.scala +++ b/akka-remote-tests/src/multi-jvm/scala/akka/remote/router/RoundRobinRoutedRemoteActorSpec.scala @@ -55,11 +55,11 @@ class RoundRobinRoutedRemoteActorSpec extends MultiNodeSpec(RoundRobinRoutedRemo "be locally instantiated on a remote node and be able to communicate through its RemoteActorRef" taggedAs LongRunningTest in { runOn(first, second, third) { - testConductor.enter("start", "broadcast-end", "end", "done") + enterBarrier("start", "broadcast-end", "end", "done") } runOn(fourth) { - testConductor.enter("start") + enterBarrier("start") val actor = system.actorOf(Props[SomeActor].withRouter(RoundRobinRouter()), "service-hello") actor.isInstanceOf[RoutedActorRef] must be(true) @@ -76,17 +76,17 @@ class RoundRobinRoutedRemoteActorSpec extends MultiNodeSpec(RoundRobinRoutedRemo case (replyMap, address) ⇒ replyMap + (address -> (replyMap(address) + 1)) } - testConductor.enter("broadcast-end") + enterBarrier("broadcast-end") actor ! Broadcast(PoisonPill) - testConductor.enter("end") + enterBarrier("end") replies.values foreach { _ must be(iterationCount) } replies.get(node(fourth).address) must be(None) // shut down the actor before we let the other node(s) shut down so we don't try to send // "Terminate" to a shut down node system.stop(actor) - testConductor.enter("done") + enterBarrier("done") } } } diff --git a/akka-remote-tests/src/multi-jvm/scala/akka/remote/router/ScatterGatherRoutedRemoteActorSpec.scala b/akka-remote-tests/src/multi-jvm/scala/akka/remote/router/ScatterGatherRoutedRemoteActorSpec.scala index 10a007e772..b77b0c196e 100644 --- a/akka-remote-tests/src/multi-jvm/scala/akka/remote/router/ScatterGatherRoutedRemoteActorSpec.scala +++ b/akka-remote-tests/src/multi-jvm/scala/akka/remote/router/ScatterGatherRoutedRemoteActorSpec.scala @@ -55,11 +55,11 @@ class ScatterGatherRoutedRemoteActorSpec extends MultiNodeSpec(ScatterGatherRout "be locally instantiated on a remote node and be able to communicate through its RemoteActorRef" taggedAs LongRunningTest in { runOn(first, second, third) { - testConductor.enter("start", "broadcast-end", "end", "done") + enterBarrier("start", "broadcast-end", "end", "done") } runOn(fourth) { - testConductor.enter("start") + enterBarrier("start") val actor = system.actorOf(Props[SomeActor].withRouter(ScatterGatherFirstCompletedRouter(within = 10 seconds)), "service-hello") actor.isInstanceOf[RoutedActorRef] must be(true) @@ -76,17 +76,17 @@ class ScatterGatherRoutedRemoteActorSpec extends MultiNodeSpec(ScatterGatherRout case (replyMap, address) ⇒ replyMap + (address -> (replyMap(address) + 1)) } - testConductor.enter("broadcast-end") + enterBarrier("broadcast-end") actor ! Broadcast(PoisonPill) - testConductor.enter("end") + enterBarrier("end") replies.values.sum must be === connectionCount * iterationCount replies.get(node(fourth).address) must be(None) // shut down the actor before we let the other node(s) shut down so we don't try to send // "Terminate" to a shut down node system.stop(actor) - testConductor.enter("done") + enterBarrier("done") } } } diff --git a/akka-remote-tests/src/multi-jvm/scala/akka/remote/testconductor/TestConductorSpec.scala b/akka-remote-tests/src/multi-jvm/scala/akka/remote/testconductor/TestConductorSpec.scala index 624347be69..86fabc489d 100644 --- a/akka-remote-tests/src/multi-jvm/scala/akka/remote/testconductor/TestConductorSpec.scala +++ b/akka-remote-tests/src/multi-jvm/scala/akka/remote/testconductor/TestConductorSpec.scala @@ -46,7 +46,7 @@ class TestConductorSpec extends MultiNodeSpec(TestConductorMultiJvmSpec) with Im }), "echo") } - testConductor.enter("name") + enterBarrier("name") } "support throttling of network connections" taggedAs LongRunningTest in { @@ -62,7 +62,7 @@ class TestConductorSpec extends MultiNodeSpec(TestConductorMultiJvmSpec) with Im testConductor.throttle(slave, master, Direction.Send, rateMBit = 0.01).await } - testConductor.enter("throttled_send") + enterBarrier("throttled_send") runOn(slave) { for (i ← 0 to 9) echo ! i @@ -73,14 +73,14 @@ class TestConductorSpec extends MultiNodeSpec(TestConductorMultiJvmSpec) with Im receiveN(9) must be(1 to 9) } - testConductor.enter("throttled_send2") + enterBarrier("throttled_send2") runOn(master) { testConductor.throttle(slave, master, Direction.Send, -1).await testConductor.throttle(slave, master, Direction.Receive, rateMBit = 0.01).await } - testConductor.enter("throttled_recv") + enterBarrier("throttled_recv") runOn(slave) { for (i ← 10 to 19) echo ! i @@ -98,7 +98,7 @@ class TestConductorSpec extends MultiNodeSpec(TestConductorMultiJvmSpec) with Im receiveN(9) must be(11 to 19) } - testConductor.enter("throttled_recv2") + enterBarrier("throttled_recv2") runOn(master) { testConductor.throttle(slave, master, Direction.Receive, -1).await diff --git a/akka-remote-tests/src/multi-jvm/scala/akka/remote/testkit/MultiNodeSpecSpec.scala b/akka-remote-tests/src/multi-jvm/scala/akka/remote/testkit/MultiNodeSpecSpec.scala new file mode 100644 index 0000000000..2a709a99a7 --- /dev/null +++ b/akka-remote-tests/src/multi-jvm/scala/akka/remote/testkit/MultiNodeSpecSpec.scala @@ -0,0 +1,36 @@ +/** + * Copyright (C) 2009-2012 Typesafe Inc. + */ +package akka.remote.testkit + +import akka.testkit.LongRunningTest + +object MultiNodeSpecMultiJvmSpec extends MultiNodeConfig { + commonConfig(debugConfig(on = false)) + + val node1 = role("node1") + val node2 = role("node2") + val node3 = role("node3") + val node4 = role("node4") +} + +class MultiNodeSpecSpecMultiJvmNode1 extends MultiNodeSpecSpec +class MultiNodeSpecSpecMultiJvmNode2 extends MultiNodeSpecSpec +class MultiNodeSpecSpecMultiJvmNode3 extends MultiNodeSpecSpec +class MultiNodeSpecSpecMultiJvmNode4 extends MultiNodeSpecSpec + +class MultiNodeSpecSpec extends MultiNodeSpec(MultiNodeSpecMultiJvmSpec) { + + import MultiNodeSpecMultiJvmSpec._ + + def initialParticipants = 4 + + "A MultiNodeSpec" must { + + "wait for all nodes to remove themselves before we shut the conductor down" taggedAs LongRunningTest in { + enterBarrier("startup") + // this test is empty here since it only exercises the shutdown code in the MultiNodeSpec + } + + } +} diff --git a/akka-remote-tests/src/test/scala/akka/remote/testconductor/BarrierSpec.scala b/akka-remote-tests/src/test/scala/akka/remote/testconductor/BarrierSpec.scala index 37ebd0a193..8ff95d0831 100644 --- a/akka-remote-tests/src/test/scala/akka/remote/testconductor/BarrierSpec.scala +++ b/akka-remote-tests/src/test/scala/akka/remote/testconductor/BarrierSpec.scala @@ -19,6 +19,7 @@ import org.scalatest.BeforeAndAfterEach import java.net.InetSocketAddress import java.net.InetAddress import akka.testkit.TimingTest +import akka.util.{ Timeout, Duration } object BarrierSpec { case class Failed(ref: ActorRef, thr: Throwable) @@ -28,10 +29,10 @@ object BarrierSpec { akka.remote.netty.port = 0 akka.actor.debug.fsm = on akka.actor.debug.lifecycle = on - """ + """ } -class BarrierSpec extends AkkaSpec(BarrierSpec.config) with ImplicitSender with BeforeAndAfterEach { +class BarrierSpec extends AkkaSpec(BarrierSpec.config) with ImplicitSender { import BarrierSpec._ import Controller._ @@ -41,10 +42,6 @@ class BarrierSpec extends AkkaSpec(BarrierSpec.config) with ImplicitSender with val B = RoleName("b") val C = RoleName("c") - override def afterEach { - system.eventStream.setLogLevel(Logging.WarningLevel) - } - "A BarrierCoordinator" must { "register clients and remove them" taggedAs TimingTest in { @@ -55,27 +52,22 @@ class BarrierSpec extends AkkaSpec(BarrierSpec.config) with ImplicitSender with EventFilter[BarrierEmpty](occurrences = 1) intercept { b ! RemoveClient(A) } - expectMsg(Failed(b, BarrierEmpty(Data(Set(), "", Nil), "cannot remove RoleName(a): no client to remove"))) + expectMsg(Failed(b, BarrierEmpty(Data(Set(), "", Nil, null), "cannot remove RoleName(a): no client to remove"))) } "register clients and disconnect them" taggedAs TimingTest in { val b = getBarrier() b ! NodeInfo(A, AddressFromURIString("akka://sys"), system.deadLetters) b ! ClientDisconnected(B) - EventFilter[ClientLost](occurrences = 1) intercept { - b ! ClientDisconnected(A) - } - expectMsg(Failed(b, ClientLost(Data(Set(), "", Nil), A))) - EventFilter[BarrierEmpty](occurrences = 1) intercept { - b ! ClientDisconnected(A) - } - expectMsg(Failed(b, BarrierEmpty(Data(Set(), "", Nil), "cannot disconnect RoleName(a): no client to disconnect"))) + expectNoMsg(1 second) + b ! ClientDisconnected(A) + expectNoMsg(1 second) } "fail entering barrier when nobody registered" taggedAs TimingTest in { val b = getBarrier() - b ! EnterBarrier("b") - expectMsg(ToClient(BarrierResult("b", false))) + b ! EnterBarrier("bar1", None) + expectMsg(ToClient(BarrierResult("bar1", false))) } "enter barrier" taggedAs TimingTest in { @@ -83,12 +75,12 @@ class BarrierSpec extends AkkaSpec(BarrierSpec.config) with ImplicitSender with val a, b = TestProbe() barrier ! NodeInfo(A, AddressFromURIString("akka://sys"), a.ref) barrier ! NodeInfo(B, AddressFromURIString("akka://sys"), b.ref) - a.send(barrier, EnterBarrier("bar")) + a.send(barrier, EnterBarrier("bar2", None)) noMsg(a, b) - within(2 second) { - b.send(barrier, EnterBarrier("bar")) - a.expectMsg(ToClient(BarrierResult("bar", true))) - b.expectMsg(ToClient(BarrierResult("bar", true))) + within(2 seconds) { + b.send(barrier, EnterBarrier("bar2", None)) + a.expectMsg(ToClient(BarrierResult("bar2", true))) + b.expectMsg(ToClient(BarrierResult("bar2", true))) } } @@ -97,15 +89,15 @@ class BarrierSpec extends AkkaSpec(BarrierSpec.config) with ImplicitSender with val a, b, c = TestProbe() barrier ! NodeInfo(A, AddressFromURIString("akka://sys"), a.ref) barrier ! NodeInfo(B, AddressFromURIString("akka://sys"), b.ref) - a.send(barrier, EnterBarrier("bar")) + a.send(barrier, EnterBarrier("bar3", None)) barrier ! NodeInfo(C, AddressFromURIString("akka://sys"), c.ref) - b.send(barrier, EnterBarrier("bar")) + b.send(barrier, EnterBarrier("bar3", None)) noMsg(a, b, c) - within(2 second) { - c.send(barrier, EnterBarrier("bar")) - a.expectMsg(ToClient(BarrierResult("bar", true))) - b.expectMsg(ToClient(BarrierResult("bar", true))) - c.expectMsg(ToClient(BarrierResult("bar", true))) + within(2 seconds) { + c.send(barrier, EnterBarrier("bar3", None)) + a.expectMsg(ToClient(BarrierResult("bar3", true))) + b.expectMsg(ToClient(BarrierResult("bar3", true))) + c.expectMsg(ToClient(BarrierResult("bar3", true))) } } @@ -115,14 +107,14 @@ class BarrierSpec extends AkkaSpec(BarrierSpec.config) with ImplicitSender with barrier ! NodeInfo(A, AddressFromURIString("akka://sys"), a.ref) barrier ! NodeInfo(B, AddressFromURIString("akka://sys"), b.ref) barrier ! NodeInfo(C, AddressFromURIString("akka://sys"), c.ref) - a.send(barrier, EnterBarrier("bar")) - b.send(barrier, EnterBarrier("bar")) + a.send(barrier, EnterBarrier("bar4", None)) + b.send(barrier, EnterBarrier("bar4", None)) barrier ! RemoveClient(A) barrier ! ClientDisconnected(A) noMsg(a, b, c) - b.within(2 second) { + b.within(2 seconds) { barrier ! RemoveClient(C) - b.expectMsg(ToClient(BarrierResult("bar", true))) + b.expectMsg(ToClient(BarrierResult("bar4", true))) } barrier ! ClientDisconnected(C) expectNoMsg(1 second) @@ -133,9 +125,9 @@ class BarrierSpec extends AkkaSpec(BarrierSpec.config) with ImplicitSender with val a, b = TestProbe() barrier ! NodeInfo(A, AddressFromURIString("akka://sys"), a.ref) barrier ! NodeInfo(B, AddressFromURIString("akka://sys"), b.ref) - a.send(barrier, EnterBarrier("bar")) + a.send(barrier, EnterBarrier("bar5", None)) barrier ! RemoveClient(A) - b.send(barrier, EnterBarrier("foo")) + b.send(barrier, EnterBarrier("foo", None)) b.expectMsg(ToClient(BarrierResult("foo", true))) } @@ -145,11 +137,15 @@ class BarrierSpec extends AkkaSpec(BarrierSpec.config) with ImplicitSender with val nodeA = NodeInfo(A, AddressFromURIString("akka://sys"), a.ref) barrier ! nodeA barrier ! NodeInfo(B, AddressFromURIString("akka://sys"), b.ref) - a.send(barrier, EnterBarrier("bar")) + a.send(barrier, EnterBarrier("bar6", None)) EventFilter[ClientLost](occurrences = 1) intercept { barrier ! ClientDisconnected(B) } - expectMsg(Failed(barrier, ClientLost(Data(Set(nodeA), "bar", a.ref :: Nil), B))) + val msg = expectMsgType[Failed] + msg match { + case Failed(barrier, thr: ClientLost) if (thr == ClientLost(Data(Set(nodeA), "bar6", a.ref :: Nil, thr.data.deadline), B)) ⇒ + case x ⇒ fail("Expected " + Failed(barrier, ClientLost(Data(Set(nodeA), "bar6", a.ref :: Nil, null), B)) + " but got " + x) + } } "fail barrier with disconnecing node who already arrived" taggedAs TimingTest in { @@ -160,12 +156,16 @@ class BarrierSpec extends AkkaSpec(BarrierSpec.config) with ImplicitSender with barrier ! nodeA barrier ! NodeInfo(B, AddressFromURIString("akka://sys"), b.ref) barrier ! nodeC - a.send(barrier, EnterBarrier("bar")) - b.send(barrier, EnterBarrier("bar")) + a.send(barrier, EnterBarrier("bar7", None)) + b.send(barrier, EnterBarrier("bar7", None)) EventFilter[ClientLost](occurrences = 1) intercept { barrier ! ClientDisconnected(B) } - expectMsg(Failed(barrier, ClientLost(Data(Set(nodeA, nodeC), "bar", a.ref :: Nil), B))) + val msg = expectMsgType[Failed] + msg match { + case Failed(barrier, thr: ClientLost) if (thr == ClientLost(Data(Set(nodeA, nodeC), "bar7", a.ref :: Nil, thr.data.deadline), B)) ⇒ + case x ⇒ fail("Expected " + Failed(barrier, ClientLost(Data(Set(nodeA, nodeC), "bar7", a.ref :: Nil, null), B)) + " but got " + x) + } } "fail when entering wrong barrier" taggedAs TimingTest in { @@ -175,11 +175,15 @@ class BarrierSpec extends AkkaSpec(BarrierSpec.config) with ImplicitSender with barrier ! nodeA val nodeB = NodeInfo(B, AddressFromURIString("akka://sys"), b.ref) barrier ! nodeB - a.send(barrier, EnterBarrier("bar")) + a.send(barrier, EnterBarrier("bar8", None)) EventFilter[WrongBarrier](occurrences = 1) intercept { - b.send(barrier, EnterBarrier("foo")) + b.send(barrier, EnterBarrier("foo", None)) + } + val msg = expectMsgType[Failed] + msg match { + case Failed(barrier, thr: WrongBarrier) if (thr == WrongBarrier("foo", b.ref, Data(Set(nodeA, nodeB), "bar8", a.ref :: Nil, thr.data.deadline))) ⇒ + case x ⇒ fail("Expected " + Failed(barrier, WrongBarrier("foo", b.ref, Data(Set(nodeA, nodeB), "bar8", a.ref :: Nil, null))) + " but got " + x) } - expectMsg(Failed(barrier, WrongBarrier("foo", b.ref, Data(Set(nodeA, nodeB), "bar", a.ref :: Nil)))) } "fail barrier after first failure" taggedAs TimingTest in { @@ -188,10 +192,14 @@ class BarrierSpec extends AkkaSpec(BarrierSpec.config) with ImplicitSender with EventFilter[BarrierEmpty](occurrences = 1) intercept { barrier ! RemoveClient(A) } - expectMsg(Failed(barrier, BarrierEmpty(Data(Set(), "", Nil), "cannot remove RoleName(a): no client to remove"))) + val msg = expectMsgType[Failed] + msg match { + case Failed(barrier, thr: BarrierEmpty) if (thr == BarrierEmpty(Data(Set(), "", Nil, thr.data.deadline), "cannot remove RoleName(a): no client to remove")) ⇒ + case x ⇒ fail("Expected " + Failed(barrier, BarrierEmpty(Data(Set(), "", Nil, null), "cannot remove RoleName(a): no client to remove")) + " but got " + x) + } barrier ! NodeInfo(A, AddressFromURIString("akka://sys"), a.ref) - a.send(barrier, EnterBarrier("right")) - a.expectMsg(ToClient(BarrierResult("right", false))) + a.send(barrier, EnterBarrier("bar9", None)) + a.expectMsg(ToClient(BarrierResult("bar9", false))) } "fail after barrier timeout" taggedAs TimingTest in { @@ -201,9 +209,13 @@ class BarrierSpec extends AkkaSpec(BarrierSpec.config) with ImplicitSender with val nodeB = NodeInfo(B, AddressFromURIString("akka://sys"), b.ref) barrier ! nodeA barrier ! nodeB - a.send(barrier, EnterBarrier("right")) + a.send(barrier, EnterBarrier("bar10", None)) EventFilter[BarrierTimeout](occurrences = 1) intercept { - expectMsg(7 seconds, Failed(barrier, BarrierTimeout(Data(Set(nodeA, nodeB), "right", a.ref :: Nil)))) + val msg = expectMsgType[Failed](7 seconds) + msg match { + case Failed(barrier, thr: BarrierTimeout) if (thr == BarrierTimeout(Data(Set(nodeA, nodeB), "bar10", a.ref :: Nil, thr.data.deadline))) ⇒ + case x ⇒ fail("Expected " + Failed(barrier, BarrierTimeout(Data(Set(nodeA, nodeB), "bar10", a.ref :: Nil, null))) + " but got " + x) + } } } @@ -216,7 +228,11 @@ class BarrierSpec extends AkkaSpec(BarrierSpec.config) with ImplicitSender with EventFilter[DuplicateNode](occurrences = 1) intercept { barrier ! nodeB } - expectMsg(Failed(barrier, DuplicateNode(Data(Set(nodeA), "", Nil), nodeB))) + val msg = expectMsgType[Failed] + msg match { + case Failed(barrier, thr: DuplicateNode) if (thr == DuplicateNode(Data(Set(nodeA), "", Nil, thr.data.deadline), nodeB)) ⇒ + case x ⇒ fail("Expected " + Failed(barrier, DuplicateNode(Data(Set(nodeA), "", Nil, null), nodeB)) + " but got " + x) + } } "finally have no failure messages left" taggedAs TimingTest in { @@ -243,17 +259,14 @@ class BarrierSpec extends AkkaSpec(BarrierSpec.config) with ImplicitSender with b ! NodeInfo(A, AddressFromURIString("akka://sys"), testActor) expectMsg(ToClient(Done)) b ! ClientDisconnected(B) - EventFilter[ClientLost](occurrences = 1) intercept { - b ! ClientDisconnected(A) - } - EventFilter[BarrierEmpty](occurrences = 1) intercept { - b ! ClientDisconnected(A) - } + expectNoMsg(1 second) + b ! ClientDisconnected(A) + expectNoMsg(1 second) } "fail entering barrier when nobody registered" taggedAs TimingTest in { val b = getController(0) - b ! EnterBarrier("b") + b ! EnterBarrier("b", None) expectMsg(ToClient(BarrierResult("b", false))) } @@ -264,12 +277,12 @@ class BarrierSpec extends AkkaSpec(BarrierSpec.config) with ImplicitSender with barrier ! NodeInfo(B, AddressFromURIString("akka://sys"), b.ref) a.expectMsg(ToClient(Done)) b.expectMsg(ToClient(Done)) - a.send(barrier, EnterBarrier("bar")) + a.send(barrier, EnterBarrier("bar11", None)) noMsg(a, b) - within(2 second) { - b.send(barrier, EnterBarrier("bar")) - a.expectMsg(ToClient(BarrierResult("bar", true))) - b.expectMsg(ToClient(BarrierResult("bar", true))) + within(2 seconds) { + b.send(barrier, EnterBarrier("bar11", None)) + a.expectMsg(ToClient(BarrierResult("bar11", true))) + b.expectMsg(ToClient(BarrierResult("bar11", true))) } } @@ -280,16 +293,16 @@ class BarrierSpec extends AkkaSpec(BarrierSpec.config) with ImplicitSender with barrier ! NodeInfo(B, AddressFromURIString("akka://sys"), b.ref) a.expectMsg(ToClient(Done)) b.expectMsg(ToClient(Done)) - a.send(barrier, EnterBarrier("bar")) + a.send(barrier, EnterBarrier("bar12", None)) barrier ! NodeInfo(C, AddressFromURIString("akka://sys"), c.ref) c.expectMsg(ToClient(Done)) - b.send(barrier, EnterBarrier("bar")) + b.send(barrier, EnterBarrier("bar12", None)) noMsg(a, b, c) - within(2 second) { - c.send(barrier, EnterBarrier("bar")) - a.expectMsg(ToClient(BarrierResult("bar", true))) - b.expectMsg(ToClient(BarrierResult("bar", true))) - c.expectMsg(ToClient(BarrierResult("bar", true))) + within(2 seconds) { + c.send(barrier, EnterBarrier("bar12", None)) + a.expectMsg(ToClient(BarrierResult("bar12", true))) + b.expectMsg(ToClient(BarrierResult("bar12", true))) + c.expectMsg(ToClient(BarrierResult("bar12", true))) } } @@ -302,14 +315,14 @@ class BarrierSpec extends AkkaSpec(BarrierSpec.config) with ImplicitSender with a.expectMsg(ToClient(Done)) b.expectMsg(ToClient(Done)) c.expectMsg(ToClient(Done)) - a.send(barrier, EnterBarrier("bar")) - b.send(barrier, EnterBarrier("bar")) + a.send(barrier, EnterBarrier("bar13", None)) + b.send(barrier, EnterBarrier("bar13", None)) barrier ! Remove(A) barrier ! ClientDisconnected(A) noMsg(a, b, c) - b.within(2 second) { + b.within(2 seconds) { barrier ! Remove(C) - b.expectMsg(ToClient(BarrierResult("bar", true))) + b.expectMsg(ToClient(BarrierResult("bar13", true))) } barrier ! ClientDisconnected(C) expectNoMsg(1 second) @@ -322,9 +335,9 @@ class BarrierSpec extends AkkaSpec(BarrierSpec.config) with ImplicitSender with barrier ! NodeInfo(B, AddressFromURIString("akka://sys"), b.ref) a.expectMsg(ToClient(Done)) b.expectMsg(ToClient(Done)) - a.send(barrier, EnterBarrier("bar")) + a.send(barrier, EnterBarrier("bar14", None)) barrier ! Remove(A) - b.send(barrier, EnterBarrier("foo")) + b.send(barrier, EnterBarrier("foo", None)) b.expectMsg(ToClient(BarrierResult("foo", true))) } @@ -336,13 +349,13 @@ class BarrierSpec extends AkkaSpec(BarrierSpec.config) with ImplicitSender with barrier ! NodeInfo(B, AddressFromURIString("akka://sys"), b.ref) a.expectMsg(ToClient(Done)) b.expectMsg(ToClient(Done)) - a.send(barrier, EnterBarrier("bar")) + a.send(barrier, EnterBarrier("bar15", None)) barrier ! ClientDisconnected(RoleName("unknown")) noMsg(a) EventFilter[ClientLost](occurrences = 1) intercept { barrier ! ClientDisconnected(B) } - a.expectMsg(ToClient(BarrierResult("bar", false))) + a.expectMsg(ToClient(BarrierResult("bar15", false))) } "fail barrier with disconnecing node who already arrived" taggedAs TimingTest in { @@ -356,12 +369,12 @@ class BarrierSpec extends AkkaSpec(BarrierSpec.config) with ImplicitSender with a.expectMsg(ToClient(Done)) b.expectMsg(ToClient(Done)) c.expectMsg(ToClient(Done)) - a.send(barrier, EnterBarrier("bar")) - b.send(barrier, EnterBarrier("bar")) + a.send(barrier, EnterBarrier("bar16", None)) + b.send(barrier, EnterBarrier("bar16", None)) EventFilter[ClientLost](occurrences = 1) intercept { barrier ! ClientDisconnected(B) } - a.expectMsg(ToClient(BarrierResult("bar", false))) + a.expectMsg(ToClient(BarrierResult("bar16", false))) } "fail when entering wrong barrier" taggedAs TimingTest in { @@ -373,15 +386,15 @@ class BarrierSpec extends AkkaSpec(BarrierSpec.config) with ImplicitSender with barrier ! nodeB a.expectMsg(ToClient(Done)) b.expectMsg(ToClient(Done)) - a.send(barrier, EnterBarrier("bar")) + a.send(barrier, EnterBarrier("bar17", None)) EventFilter[WrongBarrier](occurrences = 1) intercept { - b.send(barrier, EnterBarrier("foo")) + b.send(barrier, EnterBarrier("foo", None)) } - a.expectMsg(ToClient(BarrierResult("bar", false))) + a.expectMsg(ToClient(BarrierResult("bar17", false))) b.expectMsg(ToClient(BarrierResult("foo", false))) } - "not really fail after barrier timeout" taggedAs TimingTest in { + "fail after barrier timeout" taggedAs TimingTest in { val barrier = getController(2) val a, b = TestProbe() val nodeA = NodeInfo(A, AddressFromURIString("akka://sys"), a.ref) @@ -390,13 +403,13 @@ class BarrierSpec extends AkkaSpec(BarrierSpec.config) with ImplicitSender with barrier ! nodeB a.expectMsg(ToClient(Done)) b.expectMsg(ToClient(Done)) - a.send(barrier, EnterBarrier("right")) + a.send(barrier, EnterBarrier("bar18", Option(2 seconds))) EventFilter[BarrierTimeout](occurrences = 1) intercept { - Thread.sleep(5000) + Thread.sleep(4000) } - b.send(barrier, EnterBarrier("right")) - a.expectMsg(ToClient(BarrierResult("right", true))) - b.expectMsg(ToClient(BarrierResult("right", true))) + b.send(barrier, EnterBarrier("bar18", None)) + a.expectMsg(ToClient(BarrierResult("bar18", false))) + b.expectMsg(ToClient(BarrierResult("bar18", false))) } "fail if a node registers twice" taggedAs TimingTest in { @@ -423,8 +436,75 @@ class BarrierSpec extends AkkaSpec(BarrierSpec.config) with ImplicitSender with controller ! nodeB b.expectMsg(ToClient(BarrierResult("initial startup", false))) } - a.send(controller, EnterBarrier("x")) - a.expectMsg(ToClient(BarrierResult("x", false))) + a.send(controller, EnterBarrier("bar19", None)) + a.expectMsg(ToClient(BarrierResult("bar19", false))) + } + + "fail subsequent barriers after foreced failure" taggedAs TimingTest in { + val barrier = getController(2) + val a, b = TestProbe() + val nodeA = NodeInfo(A, AddressFromURIString("akka://sys"), a.ref) + val nodeB = NodeInfo(B, AddressFromURIString("akka://sys"), b.ref) + barrier ! nodeA + barrier ! nodeB + a.expectMsg(ToClient(Done)) + b.expectMsg(ToClient(Done)) + a.send(barrier, EnterBarrier("bar20", Option(2 seconds))) + EventFilter[FailedBarrier](occurrences = 1) intercept { + b.send(barrier, FailBarrier("bar20")) + a.expectMsg(ToClient(BarrierResult("bar20", false))) + b.expectNoMsg(1 second) + } + a.send(barrier, EnterBarrier("bar21", None)) + b.send(barrier, EnterBarrier("bar21", None)) + a.expectMsg(ToClient(BarrierResult("bar21", false))) + b.expectMsg(ToClient(BarrierResult("bar21", false))) + } + + "timeout within the shortest timeout if the new timeout is shorter" taggedAs TimingTest in { + val barrier = getController(3) + val a, b, c = TestProbe() + val nodeA = NodeInfo(A, AddressFromURIString("akka://sys"), a.ref) + val nodeB = NodeInfo(B, AddressFromURIString("akka://sys"), b.ref) + val nodeC = NodeInfo(C, AddressFromURIString("akka://sys"), c.ref) + barrier ! nodeA + barrier ! nodeB + barrier ! nodeC + a.expectMsg(ToClient(Done)) + b.expectMsg(ToClient(Done)) + c.expectMsg(ToClient(Done)) + a.send(barrier, EnterBarrier("bar22", Option(10 seconds))) + b.send(barrier, EnterBarrier("bar22", Option(2 seconds))) + EventFilter[BarrierTimeout](occurrences = 1) intercept { + Thread.sleep(4000) + } + c.send(barrier, EnterBarrier("bar22", None)) + a.expectMsg(ToClient(BarrierResult("bar22", false))) + b.expectMsg(ToClient(BarrierResult("bar22", false))) + c.expectMsg(ToClient(BarrierResult("bar22", false))) + } + + "timeout within the shortest timeout if the new timeout is longer" taggedAs TimingTest in { + val barrier = getController(3) + val a, b, c = TestProbe() + val nodeA = NodeInfo(A, AddressFromURIString("akka://sys"), a.ref) + val nodeB = NodeInfo(B, AddressFromURIString("akka://sys"), b.ref) + val nodeC = NodeInfo(C, AddressFromURIString("akka://sys"), c.ref) + barrier ! nodeA + barrier ! nodeB + barrier ! nodeC + a.expectMsg(ToClient(Done)) + b.expectMsg(ToClient(Done)) + c.expectMsg(ToClient(Done)) + a.send(barrier, EnterBarrier("bar23", Option(2 seconds))) + b.send(barrier, EnterBarrier("bar23", Option(10 seconds))) + EventFilter[BarrierTimeout](occurrences = 1) intercept { + Thread.sleep(4000) + } + c.send(barrier, EnterBarrier("bar23", None)) + a.expectMsg(ToClient(BarrierResult("bar23", false))) + b.expectMsg(ToClient(BarrierResult("bar23", false))) + c.expectMsg(ToClient(BarrierResult("bar23", false))) } "finally have no failure messages left" taggedAs TimingTest in { @@ -469,4 +549,7 @@ class BarrierSpec extends AkkaSpec(BarrierSpec.config) with ImplicitSender with probes foreach (_.msgAvailable must be(false)) } + private def data(clients: Set[Controller.NodeInfo], barrier: String, arrived: List[ActorRef], previous: Data): Data = { + Data(clients, barrier, arrived, previous.deadline) + } } \ No newline at end of file diff --git a/akka-remote-tests/src/test/scala/akka/remote/testkit/MultiNodeSpec.scala b/akka-remote-tests/src/test/scala/akka/remote/testkit/MultiNodeSpec.scala index faaab5cdc4..25bb8df7dc 100644 --- a/akka-remote-tests/src/test/scala/akka/remote/testkit/MultiNodeSpec.scala +++ b/akka-remote-tests/src/test/scala/akka/remote/testkit/MultiNodeSpec.scala @@ -7,12 +7,13 @@ import java.net.InetSocketAddress import com.typesafe.config.{ ConfigObject, ConfigFactory, Config } -import akka.actor.{ RootActorPath, Deploy, ActorPath, ActorSystem, ExtendedActorSystem } +import akka.actor.{ RootActorPath, ActorPath, ActorSystem, ExtendedActorSystem } import akka.dispatch.Await import akka.dispatch.Await.Awaitable import akka.remote.testconductor.{ TestConductorExt, TestConductor, RoleName } import akka.testkit.AkkaSpec -import akka.util.{ NonFatal, Duration } +import akka.util.{ Timeout, NonFatal } +import akka.util.duration._ /** * Configure the role names and participants of the test, including configuration settings. @@ -182,6 +183,14 @@ abstract class MultiNodeSpec(val myself: RoleName, _system: ActorSystem, _roles: if (nodes exists (_ == myself)) yes else no } + /** + * Enter the named barriers in the order given. Use the remaining duration from + * the innermost enclosing `within` block or the default `BarrierTimeout` + */ + def enterBarrier(name: String*) { + testConductor.enter(Timeout.durationToTimeout(remainingOr(testConductor.Settings.BarrierTimeout.duration)), name) + } + /** * Query the controller for the transport address of the given node (by role name) and * return that as an ActorPath for easy composition: @@ -193,11 +202,12 @@ abstract class MultiNodeSpec(val myself: RoleName, _system: ActorSystem, _roles: def node(role: RoleName): ActorPath = RootActorPath(testConductor.getAddressFor(role).await) /** - * Enrich `.await()` onto all Awaitables, using BarrierTimeout. + * Enrich `.await()` onto all Awaitables, using remaining duration from the innermost + * enclosing `within` block or QueryTimeout. */ implicit def awaitHelper[T](w: Awaitable[T]) = new AwaitHelper(w) class AwaitHelper[T](w: Awaitable[T]) { - def await: T = Await.result(w, testConductor.Settings.BarrierTimeout.duration) + def await: T = Await.result(w, remainingOr(testConductor.Settings.QueryTimeout.duration)) } /* @@ -206,9 +216,11 @@ abstract class MultiNodeSpec(val myself: RoleName, _system: ActorSystem, _roles: private val controllerAddr = new InetSocketAddress(nodeNames(0), 4711) if (selfIndex == 0) { - testConductor.startController(initialParticipants, myself, controllerAddr).await + Await.result(testConductor.startController(initialParticipants, myself, controllerAddr), + testConductor.Settings.BarrierTimeout.duration) } else { - testConductor.startClient(myself, controllerAddr).await + Await.result(testConductor.startClient(myself, controllerAddr), + testConductor.Settings.BarrierTimeout.duration) } // now add deployments, if so desired @@ -250,4 +262,16 @@ abstract class MultiNodeSpec(val myself: RoleName, _system: ActorSystem, _roles: // useful to see which jvm is running which role log.info("Role [{}] started", myself.name) + // wait for all nodes to remove themselves before we shut the conductor down + final override def beforeShutdown() = { + if (selfIndex == 0) { + testConductor.removeNode(myself) + within(testConductor.Settings.BarrierTimeout.duration) { + awaitCond { + testConductor.getNodes.await.filterNot(_ == myself).isEmpty + } + } + } + } + } diff --git a/akka-remote/src/main/protocol/RemoteProtocol.proto b/akka-remote/src/main/protocol/RemoteProtocol.proto index 7d86d8a82b..ddcfe26d1d 100644 --- a/akka-remote/src/main/protocol/RemoteProtocol.proto +++ b/akka-remote/src/main/protocol/RemoteProtocol.proto @@ -9,6 +9,8 @@ option optimize_for = SPEED; Compile with: cd ./akka-remote/src/main/protocol protoc RemoteProtocol.proto --java_out ../java + cd ../../../.. + ./scripts/fix-protobuf.sh *******************************************/ message AkkaRemoteProtocol { diff --git a/akka-remote/src/main/resources/reference.conf b/akka-remote/src/main/resources/reference.conf index e2c0a45346..a8d2cb2680 100644 --- a/akka-remote/src/main/resources/reference.conf +++ b/akka-remote/src/main/resources/reference.conf @@ -122,7 +122,8 @@ akka { # (I) Length in akka.time-unit how long core threads will be kept alive if idling execution-pool-keepalive = 60s - # (I) Size of the core pool of the remote execution unit + # (I) Size in number of threads of the core pool of the remote execution unit. + # A value of 0 will turn this off, which is can lead to deadlocks under some configurations! execution-pool-size = 4 # (I) Maximum channel size, 0 for off @@ -193,7 +194,7 @@ akka { # Examples: [ "TLS_RSA_WITH_AES_128_CBC_SHA", "TLS_RSA_WITH_AES_256_CBC_SHA" ] # You need to install the JCE Unlimited Strength Jurisdiction Policy Files to use AES 256 # More info here: http://docs.oracle.com/javase/7/docs/technotes/guides/security/SunProviders.html#SunJCEProvider - supported-algorithms = ["TLS_RSA_WITH_AES_128_CBC_SHA"] + enabled-algorithms = ["TLS_RSA_WITH_AES_128_CBC_SHA"] # Using /dev/./urandom is only necessary when using SHA1PRNG on Linux to prevent blocking # It is NOT as secure because it reuses the seed @@ -204,11 +205,11 @@ akka { # There are three options, in increasing order of security: # "" or SecureRandom => (default) # "SHA1PRNG" => Can be slow because of blocking issues on Linux - # "AES128CounterRNGFast" => fastest startup and based on AES encryption algorithm + # "AES128CounterSecureRNG" => fastest startup and based on AES encryption algorithm # The following use one of 3 possible seed sources, depending on availability: /dev/random, random.org and SecureRandom (provided by Java) - # "AES128CounterRNGSecure" - # "AES256CounterRNGSecure" (Install JCE Unlimited Strength Jurisdiction Policy Files first) - # Setting a value here may require you to supply the appropriate cipher suite (see supported-algorithms section above) + # "AES128CounterInetRNG" + # "AES256CounterInetRNG" (Install JCE Unlimited Strength Jurisdiction Policy Files first) + # Setting a value here may require you to supply the appropriate cipher suite (see enabled-algorithms section above) random-number-generator = "" } } diff --git a/akka-remote/src/main/scala/akka/remote/RemoteActorRefProvider.scala b/akka-remote/src/main/scala/akka/remote/RemoteActorRefProvider.scala index eaecf67792..cdf9ad9d70 100644 --- a/akka-remote/src/main/scala/akka/remote/RemoteActorRefProvider.scala +++ b/akka-remote/src/main/scala/akka/remote/RemoteActorRefProvider.scala @@ -36,8 +36,8 @@ private[akka] class RemoteActorRefProvider( // these are only available after init() override def rootGuardian: InternalActorRef = local.rootGuardian - override def guardian: InternalActorRef = local.guardian - override def systemGuardian: InternalActorRef = local.systemGuardian + override def guardian: LocalActorRef = local.guardian + override def systemGuardian: LocalActorRef = local.systemGuardian override def terminationFuture: Promise[Unit] = local.terminationFuture override def dispatcher: MessageDispatcher = local.dispatcher override def registerTempActor(actorRef: InternalActorRef, path: ActorPath): Unit = local.registerTempActor(actorRef, path) @@ -96,8 +96,8 @@ private[akka] class RemoteActorRefProvider( } def actorOf(system: ActorSystemImpl, props: Props, supervisor: InternalActorRef, path: ActorPath, - systemService: Boolean, deploy: Option[Deploy], lookupDeploy: Boolean): InternalActorRef = { - if (systemService) local.actorOf(system, props, supervisor, path, systemService, deploy, lookupDeploy) + systemService: Boolean, deploy: Option[Deploy], lookupDeploy: Boolean, async: Boolean): InternalActorRef = { + if (systemService) local.actorOf(system, props, supervisor, path, systemService, deploy, lookupDeploy, async) else { /* @@ -155,14 +155,14 @@ private[akka] class RemoteActorRefProvider( Iterator(props.deploy) ++ deployment.iterator reduce ((a, b) ⇒ b withFallback a) match { case d @ Deploy(_, _, _, RemoteScope(addr)) ⇒ if (addr == rootPath.address || addr == transport.address) { - local.actorOf(system, props, supervisor, path, false, deployment.headOption, false) + local.actorOf(system, props, supervisor, path, false, deployment.headOption, false, async) } else { val rpath = RootActorPath(addr) / "remote" / transport.address.hostPort / path.elements useActorOnNode(rpath, props, d, supervisor) new RemoteActorRef(this, transport, rpath, supervisor) } - case _ ⇒ local.actorOf(system, props, supervisor, path, systemService, deployment.headOption, false) + case _ ⇒ local.actorOf(system, props, supervisor, path, systemService, deployment.headOption, false, async) } } } diff --git a/akka-remote/src/main/scala/akka/remote/RemoteDaemon.scala b/akka-remote/src/main/scala/akka/remote/RemoteDaemon.scala index ddab54b2ad..53023687c0 100644 --- a/akka-remote/src/main/scala/akka/remote/RemoteDaemon.scala +++ b/akka-remote/src/main/scala/akka/remote/RemoteDaemon.scala @@ -5,10 +5,11 @@ package akka.remote import scala.annotation.tailrec - import akka.actor.{ VirtualPathContainer, Terminated, Deploy, Props, Nobody, LocalActorRef, InternalActorRef, Address, ActorSystemImpl, ActorRef, ActorPathExtractor, ActorPath, Actor } import akka.event.LoggingAdapter import akka.dispatch.Watch +import akka.actor.ActorRefWithCell +import akka.actor.ActorRefScope private[akka] sealed trait DaemonMsg private[akka] case class DaemonMsgCreate(props: Props, deploy: Deploy, path: String, supervisor: ActorRef) extends DaemonMsg @@ -60,7 +61,7 @@ private[akka] class RemoteSystemDaemon(system: ActorSystemImpl, _path: ActorPath val subpath = elems.drop(1) val path = this.path / subpath val actor = system.provider.actorOf(system, props, supervisor.asInstanceOf[InternalActorRef], - path, false, Some(deploy), true) + path, systemService = false, Some(deploy), lookupDeploy = true, async = false) addChild(subpath.mkString("/"), actor) this.sendSystemMessage(Watch(actor, this)) case _ ⇒ @@ -68,11 +69,12 @@ private[akka] class RemoteSystemDaemon(system: ActorSystemImpl, _path: ActorPath } } - case Terminated(child: LocalActorRef) ⇒ removeChild(child.path.elements.drop(1).mkString("/")) + case Terminated(child: ActorRefWithCell) if child.asInstanceOf[ActorRefScope].isLocal ⇒ + removeChild(child.path.elements.drop(1).mkString("/")) - case t: Terminated ⇒ + case t: Terminated ⇒ - case unknown ⇒ log.warning("Unknown message {} received by {}", unknown, this) + case unknown ⇒ log.warning("Unknown message {} received by {}", unknown, this) } } diff --git a/akka-remote/src/main/scala/akka/remote/RemoteTransport.scala b/akka-remote/src/main/scala/akka/remote/RemoteTransport.scala index aefd34ec74..c48cc430f2 100644 --- a/akka-remote/src/main/scala/akka/remote/RemoteTransport.scala +++ b/akka-remote/src/main/scala/akka/remote/RemoteTransport.scala @@ -106,6 +106,7 @@ case class RemoteServerShutdown( case class RemoteServerError( @BeanProperty val cause: Throwable, @transient @BeanProperty remote: RemoteTransport) extends RemoteServerLifeCycleEvent { + override def logLevel: Logging.LogLevel = Logging.ErrorLevel override def toString: String = "RemoteServerError@" + remote + "] Error[" + cause + "]" } @@ -269,14 +270,14 @@ abstract class RemoteTransport(val system: ExtendedActorSystem, val provider: Re } case x ⇒ log.warning("remoteDaemon received illegal message {} from {}", x, remoteMessage.sender) } - case l: LocalRef ⇒ + case l @ (_: LocalRef | _: RepointableRef) if l.isLocal ⇒ if (provider.remoteSettings.LogReceive) log.debug("received local message {}", remoteMessage) remoteMessage.payload match { case msg: PossiblyHarmful if useUntrustedMode ⇒ log.warning("operating in UntrustedMode, dropping inbound PossiblyHarmful message of type {}", msg.getClass) case msg: SystemMessage ⇒ l.sendSystemMessage(msg) case msg ⇒ l.!(msg)(remoteMessage.sender) } - case r: RemoteRef ⇒ + case r @ (_: RemoteRef | _: RepointableRef) if !r.isLocal ⇒ if (provider.remoteSettings.LogReceive) log.debug("received remote-destined message {}", remoteMessage) remoteMessage.originalReceiver match { case AddressFromURIString(address) if address == provider.transport.address ⇒ @@ -284,7 +285,7 @@ abstract class RemoteTransport(val system: ExtendedActorSystem, val provider: Re r.!(remoteMessage.payload)(remoteMessage.sender) case r ⇒ log.error("dropping message {} for non-local recipient {} arriving at {} inbound address is {}", remoteMessage.payload, r, address, provider.transport.address) } - case r ⇒ log.error("dropping message {} for non-local recipient {} arriving at {} inbound address is {}", remoteMessage.payload, r, address, provider.transport.address) + case r ⇒ log.error("dropping message {} for unknown recipient {} arriving at {} inbound address is {}", remoteMessage.payload, r, address, provider.transport.address) } } } diff --git a/akka-remote/src/main/scala/akka/remote/netty/Client.scala b/akka-remote/src/main/scala/akka/remote/netty/Client.scala index 0917086d4d..35c0674d23 100644 --- a/akka-remote/src/main/scala/akka/remote/netty/Client.scala +++ b/akka-remote/src/main/scala/akka/remote/netty/Client.scala @@ -18,6 +18,7 @@ import akka.AkkaException import akka.event.Logging import akka.actor.{ DeadLetter, Address, ActorRef } import akka.util.{ NonFatal, Switch } +import org.jboss.netty.handler.ssl.SslHandler /** * This is the abstract baseclass for netty remote clients, currently there's only an @@ -115,15 +116,27 @@ private[akka] class ActiveRemoteClient private[akka] ( */ def connect(reconnectIfAlreadyConnected: Boolean = false): Boolean = { - def sendSecureCookie(connection: ChannelFuture) { - val handshake = RemoteControlProtocol.newBuilder.setCommandType(CommandType.CONNECT) - if (settings.SecureCookie.nonEmpty) handshake.setCookie(settings.SecureCookie.get) - handshake.setOrigin(RemoteProtocol.AddressProtocol.newBuilder - .setSystem(localAddress.system) - .setHostname(localAddress.host.get) - .setPort(localAddress.port.get) - .build) - connection.getChannel.write(netty.createControlEnvelope(handshake.build)) + // Returns whether the handshake was written to the channel or not + def sendSecureCookie(connection: ChannelFuture): Boolean = { + val future = + if (!connection.isSuccess || !settings.EnableSSL) connection + else connection.getChannel.getPipeline.get[SslHandler](classOf[SslHandler]).handshake().awaitUninterruptibly() + + if (!future.isSuccess) { + notifyListeners(RemoteClientError(future.getCause, netty, remoteAddress)) + false + } else { + ChannelAddress.set(connection.getChannel, Some(remoteAddress)) + val handshake = RemoteControlProtocol.newBuilder.setCommandType(CommandType.CONNECT) + if (settings.SecureCookie.nonEmpty) handshake.setCookie(settings.SecureCookie.get) + handshake.setOrigin(RemoteProtocol.AddressProtocol.newBuilder + .setSystem(localAddress.system) + .setHostname(localAddress.host.get) + .setPort(localAddress.port.get) + .build) + connection.getChannel.write(netty.createControlEnvelope(handshake.build)) + true + } } def attemptReconnect(): Boolean = { @@ -131,14 +144,7 @@ private[akka] class ActiveRemoteClient private[akka] ( log.debug("Remote client reconnecting to [{}|{}]", remoteAddress, remoteIP) connection = bootstrap.connect(new InetSocketAddress(remoteIP, remoteAddress.port.get)) openChannels.add(connection.awaitUninterruptibly.getChannel) // Wait until the connection attempt succeeds or fails. - - if (!connection.isSuccess) { - notifyListeners(RemoteClientError(connection.getCause, netty, remoteAddress)) - false - } else { - sendSecureCookie(connection) - true - } + sendSecureCookie(connection) } runSwitch switchOn { @@ -163,24 +169,19 @@ private[akka] class ActiveRemoteClient private[akka] ( openChannels.add(connection.awaitUninterruptibly.getChannel) // Wait until the connection attempt succeeds or fails. - if (!connection.isSuccess) { - notifyListeners(RemoteClientError(connection.getCause, netty, remoteAddress)) - false - } else { - ChannelAddress.set(connection.getChannel, Some(remoteAddress)) - sendSecureCookie(connection) + if (sendSecureCookie(connection)) { notifyListeners(RemoteClientStarted(netty, remoteAddress)) true + } else { + connection.getChannel.close() + openChannels.remove(connection.getChannel) + false } } match { case true ⇒ true case false if reconnectIfAlreadyConnected ⇒ - connection.getChannel.close() - openChannels.remove(connection.getChannel) - log.debug("Remote client reconnecting to [{}]", remoteAddress) attemptReconnect() - case false ⇒ false } } diff --git a/akka-remote/src/main/scala/akka/remote/netty/NettyRemoteSupport.scala b/akka-remote/src/main/scala/akka/remote/netty/NettyRemoteSupport.scala index 5f62bb58c8..9c6e4c85f2 100644 --- a/akka-remote/src/main/scala/akka/remote/netty/NettyRemoteSupport.scala +++ b/akka-remote/src/main/scala/akka/remote/netty/NettyRemoteSupport.scala @@ -24,7 +24,7 @@ import akka.remote.{ RemoteTransportException, RemoteTransport, RemoteActorRefPr import akka.util.NonFatal import akka.actor.{ ExtendedActorSystem, Address, ActorRef } -object ChannelAddress extends ChannelLocal[Option[Address]] { +private[akka] object ChannelAddress extends ChannelLocal[Option[Address]] { override def initialValue(ch: Channel): Option[Address] = None } @@ -54,9 +54,7 @@ private[akka] class NettyRemoteTransport(_system: ExtendedActorSystem, _provider * in implementations of ChannelPipelineFactory. */ def apply(handlers: Seq[ChannelHandler]): DefaultChannelPipeline = - handlers.foldLeft(new DefaultChannelPipeline) { - (pipe, handler) ⇒ pipe.addLast(Logging.simpleName(handler.getClass), handler); pipe - } + (new DefaultChannelPipeline /: handlers) { (p, h) ⇒ p.addLast(Logging.simpleName(h.getClass), h); p } /** * Constructs the NettyRemoteTransport default pipeline with the give “head” handler, which @@ -65,21 +63,18 @@ private[akka] class NettyRemoteTransport(_system: ExtendedActorSystem, _provider * @param withTimeout determines whether an IdleStateHandler shall be included */ def apply(endpoint: ⇒ Seq[ChannelHandler], withTimeout: Boolean, isClient: Boolean): ChannelPipelineFactory = - new ChannelPipelineFactory { - def getPipeline = apply(defaultStack(withTimeout, isClient) ++ endpoint) - } + new ChannelPipelineFactory { override def getPipeline = apply(defaultStack(withTimeout, isClient) ++ endpoint) } /** * Construct a default protocol stack, excluding the “head” handler (i.e. the one which * actually dispatches the received messages to the local target actors). */ def defaultStack(withTimeout: Boolean, isClient: Boolean): Seq[ChannelHandler] = - (if (settings.EnableSSL) NettySSLSupport(settings, NettyRemoteTransport.this.log, isClient) :: Nil else Nil) ::: - (if (withTimeout) timeout :: Nil else Nil) ::: + (if (settings.EnableSSL) List(NettySSLSupport(settings, NettyRemoteTransport.this.log, isClient)) else Nil) ::: + (if (withTimeout) List(timeout) else Nil) ::: msgFormat ::: authenticator ::: - executionHandler :: - Nil + executionHandler /** * Construct an IdleStateHandler which uses [[akka.remote.netty.NettyRemoteTransport]].timer. @@ -103,20 +98,22 @@ private[akka] class NettyRemoteTransport(_system: ExtendedActorSystem, _provider * happen on a netty thread (that could be bad if re-sending over the network for * remote-deployed actors). */ - val executionHandler = new ExecutionHandler(new OrderedMemoryAwareThreadPoolExecutor( - settings.ExecutionPoolSize, - settings.MaxChannelMemorySize, - settings.MaxTotalMemorySize, - settings.ExecutionPoolKeepalive.length, - settings.ExecutionPoolKeepalive.unit, - system.threadFactory)) + val executionHandler = if (settings.ExecutionPoolSize != 0) + List(new ExecutionHandler(new OrderedMemoryAwareThreadPoolExecutor( + settings.ExecutionPoolSize, + settings.MaxChannelMemorySize, + settings.MaxTotalMemorySize, + settings.ExecutionPoolKeepalive.length, + settings.ExecutionPoolKeepalive.unit, + system.threadFactory))) + else Nil /** * Construct and authentication handler which uses the SecureCookie to somewhat * protect the TCP port from unauthorized use (don’t rely on it too much, though, * as this is NOT a cryptographic feature). */ - def authenticator = if (settings.RequireCookie) new RemoteServerAuthenticationHandler(settings.SecureCookie) :: Nil else Nil + def authenticator = if (settings.RequireCookie) List(new RemoteServerAuthenticationHandler(settings.SecureCookie)) else Nil } /** diff --git a/akka-remote/src/main/scala/akka/remote/netty/NettySSLSupport.scala b/akka-remote/src/main/scala/akka/remote/netty/NettySSLSupport.scala index 9440c09c95..690b4522ec 100644 --- a/akka-remote/src/main/scala/akka/remote/netty/NettySSLSupport.scala +++ b/akka-remote/src/main/scala/akka/remote/netty/NettySSLSupport.scala @@ -9,14 +9,17 @@ import javax.net.ssl.{ KeyManagerFactory, TrustManager, TrustManagerFactory, SSL import akka.remote.RemoteTransportException import akka.event.LoggingAdapter import java.io.{ IOException, FileNotFoundException, FileInputStream } -import java.security.{ SecureRandom, GeneralSecurityException, KeyStore, Security } import akka.security.provider.AkkaProvider +import java.security._ /** * Used for adding SSL support to Netty pipeline * Internal use only */ private[akka] object NettySSLSupport { + + Security addProvider AkkaProvider + /** * Construct a SSLHandler which can be inserted into a Netty server/client pipeline */ @@ -29,19 +32,20 @@ private[akka] object NettySSLSupport { * Using /dev/./urandom is only necessary when using SHA1PRNG on Linux * Use 'new SecureRandom()' instead of 'SecureRandom.getInstance("SHA1PRNG")' to avoid having problems */ - sourceOfRandomness foreach { path ⇒ System.setProperty("java.security.egd", path) } + sourceOfRandomness foreach { path ⇒ + System.setProperty("java.security.egd", path) + System.setProperty("securerandom.source", path) + } val rng = rngName match { - case Some(r @ ("AES128CounterRNGFast" | "AES128CounterRNGSecure" | "AES256CounterRNGSecure")) ⇒ + case Some(r @ ("AES128CounterSecureRNG" | "AES128CounterInetRNG" | "AES256CounterInetRNG")) ⇒ log.debug("SSL random number generator set to: {}", r) - val akka = new AkkaProvider - Security.addProvider(akka) - SecureRandom.getInstance(r, akka) - case Some("SHA1PRNG") ⇒ - log.debug("SSL random number generator set to: SHA1PRNG") - // This needs /dev/urandom to be the source on Linux to prevent problems with /dev/random blocking + SecureRandom.getInstance(r, AkkaProvider) + case Some(s @ ("SHA1PRNG" | "NativePRNG")) ⇒ + log.debug("SSL random number generator set to: " + s) + // SHA1PRNG needs /dev/urandom to be the source on Linux to prevent problems with /dev/random blocking // However, this also makes the seed source insecure as the seed is reused to avoid blocking (not a problem on FreeBSD). - SecureRandom.getInstance("SHA1PRNG") + SecureRandom.getInstance(s) case Some(unknown) ⇒ log.debug("Unknown SSLRandomNumberGenerator [{}] falling back to SecureRandom", unknown) new SecureRandom @@ -58,12 +62,18 @@ private[akka] object NettySSLSupport { def constructClientContext(settings: NettySettings, log: LoggingAdapter, trustStorePath: String, trustStorePassword: String, protocol: String): Option[SSLContext] = try { - val trustManagerFactory = TrustManagerFactory.getInstance(TrustManagerFactory.getDefaultAlgorithm) - val trustStore = KeyStore.getInstance(KeyStore.getDefaultType) - trustStore.load(new FileInputStream(trustStorePath), trustStorePassword.toCharArray) //FIXME does the FileInputStream need to be closed? - trustManagerFactory.init(trustStore) - val trustManagers: Array[TrustManager] = trustManagerFactory.getTrustManagers - Option(SSLContext.getInstance(protocol)) map { ctx ⇒ ctx.init(null, trustManagers, initializeCustomSecureRandom(settings.SSLRandomNumberGenerator, settings.SSLRandomSource, log)); ctx } + val rng = initializeCustomSecureRandom(settings.SSLRandomNumberGenerator, settings.SSLRandomSource, log) + val trustManagers: Array[TrustManager] = { + val trustManagerFactory = TrustManagerFactory.getInstance(TrustManagerFactory.getDefaultAlgorithm) + trustManagerFactory.init({ + val trustStore = KeyStore.getInstance(KeyStore.getDefaultType) + val fin = new FileInputStream(trustStorePath) + try trustStore.load(fin, trustStorePassword.toCharArray) finally fin.close() + trustStore + }) + trustManagerFactory.getTrustManagers + } + Option(SSLContext.getInstance(protocol)) map { ctx ⇒ ctx.init(null, trustManagers, rng); ctx } } catch { case e: FileNotFoundException ⇒ throw new RemoteTransportException("Client SSL connection could not be established because trust store could not be loaded", e) case e: IOException ⇒ throw new RemoteTransportException("Client SSL connection could not be established because: " + e.getMessage, e) @@ -80,10 +90,12 @@ private[akka] object NettySSLSupport { }) match { case Some(context) ⇒ log.debug("Using client SSL context to create SSLEngine ...") - val sslEngine = context.createSSLEngine - sslEngine.setUseClientMode(true) - sslEngine.setEnabledCipherSuites(settings.SSLSupportedAlgorithms.toArray.map(_.toString)) - new SslHandler(sslEngine) + new SslHandler({ + val sslEngine = context.createSSLEngine + sslEngine.setUseClientMode(true) + sslEngine.setEnabledCipherSuites(settings.SSLEnabledAlgorithms.toArray) + sslEngine + }) case None ⇒ throw new GeneralSecurityException( """Failed to initialize client SSL because SSL context could not be found." + @@ -99,11 +111,15 @@ private[akka] object NettySSLSupport { def constructServerContext(settings: NettySettings, log: LoggingAdapter, keyStorePath: String, keyStorePassword: String, protocol: String): Option[SSLContext] = try { + val rng = initializeCustomSecureRandom(settings.SSLRandomNumberGenerator, settings.SSLRandomSource, log) val factory = KeyManagerFactory.getInstance(KeyManagerFactory.getDefaultAlgorithm) - val keyStore = KeyStore.getInstance(KeyStore.getDefaultType) - keyStore.load(new FileInputStream(keyStorePath), keyStorePassword.toCharArray) //FIXME does the FileInputStream need to be closed? - factory.init(keyStore, keyStorePassword.toCharArray) - Option(SSLContext.getInstance(protocol)) map { ctx ⇒ ctx.init(factory.getKeyManagers, null, initializeCustomSecureRandom(settings.SSLRandomNumberGenerator, settings.SSLRandomSource, log)); ctx } + factory.init({ + val keyStore = KeyStore.getInstance(KeyStore.getDefaultType) + val fin = new FileInputStream(keyStorePath) + try keyStore.load(fin, keyStorePassword.toCharArray) finally fin.close() + keyStore + }, keyStorePassword.toCharArray) + Option(SSLContext.getInstance(protocol)) map { ctx ⇒ ctx.init(factory.getKeyManagers, null, rng); ctx } } catch { case e: FileNotFoundException ⇒ throw new RemoteTransportException("Server SSL connection could not be established because key store could not be loaded", e) case e: IOException ⇒ throw new RemoteTransportException("Server SSL connection could not be established because: " + e.getMessage, e) @@ -119,7 +135,7 @@ private[akka] object NettySSLSupport { log.debug("Using server SSL context to create SSLEngine ...") val sslEngine = context.createSSLEngine sslEngine.setUseClientMode(false) - sslEngine.setEnabledCipherSuites(settings.SSLSupportedAlgorithms.toArray.map(_.toString)) + sslEngine.setEnabledCipherSuites(settings.SSLEnabledAlgorithms.toArray) new SslHandler(sslEngine) case None ⇒ throw new GeneralSecurityException( """Failed to initialize server SSL because SSL context could not be found. diff --git a/akka-remote/src/main/scala/akka/remote/netty/Settings.scala b/akka-remote/src/main/scala/akka/remote/netty/Settings.scala index 024ed104c3..9babf6005c 100644 --- a/akka-remote/src/main/scala/akka/remote/netty/Settings.scala +++ b/akka-remote/src/main/scala/akka/remote/netty/Settings.scala @@ -8,6 +8,7 @@ import akka.util.Duration import java.util.concurrent.TimeUnit._ import java.net.InetAddress import akka.ConfigurationException +import scala.collection.JavaConverters.iterableAsScalaIterableConverter private[akka] class NettySettings(config: Config, val systemName: String) { @@ -72,7 +73,7 @@ private[akka] class NettySettings(config: Config, val systemName: String) { val ExecutionPoolKeepalive: Duration = Duration(getMilliseconds("execution-pool-keepalive"), MILLISECONDS) val ExecutionPoolSize: Int = getInt("execution-pool-size") match { - case sz if sz < 1 ⇒ throw new IllegalArgumentException("akka.remote.netty.execution-pool-size is less than 1") + case sz if sz < 0 ⇒ throw new IllegalArgumentException("akka.remote.netty.execution-pool-size is less than 0") case sz ⇒ sz } @@ -106,7 +107,7 @@ private[akka] class NettySettings(config: Config, val systemName: String) { case password ⇒ Some(password) } - val SSLSupportedAlgorithms = getStringList("ssl.supported-algorithms").toArray.toSet + val SSLEnabledAlgorithms = iterableAsScalaIterableConverter(getStringList("ssl.enabled-algorithms")).asScala.toSet[String] val SSLProtocol = getString("ssl.protocol") match { case "" ⇒ None diff --git a/akka-remote/src/main/scala/akka/routing/RemoteRouterConfig.scala b/akka-remote/src/main/scala/akka/routing/RemoteRouterConfig.scala index 9a71f309fc..a0b7ae4a49 100644 --- a/akka-remote/src/main/scala/akka/routing/RemoteRouterConfig.scala +++ b/akka-remote/src/main/scala/akka/routing/RemoteRouterConfig.scala @@ -71,7 +71,8 @@ class RemoteRouteeProvider(nodes: Iterable[Address], _context: ActorContext, _re IndexedSeq.empty[ActorRef] ++ (for (i ← 1 to nrOfInstances) yield { val name = "c" + i val deploy = Deploy("", ConfigFactory.empty(), props.routerConfig, RemoteScope(nodeAddressIter.next)) - impl.provider.actorOf(impl, props, context.self.asInstanceOf[InternalActorRef], context.self.path / name, false, Some(deploy), false) + impl.provider.actorOf(impl, props, context.self.asInstanceOf[InternalActorRef], context.self.path / name, + systemService = false, Some(deploy), lookupDeploy = false, async = false) }) case (_, xs, _) ⇒ throw new ConfigurationException("Remote target.nodes can not be combined with routees for [%s]" diff --git a/akka-remote/src/main/scala/akka/security/provider/AES128CounterRNGSecure.scala b/akka-remote/src/main/scala/akka/security/provider/AES128CounterInetRNG.scala similarity index 64% rename from akka-remote/src/main/scala/akka/security/provider/AES128CounterRNGSecure.scala rename to akka-remote/src/main/scala/akka/security/provider/AES128CounterInetRNG.scala index 846476cc2d..41d12b275f 100644 --- a/akka-remote/src/main/scala/akka/security/provider/AES128CounterRNGSecure.scala +++ b/akka-remote/src/main/scala/akka/security/provider/AES128CounterInetRNG.scala @@ -7,12 +7,16 @@ import org.uncommons.maths.random.{ AESCounterRNG, DefaultSeedGenerator } /** * Internal API + * This class is a wrapper around the 128-bit AESCounterRNG algorithm provided by http://maths.uncommons.org/ + * It uses the default seed generator which uses one of the following 3 random seed sources: + * Depending on availability: /dev/random, random.org and SecureRandom (provided by Java) + * The only method used by netty ssl is engineNextBytes(bytes) */ -class AES128CounterRNGSecure extends java.security.SecureRandomSpi { +class AES128CounterInetRNG extends java.security.SecureRandomSpi { private val rng = new AESCounterRNG() /** - * This is managed internally only + * This is managed internally by AESCounterRNG */ override protected def engineSetSeed(seed: Array[Byte]): Unit = () @@ -24,6 +28,7 @@ class AES128CounterRNGSecure extends java.security.SecureRandomSpi { override protected def engineNextBytes(bytes: Array[Byte]): Unit = rng.nextBytes(bytes) /** + * Unused method * Returns the given number of seed bytes. This call may be used to * seed other random number generators. * diff --git a/akka-remote/src/main/scala/akka/security/provider/AES128CounterRNGFast.scala b/akka-remote/src/main/scala/akka/security/provider/AES128CounterSecureRNG.scala similarity index 64% rename from akka-remote/src/main/scala/akka/security/provider/AES128CounterRNGFast.scala rename to akka-remote/src/main/scala/akka/security/provider/AES128CounterSecureRNG.scala index c355f5a548..cda59ee03b 100644 --- a/akka-remote/src/main/scala/akka/security/provider/AES128CounterRNGFast.scala +++ b/akka-remote/src/main/scala/akka/security/provider/AES128CounterSecureRNG.scala @@ -4,16 +4,18 @@ package akka.security.provider import org.uncommons.maths.random.{ AESCounterRNG, SecureRandomSeedGenerator } -import java.security.SecureRandom /** * Internal API + * This class is a wrapper around the AESCounterRNG algorithm provided by http://maths.uncommons.org/ * + * The only method used by netty ssl is engineNextBytes(bytes) + * This RNG is good to use to prevent startup delay when you don't have Internet access to random.org */ -class AES128CounterRNGFast extends java.security.SecureRandomSpi { +class AES128CounterSecureRNG extends java.security.SecureRandomSpi { private val rng = new AESCounterRNG(new SecureRandomSeedGenerator()) /** - * This is managed internally only + * This is managed internally by AESCounterRNG */ override protected def engineSetSeed(seed: Array[Byte]): Unit = () @@ -25,12 +27,13 @@ class AES128CounterRNGFast extends java.security.SecureRandomSpi { override protected def engineNextBytes(bytes: Array[Byte]): Unit = rng.nextBytes(bytes) /** + * Unused method * Returns the given number of seed bytes. This call may be used to * seed other random number generators. * * @param numBytes the number of seed bytes to generate. * @return the seed bytes. */ - override protected def engineGenerateSeed(numBytes: Int): Array[Byte] = (new SecureRandom).generateSeed(numBytes) + override protected def engineGenerateSeed(numBytes: Int): Array[Byte] = (new SecureRandomSeedGenerator()).generateSeed(numBytes) } diff --git a/akka-remote/src/main/scala/akka/security/provider/AES256CounterRNGSecure.scala b/akka-remote/src/main/scala/akka/security/provider/AES256CounterInetRNG.scala similarity index 52% rename from akka-remote/src/main/scala/akka/security/provider/AES256CounterRNGSecure.scala rename to akka-remote/src/main/scala/akka/security/provider/AES256CounterInetRNG.scala index d942938411..076d4fcd7f 100644 --- a/akka-remote/src/main/scala/akka/security/provider/AES256CounterRNGSecure.scala +++ b/akka-remote/src/main/scala/akka/security/provider/AES256CounterInetRNG.scala @@ -7,12 +7,22 @@ import org.uncommons.maths.random.{ AESCounterRNG, DefaultSeedGenerator } /** * Internal API + * This class is a wrapper around the 256-bit AESCounterRNG algorithm provided by http://maths.uncommons.org/ + * It uses the default seed generator which uses one of the following 3 random seed sources: + * Depending on availability: /dev/random, random.org and SecureRandom (provided by Java) + * The only method used by netty ssl is engineNextBytes(bytes) */ -class AES256CounterRNGSecure extends java.security.SecureRandomSpi { - private val rng = new AESCounterRNG(32) // Magic number is magic +class AES256CounterInetRNG extends java.security.SecureRandomSpi { + /** + * From AESCounterRNG API docs: + * Valid values are 16 (128 bits), 24 (192 bits) and 32 (256 bits). + * Any other values will result in an exception from the AES implementation. + */ + private val AES_256_BIT = 32 // Magic number is magic + private val rng = new AESCounterRNG(AES_256_BIT) /** - * This is managed internally only + * This is managed internally by AESCounterRNG */ override protected def engineSetSeed(seed: Array[Byte]): Unit = () @@ -24,6 +34,7 @@ class AES256CounterRNGSecure extends java.security.SecureRandomSpi { override protected def engineNextBytes(bytes: Array[Byte]): Unit = rng.nextBytes(bytes) /** + * Unused method * Returns the given number of seed bytes. This call may be used to * seed other random number generators. * diff --git a/akka-remote/src/main/scala/akka/security/provider/AkkaProvider.scala b/akka-remote/src/main/scala/akka/security/provider/AkkaProvider.scala index f44aeae584..707ad0c399 100644 --- a/akka-remote/src/main/scala/akka/security/provider/AkkaProvider.scala +++ b/akka-remote/src/main/scala/akka/security/provider/AkkaProvider.scala @@ -3,23 +3,23 @@ */ package akka.security.provider -import java.security.{ PrivilegedAction, AccessController, Provider } +import java.security.{ PrivilegedAction, AccessController, Provider, Security } /** * A provider that for AES128CounterRNGFast, a cryptographically secure random number generator through SecureRandom */ -final class AkkaProvider extends Provider("Akka", 1.0, "Akka provider 1.0 that implements a secure AES random number generator") { - AccessController.doPrivileged(new PrivilegedAction[AkkaProvider] { +object AkkaProvider extends Provider("Akka", 1.0, "Akka provider 1.0 that implements a secure AES random number generator") { + AccessController.doPrivileged(new PrivilegedAction[this.type] { def run = { //SecureRandom - put("SecureRandom.AES128CounterRNGFast", "akka.security.provider.AES128CounterRNGFast") - put("SecureRandom.AES128CounterRNGSecure", "akka.security.provider.AES128CounterRNGSecure") - put("SecureRandom.AES256CounterRNGSecure", "akka.security.provider.AES256CounterRNGSecure") + put("SecureRandom.AES128CounterSecureRNG", classOf[AES128CounterSecureRNG].getName) + put("SecureRandom.AES128CounterInetRNG", classOf[AES128CounterInetRNG].getName) + put("SecureRandom.AES256CounterInetRNG", classOf[AES256CounterInetRNG].getName) //Implementation type: software or hardware - put("SecureRandom.AES128CounterRNGFast ImplementedIn", "Software") - put("SecureRandom.AES128CounterRNGSecure ImplementedIn", "Software") - put("SecureRandom.AES256CounterRNGSecure ImplementedIn", "Software") + put("SecureRandom.AES128CounterSecureRNG ImplementedIn", "Software") + put("SecureRandom.AES128CounterInetRNG ImplementedIn", "Software") + put("SecureRandom.AES256CounterInetRNG ImplementedIn", "Software") null //Magic null is magic } }) diff --git a/akka-remote/src/test/protocol/ProtobufProtocol.proto b/akka-remote/src/test/protocol/ProtobufProtocol.proto index ccb92aa1e3..0ff2663821 100644 --- a/akka-remote/src/test/protocol/ProtobufProtocol.proto +++ b/akka-remote/src/test/protocol/ProtobufProtocol.proto @@ -4,11 +4,13 @@ package akka.actor; -/* +/****************************************** Compile with: cd ./akka-remote/src/test/protocol protoc ProtobufProtocol.proto --java_out ../java -*/ + cd ../../../.. + ./scripts/fix-protobuf.sh +*******************************************/ message MyMessage { required uint64 id = 1; diff --git a/akka-remote/src/test/scala/akka/remote/RemoteCommunicationSpec.scala b/akka-remote/src/test/scala/akka/remote/RemoteCommunicationSpec.scala index ac4127fe17..7f92e3089b 100644 --- a/akka-remote/src/test/scala/akka/remote/RemoteCommunicationSpec.scala +++ b/akka-remote/src/test/scala/akka/remote/RemoteCommunicationSpec.scala @@ -118,7 +118,7 @@ akka { val r = expectMsgType[ActorRef] r ! (Props[Echo], "grandchild") val remref = expectMsgType[ActorRef] - remref.isInstanceOf[LocalActorRef] must be(true) + remref.asInstanceOf[ActorRefScope].isLocal must be(true) val myref = system.actorFor(system / "looker" / "child" / "grandchild") myref.isInstanceOf[RemoteActorRef] must be(true) myref ! 43 diff --git a/akka-remote/src/test/scala/akka/remote/Ticket1978CommunicationSpec.scala b/akka-remote/src/test/scala/akka/remote/Ticket1978CommunicationSpec.scala index 505ce180cf..64408f15b1 100644 --- a/akka-remote/src/test/scala/akka/remote/Ticket1978CommunicationSpec.scala +++ b/akka-remote/src/test/scala/akka/remote/Ticket1978CommunicationSpec.scala @@ -9,12 +9,12 @@ import com.typesafe.config._ import akka.dispatch.{ Await, Future } import akka.pattern.ask import java.io.File -import akka.event.{ NoLogging, LoggingAdapter } import java.security.{ NoSuchAlgorithmException, SecureRandom, PrivilegedAction, AccessController } import netty.{ NettySettings, NettySSLSupport } import javax.net.ssl.SSLException import akka.util.{ Timeout, Duration } import akka.util.duration._ +import akka.event.{ Logging, NoLogging, LoggingAdapter } object Configuration { // set this in your JAVA_OPTS to see all ssl debug info: "-Djavax.net.debug=ssl,keymanager" @@ -24,169 +24,122 @@ object Configuration { private val conf = """ akka { actor.provider = "akka.remote.RemoteActorRefProvider" + test { + single-expect-default = 10s + filter-leeway = 10s + default-timeout = 10s + } + remote.netty { hostname = localhost + port = %d ssl { enable = on trust-store = "%s" key-store = "%s" random-number-generator = "%s" - supported-algorithms = [%s] + enabled-algorithms = [%s] sha1prng-random-source = "/dev/./urandom" } } - actor.deployment { - /blub.remote = "akka://remote-sys@localhost:12346" - /looker/child.remote = "akka://remote-sys@localhost:12346" - /looker/child/grandchild.remote = "akka://Ticket1978CommunicationSpec@localhost:12345" - } } """ - def getCipherConfig(cipher: String, enabled: String*): (String, Boolean, Config) = try { + case class CipherConfig(runTest: Boolean, config: Config, cipher: String, localPort: Int, remotePort: Int) - val config = ConfigFactory.parseString("akka.remote.netty.port=12345").withFallback(ConfigFactory.parseString(conf.format(trustStore, keyStore, cipher, enabled.mkString(", ")))) - val fullConfig = config.withFallback(AkkaSpec.testConf).withFallback(ConfigFactory.load).getConfig("akka.remote.netty") - val settings = new NettySettings(fullConfig, "placeholder") + def getCipherConfig(cipher: String, enabled: String*): CipherConfig = { + val localPort, remotePort = { val s = new java.net.ServerSocket(0); try s.getLocalPort finally s.close() } + try { + //if (true) throw new IllegalArgumentException("Ticket1978*Spec isn't enabled") - val rng = NettySSLSupport.initializeCustomSecureRandom(settings.SSLRandomNumberGenerator, settings.SSLRandomSource, NoLogging) + val config = ConfigFactory.parseString(conf.format(localPort, trustStore, keyStore, cipher, enabled.mkString(", "))) + val fullConfig = config.withFallback(AkkaSpec.testConf).withFallback(ConfigFactory.load).getConfig("akka.remote.netty") + val settings = new NettySettings(fullConfig, "placeholder") - rng.nextInt() // Has to work - settings.SSLRandomNumberGenerator foreach { sRng ⇒ rng.getAlgorithm == sRng || (throw new NoSuchAlgorithmException(sRng)) } + val rng = NettySSLSupport.initializeCustomSecureRandom(settings.SSLRandomNumberGenerator, settings.SSLRandomSource, NoLogging) - val engine = NettySSLSupport.initializeServerSSL(settings, NoLogging).getEngine - val gotAllSupported = enabled.toSet -- engine.getSupportedCipherSuites.toSet - val gotAllEnabled = enabled.toSet -- engine.getEnabledCipherSuites.toSet - gotAllSupported.isEmpty || (throw new IllegalArgumentException("Cipher Suite not supported: " + gotAllSupported)) - gotAllEnabled.isEmpty || (throw new IllegalArgumentException("Cipher Suite not enabled: " + gotAllEnabled)) - engine.getSupportedProtocols.contains(settings.SSLProtocol.get) || (throw new IllegalArgumentException(settings.SSLProtocol.get)) + rng.nextInt() // Has to work + settings.SSLRandomNumberGenerator foreach { sRng ⇒ rng.getAlgorithm == sRng || (throw new NoSuchAlgorithmException(sRng)) } - (cipher, true, config) - } catch { - case (_: IllegalArgumentException) | (_: NoSuchAlgorithmException) | (_: SSLException) ⇒ (cipher, false, AkkaSpec.testConf) // Cannot match against the message since the message might be localized :S + val engine = NettySSLSupport.initializeClientSSL(settings, NoLogging).getEngine + val gotAllSupported = enabled.toSet -- engine.getSupportedCipherSuites.toSet + val gotAllEnabled = enabled.toSet -- engine.getEnabledCipherSuites.toSet + gotAllSupported.isEmpty || (throw new IllegalArgumentException("Cipher Suite not supported: " + gotAllSupported)) + gotAllEnabled.isEmpty || (throw new IllegalArgumentException("Cipher Suite not enabled: " + gotAllEnabled)) + engine.getSupportedProtocols.contains(settings.SSLProtocol.get) || (throw new IllegalArgumentException("Protocol not supported: " + settings.SSLProtocol.get)) + + CipherConfig(true, config, cipher, localPort, remotePort) + } catch { + case (_: IllegalArgumentException) | (_: NoSuchAlgorithmException) ⇒ CipherConfig(false, AkkaSpec.testConf, cipher, localPort, remotePort) // Cannot match against the message since the message might be localized :S + } } } -import Configuration.getCipherConfig +import Configuration.{ CipherConfig, getCipherConfig } @org.junit.runner.RunWith(classOf[org.scalatest.junit.JUnitRunner]) class Ticket1978SHA1PRNGSpec extends Ticket1978CommunicationSpec(getCipherConfig("SHA1PRNG", "TLS_RSA_WITH_AES_128_CBC_SHA")) @org.junit.runner.RunWith(classOf[org.scalatest.junit.JUnitRunner]) -class Ticket1978AES128CounterRNGFastSpec extends Ticket1978CommunicationSpec(getCipherConfig("AES128CounterRNGFast", "TLS_RSA_WITH_AES_128_CBC_SHA", "TLS_RSA_WITH_AES_256_CBC_SHA")) +class Ticket1978AES128CounterSecureRNGSpec extends Ticket1978CommunicationSpec(getCipherConfig("AES128CounterSecureRNG", "TLS_RSA_WITH_AES_128_CBC_SHA", "TLS_RSA_WITH_AES_256_CBC_SHA")) /** - * Both of the Secure variants require access to the Internet to access random.org. + * Both of the Inet variants require access to the Internet to access random.org. */ @org.junit.runner.RunWith(classOf[org.scalatest.junit.JUnitRunner]) -class Ticket1978AES128CounterRNGSecureSpec extends Ticket1978CommunicationSpec(getCipherConfig("AES128CounterRNGSecure", "TLS_RSA_WITH_AES_128_CBC_SHA")) +class Ticket1978AES128CounterInetRNGSpec extends Ticket1978CommunicationSpec(getCipherConfig("AES128CounterInetRNG", "TLS_RSA_WITH_AES_128_CBC_SHA")) /** - * Both of the Secure variants require access to the Internet to access random.org. + * Both of the Inet variants require access to the Internet to access random.org. */ @org.junit.runner.RunWith(classOf[org.scalatest.junit.JUnitRunner]) -class Ticket1978AES256CounterRNGSecureSpec extends Ticket1978CommunicationSpec(getCipherConfig("AES256CounterRNGSecure", "TLS_RSA_WITH_AES_256_CBC_SHA")) +class Ticket1978AES256CounterInetRNGSpec extends Ticket1978CommunicationSpec(getCipherConfig("AES256CounterInetRNG", "TLS_RSA_WITH_AES_256_CBC_SHA")) @org.junit.runner.RunWith(classOf[org.scalatest.junit.JUnitRunner]) class Ticket1978DefaultRNGSecureSpec extends Ticket1978CommunicationSpec(getCipherConfig("", "TLS_RSA_WITH_AES_128_CBC_SHA")) @org.junit.runner.RunWith(classOf[org.scalatest.junit.JUnitRunner]) -class Ticket1978NonExistingRNGSecureSpec extends Ticket1978CommunicationSpec(("NonExistingRNG", false, AkkaSpec.testConf)) +class Ticket1978CrappyRSAWithMD5OnlyHereToMakeSureThingsWorkSpec extends Ticket1978CommunicationSpec(getCipherConfig("", "SSL_RSA_WITH_NULL_MD5")) -abstract class Ticket1978CommunicationSpec(val cipherEnabledconfig: (String, Boolean, Config)) extends AkkaSpec(cipherEnabledconfig._3) with ImplicitSender { +@org.junit.runner.RunWith(classOf[org.scalatest.junit.JUnitRunner]) +class Ticket1978NonExistingRNGSecureSpec extends Ticket1978CommunicationSpec(CipherConfig(false, AkkaSpec.testConf, "NonExistingRNG", 12345, 12346)) - implicit val timeout: Timeout = Timeout(30 seconds) +abstract class Ticket1978CommunicationSpec(val cipherConfig: CipherConfig) extends AkkaSpec(cipherConfig.config) with ImplicitSender { + + implicit val timeout: Timeout = Timeout(10 seconds) import RemoteCommunicationSpec._ - val other = ActorSystem("remote-sys", ConfigFactory.parseString("akka.remote.netty.port=12346").withFallback(system.settings.config)) + lazy val other: ActorSystem = ActorSystem( + "remote-sys", + ConfigFactory.parseString("akka.remote.netty.port=" + cipherConfig.remotePort).withFallback(system.settings.config)) override def atTermination() { - other.shutdown() + if (cipherConfig.runTest) { + other.shutdown() + other.awaitTermination() + } } - "SSL Remoting" must { - if (cipherEnabledconfig._2) { - val remote = other.actorOf(Props(new Actor { def receive = { case "ping" ⇒ sender ! (("pong", sender)) } }), "echo") + ("-") must { + if (cipherConfig.runTest) { + val ignoreMe = other.actorOf(Props(new Actor { def receive = { case ("ping", x) ⇒ sender ! ((("pong", x), sender)) } }), "echo") + val otherAddress = other.asInstanceOf[ExtendedActorSystem].provider.asInstanceOf[RemoteActorRefProvider].transport.address - val here = system.actorFor("akka://remote-sys@localhost:12346/user/echo") + "support tell" in { + val here = system.actorFor(otherAddress.toString + "/user/echo") - "support remote look-ups" in { - here ! "ping" - expectMsgPF(timeout.duration) { - case ("pong", s: AnyRef) if s eq testActor ⇒ true - } - } - - "send error message for wrong address" in { - within(timeout.duration) { - EventFilter.error(start = "dropping", occurrences = 1).intercept { - system.actorFor("akka://remotesys@localhost:12346/user/echo") ! "ping" - }(other) - } + for (i ← 1 to 1000) here ! (("ping", i)) + for (i ← 1 to 1000) expectMsgPF(timeout.duration) { case (("pong", i), `testActor`) ⇒ true } } "support ask" in { - Await.result(here ? "ping", timeout.duration) match { - case ("pong", s: akka.pattern.PromiseActorRef) ⇒ // good - case m ⇒ fail(m + " was not (pong, AskActorRef)") - } + val here = system.actorFor(otherAddress.toString + "/user/echo") + + val f = for (i ← 1 to 1000) yield here ? (("ping", i)) mapTo manifest[((String, Int), ActorRef)] + Await.result(Future.sequence(f), timeout.duration).map(_._1._1).toSet must be(Set("pong")) } - "send dead letters on remote if actor does not exist" in { - within(timeout.duration) { - EventFilter.warning(pattern = "dead.*buh", occurrences = 1).intercept { - system.actorFor("akka://remote-sys@localhost:12346/does/not/exist") ! "buh" - }(other) - } - } - - "create and supervise children on remote node" in { - within(timeout.duration) { - val r = system.actorOf(Props[Echo], "blub") - r.path.toString must be === "akka://remote-sys@localhost:12346/remote/Ticket1978CommunicationSpec@localhost:12345/user/blub" - r ! 42 - expectMsg(42) - EventFilter[Exception]("crash", occurrences = 1).intercept { - r ! new Exception("crash") - }(other) - expectMsg("preRestart") - r ! 42 - expectMsg(42) - system.stop(r) - expectMsg("postStop") - } - } - - "look-up actors across node boundaries" in { - within(timeout.duration) { - val l = system.actorOf(Props(new Actor { - def receive = { - case (p: Props, n: String) ⇒ sender ! context.actorOf(p, n) - case s: String ⇒ sender ! context.actorFor(s) - } - }), "looker") - l ! (Props[Echo], "child") - val r = expectMsgType[ActorRef] - r ! (Props[Echo], "grandchild") - val remref = expectMsgType[ActorRef] - remref.isInstanceOf[LocalActorRef] must be(true) - val myref = system.actorFor(system / "looker" / "child" / "grandchild") - myref.isInstanceOf[RemoteActorRef] must be(true) - myref ! 43 - expectMsg(43) - lastSender must be theSameInstanceAs remref - r.asInstanceOf[RemoteActorRef].getParent must be(l) - system.actorFor("/user/looker/child") must be theSameInstanceAs r - Await.result(l ? "child/..", timeout.duration).asInstanceOf[AnyRef] must be theSameInstanceAs l - Await.result(system.actorFor(system / "looker" / "child") ? "..", timeout.duration).asInstanceOf[AnyRef] must be theSameInstanceAs l - } - } - - "not fail ask across node boundaries" in { - val f = for (_ ← 1 to 1000) yield here ? "ping" mapTo manifest[(String, ActorRef)] - Await.result(Future.sequence(f), timeout.duration).map(_._1).toSet must be(Set("pong")) - } } else { "not be run when the cipher is not supported by the platform this test is currently being executed on" ignore { diff --git a/akka-remote/src/test/scala/akka/remote/Ticket1978ConfigSpec.scala b/akka-remote/src/test/scala/akka/remote/Ticket1978ConfigSpec.scala index 4017f1cfcc..0a39d20a9a 100644 --- a/akka-remote/src/test/scala/akka/remote/Ticket1978ConfigSpec.scala +++ b/akka-remote/src/test/scala/akka/remote/Ticket1978ConfigSpec.scala @@ -15,12 +15,7 @@ akka { actor.provider = "akka.remote.RemoteActorRefProvider" remote.netty { hostname = localhost - port = 12345 - } - actor.deployment { - /blub.remote = "akka://remote-sys@localhost:12346" - /looker/child.remote = "akka://remote-sys@localhost:12346" - /looker/child/grandchild.remote = "akka://RemoteCommunicationSpec@localhost:12345" + port = 0 } } """) with ImplicitSender with DefaultTimeout { @@ -40,7 +35,7 @@ akka { SSLTrustStore must be(Some("truststore")) SSLTrustStorePassword must be(Some("changeme")) SSLProtocol must be(Some("TLSv1")) - SSLSupportedAlgorithms must be(Set("TLS_RSA_WITH_AES_128_CBC_SHA")) + SSLEnabledAlgorithms must be(Set("TLS_RSA_WITH_AES_128_CBC_SHA")) SSLRandomSource must be(None) SSLRandomNumberGenerator must be(None) } diff --git a/akka-testkit/src/main/scala/akka/testkit/CallingThreadDispatcher.scala b/akka-testkit/src/main/scala/akka/testkit/CallingThreadDispatcher.scala index 2fe664d7b6..1732d5faf3 100644 --- a/akka-testkit/src/main/scala/akka/testkit/CallingThreadDispatcher.scala +++ b/akka-testkit/src/main/scala/akka/testkit/CallingThreadDispatcher.scala @@ -128,7 +128,7 @@ class CallingThreadDispatcher( override def id: String = Id - protected[akka] override def createMailbox(actor: ActorCell) = new CallingThreadMailbox(actor, mailboxType) + protected[akka] override def createMailbox(actor: akka.actor.Cell) = new CallingThreadMailbox(actor, mailboxType) protected[akka] override def shutdown() {} @@ -281,17 +281,21 @@ class NestingQueue(val q: MessageQueue) { def isActive = active } -class CallingThreadMailbox(_receiver: ActorCell, val mailboxType: MailboxType) extends Mailbox(_receiver, null) with DefaultSystemMessageQueue { +class CallingThreadMailbox(_receiver: akka.actor.Cell, val mailboxType: MailboxType) + extends Mailbox(null) with DefaultSystemMessageQueue { + + val system = _receiver.system + val self = _receiver.self private val q = new ThreadLocal[NestingQueue]() { override def initialValue = { - val queue = new NestingQueue(mailboxType.create(Some(actor))) - CallingThreadDispatcherQueues(actor.system).registerQueue(CallingThreadMailbox.this, queue) + val queue = new NestingQueue(mailboxType.create(Some(self), Some(system))) + CallingThreadDispatcherQueues(system).registerQueue(CallingThreadMailbox.this, queue) queue } } - override def enqueue(receiver: ActorRef, msg: Envelope): Unit = throw new UnsupportedOperationException("CallingThreadMailbox cannot enqueue normally") + override def enqueue(receiver: ActorRef, msg: Envelope): Unit = q.get.q.enqueue(receiver, msg) override def dequeue(): Envelope = throw new UnsupportedOperationException("CallingThreadMailbox cannot dequeue normally") override def hasMessages: Boolean = q.get.q.hasMessages override def numberOfMessages: Int = 0 @@ -311,7 +315,7 @@ class CallingThreadMailbox(_receiver: ActorCell, val mailboxType: MailboxType) e val q = queue CallingThreadDispatcherQueues(actor.system).gatherFromAllOtherQueues(this, q) super.cleanUp() - q.q.cleanUp(actor, actor.systemImpl.deadLetterQueue) + q.q.cleanUp(actor.self, actor.systemImpl.deadLetterQueue) } } } diff --git a/akka-testkit/src/main/scala/akka/testkit/TestActorRef.scala b/akka-testkit/src/main/scala/akka/testkit/TestActorRef.scala index f8efe4e2e5..73658cf985 100644 --- a/akka-testkit/src/main/scala/akka/testkit/TestActorRef.scala +++ b/akka-testkit/src/main/scala/akka/testkit/TestActorRef.scala @@ -56,7 +56,7 @@ class TestActorRef[T <: Actor]( * become/unbecome. */ def receive(o: Any, sender: ActorRef): Unit = try { - underlying.currentMessage = Envelope(o, if (sender eq null) underlying.system.deadLetters else sender)(underlying.system) + underlying.currentMessage = Envelope(o, if (sender eq null) underlying.system.deadLetters else sender, underlying.system) underlying.receiveMessage(o) } finally underlying.currentMessage = null diff --git a/akka-testkit/src/main/scala/akka/testkit/TestKit.scala b/akka-testkit/src/main/scala/akka/testkit/TestKit.scala index 4a5a880bb0..5f75ba8dfa 100644 --- a/akka-testkit/src/main/scala/akka/testkit/TestKit.scala +++ b/akka-testkit/src/main/scala/akka/testkit/TestKit.scala @@ -97,9 +97,14 @@ trait TestKitBase { */ lazy val testActor: ActorRef = { val impl = system.asInstanceOf[ActorSystemImpl] //TODO ticket #1559 - impl.systemActorOf(Props(new TestActor(queue)) + val ref = impl.systemActorOf(Props(new TestActor(queue)) .withDispatcher(CallingThreadDispatcher.Id), "testActor" + TestKit.testActorId.incrementAndGet) + awaitCond(ref match { + case r: RepointableRef ⇒ r.isStarted + case _ ⇒ true + }, 1 second, 10 millis) + ref } private var end: Duration = Duration.Undefined @@ -158,7 +163,13 @@ trait TestKitBase { * block or missing that it returns the properly dilated default for this * case from settings (key "akka.test.single-expect-default"). */ - def remaining: Duration = if (end == Duration.Undefined) testKitSettings.SingleExpectDefaultTimeout.dilated else end - now + def remaining: Duration = remainingOr(testKitSettings.SingleExpectDefaultTimeout.dilated) + + /** + * Obtain time remaining for execution of the innermost enclosing `within` + * block or missing that it returns the given duration. + */ + def remainingOr(duration: Duration): Duration = if (end == Duration.Undefined) duration else end - now /** * Query queue status. @@ -607,12 +618,6 @@ object TestKit { /** * Await until the given condition evaluates to `true` or the timeout * expires, whichever comes first. - * - * If no timeout is given, take it from the innermost enclosing `within` - * block. - * - * Note that the timeout is scaled using Duration.dilated, which uses the - * configuration entry "akka.test.timefactor" */ def awaitCond(p: ⇒ Boolean, max: Duration, interval: Duration = 100.millis, noThrow: Boolean = false): Boolean = { val stop = now + max diff --git a/akka-testkit/src/test/scala/akka/testkit/AkkaSpec.scala b/akka-testkit/src/test/scala/akka/testkit/AkkaSpec.scala index 424c913662..f9ee989e1c 100644 --- a/akka-testkit/src/test/scala/akka/testkit/AkkaSpec.scala +++ b/akka-testkit/src/test/scala/akka/testkit/AkkaSpec.scala @@ -74,6 +74,7 @@ abstract class AkkaSpec(_system: ActorSystem) } final override def afterAll { + beforeShutdown() system.shutdown() try system.awaitTermination(5 seconds) catch { case _: TimeoutException ⇒ system.log.warning("Failed to stop [{}] within 5 seconds", system.name) @@ -83,6 +84,8 @@ abstract class AkkaSpec(_system: ActorSystem) protected def atStartup() {} + protected def beforeShutdown() {} + protected def atTermination() {} def spawn(dispatcherId: String = Dispatchers.DefaultDispatcherId)(body: ⇒ Unit) { diff --git a/akka-zeromq/src/main/scala/akka/zeromq/ConcurrentSocketActor.scala b/akka-zeromq/src/main/scala/akka/zeromq/ConcurrentSocketActor.scala index 71b7b185f0..e1b1ba4ddf 100644 --- a/akka-zeromq/src/main/scala/akka/zeromq/ConcurrentSocketActor.scala +++ b/akka-zeromq/src/main/scala/akka/zeromq/ConcurrentSocketActor.scala @@ -190,29 +190,24 @@ private[zeromq] class ConcurrentSocketActor(params: Seq[SocketOption]) extends A } @tailrec private def doPoll(mode: PollMsg, togo: Int = 10): Unit = - receiveMessage(mode) match { - case null ⇒ // receiveMessage has already done something special here - case Seq() ⇒ doPollTimeout(mode) - case frames ⇒ - notifyListener(deserializer(frames)) - if (togo > 0) doPoll(mode, togo - 1) - else self ! mode + if (togo <= 0) self ! mode + else receiveMessage(mode) match { + case Seq() ⇒ doPollTimeout(mode) + case frames ⇒ notifyListener(deserializer(frames)); doPoll(mode, togo - 1) } - @tailrec private def receiveMessage(mode: PollMsg, currentFrames: Vector[Frame] = Vector.empty): Seq[Frame] = { - val result = mode match { - case Poll ⇒ socket.recv(JZMQ.NOBLOCK) - case PollCareful ⇒ if (poller.poll(0) > 0) socket.recv(0) else null + @tailrec private def receiveMessage(mode: PollMsg, currentFrames: Vector[Frame] = Vector.empty): Seq[Frame] = + if (mode == PollCareful && (poller.poll(0) <= 0)) { + if (currentFrames.isEmpty) currentFrames else throw new IllegalStateException("Received partial transmission!") + } else { + socket.recv(if (mode == Poll) JZMQ.NOBLOCK else 0) match { + case null ⇒ /*EAGAIN*/ + if (currentFrames.isEmpty) currentFrames else receiveMessage(mode, currentFrames) + case bytes ⇒ + val frames = currentFrames :+ Frame(if (bytes.length == 0) noBytes else bytes) + if (socket.hasReceiveMore) receiveMessage(mode, frames) else frames + } } - result match { - case null ⇒ - if (currentFrames.isEmpty) currentFrames - else throw new IllegalStateException("no more frames available while socket.hasReceivedMore==true") - case bytes ⇒ - val frames = currentFrames :+ Frame(if (bytes.length == 0) noBytes else bytes) - if (socket.hasReceiveMore) receiveMessage(mode, frames) else frames - } - } private val listenerOpt = params collectFirst { case Listener(l) ⇒ l } private def watchListener(): Unit = listenerOpt foreach context.watch diff --git a/project/AkkaBuild.scala b/project/AkkaBuild.scala index c0e3a72a68..d9d5517703 100644 --- a/project/AkkaBuild.scala +++ b/project/AkkaBuild.scala @@ -397,8 +397,8 @@ object AkkaBuild extends Build { if (tags.isEmpty) Seq.empty else Seq(Tests.Argument("-n", tags.mkString(" "))) }, - // show full stack traces - testOptions in Test += Tests.Argument("-oF") + // show full stack traces and test case durations + testOptions in Test += Tests.Argument("-oDF") ) lazy val formatSettings = ScalariformPlugin.scalariformSettings ++ Seq( @@ -496,7 +496,7 @@ object Dependency { object V { val Camel = "2.8.0" val Logback = "1.0.4" - val Netty = "3.5.0.Final" + val Netty = "3.5.1.Final" val OSGi = "4.2.0" val Protobuf = "2.4.1" val ScalaStm = "0.5" diff --git a/project/Sphinx.scala b/project/Sphinx.scala index 43b7e60358..4707215875 100644 --- a/project/Sphinx.scala +++ b/project/Sphinx.scala @@ -87,16 +87,15 @@ object Sphinx { def pdfTask = (sphinxLatex, streams) map { (latex, s) => { - val empty = (latex * "*.pdf").get.isEmpty + val pdf = latex / "Akka.pdf" def failed = sys.error("Failed to build Sphinx pdf documentation.") - if (empty) { + if (!pdf.exists) { s.log.info("Building Sphinx pdf documentation...") val logger = newLogger(s) val exitCode = Process(Seq("make", "all-pdf"), latex) ! logger if (exitCode != 0) failed + s.log.info("Sphinx pdf documentation created: %s" format pdf) } - val pdf = (latex * "*.pdf").get.headOption.getOrElse(failed) - if (empty) s.log.info("Sphinx pdf documentation created: %s" format pdf) pdf } }