Quarantine and cleanup idle associations, #24972

* fix NPE in shutdownTransport
  * perhaps because shutdown before started
  * system.dispatcher is used in other places of the shutdown
* improve logging of compression advertisment progress
* adjust RestartFlow.withBackoff parameters
* quarantine after ActorSystemTerminating signal
  (will cleanup compressions)
* Quarantine idle associations
  * liveness checks by sending extra HandshakeReq and update the
    lastUsed when reply received
  * concervative default value to survive network partition, in
    case no other messages are sent
* Adjust logging and QuarantinedEvent for harmless quarantine
  * Harmless if it was via the shutdown signal or cluster leaving
This commit is contained in:
Patrik Nordwall 2018-04-25 08:38:27 +02:00
parent f976f8d793
commit 7fc7744049
15 changed files with 297 additions and 176 deletions

View file

@ -54,7 +54,7 @@ object RemoteWatcherSpec {
// that doesn't interfere with the real watch that is going on in the background
context.system.eventStream.publish(TestRemoteWatcher.AddressTerm(address))
override def quarantine(address: Address, uid: Option[Long], reason: String): Unit = {
override def quarantine(address: Address, uid: Option[Long], reason: String, harmless: Boolean): Unit = {
// don't quarantine in remoting, but publish a testable message
context.system.eventStream.publish(TestRemoteWatcher.Quarantined(address, uid))
}

View file

@ -33,11 +33,13 @@ class OutboundHandshakeSpec extends AkkaSpec with ImplicitSender {
private def setupStream(
outboundContext: OutboundContext, timeout: FiniteDuration = 5.seconds,
retryInterval: FiniteDuration = 10.seconds,
injectHandshakeInterval: FiniteDuration = 10.seconds): (TestPublisher.Probe[String], TestSubscriber.Probe[Any]) = {
injectHandshakeInterval: FiniteDuration = 10.seconds,
livenessProbeInterval: Duration = Duration.Undefined): (TestPublisher.Probe[String], TestSubscriber.Probe[Any]) = {
TestSource.probe[String]
.map(msg outboundEnvelopePool.acquire().init(OptionVal.None, msg, OptionVal.None))
.via(new OutboundHandshake(system, outboundContext, outboundEnvelopePool, timeout, retryInterval, injectHandshakeInterval))
.via(new OutboundHandshake(system, outboundContext, outboundEnvelopePool, timeout, retryInterval,
injectHandshakeInterval, livenessProbeInterval))
.map(env env.message)
.toMat(TestSink.probe[Any])(Keep.both)
.run()
@ -130,6 +132,21 @@ class OutboundHandshakeSpec extends AkkaSpec with ImplicitSender {
downstream.cancel()
}
"send HandshakeReq for liveness probing" in {
val inboundContext = new TestInboundContext(localAddress = addressA)
val outboundContext = inboundContext.association(addressB.address)
val (upstream, downstream) = setupStream(outboundContext, livenessProbeInterval = 200.millis)
downstream.request(10)
// this is from the initial
downstream.expectNext(HandshakeReq(addressA, addressB.address))
inboundContext.completeHandshake(addressB)
// these are from livenessProbeInterval
downstream.expectNext(HandshakeReq(addressA, addressB.address))
downstream.expectNext(HandshakeReq(addressA, addressB.address))
downstream.cancel()
}
}
}

View file

@ -105,9 +105,6 @@ class OutboundIdleShutdownSpec extends ArteryMultiNodeSpec(s"""
assertStreamActive(association, Association.OrdinaryQueueIndex, expected = false)
}
Thread.sleep(2000)
// localArtery.quarantine(remoteAddress, Some(remoteUid), "Test")
// the outbound streams are inactive and association quarantined, then it's completely removed
eventually {
localArtery.remoteAddresses should not contain remoteAddress

View file

@ -55,7 +55,7 @@ object RemoteWatcherSpec {
// that doesn't interfere with the real watch that is going on in the background
context.system.eventStream.publish(TestRemoteWatcher.AddressTerm(address))
override def quarantine(address: Address, uid: Option[Long], reason: String): Unit = {
override def quarantine(address: Address, uid: Option[Long], reason: String, harmless: Boolean): Unit = {
// don't quarantine in remoting, but publish a testable message
context.system.eventStream.publish(TestRemoteWatcher.Quarantined(address, uid))
}