+rem #18353: Prune reliable deliver actors
(cherry picked from commit 6643f56)
This commit is contained in:
parent
fc75eb361a
commit
c4e326c9dd
6 changed files with 248 additions and 20 deletions
|
|
@ -21,7 +21,7 @@ import akka.serialization.Serialization
|
|||
import akka.util.ByteString
|
||||
import akka.{ OnlyCauseStackTrace, AkkaException }
|
||||
import java.io.NotSerializableException
|
||||
import java.util.concurrent.ConcurrentHashMap
|
||||
import java.util.concurrent.{ TimeUnit, TimeoutException, ConcurrentHashMap }
|
||||
import scala.annotation.tailrec
|
||||
import scala.concurrent.duration.{ Duration, Deadline }
|
||||
import scala.util.control.NonFatal
|
||||
|
|
@ -168,6 +168,7 @@ private[remote] object ReliableDeliverySupervisor {
|
|||
|
||||
case object IsIdle
|
||||
case object Idle
|
||||
case object TooLongIdle
|
||||
|
||||
def props(
|
||||
handleOrActive: Option[AkkaProtocolHandle],
|
||||
|
|
@ -200,6 +201,8 @@ private[remote] class ReliableDeliverySupervisor(
|
|||
val autoResendTimer = context.system.scheduler.schedule(
|
||||
settings.SysResendTimeout, settings.SysResendTimeout, self, AttemptSysMsgRedelivery)
|
||||
|
||||
private var bufferWasInUse = false
|
||||
|
||||
override val supervisorStrategy = OneForOneStrategy(loggingEnabled = false) {
|
||||
case e @ (_: AssociationProblem) ⇒ Escalate
|
||||
case NonFatal(e) ⇒
|
||||
|
|
@ -207,12 +210,14 @@ private[remote] class ReliableDeliverySupervisor(
|
|||
log.warning("Association with remote system [{}] has failed, address is now gated for [{}] ms. Reason: [{}] {}",
|
||||
remoteAddress, settings.RetryGateClosedFor.toMillis, e.getMessage, causedBy)
|
||||
uidConfirmed = false // Need confirmation of UID again
|
||||
if ((resendBuffer.nacked.nonEmpty || resendBuffer.nonAcked.nonEmpty) && bailoutAt.isEmpty)
|
||||
bailoutAt = Some(Deadline.now + settings.InitialSysMsgDeliveryTimeout)
|
||||
context.become(gated)
|
||||
currentHandle = None
|
||||
context.parent ! StoppedReading(self)
|
||||
Stop
|
||||
if (bufferWasInUse) {
|
||||
if ((resendBuffer.nacked.nonEmpty || resendBuffer.nonAcked.nonEmpty) && bailoutAt.isEmpty)
|
||||
bailoutAt = Some(Deadline.now + settings.InitialSysMsgDeliveryTimeout)
|
||||
context.become(gated)
|
||||
currentHandle = None
|
||||
context.parent ! StoppedReading(self)
|
||||
Stop
|
||||
} else Escalate
|
||||
}
|
||||
|
||||
var currentHandle: Option[AkkaProtocolHandle] = handleOrActive
|
||||
|
|
@ -237,6 +242,7 @@ private[remote] class ReliableDeliverySupervisor(
|
|||
var writer: ActorRef = createWriter()
|
||||
var uid: Option[Int] = handleOrActive map { _.handshakeInfo.uid }
|
||||
var bailoutAt: Option[Deadline] = None
|
||||
var maxSilenceTimer: Option[Cancellable] = None
|
||||
// Processing of Acks has to be delayed until the UID after a reconnect is discovered. Depending whether the
|
||||
// UID matches the expected one, pending Acks can be processed, or must be dropped. It is guaranteed that for
|
||||
// any inbound connections (calling createWriter()) the first message from that connection is GotUid() therefore
|
||||
|
|
@ -255,6 +261,7 @@ private[remote] class ReliableDeliverySupervisor(
|
|||
(resendBuffer.nacked ++ resendBuffer.nonAcked) foreach { s ⇒ context.system.deadLetters ! s.copy(seqOpt = None) }
|
||||
receiveBuffers.remove(Link(localAddress, remoteAddress))
|
||||
autoResendTimer.cancel()
|
||||
maxSilenceTimer.foreach(_.cancel())
|
||||
}
|
||||
|
||||
override def postRestart(reason: Throwable): Unit = {
|
||||
|
|
@ -291,7 +298,7 @@ private[remote] class ReliableDeliverySupervisor(
|
|||
context.parent ! StoppedReading(self)
|
||||
if (resendBuffer.nonAcked.nonEmpty || resendBuffer.nacked.nonEmpty)
|
||||
context.system.scheduler.scheduleOnce(settings.SysResendTimeout, self, AttemptSysMsgRedelivery)
|
||||
context.become(idle)
|
||||
goToIdle()
|
||||
case g @ GotUid(receivedUid, _) ⇒
|
||||
bailoutAt = None
|
||||
context.parent ! g
|
||||
|
|
@ -321,8 +328,8 @@ private[remote] class ReliableDeliverySupervisor(
|
|||
new java.util.concurrent.TimeoutException("Delivery of system messages timed out and they were dropped."))
|
||||
writer = createWriter()
|
||||
// Resending will be triggered by the incoming GotUid message after the connection finished
|
||||
context.become(receive)
|
||||
} else context.become(idle)
|
||||
goToActive()
|
||||
} else goToIdle()
|
||||
case AttemptSysMsgRedelivery ⇒ // Ignore
|
||||
case s @ Send(msg: SystemMessage, _, _, _) ⇒ tryBuffer(s.copy(seqOpt = Some(nextSeq())))
|
||||
case s: Send ⇒ context.system.deadLetters ! s
|
||||
|
|
@ -338,18 +345,34 @@ private[remote] class ReliableDeliverySupervisor(
|
|||
writer = createWriter()
|
||||
// Resending will be triggered by the incoming GotUid message after the connection finished
|
||||
handleSend(s)
|
||||
context.become(receive)
|
||||
goToActive()
|
||||
case AttemptSysMsgRedelivery ⇒
|
||||
if (resendBuffer.nacked.nonEmpty || resendBuffer.nonAcked.nonEmpty) {
|
||||
writer = createWriter()
|
||||
// Resending will be triggered by the incoming GotUid message after the connection finished
|
||||
context.become(receive)
|
||||
goToActive()
|
||||
}
|
||||
case TooLongIdle ⇒
|
||||
throw new HopelessAssociation(localAddress, remoteAddress, uid,
|
||||
new TimeoutException("Remote system has been silent for too long. " +
|
||||
s"(more than ${settings.QuarantineSilentSystemTimeout.toUnit(TimeUnit.HOURS)} hours)"))
|
||||
case EndpointWriter.FlushAndStop ⇒ context.stop(self)
|
||||
case EndpointWriter.StopReading(w, replyTo) ⇒
|
||||
replyTo ! EndpointWriter.StoppedReading(w)
|
||||
}
|
||||
|
||||
private def goToIdle(): Unit = {
|
||||
if (bufferWasInUse && maxSilenceTimer.isEmpty)
|
||||
maxSilenceTimer = Some(context.system.scheduler.scheduleOnce(settings.QuarantineSilentSystemTimeout, self, TooLongIdle))
|
||||
context.become(idle)
|
||||
}
|
||||
|
||||
private def goToActive(): Unit = {
|
||||
maxSilenceTimer.foreach(_.cancel())
|
||||
maxSilenceTimer = None
|
||||
context.become(receive)
|
||||
}
|
||||
|
||||
def flushWait: Receive = {
|
||||
case IsIdle ⇒ // Do not reply, we will Terminate soon, which will do the inbound connection unstashing
|
||||
case Terminated(_) ⇒
|
||||
|
|
@ -381,6 +404,7 @@ private[remote] class ReliableDeliverySupervisor(
|
|||
private def tryBuffer(s: Send): Unit =
|
||||
try {
|
||||
resendBuffer = resendBuffer buffer s
|
||||
bufferWasInUse = true
|
||||
} catch {
|
||||
case NonFatal(e) ⇒ throw new HopelessAssociation(localAddress, remoteAddress, uid, e)
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue