+rem #18353: Prune reliable deliver actors

(cherry picked from commit 6643f56)
This commit is contained in:
Endre Sándor Varga 2015-09-16 15:26:24 +02:00
parent fc75eb361a
commit c4e326c9dd
6 changed files with 248 additions and 20 deletions

View file

@ -21,7 +21,7 @@ import akka.serialization.Serialization
import akka.util.ByteString
import akka.{ OnlyCauseStackTrace, AkkaException }
import java.io.NotSerializableException
import java.util.concurrent.ConcurrentHashMap
import java.util.concurrent.{ TimeUnit, TimeoutException, ConcurrentHashMap }
import scala.annotation.tailrec
import scala.concurrent.duration.{ Duration, Deadline }
import scala.util.control.NonFatal
@ -168,6 +168,7 @@ private[remote] object ReliableDeliverySupervisor {
case object IsIdle
case object Idle
case object TooLongIdle
def props(
handleOrActive: Option[AkkaProtocolHandle],
@ -200,6 +201,8 @@ private[remote] class ReliableDeliverySupervisor(
val autoResendTimer = context.system.scheduler.schedule(
settings.SysResendTimeout, settings.SysResendTimeout, self, AttemptSysMsgRedelivery)
private var bufferWasInUse = false
override val supervisorStrategy = OneForOneStrategy(loggingEnabled = false) {
case e @ (_: AssociationProblem) Escalate
case NonFatal(e)
@ -207,12 +210,14 @@ private[remote] class ReliableDeliverySupervisor(
log.warning("Association with remote system [{}] has failed, address is now gated for [{}] ms. Reason: [{}] {}",
remoteAddress, settings.RetryGateClosedFor.toMillis, e.getMessage, causedBy)
uidConfirmed = false // Need confirmation of UID again
if ((resendBuffer.nacked.nonEmpty || resendBuffer.nonAcked.nonEmpty) && bailoutAt.isEmpty)
bailoutAt = Some(Deadline.now + settings.InitialSysMsgDeliveryTimeout)
context.become(gated)
currentHandle = None
context.parent ! StoppedReading(self)
Stop
if (bufferWasInUse) {
if ((resendBuffer.nacked.nonEmpty || resendBuffer.nonAcked.nonEmpty) && bailoutAt.isEmpty)
bailoutAt = Some(Deadline.now + settings.InitialSysMsgDeliveryTimeout)
context.become(gated)
currentHandle = None
context.parent ! StoppedReading(self)
Stop
} else Escalate
}
var currentHandle: Option[AkkaProtocolHandle] = handleOrActive
@ -237,6 +242,7 @@ private[remote] class ReliableDeliverySupervisor(
var writer: ActorRef = createWriter()
var uid: Option[Int] = handleOrActive map { _.handshakeInfo.uid }
var bailoutAt: Option[Deadline] = None
var maxSilenceTimer: Option[Cancellable] = None
// Processing of Acks has to be delayed until the UID after a reconnect is discovered. Depending whether the
// UID matches the expected one, pending Acks can be processed, or must be dropped. It is guaranteed that for
// any inbound connections (calling createWriter()) the first message from that connection is GotUid() therefore
@ -255,6 +261,7 @@ private[remote] class ReliableDeliverySupervisor(
(resendBuffer.nacked ++ resendBuffer.nonAcked) foreach { s context.system.deadLetters ! s.copy(seqOpt = None) }
receiveBuffers.remove(Link(localAddress, remoteAddress))
autoResendTimer.cancel()
maxSilenceTimer.foreach(_.cancel())
}
override def postRestart(reason: Throwable): Unit = {
@ -291,7 +298,7 @@ private[remote] class ReliableDeliverySupervisor(
context.parent ! StoppedReading(self)
if (resendBuffer.nonAcked.nonEmpty || resendBuffer.nacked.nonEmpty)
context.system.scheduler.scheduleOnce(settings.SysResendTimeout, self, AttemptSysMsgRedelivery)
context.become(idle)
goToIdle()
case g @ GotUid(receivedUid, _)
bailoutAt = None
context.parent ! g
@ -321,8 +328,8 @@ private[remote] class ReliableDeliverySupervisor(
new java.util.concurrent.TimeoutException("Delivery of system messages timed out and they were dropped."))
writer = createWriter()
// Resending will be triggered by the incoming GotUid message after the connection finished
context.become(receive)
} else context.become(idle)
goToActive()
} else goToIdle()
case AttemptSysMsgRedelivery // Ignore
case s @ Send(msg: SystemMessage, _, _, _) tryBuffer(s.copy(seqOpt = Some(nextSeq())))
case s: Send context.system.deadLetters ! s
@ -338,18 +345,34 @@ private[remote] class ReliableDeliverySupervisor(
writer = createWriter()
// Resending will be triggered by the incoming GotUid message after the connection finished
handleSend(s)
context.become(receive)
goToActive()
case AttemptSysMsgRedelivery
if (resendBuffer.nacked.nonEmpty || resendBuffer.nonAcked.nonEmpty) {
writer = createWriter()
// Resending will be triggered by the incoming GotUid message after the connection finished
context.become(receive)
goToActive()
}
case TooLongIdle
throw new HopelessAssociation(localAddress, remoteAddress, uid,
new TimeoutException("Remote system has been silent for too long. " +
s"(more than ${settings.QuarantineSilentSystemTimeout.toUnit(TimeUnit.HOURS)} hours)"))
case EndpointWriter.FlushAndStop context.stop(self)
case EndpointWriter.StopReading(w, replyTo)
replyTo ! EndpointWriter.StoppedReading(w)
}
private def goToIdle(): Unit = {
if (bufferWasInUse && maxSilenceTimer.isEmpty)
maxSilenceTimer = Some(context.system.scheduler.scheduleOnce(settings.QuarantineSilentSystemTimeout, self, TooLongIdle))
context.become(idle)
}
private def goToActive(): Unit = {
maxSilenceTimer.foreach(_.cancel())
maxSilenceTimer = None
context.become(receive)
}
def flushWait: Receive = {
case IsIdle // Do not reply, we will Terminate soon, which will do the inbound connection unstashing
case Terminated(_)
@ -381,6 +404,7 @@ private[remote] class ReliableDeliverySupervisor(
private def tryBuffer(s: Send): Unit =
try {
resendBuffer = resendBuffer buffer s
bufferWasInUse = true
} catch {
case NonFatal(e) throw new HopelessAssociation(localAddress, remoteAddress, uid, e)
}