From 9f175f56deb69025137f11436ecd2f491b44b162 Mon Sep 17 00:00:00 2001 From: Patrik Nordwall Date: Wed, 21 Sep 2016 20:27:04 +0200 Subject: [PATCH] fix problem with quick restart, #21512 * image-liveness-timeout must be less than the handshake-timeout, otherwise the publication for the handshake will give up too early when previous image is still considered alive --- .../scala/akka/cluster/SharedMediaDriverSupport.scala | 2 +- akka-remote/src/main/resources/reference.conf | 6 ++++-- .../src/main/scala/akka/remote/artery/ArterySettings.scala | 3 ++- .../src/main/scala/akka/remote/artery/ArteryTransport.scala | 2 +- .../test/scala/akka/remote/artery/HandshakeDenySpec.scala | 1 + .../scala/akka/remote/artery/HandshakeFailureSpec.scala | 1 + .../test/scala/akka/remote/artery/HandshakeRetrySpec.scala | 1 + .../src/test/scala/akka/remote/artery/LateConnectSpec.scala | 1 + .../compress/HandshakeShouldDropCompressionTableSpec.scala | 1 + 9 files changed, 13 insertions(+), 5 deletions(-) diff --git a/akka-cluster/src/multi-jvm/scala/akka/cluster/SharedMediaDriverSupport.scala b/akka-cluster/src/multi-jvm/scala/akka/cluster/SharedMediaDriverSupport.scala index 925bfbc4d5..f45a54b245 100644 --- a/akka-cluster/src/multi-jvm/scala/akka/cluster/SharedMediaDriverSupport.scala +++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/SharedMediaDriverSupport.scala @@ -35,7 +35,7 @@ object SharedMediaDriverSupport { val driverContext = new MediaDriver.Context driverContext.aeronDirectoryName(aeronDir) driverContext.clientLivenessTimeoutNs(arterySettings.Advanced.ClientLivenessTimeout.toNanos) - driverContext.imageLivenessTimeoutNs(arterySettings.Advanced.ImageLivenessTimeoutNs.toNanos) + driverContext.imageLivenessTimeoutNs(arterySettings.Advanced.ImageLivenessTimeout.toNanos) driverContext.driverTimeoutMs(arterySettings.Advanced.DriverTimeout.toMillis) val idleCpuLevel = arterySettings.Advanced.IdleCpuLevel diff --git a/akka-remote/src/main/resources/reference.conf b/akka-remote/src/main/resources/reference.conf index f79a1be708..ef2116885f 100644 --- a/akka-remote/src/main/resources/reference.conf +++ b/akka-remote/src/main/resources/reference.conf @@ -227,7 +227,8 @@ akka { system-message-resend-interval = 1 second # The timeout for outbound associations to perform the handshake. - handshake-timeout = 15 s + # This timeout must be greater than the 'image-liveness-timeout'. + handshake-timeout = 20 s # incomplete handshake attempt is retried with this interval handshake-retry-interval = 1 second @@ -274,7 +275,8 @@ akka { # Timeout for each the INACTIVE and LINGER stages an aeron image # will be retained for when it is no longer referenced. - image-liveness-timeout = 20 seconds + # This timeout must be less than the 'handshake-timeout'. + image-liveness-timeout = 10 seconds # Timeout after which the aeron driver is considered dead # if it does not update its C'n'C timestamp. diff --git a/akka-remote/src/main/scala/akka/remote/artery/ArterySettings.scala b/akka-remote/src/main/scala/akka/remote/artery/ArterySettings.scala index 630eb9585f..dcb54fa34d 100644 --- a/akka-remote/src/main/scala/akka/remote/artery/ArterySettings.scala +++ b/akka-remote/src/main/scala/akka/remote/artery/ArterySettings.scala @@ -103,8 +103,9 @@ private[akka] final class ArterySettings private (config: Config) { interval > Duration.Zero, "stop-quarantined-after-idle must be more than zero") val ClientLivenessTimeout = config.getMillisDuration("client-liveness-timeout").requiring(interval ⇒ interval > Duration.Zero, "client-liveness-timeout must be more than zero") - val ImageLivenessTimeoutNs = config.getMillisDuration("image-liveness-timeout").requiring(interval ⇒ + val ImageLivenessTimeout = config.getMillisDuration("image-liveness-timeout").requiring(interval ⇒ interval > Duration.Zero, "image-liveness-timeout must be more than zero") + require(ImageLivenessTimeout < HandshakeTimeout, "image-liveness-timeout must be less than handshake-timeout") val DriverTimeout = config.getMillisDuration("driver-timeout").requiring(interval ⇒ interval > Duration.Zero, "driver-timeout must be more than zero") val FlightRecorderEnabled: Boolean = getBoolean("flight-recorder.enabled") diff --git a/akka-remote/src/main/scala/akka/remote/artery/ArteryTransport.scala b/akka-remote/src/main/scala/akka/remote/artery/ArteryTransport.scala index 2e17a80c19..5dbbcf7513 100644 --- a/akka-remote/src/main/scala/akka/remote/artery/ArteryTransport.scala +++ b/akka-remote/src/main/scala/akka/remote/artery/ArteryTransport.scala @@ -443,7 +443,7 @@ private[remote] class ArteryTransport(_system: ExtendedActorSystem, _provider: R driverContext.aeronDirectoryName(randomName) } driverContext.clientLivenessTimeoutNs(settings.Advanced.ClientLivenessTimeout.toNanos) - driverContext.imageLivenessTimeoutNs(settings.Advanced.ImageLivenessTimeoutNs.toNanos) + driverContext.imageLivenessTimeoutNs(settings.Advanced.ImageLivenessTimeout.toNanos) driverContext.driverTimeoutMs(settings.Advanced.DriverTimeout.toMillis) val idleCpuLevel = settings.Advanced.IdleCpuLevel diff --git a/akka-remote/src/test/scala/akka/remote/artery/HandshakeDenySpec.scala b/akka-remote/src/test/scala/akka/remote/artery/HandshakeDenySpec.scala index 864c0bbe49..fe6e9993ca 100644 --- a/akka-remote/src/test/scala/akka/remote/artery/HandshakeDenySpec.scala +++ b/akka-remote/src/test/scala/akka/remote/artery/HandshakeDenySpec.scala @@ -20,6 +20,7 @@ object HandshakeDenySpec { remote.artery.canonical.hostname = localhost remote.artery.canonical.port = 0 remote.artery.advanced.handshake-timeout = 2s + remote.artery.advanced.image-liveness-timeout = 1.9s } """) diff --git a/akka-remote/src/test/scala/akka/remote/artery/HandshakeFailureSpec.scala b/akka-remote/src/test/scala/akka/remote/artery/HandshakeFailureSpec.scala index 802f5a58f8..ec72c0bd4a 100644 --- a/akka-remote/src/test/scala/akka/remote/artery/HandshakeFailureSpec.scala +++ b/akka-remote/src/test/scala/akka/remote/artery/HandshakeFailureSpec.scala @@ -24,6 +24,7 @@ object HandshakeFailureSpec { remote.artery.canonical.hostname = localhost remote.artery.canonical.port = 0 remote.artery.advanced.handshake-timeout = 2s + remote.artery.advanced.image-liveness-timeout = 1.9s } """) diff --git a/akka-remote/src/test/scala/akka/remote/artery/HandshakeRetrySpec.scala b/akka-remote/src/test/scala/akka/remote/artery/HandshakeRetrySpec.scala index 2ce40262e0..8573ad34f1 100644 --- a/akka-remote/src/test/scala/akka/remote/artery/HandshakeRetrySpec.scala +++ b/akka-remote/src/test/scala/akka/remote/artery/HandshakeRetrySpec.scala @@ -23,6 +23,7 @@ object HandshakeRetrySpec { remote.artery.canonical.hostname = localhost remote.artery.canonical.port = 0 remote.artery.advanced.handshake-timeout = 10s + remote.artery.advanced.image-liveness-timeout = 7s } """) diff --git a/akka-remote/src/test/scala/akka/remote/artery/LateConnectSpec.scala b/akka-remote/src/test/scala/akka/remote/artery/LateConnectSpec.scala index 36f1690dd2..e50b71d91a 100644 --- a/akka-remote/src/test/scala/akka/remote/artery/LateConnectSpec.scala +++ b/akka-remote/src/test/scala/akka/remote/artery/LateConnectSpec.scala @@ -24,6 +24,7 @@ object LateConnectSpec { remote.artery.canonical.hostname = localhost remote.artery.canonical.port = 0 remote.artery.advanced.handshake-timeout = 3s + remote.artery.advanced.image-liveness-timeout = 2.9s } """) diff --git a/akka-remote/src/test/scala/akka/remote/artery/compress/HandshakeShouldDropCompressionTableSpec.scala b/akka-remote/src/test/scala/akka/remote/artery/compress/HandshakeShouldDropCompressionTableSpec.scala index 869d8c0ff1..8cc12fc4d8 100644 --- a/akka-remote/src/test/scala/akka/remote/artery/compress/HandshakeShouldDropCompressionTableSpec.scala +++ b/akka-remote/src/test/scala/akka/remote/artery/compress/HandshakeShouldDropCompressionTableSpec.scala @@ -31,6 +31,7 @@ object HandshakeShouldDropCompressionTableSpec { remote.artery.canonical.hostname = localhost remote.artery.canonical.port = 0 remote.artery.advanced.handshake-timeout = 10s + remote.artery.advanced.image-liveness-timeout = 7s remote.artery.advanced.compression { actor-refs {