Merge pull request #1361 from akka/wip-3247-UnreachableNodeJoinsAgainSpec-patriknw
Solve UnreachableNodeJoinsAgainSpec problem, see #3247
This commit is contained in:
commit
b62f5f46f5
12 changed files with 59 additions and 43 deletions
|
|
@ -72,6 +72,8 @@ class RoutingSpec extends AkkaSpec(RoutingSpec.config) with DefaultTimeout with
|
||||||
implicit val ec = system.dispatcher
|
implicit val ec = system.dispatcher
|
||||||
import akka.routing.RoutingSpec._
|
import akka.routing.RoutingSpec._
|
||||||
|
|
||||||
|
muteDeadLetters("DeathWatchNotification.*")()
|
||||||
|
|
||||||
"routers in general" must {
|
"routers in general" must {
|
||||||
|
|
||||||
"evict terminated routees" in {
|
"evict terminated routees" in {
|
||||||
|
|
|
||||||
|
|
@ -503,7 +503,11 @@ private[akka] class EmptyLocalActorRef(override val provider: ActorRefProvider,
|
||||||
sender ! ActorIdentity(messageId, None)
|
sender ! ActorIdentity(messageId, None)
|
||||||
true
|
true
|
||||||
case s: SelectChildName ⇒
|
case s: SelectChildName ⇒
|
||||||
s.identifyRequest foreach { x ⇒ sender ! ActorIdentity(x.messageId, None) }
|
s.identifyRequest match {
|
||||||
|
case Some(identify) ⇒ sender ! ActorIdentity(identify.messageId, None)
|
||||||
|
case None ⇒
|
||||||
|
eventStream.publish(DeadLetter(s.wrappedMessage, if (sender eq Actor.noSender) provider.deadLetters else sender, this))
|
||||||
|
}
|
||||||
true
|
true
|
||||||
case _ ⇒ false
|
case _ ⇒ false
|
||||||
}
|
}
|
||||||
|
|
@ -533,15 +537,7 @@ private[akka] class DeadLetterActorRef(_provider: ActorRefProvider,
|
||||||
w.watcher.sendSystemMessage(
|
w.watcher.sendSystemMessage(
|
||||||
DeathWatchNotification(w.watchee, existenceConfirmed = false, addressTerminated = false))
|
DeathWatchNotification(w.watchee, existenceConfirmed = false, addressTerminated = false))
|
||||||
true
|
true
|
||||||
case w: Unwatch ⇒ true // Just ignore
|
case _ ⇒ super.specialHandle(msg, sender)
|
||||||
case Identify(messageId) ⇒
|
|
||||||
sender ! ActorIdentity(messageId, None)
|
|
||||||
true
|
|
||||||
case s: SelectChildName ⇒
|
|
||||||
s.identifyRequest foreach { x ⇒ sender ! ActorIdentity(x.messageId, None) }
|
|
||||||
true
|
|
||||||
case NullMessage ⇒ true
|
|
||||||
case _ ⇒ false
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@throws(classOf[java.io.ObjectStreamException])
|
@throws(classOf[java.io.ObjectStreamException])
|
||||||
|
|
|
||||||
|
|
@ -104,14 +104,18 @@ trait MultiNodeClusterSpec extends Suite with STMultiNodeSpec with WatchedByCoro
|
||||||
sys.eventStream.publish(Mute(EventFilter.info(pattern = s)))
|
sys.eventStream.publish(Mute(EventFilter.info(pattern = s)))
|
||||||
}
|
}
|
||||||
|
|
||||||
Seq(".*received dead letter from.*ClientDisconnected",
|
muteDeadLetters(
|
||||||
".*received dead letter from.*deadLetters.*PoisonPill",
|
"Heartbeat.*",
|
||||||
".*received dead letter from.*Disassociated",
|
"GossipEnvelope.*",
|
||||||
".*received dead letter from.*DisassociateUnderlying",
|
"ClusterMetricsChanged.*",
|
||||||
".*received dead letter from.*HandleListenerRegistered",
|
"Disassociated.*",
|
||||||
".*installing context org.jboss.netty.channel.DefaultChannelPipeline.*") foreach { s ⇒
|
"DisassociateUnderlying.*",
|
||||||
sys.eventStream.publish(Mute(EventFilter.warning(pattern = s)))
|
"HandleListenerRegistered.*",
|
||||||
}
|
"PoisonPill.*",
|
||||||
|
"DeathWatchNotification.*",
|
||||||
|
"NullMessage.*",
|
||||||
|
"InboundPayload.*")(sys)
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -119,13 +123,9 @@ trait MultiNodeClusterSpec extends Suite with STMultiNodeSpec with WatchedByCoro
|
||||||
if (!sys.log.isDebugEnabled)
|
if (!sys.log.isDebugEnabled)
|
||||||
sys.eventStream.publish(Mute(EventFilter.error(pattern = ".*Marking.* as UNREACHABLE.*")))
|
sys.eventStream.publish(Mute(EventFilter.error(pattern = ".*Marking.* as UNREACHABLE.*")))
|
||||||
|
|
||||||
def muteDeadLetters(sys: ActorSystem = system): Unit =
|
|
||||||
if (!sys.log.isDebugEnabled)
|
|
||||||
sys.eventStream.publish(Mute(EventFilter.warning(pattern = ".*received dead letter from.*")))
|
|
||||||
|
|
||||||
override def afterAll(): Unit = {
|
override def afterAll(): Unit = {
|
||||||
if (!log.isDebugEnabled) {
|
if (!log.isDebugEnabled) {
|
||||||
muteDeadLetters()
|
muteDeadLetters()()
|
||||||
system.eventStream.setLogLevel(ErrorLevel)
|
system.eventStream.setLogLevel(ErrorLevel)
|
||||||
}
|
}
|
||||||
super.afterAll()
|
super.afterAll()
|
||||||
|
|
|
||||||
|
|
@ -60,6 +60,7 @@ private[cluster] object StressMultiJvmSpec extends MultiNodeConfig {
|
||||||
// not MultiNodeClusterSpec.clusterConfig
|
// not MultiNodeClusterSpec.clusterConfig
|
||||||
commonConfig(ConfigFactory.parseString("""
|
commonConfig(ConfigFactory.parseString("""
|
||||||
akka.test.cluster-stress-spec {
|
akka.test.cluster-stress-spec {
|
||||||
|
log-stats = off
|
||||||
# scale the nr-of-nodes* settings with this factor
|
# scale the nr-of-nodes* settings with this factor
|
||||||
nr-of-nodes-factor = 1
|
nr-of-nodes-factor = 1
|
||||||
nr-of-nodes = 13
|
nr-of-nodes = 13
|
||||||
|
|
@ -147,6 +148,7 @@ private[cluster] object StressMultiJvmSpec extends MultiNodeConfig {
|
||||||
|
|
||||||
private def getDuration(name: String): FiniteDuration = Duration(getMilliseconds(name), MILLISECONDS)
|
private def getDuration(name: String): FiniteDuration = Duration(getMilliseconds(name), MILLISECONDS)
|
||||||
|
|
||||||
|
val logStats = getBoolean("log-stats")
|
||||||
val nFactor = getInt("nr-of-nodes-factor")
|
val nFactor = getInt("nr-of-nodes-factor")
|
||||||
val totalNumberOfNodes = getInt("nr-of-nodes") * nFactor ensuring (
|
val totalNumberOfNodes = getInt("nr-of-nodes") * nFactor ensuring (
|
||||||
_ >= 10, "nr-of-nodes must be >= 10")
|
_ >= 10, "nr-of-nodes must be >= 10")
|
||||||
|
|
@ -211,7 +213,9 @@ private[cluster] object StressMultiJvmSpec extends MultiNodeConfig {
|
||||||
* expected results has been collected. It shuts down
|
* expected results has been collected. It shuts down
|
||||||
* itself when expected results has been collected.
|
* itself when expected results has been collected.
|
||||||
*/
|
*/
|
||||||
class ClusterResultAggregator(title: String, expectedResults: Int, reportMetricsInterval: FiniteDuration) extends Actor with ActorLogging {
|
class ClusterResultAggregator(title: String, expectedResults: Int, settings: Settings) extends Actor with ActorLogging {
|
||||||
|
import settings.reportMetricsInterval
|
||||||
|
import settings.logStats
|
||||||
val cluster = Cluster(context.system)
|
val cluster = Cluster(context.system)
|
||||||
var reportTo: Option[ActorRef] = None
|
var reportTo: Option[ActorRef] = None
|
||||||
var results = Vector.empty[ClusterResult]
|
var results = Vector.empty[ClusterResult]
|
||||||
|
|
@ -242,12 +246,14 @@ private[cluster] object StressMultiJvmSpec extends MultiNodeConfig {
|
||||||
case PhiResult(from, phiValues) ⇒ phiValuesObservedByNode += from -> phiValues
|
case PhiResult(from, phiValues) ⇒ phiValuesObservedByNode += from -> phiValues
|
||||||
case StatsResult(from, stats) ⇒ clusterStatsObservedByNode += from -> stats
|
case StatsResult(from, stats) ⇒ clusterStatsObservedByNode += from -> stats
|
||||||
case ReportTick ⇒
|
case ReportTick ⇒
|
||||||
log.info(s"[${title}] in progress\n${formatMetrics}\n\n${formatPhi}\n\n${formatStats}")
|
if (logStats)
|
||||||
|
log.info(s"[${title}] in progress\n${formatMetrics}\n\n${formatPhi}\n\n${formatStats}")
|
||||||
case r: ClusterResult ⇒
|
case r: ClusterResult ⇒
|
||||||
results :+= r
|
results :+= r
|
||||||
if (results.size == expectedResults) {
|
if (results.size == expectedResults) {
|
||||||
val aggregated = AggregatedClusterResult(title, maxDuration, totalClusterStats)
|
val aggregated = AggregatedClusterResult(title, maxDuration, totalClusterStats)
|
||||||
log.info(s"[${title}] completed in [${aggregated.duration.toMillis}] ms\n${aggregated.clusterStats}\n${formatMetrics}\n\n${formatPhi}\n\n${formatStats}")
|
if (logStats)
|
||||||
|
log.info(s"[${title}] completed in [${aggregated.duration.toMillis}] ms\n${aggregated.clusterStats}\n${formatMetrics}\n\n${formatPhi}\n\n${formatStats}")
|
||||||
reportTo foreach { _ ! aggregated }
|
reportTo foreach { _ ! aggregated }
|
||||||
context stop self
|
context stop self
|
||||||
}
|
}
|
||||||
|
|
@ -665,6 +671,7 @@ abstract class StressSpec
|
||||||
sys.eventStream.publish(Mute(EventFilter.warning(pattern = ".*PhiResult.*")))
|
sys.eventStream.publish(Mute(EventFilter.warning(pattern = ".*PhiResult.*")))
|
||||||
sys.eventStream.publish(Mute(EventFilter.warning(pattern = ".*SendBatch.*")))
|
sys.eventStream.publish(Mute(EventFilter.warning(pattern = ".*SendBatch.*")))
|
||||||
sys.eventStream.publish(Mute(EventFilter.warning(pattern = ".*ClusterStats.*")))
|
sys.eventStream.publish(Mute(EventFilter.warning(pattern = ".*ClusterStats.*")))
|
||||||
|
muteDeadLetters("SimpleJob.*", "Tick.*", "AggregatedClusterResult.*")(sys)
|
||||||
}
|
}
|
||||||
|
|
||||||
val seedNodes = roles.take(numberOfSeedNodes)
|
val seedNodes = roles.take(numberOfSeedNodes)
|
||||||
|
|
@ -680,7 +687,7 @@ abstract class StressSpec
|
||||||
|
|
||||||
def createResultAggregator(title: String, expectedResults: Int, includeInHistory: Boolean): Unit = {
|
def createResultAggregator(title: String, expectedResults: Int, includeInHistory: Boolean): Unit = {
|
||||||
runOn(roles.head) {
|
runOn(roles.head) {
|
||||||
val aggregator = system.actorOf(Props(new ClusterResultAggregator(title, expectedResults, reportMetricsInterval)),
|
val aggregator = system.actorOf(Props(new ClusterResultAggregator(title, expectedResults, settings)),
|
||||||
name = "result" + step)
|
name = "result" + step)
|
||||||
if (includeInHistory) aggregator ! ReportTo(Some(clusterResultHistory))
|
if (includeInHistory) aggregator ! ReportTo(Some(clusterResultHistory))
|
||||||
else aggregator ! ReportTo(None)
|
else aggregator ! ReportTo(None)
|
||||||
|
|
@ -698,7 +705,9 @@ abstract class StressSpec
|
||||||
identifyProbe.expectMsgType[ActorIdentity].ref
|
identifyProbe.expectMsgType[ActorIdentity].ref
|
||||||
}
|
}
|
||||||
|
|
||||||
lazy val clusterResultHistory = system.actorOf(Props[ClusterResultHistory], "resultHistory")
|
lazy val clusterResultHistory =
|
||||||
|
if (settings.logStats) system.actorOf(Props[ClusterResultHistory], "resultHistory")
|
||||||
|
else system.deadLetters
|
||||||
|
|
||||||
lazy val phiObserver = system.actorOf(Props[PhiObserver], "phiObserver")
|
lazy val phiObserver = system.actorOf(Props[PhiObserver], "phiObserver")
|
||||||
|
|
||||||
|
|
@ -953,9 +962,10 @@ abstract class StressSpec
|
||||||
|
|
||||||
def awaitWorkResult: WorkResult = {
|
def awaitWorkResult: WorkResult = {
|
||||||
val workResult = expectMsgType[WorkResult]
|
val workResult = expectMsgType[WorkResult]
|
||||||
log.info("{} result, [{}] jobs/s, retried [{}] of [{}] msg", masterName,
|
if (settings.logStats)
|
||||||
workResult.jobsPerSecond.form,
|
log.info("{} result, [{}] jobs/s, retried [{}] of [{}] msg", masterName,
|
||||||
workResult.retryCount, workResult.sendCount)
|
workResult.jobsPerSecond.form,
|
||||||
|
workResult.retryCount, workResult.sendCount)
|
||||||
master match {
|
master match {
|
||||||
case Some(m) ⇒
|
case Some(m) ⇒
|
||||||
watch(m)
|
watch(m)
|
||||||
|
|
|
||||||
|
|
@ -28,10 +28,6 @@ object UnreachableNodeJoinsAgainMultiNodeConfig extends MultiNodeConfig {
|
||||||
|
|
||||||
commonConfig(ConfigFactory.parseString(
|
commonConfig(ConfigFactory.parseString(
|
||||||
"""
|
"""
|
||||||
# this setting is here to limit the number of retries and failures while the
|
|
||||||
# node is being blackholed
|
|
||||||
akka.remote.retry-gate-closed-for = 500 ms
|
|
||||||
|
|
||||||
akka.remote.log-remote-lifecycle-events = off
|
akka.remote.log-remote-lifecycle-events = off
|
||||||
akka.cluster.publish-stats-interval = 0s
|
akka.cluster.publish-stats-interval = 0s
|
||||||
""").withFallback(debugConfig(on = false).withFallback(MultiNodeClusterSpec.clusterConfig)))
|
""").withFallback(debugConfig(on = false).withFallback(MultiNodeClusterSpec.clusterConfig)))
|
||||||
|
|
@ -166,7 +162,6 @@ abstract class UnreachableNodeJoinsAgainSpec
|
||||||
runOn(victim) {
|
runOn(victim) {
|
||||||
val victimAddress = system.asInstanceOf[ExtendedActorSystem].provider.getDefaultAddress
|
val victimAddress = system.asInstanceOf[ExtendedActorSystem].provider.getDefaultAddress
|
||||||
system.awaitTermination(10 seconds)
|
system.awaitTermination(10 seconds)
|
||||||
Thread.sleep(5000)
|
|
||||||
// create new ActorSystem with same host:port
|
// create new ActorSystem with same host:port
|
||||||
val freshSystem = ActorSystem(system.name, ConfigFactory.parseString(s"""
|
val freshSystem = ActorSystem(system.name, ConfigFactory.parseString(s"""
|
||||||
akka.remote.netty.tcp {
|
akka.remote.netty.tcp {
|
||||||
|
|
@ -177,7 +172,6 @@ abstract class UnreachableNodeJoinsAgainSpec
|
||||||
|
|
||||||
try {
|
try {
|
||||||
Cluster(freshSystem).join(masterAddress)
|
Cluster(freshSystem).join(masterAddress)
|
||||||
Thread.sleep(5000)
|
|
||||||
within(15 seconds) {
|
within(15 seconds) {
|
||||||
awaitAssert(Cluster(freshSystem).readView.members.map(_.address) must contain(victimAddress))
|
awaitAssert(Cluster(freshSystem).readView.members.map(_.address) must contain(victimAddress))
|
||||||
awaitAssert(Cluster(freshSystem).readView.members.size must be(expectedNumberOfMembers))
|
awaitAssert(Cluster(freshSystem).readView.members.size must be(expectedNumberOfMembers))
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,7 @@ import akka.util.Timeout
|
||||||
import akka.remote.testconductor.{ TestConductorExt, TestConductor, RoleName }
|
import akka.remote.testconductor.{ TestConductorExt, TestConductor, RoleName }
|
||||||
import akka.remote.RemoteActorRefProvider
|
import akka.remote.RemoteActorRefProvider
|
||||||
import akka.testkit._
|
import akka.testkit._
|
||||||
|
import akka.testkit.TestEvent._
|
||||||
import scala.concurrent.duration._
|
import scala.concurrent.duration._
|
||||||
import akka.remote.testconductor.RoleName
|
import akka.remote.testconductor.RoleName
|
||||||
import akka.actor.RootActorPath
|
import akka.actor.RootActorPath
|
||||||
|
|
@ -101,7 +102,6 @@ abstract class MultiNodeConfig {
|
||||||
if (_testTransport) ConfigFactory.parseString(
|
if (_testTransport) ConfigFactory.parseString(
|
||||||
"""
|
"""
|
||||||
akka.remote.netty.tcp.applied-adapters = [trttl, gremlin]
|
akka.remote.netty.tcp.applied-adapters = [trttl, gremlin]
|
||||||
akka.remote.retry-gate-closed-for = 1 s
|
|
||||||
""")
|
""")
|
||||||
else ConfigFactory.empty
|
else ConfigFactory.empty
|
||||||
|
|
||||||
|
|
@ -359,6 +359,14 @@ abstract class MultiNodeSpec(val myself: RoleName, _system: ActorSystem, _roles:
|
||||||
*/
|
*/
|
||||||
def node(role: RoleName): ActorPath = RootActorPath(testConductor.getAddressFor(role).await)
|
def node(role: RoleName): ActorPath = RootActorPath(testConductor.getAddressFor(role).await)
|
||||||
|
|
||||||
|
def muteDeadLetters(endPatterns: String*)(sys: ActorSystem = system): Unit =
|
||||||
|
if (!sys.log.isDebugEnabled) {
|
||||||
|
def mute(suffix: String): Unit =
|
||||||
|
sys.eventStream.publish(Mute(EventFilter.warning(pattern = ".*received dead.*" + suffix)))
|
||||||
|
if (endPatterns.isEmpty) mute("")
|
||||||
|
else endPatterns foreach mute
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Enrich `.await()` onto all Awaitables, using remaining duration from the innermost
|
* Enrich `.await()` onto all Awaitables, using remaining duration from the innermost
|
||||||
* enclosing `within` block or QueryTimeout.
|
* enclosing `within` block or QueryTimeout.
|
||||||
|
|
|
||||||
|
|
@ -89,6 +89,8 @@ class RemoteWatcherSpec extends AkkaSpec(
|
||||||
val remoteAddress = remoteSystem.asInstanceOf[ExtendedActorSystem].provider.getDefaultAddress
|
val remoteAddress = remoteSystem.asInstanceOf[ExtendedActorSystem].provider.getDefaultAddress
|
||||||
def remoteAddressUid = AddressUidExtension(remoteSystem).addressUid
|
def remoteAddressUid = AddressUidExtension(remoteSystem).addressUid
|
||||||
|
|
||||||
|
Seq(system, remoteSystem).foreach(muteDeadLetters("Disassociated.*", "DisassociateUnderlying.*")(_))
|
||||||
|
|
||||||
override def afterTermination() {
|
override def afterTermination() {
|
||||||
remoteSystem.shutdown()
|
remoteSystem.shutdown()
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -54,8 +54,6 @@ class AkkaProtocolSpec extends AkkaSpec("""akka.actor.provider = "akka.remote.Re
|
||||||
|
|
||||||
startup-timeout = 5 s
|
startup-timeout = 5 s
|
||||||
|
|
||||||
retry-gate-closed-for = 0 s
|
|
||||||
|
|
||||||
use-passive-connections = on
|
use-passive-connections = on
|
||||||
}
|
}
|
||||||
""")
|
""")
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,6 @@ object AkkaProtocolStressTest {
|
||||||
#loglevel = DEBUG
|
#loglevel = DEBUG
|
||||||
actor.provider = "akka.remote.RemoteActorRefProvider"
|
actor.provider = "akka.remote.RemoteActorRefProvider"
|
||||||
|
|
||||||
remote.retry-gate-closed-for = 0 s
|
|
||||||
remote.log-remote-lifecycle-events = on
|
remote.log-remote-lifecycle-events = on
|
||||||
|
|
||||||
remote.transport-failure-detector {
|
remote.transport-failure-detector {
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,6 @@ object SystemMessageDeliveryStressTest {
|
||||||
#loglevel = DEBUG
|
#loglevel = DEBUG
|
||||||
actor.provider = "akka.remote.RemoteActorRefProvider"
|
actor.provider = "akka.remote.RemoteActorRefProvider"
|
||||||
|
|
||||||
remote.retry-gate-closed-for = 0 s
|
|
||||||
remote.log-remote-lifecycle-events = on
|
remote.log-remote-lifecycle-events = on
|
||||||
|
|
||||||
remote.failure-detector {
|
remote.failure-detector {
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,6 @@ object ThrottlerTransportAdapterSpec {
|
||||||
actor.provider = "akka.remote.RemoteActorRefProvider"
|
actor.provider = "akka.remote.RemoteActorRefProvider"
|
||||||
|
|
||||||
remote.netty.tcp.hostname = "localhost"
|
remote.netty.tcp.hostname = "localhost"
|
||||||
remote.retry-gate-closed-for = 0 s
|
|
||||||
remote.log-remote-lifecycle-events = off
|
remote.log-remote-lifecycle-events = off
|
||||||
|
|
||||||
remote.netty.tcp.applied-adapters = ["trttl"]
|
remote.netty.tcp.applied-adapters = ["trttl"]
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@ import com.typesafe.config.{ Config, ConfigFactory }
|
||||||
import java.util.concurrent.TimeoutException
|
import java.util.concurrent.TimeoutException
|
||||||
import akka.dispatch.Dispatchers
|
import akka.dispatch.Dispatchers
|
||||||
import akka.pattern.ask
|
import akka.pattern.ask
|
||||||
|
import akka.testkit.TestEvent._
|
||||||
|
|
||||||
object AkkaSpec {
|
object AkkaSpec {
|
||||||
val testConf: Config = ConfigFactory.parseString("""
|
val testConf: Config = ConfigFactory.parseString("""
|
||||||
|
|
@ -93,4 +94,12 @@ abstract class AkkaSpec(_system: ActorSystem)
|
||||||
|
|
||||||
override def expectedTestDuration: FiniteDuration = 60 seconds
|
override def expectedTestDuration: FiniteDuration = 60 seconds
|
||||||
|
|
||||||
|
def muteDeadLetters(endPatterns: String*)(sys: ActorSystem = system): Unit =
|
||||||
|
if (!sys.log.isDebugEnabled) {
|
||||||
|
def mute(suffix: String): Unit =
|
||||||
|
sys.eventStream.publish(Mute(EventFilter.warning(pattern = ".*received dead.*" + suffix)))
|
||||||
|
if (endPatterns.isEmpty) mute("")
|
||||||
|
else endPatterns foreach mute
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue