Merge pull request #1361 from akka/wip-3247-UnreachableNodeJoinsAgainSpec-patriknw

Solve UnreachableNodeJoinsAgainSpec problem, see #3247
This commit is contained in:
Patrik Nordwall 2013-04-25 05:32:08 -07:00
commit b62f5f46f5
12 changed files with 59 additions and 43 deletions

View file

@ -72,6 +72,8 @@ class RoutingSpec extends AkkaSpec(RoutingSpec.config) with DefaultTimeout with
implicit val ec = system.dispatcher implicit val ec = system.dispatcher
import akka.routing.RoutingSpec._ import akka.routing.RoutingSpec._
muteDeadLetters("DeathWatchNotification.*")()
"routers in general" must { "routers in general" must {
"evict terminated routees" in { "evict terminated routees" in {

View file

@ -503,7 +503,11 @@ private[akka] class EmptyLocalActorRef(override val provider: ActorRefProvider,
sender ! ActorIdentity(messageId, None) sender ! ActorIdentity(messageId, None)
true true
case s: SelectChildName case s: SelectChildName
s.identifyRequest foreach { x sender ! ActorIdentity(x.messageId, None) } s.identifyRequest match {
case Some(identify) sender ! ActorIdentity(identify.messageId, None)
case None
eventStream.publish(DeadLetter(s.wrappedMessage, if (sender eq Actor.noSender) provider.deadLetters else sender, this))
}
true true
case _ false case _ false
} }
@ -533,15 +537,7 @@ private[akka] class DeadLetterActorRef(_provider: ActorRefProvider,
w.watcher.sendSystemMessage( w.watcher.sendSystemMessage(
DeathWatchNotification(w.watchee, existenceConfirmed = false, addressTerminated = false)) DeathWatchNotification(w.watchee, existenceConfirmed = false, addressTerminated = false))
true true
case w: Unwatch true // Just ignore case _ super.specialHandle(msg, sender)
case Identify(messageId)
sender ! ActorIdentity(messageId, None)
true
case s: SelectChildName
s.identifyRequest foreach { x sender ! ActorIdentity(x.messageId, None) }
true
case NullMessage true
case _ false
} }
@throws(classOf[java.io.ObjectStreamException]) @throws(classOf[java.io.ObjectStreamException])

View file

@ -104,14 +104,18 @@ trait MultiNodeClusterSpec extends Suite with STMultiNodeSpec with WatchedByCoro
sys.eventStream.publish(Mute(EventFilter.info(pattern = s))) sys.eventStream.publish(Mute(EventFilter.info(pattern = s)))
} }
Seq(".*received dead letter from.*ClientDisconnected", muteDeadLetters(
".*received dead letter from.*deadLetters.*PoisonPill", "Heartbeat.*",
".*received dead letter from.*Disassociated", "GossipEnvelope.*",
".*received dead letter from.*DisassociateUnderlying", "ClusterMetricsChanged.*",
".*received dead letter from.*HandleListenerRegistered", "Disassociated.*",
".*installing context org.jboss.netty.channel.DefaultChannelPipeline.*") foreach { s "DisassociateUnderlying.*",
sys.eventStream.publish(Mute(EventFilter.warning(pattern = s))) "HandleListenerRegistered.*",
} "PoisonPill.*",
"DeathWatchNotification.*",
"NullMessage.*",
"InboundPayload.*")(sys)
} }
} }
@ -119,13 +123,9 @@ trait MultiNodeClusterSpec extends Suite with STMultiNodeSpec with WatchedByCoro
if (!sys.log.isDebugEnabled) if (!sys.log.isDebugEnabled)
sys.eventStream.publish(Mute(EventFilter.error(pattern = ".*Marking.* as UNREACHABLE.*"))) sys.eventStream.publish(Mute(EventFilter.error(pattern = ".*Marking.* as UNREACHABLE.*")))
def muteDeadLetters(sys: ActorSystem = system): Unit =
if (!sys.log.isDebugEnabled)
sys.eventStream.publish(Mute(EventFilter.warning(pattern = ".*received dead letter from.*")))
override def afterAll(): Unit = { override def afterAll(): Unit = {
if (!log.isDebugEnabled) { if (!log.isDebugEnabled) {
muteDeadLetters() muteDeadLetters()()
system.eventStream.setLogLevel(ErrorLevel) system.eventStream.setLogLevel(ErrorLevel)
} }
super.afterAll() super.afterAll()

View file

@ -60,6 +60,7 @@ private[cluster] object StressMultiJvmSpec extends MultiNodeConfig {
// not MultiNodeClusterSpec.clusterConfig // not MultiNodeClusterSpec.clusterConfig
commonConfig(ConfigFactory.parseString(""" commonConfig(ConfigFactory.parseString("""
akka.test.cluster-stress-spec { akka.test.cluster-stress-spec {
log-stats = off
# scale the nr-of-nodes* settings with this factor # scale the nr-of-nodes* settings with this factor
nr-of-nodes-factor = 1 nr-of-nodes-factor = 1
nr-of-nodes = 13 nr-of-nodes = 13
@ -147,6 +148,7 @@ private[cluster] object StressMultiJvmSpec extends MultiNodeConfig {
private def getDuration(name: String): FiniteDuration = Duration(getMilliseconds(name), MILLISECONDS) private def getDuration(name: String): FiniteDuration = Duration(getMilliseconds(name), MILLISECONDS)
val logStats = getBoolean("log-stats")
val nFactor = getInt("nr-of-nodes-factor") val nFactor = getInt("nr-of-nodes-factor")
val totalNumberOfNodes = getInt("nr-of-nodes") * nFactor ensuring ( val totalNumberOfNodes = getInt("nr-of-nodes") * nFactor ensuring (
_ >= 10, "nr-of-nodes must be >= 10") _ >= 10, "nr-of-nodes must be >= 10")
@ -211,7 +213,9 @@ private[cluster] object StressMultiJvmSpec extends MultiNodeConfig {
* expected results has been collected. It shuts down * expected results has been collected. It shuts down
* itself when expected results has been collected. * itself when expected results has been collected.
*/ */
class ClusterResultAggregator(title: String, expectedResults: Int, reportMetricsInterval: FiniteDuration) extends Actor with ActorLogging { class ClusterResultAggregator(title: String, expectedResults: Int, settings: Settings) extends Actor with ActorLogging {
import settings.reportMetricsInterval
import settings.logStats
val cluster = Cluster(context.system) val cluster = Cluster(context.system)
var reportTo: Option[ActorRef] = None var reportTo: Option[ActorRef] = None
var results = Vector.empty[ClusterResult] var results = Vector.empty[ClusterResult]
@ -242,12 +246,14 @@ private[cluster] object StressMultiJvmSpec extends MultiNodeConfig {
case PhiResult(from, phiValues) phiValuesObservedByNode += from -> phiValues case PhiResult(from, phiValues) phiValuesObservedByNode += from -> phiValues
case StatsResult(from, stats) clusterStatsObservedByNode += from -> stats case StatsResult(from, stats) clusterStatsObservedByNode += from -> stats
case ReportTick case ReportTick
log.info(s"[${title}] in progress\n${formatMetrics}\n\n${formatPhi}\n\n${formatStats}") if (logStats)
log.info(s"[${title}] in progress\n${formatMetrics}\n\n${formatPhi}\n\n${formatStats}")
case r: ClusterResult case r: ClusterResult
results :+= r results :+= r
if (results.size == expectedResults) { if (results.size == expectedResults) {
val aggregated = AggregatedClusterResult(title, maxDuration, totalClusterStats) val aggregated = AggregatedClusterResult(title, maxDuration, totalClusterStats)
log.info(s"[${title}] completed in [${aggregated.duration.toMillis}] ms\n${aggregated.clusterStats}\n${formatMetrics}\n\n${formatPhi}\n\n${formatStats}") if (logStats)
log.info(s"[${title}] completed in [${aggregated.duration.toMillis}] ms\n${aggregated.clusterStats}\n${formatMetrics}\n\n${formatPhi}\n\n${formatStats}")
reportTo foreach { _ ! aggregated } reportTo foreach { _ ! aggregated }
context stop self context stop self
} }
@ -665,6 +671,7 @@ abstract class StressSpec
sys.eventStream.publish(Mute(EventFilter.warning(pattern = ".*PhiResult.*"))) sys.eventStream.publish(Mute(EventFilter.warning(pattern = ".*PhiResult.*")))
sys.eventStream.publish(Mute(EventFilter.warning(pattern = ".*SendBatch.*"))) sys.eventStream.publish(Mute(EventFilter.warning(pattern = ".*SendBatch.*")))
sys.eventStream.publish(Mute(EventFilter.warning(pattern = ".*ClusterStats.*"))) sys.eventStream.publish(Mute(EventFilter.warning(pattern = ".*ClusterStats.*")))
muteDeadLetters("SimpleJob.*", "Tick.*", "AggregatedClusterResult.*")(sys)
} }
val seedNodes = roles.take(numberOfSeedNodes) val seedNodes = roles.take(numberOfSeedNodes)
@ -680,7 +687,7 @@ abstract class StressSpec
def createResultAggregator(title: String, expectedResults: Int, includeInHistory: Boolean): Unit = { def createResultAggregator(title: String, expectedResults: Int, includeInHistory: Boolean): Unit = {
runOn(roles.head) { runOn(roles.head) {
val aggregator = system.actorOf(Props(new ClusterResultAggregator(title, expectedResults, reportMetricsInterval)), val aggregator = system.actorOf(Props(new ClusterResultAggregator(title, expectedResults, settings)),
name = "result" + step) name = "result" + step)
if (includeInHistory) aggregator ! ReportTo(Some(clusterResultHistory)) if (includeInHistory) aggregator ! ReportTo(Some(clusterResultHistory))
else aggregator ! ReportTo(None) else aggregator ! ReportTo(None)
@ -698,7 +705,9 @@ abstract class StressSpec
identifyProbe.expectMsgType[ActorIdentity].ref identifyProbe.expectMsgType[ActorIdentity].ref
} }
lazy val clusterResultHistory = system.actorOf(Props[ClusterResultHistory], "resultHistory") lazy val clusterResultHistory =
if (settings.logStats) system.actorOf(Props[ClusterResultHistory], "resultHistory")
else system.deadLetters
lazy val phiObserver = system.actorOf(Props[PhiObserver], "phiObserver") lazy val phiObserver = system.actorOf(Props[PhiObserver], "phiObserver")
@ -953,9 +962,10 @@ abstract class StressSpec
def awaitWorkResult: WorkResult = { def awaitWorkResult: WorkResult = {
val workResult = expectMsgType[WorkResult] val workResult = expectMsgType[WorkResult]
log.info("{} result, [{}] jobs/s, retried [{}] of [{}] msg", masterName, if (settings.logStats)
workResult.jobsPerSecond.form, log.info("{} result, [{}] jobs/s, retried [{}] of [{}] msg", masterName,
workResult.retryCount, workResult.sendCount) workResult.jobsPerSecond.form,
workResult.retryCount, workResult.sendCount)
master match { master match {
case Some(m) case Some(m)
watch(m) watch(m)

View file

@ -28,10 +28,6 @@ object UnreachableNodeJoinsAgainMultiNodeConfig extends MultiNodeConfig {
commonConfig(ConfigFactory.parseString( commonConfig(ConfigFactory.parseString(
""" """
# this setting is here to limit the number of retries and failures while the
# node is being blackholed
akka.remote.retry-gate-closed-for = 500 ms
akka.remote.log-remote-lifecycle-events = off akka.remote.log-remote-lifecycle-events = off
akka.cluster.publish-stats-interval = 0s akka.cluster.publish-stats-interval = 0s
""").withFallback(debugConfig(on = false).withFallback(MultiNodeClusterSpec.clusterConfig))) """).withFallback(debugConfig(on = false).withFallback(MultiNodeClusterSpec.clusterConfig)))
@ -166,7 +162,6 @@ abstract class UnreachableNodeJoinsAgainSpec
runOn(victim) { runOn(victim) {
val victimAddress = system.asInstanceOf[ExtendedActorSystem].provider.getDefaultAddress val victimAddress = system.asInstanceOf[ExtendedActorSystem].provider.getDefaultAddress
system.awaitTermination(10 seconds) system.awaitTermination(10 seconds)
Thread.sleep(5000)
// create new ActorSystem with same host:port // create new ActorSystem with same host:port
val freshSystem = ActorSystem(system.name, ConfigFactory.parseString(s""" val freshSystem = ActorSystem(system.name, ConfigFactory.parseString(s"""
akka.remote.netty.tcp { akka.remote.netty.tcp {
@ -177,7 +172,6 @@ abstract class UnreachableNodeJoinsAgainSpec
try { try {
Cluster(freshSystem).join(masterAddress) Cluster(freshSystem).join(masterAddress)
Thread.sleep(5000)
within(15 seconds) { within(15 seconds) {
awaitAssert(Cluster(freshSystem).readView.members.map(_.address) must contain(victimAddress)) awaitAssert(Cluster(freshSystem).readView.members.map(_.address) must contain(victimAddress))
awaitAssert(Cluster(freshSystem).readView.members.size must be(expectedNumberOfMembers)) awaitAssert(Cluster(freshSystem).readView.members.size must be(expectedNumberOfMembers))

View file

@ -16,6 +16,7 @@ import akka.util.Timeout
import akka.remote.testconductor.{ TestConductorExt, TestConductor, RoleName } import akka.remote.testconductor.{ TestConductorExt, TestConductor, RoleName }
import akka.remote.RemoteActorRefProvider import akka.remote.RemoteActorRefProvider
import akka.testkit._ import akka.testkit._
import akka.testkit.TestEvent._
import scala.concurrent.duration._ import scala.concurrent.duration._
import akka.remote.testconductor.RoleName import akka.remote.testconductor.RoleName
import akka.actor.RootActorPath import akka.actor.RootActorPath
@ -101,7 +102,6 @@ abstract class MultiNodeConfig {
if (_testTransport) ConfigFactory.parseString( if (_testTransport) ConfigFactory.parseString(
""" """
akka.remote.netty.tcp.applied-adapters = [trttl, gremlin] akka.remote.netty.tcp.applied-adapters = [trttl, gremlin]
akka.remote.retry-gate-closed-for = 1 s
""") """)
else ConfigFactory.empty else ConfigFactory.empty
@ -359,6 +359,14 @@ abstract class MultiNodeSpec(val myself: RoleName, _system: ActorSystem, _roles:
*/ */
def node(role: RoleName): ActorPath = RootActorPath(testConductor.getAddressFor(role).await) def node(role: RoleName): ActorPath = RootActorPath(testConductor.getAddressFor(role).await)
def muteDeadLetters(endPatterns: String*)(sys: ActorSystem = system): Unit =
if (!sys.log.isDebugEnabled) {
def mute(suffix: String): Unit =
sys.eventStream.publish(Mute(EventFilter.warning(pattern = ".*received dead.*" + suffix)))
if (endPatterns.isEmpty) mute("")
else endPatterns foreach mute
}
/** /**
* Enrich `.await()` onto all Awaitables, using remaining duration from the innermost * Enrich `.await()` onto all Awaitables, using remaining duration from the innermost
* enclosing `within` block or QueryTimeout. * enclosing `within` block or QueryTimeout.

View file

@ -89,6 +89,8 @@ class RemoteWatcherSpec extends AkkaSpec(
val remoteAddress = remoteSystem.asInstanceOf[ExtendedActorSystem].provider.getDefaultAddress val remoteAddress = remoteSystem.asInstanceOf[ExtendedActorSystem].provider.getDefaultAddress
def remoteAddressUid = AddressUidExtension(remoteSystem).addressUid def remoteAddressUid = AddressUidExtension(remoteSystem).addressUid
Seq(system, remoteSystem).foreach(muteDeadLetters("Disassociated.*", "DisassociateUnderlying.*")(_))
override def afterTermination() { override def afterTermination() {
remoteSystem.shutdown() remoteSystem.shutdown()
} }

View file

@ -54,8 +54,6 @@ class AkkaProtocolSpec extends AkkaSpec("""akka.actor.provider = "akka.remote.Re
startup-timeout = 5 s startup-timeout = 5 s
retry-gate-closed-for = 0 s
use-passive-connections = on use-passive-connections = on
} }
""") """)

View file

@ -16,7 +16,6 @@ object AkkaProtocolStressTest {
#loglevel = DEBUG #loglevel = DEBUG
actor.provider = "akka.remote.RemoteActorRefProvider" actor.provider = "akka.remote.RemoteActorRefProvider"
remote.retry-gate-closed-for = 0 s
remote.log-remote-lifecycle-events = on remote.log-remote-lifecycle-events = on
remote.transport-failure-detector { remote.transport-failure-detector {

View file

@ -37,7 +37,6 @@ object SystemMessageDeliveryStressTest {
#loglevel = DEBUG #loglevel = DEBUG
actor.provider = "akka.remote.RemoteActorRefProvider" actor.provider = "akka.remote.RemoteActorRefProvider"
remote.retry-gate-closed-for = 0 s
remote.log-remote-lifecycle-events = on remote.log-remote-lifecycle-events = on
remote.failure-detector { remote.failure-detector {

View file

@ -18,7 +18,6 @@ object ThrottlerTransportAdapterSpec {
actor.provider = "akka.remote.RemoteActorRefProvider" actor.provider = "akka.remote.RemoteActorRefProvider"
remote.netty.tcp.hostname = "localhost" remote.netty.tcp.hostname = "localhost"
remote.retry-gate-closed-for = 0 s
remote.log-remote-lifecycle-events = off remote.log-remote-lifecycle-events = off
remote.netty.tcp.applied-adapters = ["trttl"] remote.netty.tcp.applied-adapters = ["trttl"]

View file

@ -15,6 +15,7 @@ import com.typesafe.config.{ Config, ConfigFactory }
import java.util.concurrent.TimeoutException import java.util.concurrent.TimeoutException
import akka.dispatch.Dispatchers import akka.dispatch.Dispatchers
import akka.pattern.ask import akka.pattern.ask
import akka.testkit.TestEvent._
object AkkaSpec { object AkkaSpec {
val testConf: Config = ConfigFactory.parseString(""" val testConf: Config = ConfigFactory.parseString("""
@ -93,4 +94,12 @@ abstract class AkkaSpec(_system: ActorSystem)
override def expectedTestDuration: FiniteDuration = 60 seconds override def expectedTestDuration: FiniteDuration = 60 seconds
def muteDeadLetters(endPatterns: String*)(sys: ActorSystem = system): Unit =
if (!sys.log.isDebugEnabled) {
def mute(suffix: String): Unit =
sys.eventStream.publish(Mute(EventFilter.warning(pattern = ".*received dead.*" + suffix)))
if (endPatterns.isEmpty) mute("")
else endPatterns foreach mute
}
} }