Make joining to the same node multiple times work, and reenable blackhole test. See #2930

2013-03-20 10:32:18 +01:00 · 2013-03-20 10:32:18 +01:00 · 5827a27b94
commit 5827a27b94
parent f18575e251
5 changed files with 57 additions and 27 deletions
--- a/akka-cluster/src/multi-jvm/scala/akka/cluster/MultiNodeClusterSpec.scala
+++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/MultiNodeClusterSpec.scala
@ -182,6 +182,27 @@ trait MultiNodeClusterSpec extends Suite with STMultiNodeSpec with WatchedByCoro
    enterBarrier(roles.map(_.name).mkString("-") + "-joined")
  }

+  /**
+   * Join the specific node within the given period by sending repeated join
+   * requests at periodic intervals until we succeed.
+   */
+  def joinWithin(joinNode: RoleName, max: Duration = remaining, interval: Duration = 1.second): Unit = {
+    def memberInState(member: Address, status: Seq[MemberStatus]): Boolean =
+      clusterView.members.exists { m ⇒ (m.address == member) && status.contains(m.status) }
+
+    cluster join joinNode
+    awaitCond({
+      clusterView.refreshCurrentState()
+      if (memberInState(joinNode, List(MemberStatus.up)) &&
+        memberInState(myself, List(MemberStatus.Joining, MemberStatus.Up)))
+        true
+      else {
+        cluster join joinNode
+        false
+      }
+    }, max, interval)
+  }
+
  /**
   * Assert that the member addresses match the expected addresses in the
   * sort order used by the cluster.
--- a/akka-cluster/src/multi-jvm/scala/akka/cluster/UnreachableNodeRejoinsClusterSpec.scala
+++ b/akka-cluster/src/multi-jvm/scala/akka/cluster/UnreachableNodeRejoinsClusterSpec.scala
@ -25,10 +25,14 @@ case class UnreachableNodeRejoinsClusterMultiNodeConfig(failureDetectorPuppet: B

  commonConfig(ConfigFactory.parseString(
    """
+      # this setting is here to limit the number of retries and failures while the
+      # node is being blackholed
+      akka.remote.failure-detector.retry-gate-closed-for = 500 ms
+
      akka.remote.log-remote-lifecycle-events = off
      akka.cluster.publish-stats-interval = 0s
      akka.loglevel = INFO
-    """).withFallback(debugConfig(on = false).withFallback(MultiNodeClusterSpec.clusterConfig)))
+    """).withFallback(debugConfig(on = false).withFallback(MultiNodeClusterSpec.clusterConfig(failureDetectorPuppet))))

  testTransport(on = true)
 }
@ -74,8 +78,7 @@ abstract class UnreachableNodeRejoinsClusterSpec(multiNodeConfig: UnreachableNod
      endBarrier
    }

-    // FIXME ignored due to ticket #2930 - timeout changing throttler mode
-    "mark a node as UNREACHABLE when we pull the network" taggedAs LongRunningTest ignore {
+    "mark a node as UNREACHABLE when we pull the network" taggedAs LongRunningTest in {
      // let them send at least one heartbeat to each other after the gossip convergence
      // because for new joining nodes we remove them from the failure detector when
      // receive gossip
@ -125,8 +128,7 @@ abstract class UnreachableNodeRejoinsClusterSpec(multiNodeConfig: UnreachableNod
      endBarrier
    }

-    // FIXME ignored due to ticket #2930 - timeout changing throttler mode
-    "mark the node as DOWN" taggedAs LongRunningTest ignore {
+    "mark the node as DOWN" taggedAs LongRunningTest in {
      runOn(master) {
        cluster down victim
      }
@ -135,13 +137,12 @@ abstract class UnreachableNodeRejoinsClusterSpec(multiNodeConfig: UnreachableNod
        awaitMembersUp(roles.size - 1, Set(victim))
        // eventually removed
        awaitCond(clusterView.unreachableMembers.isEmpty, 15 seconds)
-      }

+      }
      endBarrier
    }

-    // FIXME ignored due to ticket #2930 - timeout changing throttler mode
-    "allow node to REJOIN when the network is plugged back in" taggedAs LongRunningTest ignore {
+    "allow node to REJOIN when the network is plugged back in" taggedAs LongRunningTest in {
      runOn(first) {
        // put the network back in
        allBut(victim).foreach { roleName ⇒
@ -152,7 +153,7 @@ abstract class UnreachableNodeRejoinsClusterSpec(multiNodeConfig: UnreachableNod
      enterBarrier("plug_in_victim")

      runOn(victim) {
-        cluster join master
+        joinWithin(master, 10.seconds)
      }

      awaitMembersUp(roles.size)