Fixed remaining issues with gossip based failure detection and removal of unreachable nodes.

* Completed gossip based failure detection. * Completed removal of unreachable nodes according to failure detector. * Added passing tests. * Misc other fixes, more logging, more comments. Signed-off-by: Jonas Bonér <jonas@jonasboner.com>
2012-02-18 17:48:07 +01:00 · 2012-02-18 17:48:07 +01:00 · cfd04bba3d
commit cfd04bba3d
parent db1e1da7e7
4 changed files with 173 additions and 129 deletions
--- a/akka-cluster/src/main/scala/akka/cluster/AccrualFailureDetector.scala
+++ b/akka-cluster/src/main/scala/akka/cluster/AccrualFailureDetector.scala
@ -23,14 +23,17 @@ import System.{ currentTimeMillis ⇒ newTimestamp }
 * <p/>
 * Default threshold is 8, but can be configured in the Akka config.
 */
-class AccrualFailureDetector(system: ActorSystem, val threshold: Int = 8, val maxSampleSize: Int = 1000) {
+class AccrualFailureDetector(system: ActorSystem, address: Address, val threshold: Int = 8, val maxSampleSize: Int = 1000) {

  private final val PhiFactor = 1.0 / math.log(10.0)

-  private case class FailureStats(mean: Double = 0.0D, variance: Double = 0.0D, deviation: Double = 0.0D)
-
  private val log = Logging(system, "FailureDetector")

+  /**
+   * Holds the failure statistics for a specific node Address.
+   */
+  private case class FailureStats(mean: Double = 0.0D, variance: Double = 0.0D, deviation: Double = 0.0D)
+
  /**
   * Implement using optimistic lockless concurrency, all state is represented
   * by this immutable case class and managed by an AtomicReference.
@ -54,22 +57,26 @@ class AccrualFailureDetector(system: ActorSystem, val threshold: Int = 8, val ma
   */
  @tailrec
  final def heartbeat(connection: Address) {
-    log.debug("Heartbeat from connection [{}] ", connection)
-    val oldState = state.get
+    log.debug("Node [{}] - Heartbeat from connection [{}] ", address, connection)

+    val oldState = state.get
+    val oldFailureStats = oldState.failureStats
+    val oldTimestamps = oldState.timestamps
    val latestTimestamp = oldState.timestamps.get(connection)
+
    if (latestTimestamp.isEmpty) {

      // this is heartbeat from a new connection
      // add starter records for this new connection
-      val failureStats = oldState.failureStats + (connection -> FailureStats())
-      val intervalHistory = oldState.intervalHistory + (connection -> Vector.empty[Long])
-      val timestamps = oldState.timestamps + (connection -> newTimestamp)
+      val newFailureStats = oldFailureStats + (connection -> FailureStats())
+      val newIntervalHistory = oldState.intervalHistory + (connection -> Vector.empty[Long])
+      val newTimestamps = oldTimestamps + (connection -> newTimestamp)

-      val newState = oldState copy (version = oldState.version + 1,
-        failureStats = failureStats,
-        intervalHistory = intervalHistory,
-        timestamps = timestamps)
+      val newState = oldState copy (
+        version = oldState.version + 1,
+        failureStats = newFailureStats,
+        intervalHistory = newIntervalHistory,
+        timestamps = newTimestamps)

      // if we won the race then update else try again
      if (!state.compareAndSet(oldState, newState)) heartbeat(connection) // recur
@ -79,7 +86,7 @@ class AccrualFailureDetector(system: ActorSystem, val threshold: Int = 8, val ma
      val timestamp = newTimestamp
      val interval = timestamp - latestTimestamp.get

-      val timestamps = oldState.timestamps + (connection -> timestamp) // record new timestamp
+      val newTimestamps = oldTimestamps + (connection -> timestamp) // record new timestamp

      var newIntervalsForConnection =
        oldState.intervalHistory.get(connection).getOrElse(Vector.empty[Long]) :+ interval // append the new interval to history
@ -89,36 +96,33 @@ class AccrualFailureDetector(system: ActorSystem, val threshold: Int = 8, val ma
        newIntervalsForConnection = newIntervalsForConnection drop 0
      }

-      val failureStats =
+      val newFailureStats =
        if (newIntervalsForConnection.size > 1) {

-          val mean: Double = newIntervalsForConnection.sum / newIntervalsForConnection.size.toDouble
-
-          val oldFailureStats = oldState.failureStats.get(connection).getOrElse(FailureStats())
+          val newMean: Double = newIntervalsForConnection.sum / newIntervalsForConnection.size.toDouble
+          val oldConnectionFailureStats = oldFailureStats.get(connection).getOrElse(throw new IllegalStateException("Can't calculate new failure statistics due to missing heartbeat history"))

          val deviationSum =
            newIntervalsForConnection
              .map(_.toDouble)
-              .foldLeft(0.0D)((x, y) ⇒ x + (y - mean))
+              .foldLeft(0.0D)((x, y) ⇒ x + (y - newMean))

-          val variance: Double = deviationSum / newIntervalsForConnection.size.toDouble
-          val deviation: Double = math.sqrt(variance)
+          val newVariance: Double = deviationSum / newIntervalsForConnection.size.toDouble
+          val newDeviation: Double = math.sqrt(newVariance)

-          val newFailureStats = oldFailureStats copy (mean = mean,
-            deviation = deviation,
-            variance = variance)
+          val newFailureStats = oldConnectionFailureStats copy (mean = newMean, deviation = newDeviation, variance = newVariance)
+          oldFailureStats + (connection -> newFailureStats)

-          oldState.failureStats + (connection -> newFailureStats)
        } else {
-          oldState.failureStats
+          oldFailureStats
        }

-      val intervalHistory = oldState.intervalHistory + (connection -> newIntervalsForConnection)
+      val newIntervalHistory = oldState.intervalHistory + (connection -> newIntervalsForConnection)

      val newState = oldState copy (version = oldState.version + 1,
-        failureStats = failureStats,
-        intervalHistory = intervalHistory,
-        timestamps = timestamps)
+        failureStats = newFailureStats,
+        intervalHistory = newIntervalHistory,
+        timestamps = newTimestamps)

      // if we won the race then update else try again
      if (!state.compareAndSet(oldState, newState)) heartbeat(connection) // recur
@ -138,17 +142,21 @@ class AccrualFailureDetector(system: ActorSystem, val threshold: Int = 8, val ma
  def phi(connection: Address): Double = {
    val oldState = state.get
    val oldTimestamp = oldState.timestamps.get(connection)
+
    val phi =
      if (oldTimestamp.isEmpty) 0.0D // treat unmanaged connections, e.g. with zero heartbeats, as healthy connections
      else {
        val timestampDiff = newTimestamp - oldTimestamp.get
-        val mean = oldState.failureStats.get(connection).getOrElse(FailureStats()).mean
+
+        val stats = oldState.failureStats.get(connection)
+        val mean = stats.getOrElse(throw new IllegalStateException("Can't calculate Failure Detector Phi value for a node that have no heartbeat history")).mean
+
        if (mean == 0.0D) 0.0D
        else PhiFactor * timestampDiff / mean
      }

    // only log if PHI value is starting to get interesting
-    if (phi > 0.0D) log.debug("Phi value [{}] and threshold [{}] for connection [{}] ", phi, threshold, connection)
+    if (phi > 0.0D) log.debug("Node [{}] - Phi value [{}] and threshold [{}] for connection [{}] ", address, phi, threshold, connection)
    phi
  }

--- a/akka-cluster/src/main/scala/akka/cluster/Gossiper.scala
+++ b/akka-cluster/src/main/scala/akka/cluster/Gossiper.scala
@ -210,11 +210,12 @@ case class Gossiper(system: ActorSystemImpl, remote: RemoteActorRefProvider) {

  implicit val defaultTimeout = Timeout(remoteSettings.RemoteSystemDaemonAckTimeout)

+  val failureDetector = new AccrualFailureDetector(
+    system, remoteAddress, clusterSettings.FailureDetectorThreshold, clusterSettings.FailureDetectorMaxSampleSize)
+
  private val nodeToJoin: Option[Address] = clusterSettings.NodeToJoin filter (_ != remoteAddress)

  private val serialization = remote.serialization
-  private val failureDetector = new AccrualFailureDetector(
-    system, clusterSettings.FailureDetectorThreshold, clusterSettings.FailureDetectorMaxSampleSize)

  private val isRunning = new AtomicBoolean(true)
  private val log = Logging(system, "Gossiper")
@ -279,12 +280,10 @@ case class Gossiper(system: ActorSystemImpl, remote: RemoteActorRefProvider) {
    if (isRunning.compareAndSet(true, false)) {
      log.info("Node [{}] - Shutting down Gossiper and ClusterDaemon...", remoteAddress)

-      try connectionManager.shutdown() finally {
-        try system.stop(clusterDaemon) finally {
-          try gossipCanceller.cancel() finally {
-            try scrutinizeCanceller.cancel() finally {
-              log.info("Node [{}] - Gossiper and ClusterDaemon shut down successfully", remoteAddress)
-            }
+      try system.stop(clusterDaemon) finally {
+        try gossipCanceller.cancel() finally {
+          try scrutinizeCanceller.cancel() finally {
+            log.info("Node [{}] - Gossiper and ClusterDaemon shut down successfully", remoteAddress)
          }
        }
      }
@ -298,6 +297,8 @@ case class Gossiper(system: ActorSystemImpl, remote: RemoteActorRefProvider) {
  final def joining(node: Address) {
    log.info("Node [{}] - Node [{}] is joining", remoteAddress, node)

+    failureDetector heartbeat node // update heartbeat in failure detector
+
    val localState = state.get
    val localGossip = localState.latestGossip
    val localMembers = localGossip.members
@ -475,7 +476,7 @@ case class Gossiper(system: ActorSystemImpl, remote: RemoteActorRefProvider) {
   */
  private def gossipTo(address: Address) {
    setUpConnectionTo(address) foreach { connection ⇒
-      log.debug("Node [{}] - Gossiping to [{}]", remoteAddress, address)
+      log.debug("Node [{}] - Gossiping to [{}]", remoteAddress, connection)
      connection ! GossipEnvelope(self, latestGossip)
    }
  }
@ -496,7 +497,7 @@ case class Gossiper(system: ActorSystemImpl, remote: RemoteActorRefProvider) {
  }

  /**
-   * Scrutinizes the cluster; marks members detected by the failure detector as unavailable.
+   * Scrutinizes the cluster; marks members detected by the failure detector as unreachable.
   */
  @tailrec
  final private def scrutinize() {
@ -517,6 +518,8 @@ case class Gossiper(system: ActorSystemImpl, remote: RemoteActorRefProvider) {
        val newMembers = localMembers diff newlyDetectedUnreachableMembers
        val newUnreachableAddresses: Set[Address] = localUnreachableAddresses ++ newlyDetectedUnreachableAddresses

+        log.info("Node [{}] - Marking node(s) an unreachable [{}]", remoteAddress, newlyDetectedUnreachableAddresses.mkString(", "))
+
        val newOverview = localOverview copy (unreachable = newUnreachableAddresses)
        val newGossip = localGossip copy (overview = newOverview, members = newMembers)

--- a/akka-cluster/src/test/scala/akka/cluster/AccrualFailureDetectorSpec.scala
+++ b/akka-cluster/src/test/scala/akka/cluster/AccrualFailureDetectorSpec.scala
@ -13,13 +13,13 @@ class AccrualFailureDetectorSpec extends AkkaSpec("""
    val conn2 = Address("akka", "", "localhost", 2553)

    "return phi value of 0.0D on startup for each address" in {
-      val fd = new AccrualFailureDetector(system)
+      val fd = new AccrualFailureDetector(system, conn)
      fd.phi(conn) must be(0.0D)
      fd.phi(conn2) must be(0.0D)
    }

    "mark node as available after a series of successful heartbeats" in {
-      val fd = new AccrualFailureDetector(system)
+      val fd = new AccrualFailureDetector(system, conn)

      fd.heartbeat(conn)

@ -34,7 +34,7 @@ class AccrualFailureDetectorSpec extends AkkaSpec("""

    // FIXME how should we deal with explicit removal of connection? - if triggered as failure then we have a problem in boostrap - see line 142 in AccrualFailureDetector
    "mark node as dead after explicit removal of connection" ignore {
-      val fd = new AccrualFailureDetector(system)
+      val fd = new AccrualFailureDetector(system, conn)

      fd.heartbeat(conn)

@ -52,7 +52,7 @@ class AccrualFailureDetectorSpec extends AkkaSpec("""
    }

    "mark node as dead if heartbeat are missed" in {
-      val fd = new AccrualFailureDetector(system, threshold = 3)
+      val fd = new AccrualFailureDetector(system, conn, threshold = 3)

      fd.heartbeat(conn)

@ -70,7 +70,7 @@ class AccrualFailureDetectorSpec extends AkkaSpec("""
    }

    "mark node as available if it starts heartbeat again after being marked dead due to detection of failure" in {
-      val fd = new AccrualFailureDetector(system, threshold = 3)
+      val fd = new AccrualFailureDetector(system, conn, threshold = 3)

      fd.heartbeat(conn)

--- a/akka-cluster/src/test/scala/akka/cluster/GossipingAccrualFailureDetectorSpec.scala
+++ b/akka-cluster/src/test/scala/akka/cluster/GossipingAccrualFailureDetectorSpec.scala
@ -1,95 +1,128 @@
-// /**
-//  *  Copyright (C) 2009-2011 Typesafe Inc. <http://www.typesafe.com>
-//  */
-// package akka.cluster
+/**
+ *  Copyright (C) 2009-2012 Typesafe Inc. <http://www.typesafe.com>
+ */
+package akka.cluster

-// import java.net.InetSocketAddress
+import akka.testkit._
+import akka.dispatch._
+import akka.actor._
+import akka.remote._
+import akka.util.duration._

-// import akka.testkit._
-// import akka.dispatch._
-// import akka.actor._
-// import com.typesafe.config._
+import com.typesafe.config._

-// class GossipingAccrualFailureDetectorSpec extends AkkaSpec("""
-//   akka {
-//     loglevel = "INFO"
-//     actor.provider = "akka.remote.RemoteActorRefProvider"
+import java.net.InetSocketAddress

-//     remote.server.hostname = localhost
-//     remote.server.port = 5550
-//     remote.failure-detector.threshold = 3
-//     cluster.seed-nodes = ["akka://localhost:5551"]
-//   }
-//   """) with ImplicitSender {
+class GossipingAccrualFailureDetectorSpec extends AkkaSpec("""
+  akka {
+    loglevel = "INFO"
+    cluster.failure-detector.threshold = 3
+    actor.debug.lifecycle = on
+    actor.debug.autoreceive = on
+  }
+  """) with ImplicitSender {

-//   val conn1 = Address("akka", system.systemName, Some("localhost"), Some(5551))
-//   val node1 = ActorSystem("GossiperSpec", ConfigFactory
-//     .parseString("akka { remote.server.port=5551, cluster.use-cluster = on }")
-//     .withFallback(system.settings.config))
-//   val remote1 =
-//     node1.asInstanceOf[ActorSystemImpl]
-//       .provider.asInstanceOf[RemoteActorRefProvider]
-//       .remote
-//   val gossiper1 = remote1.gossiper
-//   val fd1 = remote1.failureDetector
-//   gossiper1 must be('defined)
+  var gossiper1: Gossiper = _
+  var gossiper2: Gossiper = _
+  var gossiper3: Gossiper = _

-//   val conn2 = RemoteNettyAddress("localhost", 5552)
-//   val node2 = ActorSystem("GossiperSpec", ConfigFactory
-//     .parseString("akka { remote.server.port=5552, cluster.use-cluster = on }")
-//     .withFallback(system.settings.config))
-//   val remote2 =
-//     node2.asInstanceOf[ActorSystemImpl]
-//       .provider.asInstanceOf[RemoteActorRefProvider]
-//       .remote
-//   val gossiper2 = remote2.gossiper
-//   val fd2 = remote2.failureDetector
-//   gossiper2 must be('defined)
+  var node1: ActorSystemImpl = _
+  var node2: ActorSystemImpl = _
+  var node3: ActorSystemImpl = _

-//   val conn3 = RemoteNettyAddress("localhost", 5553)
-//   val node3 = ActorSystem("GossiperSpec", ConfigFactory
-//     .parseString("akka { remote.server.port=5553, cluster.use-cluster = on }")
-//     .withFallback(system.settings.config))
-//   val remote3 =
-//     node3.asInstanceOf[ActorSystemImpl]
-//       .provider.asInstanceOf[RemoteActorRefProvider]
-//       .remote
-//   val gossiper3 = remote3.gossiper
-//   val fd3 = remote3.failureDetector
-//   gossiper3 must be('defined)
+  try {
+    "A Gossip-driven Failure Detector" must {

-//   "A Gossip-driven Failure Detector" must {
+      // ======= NODE 1 ========
+      node1 = ActorSystem("node1", ConfigFactory
+        .parseString("""
+          akka {
+            actor.provider = "akka.remote.RemoteActorRefProvider"
+            remote.netty {
+              hostname = localhost
+              port=5550
+            }
+          }""")
+        .withFallback(system.settings.config))
+        .asInstanceOf[ActorSystemImpl]
+      val remote1 = node1.provider.asInstanceOf[RemoteActorRefProvider]
+      gossiper1 = Gossiper(node1, remote1)
+      val fd1 = gossiper1.failureDetector
+      val address1 = gossiper1.self.address

-//     "receive gossip heartbeats so that all healthy nodes in the cluster are marked 'available'" ignore {
-//       Thread.sleep(5000) // let them gossip for 10 seconds
-//       fd1.isAvailable(conn2) must be(true)
-//       fd1.isAvailable(conn3) must be(true)
-//       fd2.isAvailable(conn1) must be(true)
-//       fd2.isAvailable(conn3) must be(true)
-//       fd3.isAvailable(conn1) must be(true)
-//       fd3.isAvailable(conn2) must be(true)
-//     }
+      // ======= NODE 2 ========
+      node2 = ActorSystem("node2", ConfigFactory
+        .parseString("""
+          akka {
+            actor.provider = "akka.remote.RemoteActorRefProvider"
+            remote.netty {
+              hostname = localhost
+              port = 5551
+            }
+            cluster.node-to-join = "akka://node1@localhost:5550"
+          }""")
+        .withFallback(system.settings.config))
+        .asInstanceOf[ActorSystemImpl]
+      val remote2 = node2.provider.asInstanceOf[RemoteActorRefProvider]
+      gossiper2 = Gossiper(node2, remote2)
+      val fd2 = gossiper2.failureDetector
+      val address2 = gossiper2.self.address

-//     "mark node as 'unavailable' if a node in the cluster is shut down and its heartbeats stops" ignore {
-//       // kill node 3
-//       gossiper3.get.shutdown()
-//       node3.shutdown()
-//       Thread.sleep(5000) // let them gossip for 10 seconds
+      // ======= NODE 3 ========
+      node3 = ActorSystem("node3", ConfigFactory
+        .parseString("""
+          akka {
+            actor.provider = "akka.remote.RemoteActorRefProvider"
+            remote.netty {
+              hostname = localhost
+              port=5552
+            }
+            cluster.node-to-join = "akka://node1@localhost:5550"
+          }""")
+        .withFallback(system.settings.config))
+        .asInstanceOf[ActorSystemImpl]
+      val remote3 = node3.provider.asInstanceOf[RemoteActorRefProvider]
+      gossiper3 = Gossiper(node3, remote3)
+      val fd3 = gossiper3.failureDetector
+      val address3 = gossiper3.self.address

-//       fd1.isAvailable(conn2) must be(true)
-//       fd1.isAvailable(conn3) must be(false)
-//       fd2.isAvailable(conn1) must be(true)
-//       fd2.isAvailable(conn3) must be(false)
-//     }
-//   }
+      "receive gossip heartbeats so that all healthy nodes in the cluster are marked 'available'" in {
+        println("Let the nodes gossip for a while...")
+        Thread.sleep(30.seconds.dilated.toMillis) // let them gossip for 30 seconds
+        fd1.isAvailable(address2) must be(true)
+        fd1.isAvailable(address3) must be(true)
+        fd2.isAvailable(address1) must be(true)
+        fd2.isAvailable(address3) must be(true)
+        fd3.isAvailable(address1) must be(true)
+        fd3.isAvailable(address2) must be(true)
+      }

-//   override def atTermination() {
-//     gossiper1.get.shutdown()
-//     gossiper2.get.shutdown()
-//     gossiper3.get.shutdown()
-//     node1.shutdown()
-//     node2.shutdown()
-//     node3.shutdown()
-//     // FIXME Ordering problem - If we shut down the ActorSystem before the Gossiper then we get an IllegalStateException
-//   }
-// }
+      "mark node as 'unavailable' if a node in the cluster is shut down (and its heartbeats stops)" in {
+        // shut down node3
+        gossiper3.shutdown()
+        node3.shutdown()
+        println("Give the remaning nodes time to detect failure...")
+        Thread.sleep(30.seconds.dilated.toMillis) // give them 30 seconds to detect failure of node3
+        fd1.isAvailable(address2) must be(true)
+        fd1.isAvailable(address3) must be(false)
+        fd2.isAvailable(address1) must be(true)
+        fd2.isAvailable(address3) must be(false)
+      }
+    }
+  } catch {
+    case e: Exception ⇒
+      e.printStackTrace
+      fail(e.toString)
+  }
+
+  override def atTermination() {
+    gossiper1.shutdown()
+    node1.shutdown()
+
+    gossiper2.shutdown()
+    node2.shutdown()
+
+    gossiper3.shutdown()
+    node3.shutdown()
+  }
+}