2013-11-20 13:47:42 +01:00
|
|
|
/**
|
2014-02-02 19:05:45 -06:00
|
|
|
* Copyright (C) 2009-2014 Typesafe Inc. <http://www.typesafe.com>
|
2013-11-20 13:47:42 +01:00
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
package akka.persistence
|
|
|
|
|
|
|
|
|
|
import scala.concurrent.duration._
|
|
|
|
|
import scala.concurrent.forkjoin.ThreadLocalRandom
|
|
|
|
|
import scala.language.postfixOps
|
|
|
|
|
|
|
|
|
|
import com.typesafe.config.ConfigFactory
|
|
|
|
|
|
|
|
|
|
import akka.actor._
|
|
|
|
|
import akka.testkit._
|
|
|
|
|
|
|
|
|
|
object FailureSpec {
|
|
|
|
|
val config = ConfigFactory.parseString(
|
|
|
|
|
s"""
|
|
|
|
|
akka.persistence.processor.chaos.live-processing-failure-rate = 0.3
|
|
|
|
|
akka.persistence.processor.chaos.replay-processing-failure-rate = 0.1
|
2013-12-06 12:48:44 +01:00
|
|
|
akka.persistence.destination.chaos.confirm-failure-rate = 0.3
|
2013-11-20 13:47:42 +01:00
|
|
|
akka.persistence.journal.plugin = "akka.persistence.journal.chaos"
|
|
|
|
|
akka.persistence.journal.chaos.write-failure-rate = 0.3
|
2014-01-17 06:58:25 +01:00
|
|
|
akka.persistence.journal.chaos.confirm-failure-rate = 0.2
|
2013-11-20 13:47:42 +01:00
|
|
|
akka.persistence.journal.chaos.delete-failure-rate = 0.3
|
2014-01-17 06:58:25 +01:00
|
|
|
akka.persistence.journal.chaos.replay-failure-rate = 0.25
|
|
|
|
|
akka.persistence.journal.chaos.read-highest-failure-rate = 0.1
|
2013-11-20 13:47:42 +01:00
|
|
|
akka.persistence.journal.chaos.class = akka.persistence.journal.chaos.ChaosJournal
|
|
|
|
|
akka.persistence.snapshot-store.local.dir = "target/snapshots-failure-spec/"
|
|
|
|
|
""")
|
|
|
|
|
|
|
|
|
|
val numMessages = 10
|
|
|
|
|
|
|
|
|
|
case object Start
|
2014-03-07 13:20:01 +01:00
|
|
|
final case class Done(ints: Vector[Int])
|
2013-11-20 13:47:42 +01:00
|
|
|
|
2014-03-07 13:20:01 +01:00
|
|
|
final case class ProcessingFailure(i: Int)
|
|
|
|
|
final case class JournalingFailure(i: Int)
|
2013-11-20 13:47:42 +01:00
|
|
|
|
2013-12-06 12:48:44 +01:00
|
|
|
trait ChaosSupport { this: Actor ⇒
|
|
|
|
|
def random = ThreadLocalRandom.current
|
|
|
|
|
|
|
|
|
|
var state = Vector.empty[Int]
|
|
|
|
|
|
|
|
|
|
def contains(i: Int): Boolean =
|
|
|
|
|
state.contains(i)
|
|
|
|
|
|
|
|
|
|
def add(i: Int): Unit = {
|
|
|
|
|
state :+= i
|
2014-06-20 23:05:51 +02:00
|
|
|
if (state.length == numMessages) sender() ! Done(state)
|
2013-12-06 12:48:44 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def shouldFail(rate: Double) =
|
|
|
|
|
random.nextDouble() < rate
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
class ChaosProcessor(destination: ActorRef) extends Processor with ChaosSupport with ActorLogging {
|
2013-11-20 13:47:42 +01:00
|
|
|
val config = context.system.settings.config.getConfig("akka.persistence.processor.chaos")
|
|
|
|
|
val liveProcessingFailureRate = config.getDouble("live-processing-failure-rate")
|
|
|
|
|
val replayProcessingFailureRate = config.getDouble("replay-processing-failure-rate")
|
|
|
|
|
|
2013-12-06 12:48:44 +01:00
|
|
|
val channel = context.actorOf(Channel.props("channel", ChannelSettings(redeliverMax = 10, redeliverInterval = 500 milliseconds)), "channel")
|
2013-11-20 13:47:42 +01:00
|
|
|
|
2014-06-23 14:33:35 +02:00
|
|
|
override def persistenceId = "chaos"
|
2013-11-20 13:47:42 +01:00
|
|
|
|
|
|
|
|
def receive = {
|
2013-12-06 12:48:44 +01:00
|
|
|
case p @ Persistent(i: Int, _) ⇒
|
2013-11-20 13:47:42 +01:00
|
|
|
val failureRate = if (recoveryRunning) replayProcessingFailureRate else liveProcessingFailureRate
|
2013-12-06 12:48:44 +01:00
|
|
|
if (contains(i)) {
|
2013-11-20 13:47:42 +01:00
|
|
|
log.debug(debugMessage(s"ignored duplicate ${i}"))
|
|
|
|
|
} else if (shouldFail(failureRate)) {
|
|
|
|
|
throw new TestException(debugMessage(s"rejected payload ${i}"))
|
|
|
|
|
} else {
|
2013-12-06 12:48:44 +01:00
|
|
|
add(i)
|
2014-01-17 06:58:25 +01:00
|
|
|
channel forward Deliver(p, destination.path)
|
2013-11-20 13:47:42 +01:00
|
|
|
log.debug(debugMessage(s"processed payload ${i}"))
|
|
|
|
|
}
|
|
|
|
|
case PersistenceFailure(i: Int, _, _) ⇒
|
|
|
|
|
// inform sender about journaling failure so that it can resend
|
2014-06-20 23:05:51 +02:00
|
|
|
sender() ! JournalingFailure(i)
|
2013-11-20 13:47:42 +01:00
|
|
|
case RecoveryFailure(_) ⇒
|
|
|
|
|
// journal failed during recovery, throw exception to re-recover processor
|
|
|
|
|
throw new TestException(debugMessage("recovery failed"))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
override def preRestart(reason: Throwable, message: Option[Any]): Unit = {
|
|
|
|
|
message match {
|
|
|
|
|
case Some(p @ Persistent(i: Int, _)) if !recoveryRunning ⇒
|
|
|
|
|
deleteMessage(p.sequenceNr)
|
|
|
|
|
log.debug(debugMessage(s"requested deletion of payload ${i}"))
|
|
|
|
|
// inform sender about processing failure so that it can resend
|
2014-06-20 23:05:51 +02:00
|
|
|
sender() ! ProcessingFailure(i)
|
2013-11-20 13:47:42 +01:00
|
|
|
case _ ⇒
|
|
|
|
|
}
|
|
|
|
|
super.preRestart(reason, message)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private def debugMessage(msg: String): String =
|
2013-12-06 12:48:44 +01:00
|
|
|
s"[processor] ${msg} (mode = ${if (recoveryRunning) "replay" else "live"} snr = ${lastSequenceNr} state = ${state.sorted})"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
class ChaosDestination extends Actor with ChaosSupport with ActorLogging {
|
|
|
|
|
val config = context.system.settings.config.getConfig("akka.persistence.destination.chaos")
|
|
|
|
|
val confirmFailureRate = config.getDouble("confirm-failure-rate")
|
|
|
|
|
|
|
|
|
|
def receive = {
|
|
|
|
|
case cp @ ConfirmablePersistent(i: Int, _, _) ⇒
|
|
|
|
|
if (shouldFail(confirmFailureRate)) {
|
|
|
|
|
log.error(debugMessage("confirm message failed", cp))
|
|
|
|
|
} else if (contains(i)) {
|
|
|
|
|
log.debug(debugMessage("ignored duplicate", cp))
|
|
|
|
|
} else {
|
|
|
|
|
add(i)
|
|
|
|
|
cp.confirm()
|
|
|
|
|
log.debug(debugMessage("received and confirmed message", cp))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private def debugMessage(msg: String, cp: ConfirmablePersistent): String =
|
|
|
|
|
s"[destination] ${msg} (message = ConfirmablePersistent(${cp.payload}, ${cp.sequenceNr}, ${cp.redeliveries}), state = ${state.sorted})"
|
2013-11-20 13:47:42 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
class ChaosProcessorApp(probe: ActorRef) extends Actor with ActorLogging {
|
2013-12-06 12:48:44 +01:00
|
|
|
val destination = context.actorOf(Props[ChaosDestination], "destination")
|
|
|
|
|
val processor = context.actorOf(Props(classOf[ChaosProcessor], destination), "processor")
|
2013-11-20 13:47:42 +01:00
|
|
|
|
|
|
|
|
def receive = {
|
|
|
|
|
case Start ⇒ 1 to numMessages foreach (processor ! Persistent(_))
|
|
|
|
|
case Done(ints) ⇒ probe ! Done(ints)
|
|
|
|
|
case ProcessingFailure(i) ⇒
|
|
|
|
|
processor ! Persistent(i)
|
|
|
|
|
log.debug(s"resent ${i} after processing failure")
|
|
|
|
|
case JournalingFailure(i) ⇒
|
|
|
|
|
processor ! Persistent(i)
|
|
|
|
|
log.debug(s"resent ${i} after journaling failure")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
class FailureSpec extends AkkaSpec(FailureSpec.config) with Cleanup with ImplicitSender {
|
|
|
|
|
import FailureSpec._
|
|
|
|
|
|
|
|
|
|
"The journaling protocol (= conversation between a processor and a journal)" must {
|
|
|
|
|
"tolerate and recover from random failures" in {
|
|
|
|
|
system.actorOf(Props(classOf[ChaosProcessorApp], testActor)) ! Start
|
2013-12-06 12:48:44 +01:00
|
|
|
expectDone() // by processor
|
|
|
|
|
expectDone() // by destination
|
2013-11-20 13:47:42 +01:00
|
|
|
|
2013-12-17 14:25:56 +01:00
|
|
|
system.actorOf(Props(classOf[ChaosProcessorApp], testActor)) // recovery of new instance should have same outcome
|
2013-12-06 12:48:44 +01:00
|
|
|
expectDone() // by processor
|
|
|
|
|
// destination doesn't receive messages again because all have been confirmed already
|
2013-11-20 13:47:42 +01:00
|
|
|
}
|
|
|
|
|
}
|
2013-12-06 12:48:44 +01:00
|
|
|
|
|
|
|
|
def expectDone() =
|
2013-12-17 14:25:56 +01:00
|
|
|
expectMsgPF(numMessages seconds) { case Done(ints) ⇒ ints.sorted should be(1 to numMessages toVector) }
|
2013-11-20 13:47:42 +01:00
|
|
|
}
|