add LoadSnapshotFailed in snapshot protocol, #21842
* treat snapshot load failure in same way as other recovery failures * if load of snapshot fails the persistent actor will be stopped, since we can't assume that a consistent state would be recovered just by replaying all events, since events may have been deleted * additional recovery docs * improve log message
This commit is contained in:
parent
a5e94dd3ed
commit
ea84b4bfdd
13 changed files with 153 additions and 19 deletions
|
|
@ -215,7 +215,8 @@ akka.persistence.snapshot-store.local {
|
|||
# Number load attempts when recovering from the latest snapshot fails
|
||||
# yet older snapshot files are available. Each recovery attempt will try
|
||||
# to recover using an older than previously failed-on snapshot file
|
||||
# (if any are present).
|
||||
# (if any are present). If all attempts fail the recovery will fail and
|
||||
# the persistent actor will be stopped.
|
||||
max-load-attempts = 3
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ private[persistence] object Eventsourced {
|
|||
private[persistence] trait Eventsourced extends Snapshotter with PersistenceStash with PersistenceIdentity with PersistenceRecovery {
|
||||
import JournalProtocol._
|
||||
import SnapshotProtocol.LoadSnapshotResult
|
||||
import SnapshotProtocol.LoadSnapshotFailed
|
||||
import Eventsourced._
|
||||
|
||||
private val extension = Persistence(context.system)
|
||||
|
|
@ -502,6 +503,10 @@ private[persistence] trait Eventsourced extends Snapshotter with PersistenceStas
|
|||
changeState(recovering(recoveryBehavior, timeout))
|
||||
journal ! ReplayMessages(lastSequenceNr + 1L, toSnr, replayMax, persistenceId, self)
|
||||
|
||||
case LoadSnapshotFailed(cause) ⇒
|
||||
timeoutCancellable.cancel()
|
||||
try onRecoveryFailure(cause, event = None) finally context.stop(self)
|
||||
|
||||
case RecoveryTick(true) ⇒
|
||||
try onRecoveryFailure(
|
||||
new RecoveryTimedOut(s"Recovery timed out, didn't get snapshot within $timeout"),
|
||||
|
|
|
|||
|
|
@ -208,6 +208,12 @@ private[persistence] object SnapshotProtocol {
|
|||
final case class LoadSnapshotResult(snapshot: Option[SelectedSnapshot], toSequenceNr: Long)
|
||||
extends Response
|
||||
|
||||
/**
|
||||
* Reply message to a failed [[LoadSnapshot]] request.
|
||||
* @param cause failure cause.
|
||||
*/
|
||||
final case class LoadSnapshotFailed(cause: Throwable) extends Response
|
||||
|
||||
/**
|
||||
* Instructs snapshot store to save a snapshot.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -183,7 +183,7 @@ final class PersistencePluginProxy(config: Config) extends Actor with Stash with
|
|||
|
||||
case req: SnapshotProtocol.Request ⇒ req match { // exhaustive match
|
||||
case LoadSnapshot(persistenceId, criteria, toSequenceNr) ⇒
|
||||
sender() ! LoadSnapshotResult(None, toSequenceNr)
|
||||
sender() ! LoadSnapshotFailed(timeoutException)
|
||||
case SaveSnapshot(metadata, snapshot) ⇒
|
||||
sender() ! SaveSnapshotFailure(metadata, timeoutException)
|
||||
case DeleteSnapshot(metadata) ⇒
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ trait SnapshotStore extends Actor with ActorLogging {
|
|||
breaker.withCircuitBreaker(loadAsync(persistenceId, criteria.limit(toSequenceNr))) map {
|
||||
sso ⇒ LoadSnapshotResult(sso, toSequenceNr)
|
||||
} recover {
|
||||
case e ⇒ LoadSnapshotResult(None, toSequenceNr)
|
||||
case e ⇒ LoadSnapshotFailed(e)
|
||||
} pipeTo senderPersistentActor()
|
||||
|
||||
case SaveSnapshot(metadata, snapshot) ⇒
|
||||
|
|
@ -96,6 +96,12 @@ trait SnapshotStore extends Actor with ActorLogging {
|
|||
/**
|
||||
* Plugin API: asynchronously loads a snapshot.
|
||||
*
|
||||
* If the future `Option` is `None` then all events will be replayed,
|
||||
* i.e. there was no snapshot. If snapshot could not be loaded the `Future`
|
||||
* should be completed with failure. That is important because events may
|
||||
* have been deleted and just replaying the events might not result in a valid
|
||||
* state.
|
||||
*
|
||||
* This call is protected with a circuit-breaker.
|
||||
*
|
||||
* @param persistenceId id of the persistent actor.
|
||||
|
|
|
|||
|
|
@ -48,7 +48,12 @@ private[persistence] class LocalSnapshotStore extends SnapshotStore with ActorLo
|
|||
// Hence, an attempt to load that snapshot will fail but loading an older snapshot may succeed.
|
||||
//
|
||||
val metadata = snapshotMetadatas(persistenceId, criteria).sorted.takeRight(maxLoadAttempts)
|
||||
Future(load(metadata))(streamDispatcher)
|
||||
Future {
|
||||
load(metadata) match {
|
||||
case Success(s) ⇒ s
|
||||
case Failure(e) ⇒ throw e // all attempts failed, fail the future
|
||||
}
|
||||
}(streamDispatcher)
|
||||
}
|
||||
|
||||
override def saveAsync(metadata: SnapshotMetadata, snapshot: Any): Future[Unit] = {
|
||||
|
|
@ -86,14 +91,19 @@ private[persistence] class LocalSnapshotStore extends SnapshotStore with ActorLo
|
|||
}
|
||||
|
||||
@scala.annotation.tailrec
|
||||
private def load(metadata: immutable.Seq[SnapshotMetadata]): Option[SelectedSnapshot] = metadata.lastOption match {
|
||||
case None ⇒ None
|
||||
private def load(metadata: immutable.Seq[SnapshotMetadata]): Try[Option[SelectedSnapshot]] = metadata.lastOption match {
|
||||
case None ⇒ Success(None) // no snapshots stored
|
||||
case Some(md) ⇒
|
||||
Try(withInputStream(md)(deserialize)) match {
|
||||
case Success(s) ⇒ Some(SelectedSnapshot(md, s.data))
|
||||
case Success(s) ⇒
|
||||
Success(Some(SelectedSnapshot(md, s.data)))
|
||||
case Failure(e) ⇒
|
||||
log.error(e, s"Error loading snapshot [${md}]")
|
||||
load(metadata.init) // try older snapshot
|
||||
val remaining = metadata.init
|
||||
log.error(e, s"Error loading snapshot [{}], remaining attempts: [{}]", md, remaining.size)
|
||||
if (remaining.isEmpty)
|
||||
Failure(e) // all attempts failed
|
||||
else
|
||||
load(remaining) // try older snapshot
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -121,7 +131,7 @@ private[persistence] class LocalSnapshotStore extends SnapshotStore with ActorLo
|
|||
try { p(stream) } finally { stream.close() }
|
||||
|
||||
/** Only by persistenceId and sequenceNr, timestamp is informational - accomodates for 2.13.x series files */
|
||||
private def snapshotFileForWrite(metadata: SnapshotMetadata, extension: String = ""): File =
|
||||
protected def snapshotFileForWrite(metadata: SnapshotMetadata, extension: String = ""): File =
|
||||
new File(snapshotDir, s"snapshot-${URLEncoder.encode(metadata.persistenceId, UTF_8)}-${metadata.sequenceNr}-${metadata.timestamp}${extension}")
|
||||
|
||||
private def snapshotMetadatas(persistenceId: String, criteria: SnapshotSelectionCriteria): immutable.Seq[SnapshotMetadata] = {
|
||||
|
|
|
|||
|
|
@ -73,9 +73,10 @@ object SnapshotFailureRobustnessSpec {
|
|||
|
||||
class FailingLocalSnapshotStore extends LocalSnapshotStore {
|
||||
override def save(metadata: SnapshotMetadata, snapshot: Any): Unit = {
|
||||
if (metadata.sequenceNr == 2) {
|
||||
if (metadata.sequenceNr == 2 || snapshot == "boom") {
|
||||
val bytes = "b0rk".getBytes("UTF-8")
|
||||
withOutputStream(metadata)(_.write(bytes))
|
||||
val tmpFile = withOutputStream(metadata)(_.write(bytes))
|
||||
tmpFile.renameTo(snapshotFileForWrite(metadata))
|
||||
} else super.save(metadata, snapshot)
|
||||
}
|
||||
}
|
||||
|
|
@ -112,10 +113,11 @@ class SnapshotFailureRobustnessSpec extends PersistenceSpec(PersistenceSpec.conf
|
|||
sPersistentActor ! Cmd("kablama")
|
||||
expectMsg(2)
|
||||
system.eventStream.publish(TestEvent.Mute(
|
||||
EventFilter.error(start = "Error loading snapshot [")))
|
||||
EventFilter[java.io.NotSerializableException](start = "Error loading snapshot")))
|
||||
system.eventStream.subscribe(testActor, classOf[Logging.Error])
|
||||
try {
|
||||
val lPersistentActor = system.actorOf(Props(classOf[LoadSnapshotTestPersistentActor], name, testActor))
|
||||
expectMsgType[Logging.Error].message.toString should startWith("Error loading snapshot")
|
||||
expectMsgPF() {
|
||||
case (SnapshotMetadata(`persistenceId`, 1, timestamp), state) ⇒
|
||||
state should ===("blahonga")
|
||||
|
|
@ -131,6 +133,40 @@ class SnapshotFailureRobustnessSpec extends PersistenceSpec(PersistenceSpec.conf
|
|||
}
|
||||
}
|
||||
|
||||
"fail recovery and stop actor when no snapshot could be loaded" in {
|
||||
val sPersistentActor = system.actorOf(Props(classOf[SaveSnapshotTestPersistentActor], name, testActor))
|
||||
val persistenceId = name
|
||||
|
||||
expectMsg(RecoveryCompleted)
|
||||
sPersistentActor ! Cmd("ok")
|
||||
expectMsg(1)
|
||||
// max-attempts = 3
|
||||
sPersistentActor ! Cmd("boom")
|
||||
expectMsg(2)
|
||||
sPersistentActor ! Cmd("boom")
|
||||
expectMsg(3)
|
||||
sPersistentActor ! Cmd("boom")
|
||||
expectMsg(4)
|
||||
system.eventStream.publish(TestEvent.Mute(
|
||||
EventFilter[java.io.NotSerializableException](start = "Error loading snapshot")))
|
||||
system.eventStream.publish(TestEvent.Mute(
|
||||
EventFilter[java.io.NotSerializableException](start = "Persistence failure")))
|
||||
system.eventStream.subscribe(testActor, classOf[Logging.Error])
|
||||
try {
|
||||
val lPersistentActor = system.actorOf(Props(classOf[LoadSnapshotTestPersistentActor], name, testActor))
|
||||
(1 to 3).foreach { _ ⇒
|
||||
expectMsgType[Logging.Error].message.toString should startWith("Error loading snapshot")
|
||||
}
|
||||
expectMsgType[Logging.Error].message.toString should startWith("Persistence failure")
|
||||
watch(lPersistentActor)
|
||||
expectTerminated(lPersistentActor)
|
||||
} finally {
|
||||
system.eventStream.unsubscribe(testActor, classOf[Logging.Error])
|
||||
system.eventStream.publish(TestEvent.UnMute(
|
||||
EventFilter.error(start = "Error loading snapshot [")))
|
||||
}
|
||||
}
|
||||
|
||||
"receive failure message when deleting a single snapshot fails" in {
|
||||
val p = system.actorOf(Props(classOf[DeleteSnapshotTestPersistentActor], name, testActor))
|
||||
val persistenceId = name
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue