Merge pull request #29219 from akka/feature-remember-entities-rework

Rework of Remember Entities and sharding internals #27763
2020-06-17 09:07:49 +02:00 · 2020-06-17 09:07:49 +02:00 · 4811b53917
commit 4811b53917
parent 9641780054 b734797fc8
82 changed files with 7232 additions and 1662 deletions
--- a/akka-actor-typed/src/main/scala-jdk-9/akka/actor/typed/internal/jfr/JFRActorFlightRecorder.scala
+++ b/akka-actor-typed/src/main/scala-jdk-9/akka/actor/typed/internal/jfr/JFRActorFlightRecorder.scala
@ -14,7 +14,7 @@ import akka.annotation.InternalApi
 * INTERNAL API
 */
@InternalApi
-private[akka] final class JFRActorFlightRecorder(val system: ActorSystem[_]) extends ActorFlightRecorder {
+private[akka] final class JFRActorFlightRecorder() extends ActorFlightRecorder {
  override val delivery: DeliveryFlightRecorder = new JFRDeliveryFlightRecorder
 }

--- a/akka-actor-typed/src/main/scala/akka/actor/typed/internal/ActorFlightRecorder.scala
+++ b/akka-actor-typed/src/main/scala/akka/actor/typed/internal/ActorFlightRecorder.scala
@ -4,15 +4,10 @@

 package akka.actor.typed.internal

-import scala.util.Failure
-import scala.util.Success
-
 import akka.actor.ActorPath
-import akka.actor.typed.ActorSystem
-import akka.actor.typed.Extension
-import akka.actor.typed.ExtensionId
+import akka.actor.typed.{ ActorSystem, Extension, ExtensionId }
 import akka.annotation.InternalApi
-import akka.util.JavaVersion
+import akka.util.FlightRecorderLoader

 /**
 * INTERNAL API
@ -21,20 +16,10 @@ import akka.util.JavaVersion
 object ActorFlightRecorder extends ExtensionId[ActorFlightRecorder] {

  override def createExtension(system: ActorSystem[_]): ActorFlightRecorder =
-    if (JavaVersion.majorVersion >= 11 && system.settings.config.getBoolean("akka.java-flight-recorder.enabled")) {
-      // Dynamic instantiation to not trigger class load on earlier JDKs
-      import scala.language.existentials
-      system.dynamicAccess.createInstanceFor[ActorFlightRecorder](
-        "akka.actor.typed.internal.jfr.JFRActorFlightRecorder",
-        (classOf[ActorSystem[_]], system) :: Nil) match {
-        case Success(jfr) => jfr
-        case Failure(ex) =>
-          system.log.warn("Failed to load JFR Actor flight recorder, falling back to noop. Exception: {}", ex.toString)
-          NoOpActorFlightRecorder
-      } // fallback if not possible to dynamically load for some reason
-    } else
-      // JFR not available on Java 8
-      NoOpActorFlightRecorder
+    FlightRecorderLoader.load[ActorFlightRecorder](
+      system,
+      "akka.actor.typed.internal.jfr.JFRActorFlightRecorder",
+      NoOpActorFlightRecorder)
 }

 /**
@ -43,7 +28,6 @@ object ActorFlightRecorder extends ExtensionId[ActorFlightRecorder] {
@InternalApi
 private[akka] trait ActorFlightRecorder extends Extension {
  val delivery: DeliveryFlightRecorder
-
 }

 /**
--- a/akka-actor/src/main/scala/akka/pattern/internal/BackoffOnRestartSupervisor.scala
+++ b/akka-actor/src/main/scala/akka/pattern/internal/BackoffOnRestartSupervisor.scala
@ -4,8 +4,10 @@

 package akka.pattern.internal

-import akka.actor.SupervisorStrategy._
+import scala.concurrent.duration._
+
 import akka.actor.{ OneForOneStrategy, _ }
+import akka.actor.SupervisorStrategy._
 import akka.annotation.InternalApi
 import akka.pattern.{
  BackoffReset,
@ -17,8 +19,6 @@ import akka.pattern.{
  ReplyWith
 }

-import scala.concurrent.duration._
-
 /**
 * INTERNAL API
 *
--- a/akka-actor/src/main/scala/akka/pattern/internal/BackoffOnStopSupervisor.scala
+++ b/akka-actor/src/main/scala/akka/pattern/internal/BackoffOnStopSupervisor.scala
@ -4,8 +4,10 @@

 package akka.pattern.internal

-import akka.actor.SupervisorStrategy.{ Directive, Escalate }
+import scala.concurrent.duration.FiniteDuration
+
 import akka.actor.{ Actor, ActorLogging, OneForOneStrategy, Props, SupervisorStrategy, Terminated }
+import akka.actor.SupervisorStrategy.{ Directive, Escalate }
 import akka.annotation.InternalApi
 import akka.pattern.{
  BackoffReset,
@ -17,8 +19,6 @@ import akka.pattern.{
  ReplyWith
 }

-import scala.concurrent.duration.FiniteDuration
-
 /**
 * INTERNAL API
 *
--- a/akka-actor/src/main/scala/akka/util/FlightRecorderLoader.scala
+++ b/akka-actor/src/main/scala/akka/util/FlightRecorderLoader.scala
@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2020 Lightbend Inc. <https://www.lightbend.com>
+ */
+
+package akka.util
+import akka.actor.{ ClassicActorSystemProvider, ExtendedActorSystem }
+import akka.annotation.InternalApi
+
+import scala.reflect.ClassTag
+import scala.util.{ Failure, Success }
+
+/**
+ * INTERNAL API
+ */
+@InternalApi
+private[akka] object FlightRecorderLoader {
+  def load[T: ClassTag](casp: ClassicActorSystemProvider, fqcn: String, fallback: T): T = {
+    val system = casp.classicSystem.asInstanceOf[ExtendedActorSystem]
+    if (JavaVersion.majorVersion >= 11 && system.settings.config.getBoolean("akka.java-flight-recorder.enabled")) {
+      // Dynamic instantiation to not trigger class load on earlier JDKs
+      system.dynamicAccess.createInstanceFor[T](fqcn, Nil) match {
+        case Success(jfr) =>
+          jfr
+        case Failure(ex) =>
+          system.log.warning("Failed to load JFR flight recorder, falling back to noop. Exception: {}", ex.toString)
+          fallback
+      } // fallback if not possible to dynamically load for some reason
+    } else
+      // JFR not available on Java 8
+      fallback
+  }
+}
--- a/akka-actor/src/main/scala/akka/util/MessageBuffer.scala
+++ b/akka-actor/src/main/scala/akka/util/MessageBuffer.scala
@ -4,7 +4,7 @@

 package akka.util

-import akka.actor.ActorRef
+import akka.actor.{ ActorRef, Dropped }
 import akka.annotation.InternalApi
 import akka.japi.function.Procedure2

@ -247,6 +247,19 @@ final class MessageBufferMap[I] {
    bufferMap.remove(id)
  }

+  /**
+   * Remove the buffer for an id, but publish a [[akka.actor.Dropped]] for each dropped buffered message
+   * @return how many buffered messages were dropped
+   */
+  def drop(id: I, reason: String, deadLetters: ActorRef): Int = {
+    val entries = bufferMap.get(id)
+    if (entries.nonEmpty) {
+      entries.foreach((msg, ref) => deadLetters ! Dropped(msg, reason, ref, ActorRef.noSender))
+    }
+    remove(id)
+    entries.size
+  }
+
  /**
   * Check if the buffer map contains an id.
   *
--- a/akka-cluster-sharding-typed/src/main/scala/akka/cluster/sharding/typed/ClusterShardingSettings.scala
+++ b/akka-cluster-sharding-typed/src/main/scala/akka/cluster/sharding/typed/ClusterShardingSettings.scala
@ -11,6 +11,7 @@ import com.typesafe.config.Config
 import akka.actor.typed.ActorSystem
 import akka.annotation.InternalApi
 import akka.cluster.ClusterSettings.DataCenter
+import akka.cluster.sharding.typed.ClusterShardingSettings.RememberEntitiesStoreModeDData
 import akka.cluster.sharding.{ ClusterShardingSettings => ClassicShardingSettings }
 import akka.cluster.singleton.{ ClusterSingletonManagerSettings => ClassicClusterSingletonManagerSettings }
 import akka.cluster.typed.Cluster
@ -47,6 +48,7 @@ object ClusterShardingSettings {
      passivateIdleEntityAfter = classicSettings.passivateIdleEntityAfter,
      shardRegionQueryTimeout = classicSettings.shardRegionQueryTimeout,
      stateStoreMode = StateStoreMode.byName(classicSettings.stateStoreMode),
+      rememberEntitiesStoreMode = RememberEntitiesStoreMode.byName(classicSettings.rememberEntitiesStore),
      new TuningParameters(classicSettings.tuningParameters),
      new ClusterSingletonManagerSettings(
        classicSettings.coordinatorSingletonSettings.singletonName,
@ -63,6 +65,7 @@ object ClusterShardingSettings {
      journalPluginId = settings.journalPluginId,
      snapshotPluginId = settings.snapshotPluginId,
      stateStoreMode = settings.stateStoreMode.name,
+      rememberEntitiesStore = settings.rememberEntitiesStoreMode.name,
      passivateIdleEntityAfter = settings.passivateIdleEntityAfter,
      shardRegionQueryTimeout = settings.shardRegionQueryTimeout,
      new ClassicShardingSettings.TuningParameters(
@ -101,16 +104,57 @@ object ClusterShardingSettings {
    if (role == "" || role == null) None else Option(role)

  sealed trait StateStoreMode { def name: String }
+
+  /**
+   * Java API
+   */
+  def stateStoreModePersistence(): StateStoreMode = StateStoreModePersistence
+
+  /**
+   * Java API
+   */
+  def stateStoreModeDdata(): StateStoreMode = StateStoreModePersistence
+
  object StateStoreMode {
+
    def byName(name: String): StateStoreMode =
      if (name == StateStoreModePersistence.name) StateStoreModePersistence
      else if (name == StateStoreModeDData.name) StateStoreModeDData
      else
-        throw new IllegalArgumentException("Not recognized StateStoreMode, only 'ddata' is supported.")
+        throw new IllegalArgumentException(
+          s"Not recognized StateStoreMode, only '${StateStoreModePersistence.name}' and '${StateStoreModeDData.name}' are supported.")
  }
+
  case object StateStoreModePersistence extends StateStoreMode { override def name = "persistence" }
+
  case object StateStoreModeDData extends StateStoreMode { override def name = "ddata" }

+  /**
+   * Java API
+   */
+  def rememberEntitiesStoreModeEventSourced(): RememberEntitiesStoreMode = RememberEntitiesStoreModeEventSourced
+
+  /**
+   * Java API
+   */
+  def rememberEntitiesStoreModeDdata(): RememberEntitiesStoreMode = RememberEntitiesStoreModeDData
+
+  sealed trait RememberEntitiesStoreMode { def name: String }
+
+  object RememberEntitiesStoreMode {
+
+    def byName(name: String): RememberEntitiesStoreMode =
+      if (name == RememberEntitiesStoreModeEventSourced.name) RememberEntitiesStoreModeEventSourced
+      else if (name == RememberEntitiesStoreModeDData.name) RememberEntitiesStoreModeDData
+      else
+        throw new IllegalArgumentException(
+          s"Not recognized RememberEntitiesStore, only '${RememberEntitiesStoreModeDData.name}' and '${RememberEntitiesStoreModeEventSourced.name}' are supported.")
+  }
+  final case object RememberEntitiesStoreModeEventSourced extends RememberEntitiesStoreMode {
+    override def name = "eventsourced"
+  }
+  final case object RememberEntitiesStoreModeDData extends RememberEntitiesStoreMode { override def name = "ddata" }
+
  // generated using kaze-class
  final class TuningParameters private (
      val bufferSize: Int,
@ -279,15 +323,36 @@ final class ClusterShardingSettings(
    val passivateIdleEntityAfter: FiniteDuration,
    val shardRegionQueryTimeout: FiniteDuration,
    val stateStoreMode: ClusterShardingSettings.StateStoreMode,
+    val rememberEntitiesStoreMode: ClusterShardingSettings.RememberEntitiesStoreMode,
    val tuningParameters: ClusterShardingSettings.TuningParameters,
    val coordinatorSingletonSettings: ClusterSingletonManagerSettings) {

-  import akka.cluster.sharding.typed.ClusterShardingSettings.StateStoreModeDData
-  import akka.cluster.sharding.typed.ClusterShardingSettings.StateStoreModePersistence
-  require(
-    stateStoreMode == StateStoreModePersistence || stateStoreMode == StateStoreModeDData,
-    s"Unknown 'state-store-mode' [$stateStoreMode], " +
-    s"valid values are '${StateStoreModeDData.name}' or '${StateStoreModePersistence.name}'")
+  @deprecated("Use constructor with rememberEntitiesStoreMode", "2.6.6") // FIXME update version once merged
+  def this(
+      numberOfShards: Int,
+      role: Option[String],
+      dataCenter: Option[DataCenter],
+      rememberEntities: Boolean,
+      journalPluginId: String,
+      snapshotPluginId: String,
+      passivateIdleEntityAfter: FiniteDuration,
+      shardRegionQueryTimeout: FiniteDuration,
+      stateStoreMode: ClusterShardingSettings.StateStoreMode,
+      tuningParameters: ClusterShardingSettings.TuningParameters,
+      coordinatorSingletonSettings: ClusterSingletonManagerSettings) =
+    this(
+      numberOfShards,
+      role,
+      dataCenter,
+      rememberEntities,
+      journalPluginId,
+      snapshotPluginId,
+      passivateIdleEntityAfter,
+      shardRegionQueryTimeout,
+      stateStoreMode,
+      RememberEntitiesStoreModeDData,
+      tuningParameters,
+      coordinatorSingletonSettings)

  /**
   * INTERNAL API
@ -322,6 +387,10 @@ final class ClusterShardingSettings(
  def withStateStoreMode(stateStoreMode: ClusterShardingSettings.StateStoreMode): ClusterShardingSettings =
    copy(stateStoreMode = stateStoreMode)

+  def withRememberEntitiesStoreMode(
+      rememberEntitiesStoreMode: ClusterShardingSettings.RememberEntitiesStoreMode): ClusterShardingSettings =
+    copy(rememberEntitiesStoreMode = rememberEntitiesStoreMode)
+
  def withPassivateIdleEntityAfter(duration: FiniteDuration): ClusterShardingSettings =
    copy(passivateIdleEntityAfter = duration)

@ -349,6 +418,7 @@ final class ClusterShardingSettings(
      journalPluginId: String = journalPluginId,
      snapshotPluginId: String = snapshotPluginId,
      stateStoreMode: ClusterShardingSettings.StateStoreMode = stateStoreMode,
+      rememberEntitiesStoreMode: ClusterShardingSettings.RememberEntitiesStoreMode = rememberEntitiesStoreMode,
      tuningParameters: ClusterShardingSettings.TuningParameters = tuningParameters,
      coordinatorSingletonSettings: ClusterSingletonManagerSettings = coordinatorSingletonSettings,
      passivateIdleEntityAfter: FiniteDuration = passivateIdleEntityAfter,
@ -363,6 +433,7 @@ final class ClusterShardingSettings(
      passivateIdleEntityAfter,
      shardRegionQueryTimeout,
      stateStoreMode,
+      rememberEntitiesStoreMode,
      tuningParameters,
      coordinatorSingletonSettings)
 }
--- a/akka-cluster-sharding-typed/src/main/scala/akka/cluster/sharding/typed/internal/ShardedDaemonProcessImpl.scala
+++ b/akka-cluster-sharding-typed/src/main/scala/akka/cluster/sharding/typed/internal/ShardedDaemonProcessImpl.scala
@ -9,7 +9,6 @@ import java.util.Optional
 import scala.compat.java8.OptionConverters._
 import scala.concurrent.duration.Duration
 import scala.reflect.ClassTag
-
 import akka.actor.typed.ActorRef
 import akka.actor.typed.ActorSystem
 import akka.actor.typed.Behavior
@ -18,7 +17,7 @@ import akka.actor.typed.scaladsl.LoggerOps
 import akka.annotation.InternalApi
 import akka.cluster.sharding.ShardRegion.EntityId
 import akka.cluster.sharding.typed.ClusterShardingSettings
-import akka.cluster.sharding.typed.ClusterShardingSettings.StateStoreModeDData
+import akka.cluster.sharding.typed.ClusterShardingSettings.{ RememberEntitiesStoreModeDData, StateStoreModeDData }
 import akka.cluster.sharding.typed.ShardedDaemonProcessSettings
 import akka.cluster.sharding.typed.ShardingEnvelope
 import akka.cluster.sharding.typed.ShardingMessageExtractor
@ -133,6 +132,7 @@ private[akka] final class ShardedDaemonProcessImpl(system: ActorSystem[_])
        Duration.Zero, // passivation disabled
        shardingBaseSettings.shardRegionQueryTimeout,
        StateStoreModeDData,
+        RememberEntitiesStoreModeDData, // not used as remembered entities is off
        shardingBaseSettings.tuningParameters,
        shardingBaseSettings.coordinatorSingletonSettings)
    }
--- a/akka-cluster-sharding-typed/src/multi-jvm/resources/logback-test.xml
+++ b/akka-cluster-sharding-typed/src/multi-jvm/resources/logback-test.xml
@ -8,7 +8,7 @@
            <pattern>%date{ISO8601} %-5level %logger %marker - %msg MDC: {%mdc}%n</pattern>
        </encoder>
    </appender>
-
+    
    <root level="INFO">
        <appender-ref ref="STDOUT"/>
    </root>
--- a/akka-cluster-sharding-typed/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingRememberEntitiesPerfSpec.scala
+++ b/akka-cluster-sharding-typed/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingRememberEntitiesPerfSpec.scala
@ -0,0 +1,308 @@
+/*
+ * Copyright (C) 2019-2020 Lightbend Inc. <https://www.lightbend.com>
+ */
+
+package akka.cluster.sharding
+
+import java.nio.file.Paths
+import java.util.concurrent.TimeUnit.NANOSECONDS
+import java.util.concurrent.atomic.AtomicInteger
+
+import akka.actor._
+import akka.cluster.MemberStatus
+import akka.cluster.sharding.ShardRegion.{ CurrentShardRegionState, GetShardRegionState, Passivate }
+import akka.testkit._
+import akka.util.ccompat._
+import com.typesafe.config.ConfigFactory
+import org.HdrHistogram.Histogram
+
+import scala.concurrent.duration._
+
+@ccompatUsedUntil213
+object ClusterShardingRememberEntitiesPerfSpec {
+  val NrRegions = 6
+  // use 5 for "real" testing
+  val NrIterations = 2
+  // use 5 for "real" testing
+  val NrOfMessagesFactor = 1
+
+  case class In(id: Long, created: Long = System.currentTimeMillis())
+  case class Out(latency: Long)
+
+  class LatencyEntity extends Actor with ActorLogging {
+
+    override def receive: Receive = {
+      case In(_, created) =>
+        sender() ! Out(System.currentTimeMillis() - created)
+      case _: Stop =>
+//        log.debug("Stop received {}", self.path.name)
+        context.parent ! Passivate("stop")
+      case "stop" =>
+//        log.debug("Final Stop received {}", self.path.name)
+        context.stop(self)
+      case msg => throw new RuntimeException("unexpected msg " + msg)
+    }
+  }
+
+  object LatencyEntity {
+
+    val extractEntityId: ShardRegion.ExtractEntityId = {
+      case in: In         => (in.id.toString, in)
+      case msg @ Stop(id) => (id.toString, msg)
+    }
+
+    val extractShardId: ShardRegion.ExtractShardId = _ => "0"
+  }
+
+  case class Stop(id: Int)
+
+}
+
+object ClusterShardingRememberEntitiesPerfSpecConfig
+    extends MultiNodeClusterShardingConfig(
+      rememberEntities = true,
+      additionalConfig = s"""
+    akka.loglevel = DEBUG 
+    akka.testconductor.barrier-timeout = 3 minutes
+    akka.remote.artery.advanced.outbound-message-queue-size = 10000
+    akka.remote.artery.advanced.maximum-frame-size = 512 KiB
+    # comment next line to enable durable lmdb storage
+    akka.cluster.sharding.distributed-data.durable.keys = []
+    akka.cluster.sharding {
+      remember-entities = on
+    }
+    """) {
+
+  val first = role("first")
+  val second = role("second")
+  val third = role("third")
+
+  nodeConfig(third)(ConfigFactory.parseString(s"""
+    akka.cluster.sharding.distributed-data.durable.lmdb {
+      # use same directory when starting new node on third (not used at same time)
+      dir = "$targetDir/sharding-third"
+    }
+    """))
+
+}
+
+class ClusterShardingRememberEntitiesPerfSpecMultiJvmNode1 extends ClusterShardingRememberEntitiesPerfSpec
+class ClusterShardingRememberEntitiesPerfSpecMultiJvmNode2 extends ClusterShardingRememberEntitiesPerfSpec
+class ClusterShardingRememberEntitiesPerfSpecMultiJvmNode3 extends ClusterShardingRememberEntitiesPerfSpec
+
+abstract class ClusterShardingRememberEntitiesPerfSpec
+    extends MultiNodeClusterShardingSpec(ClusterShardingRememberEntitiesPerfSpecConfig)
+    with ImplicitSender {
+  import ClusterShardingRememberEntitiesPerfSpec._
+  import ClusterShardingRememberEntitiesPerfSpecConfig._
+
+  import ClusterShardingRememberEntitiesPerfSpec._
+
+  def startSharding(): Unit = {
+    (1 to NrRegions).foreach { n =>
+      startSharding(
+        system,
+        typeName = s"EntityLatency$n",
+        entityProps = Props(new LatencyEntity()),
+        extractEntityId = LatencyEntity.extractEntityId,
+        extractShardId = LatencyEntity.extractShardId)
+    }
+
+  }
+
+  var latencyRegions = Vector.empty[ActorRef]
+
+  val latencyCount = new AtomicInteger(0)
+
+  override protected def atStartup(): Unit = {
+    super.atStartup()
+    join(first, first)
+
+    startSharding()
+
+    // this will make it run on first
+    runOn(first) {
+      latencyRegions = (1 to NrRegions).map { n =>
+        val region = ClusterSharding(system).shardRegion(s"EntityLatency$n")
+        region ! In(0)
+        expectMsgType[Out]
+        region
+      }.toVector
+    }
+    enterBarrier("allocated-on-first")
+
+    join(second, first)
+    join(third, first)
+
+    within(20.seconds) {
+      awaitAssert {
+        cluster.state.members.size should ===(3)
+        cluster.state.members.unsorted.map(_.status) should ===(Set(MemberStatus.Up))
+      }
+    }
+
+    enterBarrier("all-up")
+  }
+
+  "Cluster sharding with remember entities performance" must {
+
+    val percentiles = List(99.9, 99.0, 95.0, 50.0)
+
+    def runBench(name: String)(logic: (Int, ActorRef, Histogram) => Long): Unit = {
+      val testRun = latencyCount.getAndIncrement()
+      runOn(first) {
+        val recording = new FlightRecording(system)
+        recording.start()
+        val region = latencyRegions(testRun)
+        val fullHistogram = new Histogram(10L * 1000L, 2)
+        val throughputs = (1 to NrIterations).map { iteration =>
+          val histogram: Histogram = new Histogram(10L * 1000L, 2)
+          val startTime = System.nanoTime()
+          val numberOfMessages = logic(iteration, region, histogram)
+          val took = NANOSECONDS.toMillis(System.nanoTime - startTime)
+          val throughput = numberOfMessages * 1000.0 / took
+          println(
+            s"### Test [${name}] stop with $numberOfMessages took $took ms, " +
+            f"throughput $throughput%,.0f msg/s")
+          //          println("Iteration Latencies: ")
+          //          histogram.outputPercentileDistribution(System.out, 1.0)
+          fullHistogram.add(histogram)
+          throughput
+        }
+        println(f"Average throughput: ${throughputs.sum / NrIterations}%,.0f msg/s")
+        println("Combined latency figures:")
+        println(s"total ${fullHistogram.getTotalCount} max ${fullHistogram.getMaxValue} ${percentiles
+          .map(p => s"$p% ${fullHistogram.getValueAtPercentile(p)}ms")
+          .mkString(" ")}")
+        recording.endAndDump(Paths.get("target", s"${name.replace(" ", "-")}.jfr"))
+      }
+      enterBarrier(s"after-start-stop-${testRun}")
+    }
+
+    "test when starting new entity" in {
+      val numberOfMessages = 200 * NrOfMessagesFactor
+      runBench("start new entities") { (iteration, region, histogram) =>
+        (1 to numberOfMessages).foreach { n =>
+          region ! In(iteration * 100000 + n)
+        }
+        for (_ <- 1 to numberOfMessages) {
+          histogram.recordValue(expectMsgType[Out].latency)
+        }
+        numberOfMessages
+      }
+    }
+
+    "test latency when starting new entity and sending a few messages" in {
+      val numberOfMessages = 800 * NrOfMessagesFactor
+      runBench("start, few messages") { (iteration, region, histogram) =>
+        for (n <- 1 to numberOfMessages / 5; _ <- 1 to 5) {
+          region ! In(iteration * 100000 + n)
+        }
+        for (_ <- 1 to numberOfMessages) {
+          histogram.recordValue(expectMsgType[Out].latency)
+        }
+        numberOfMessages
+      }
+    }
+
+    "test latency when starting new entity and sending a few messages to it and stopping" in {
+      val numberOfMessages = 800 * NrOfMessagesFactor
+      // 160 entities, and an extra one for the intialization
+      // all but the first one are not removed
+      runBench("start, few messages, stop") { (iteration, region, histogram) =>
+        for (n <- 1 to numberOfMessages / 5; m <- 1 to 5) {
+          val id = iteration * 100000 + n
+          region ! In(id)
+          if (m == 5) {
+            region ! Stop(id)
+          }
+        }
+        for (_ <- 1 to numberOfMessages) {
+          try {
+            histogram.recordValue(expectMsgType[Out].latency)
+          } catch {
+            case e: AssertionError =>
+              log.error(s"Received ${histogram.getTotalCount} out of $numberOfMessages")
+              throw e
+          }
+        }
+
+        awaitAssert({
+          val probe = TestProbe()
+          region.tell(GetShardRegionState, probe.ref)
+          val stats = probe.expectMsgType[CurrentShardRegionState]
+          stats.shards.head.shardId shouldEqual "0"
+          stats.shards.head.entityIds.toList.sorted shouldEqual List("0") // the init entity
+        }, 2.seconds)
+
+        numberOfMessages
+      }
+    }
+
+    "test latency when starting, few messages, stopping, few messages" in {
+      val numberOfMessages = 800 * NrOfMessagesFactor
+      runBench("start, few messages, stop, few messages") { (iteration, region, histogram) =>
+        for (n <- 1 to numberOfMessages / 5; m <- 1 to 5) {
+          val id = iteration * 100000 + n
+          region ! In(id)
+          if (m == 2) {
+            region ! Stop(id)
+          }
+        }
+        for (_ <- 1 to numberOfMessages) {
+          try {
+            histogram.recordValue(expectMsgType[Out].latency)
+          } catch {
+            case e: AssertionError =>
+              log.error(s"Received ${histogram.getTotalCount} out of $numberOfMessages")
+              throw e
+          }
+        }
+        numberOfMessages
+      }
+    }
+
+    "test when starting some new entities mixed with sending to started" in {
+      runBench("starting mixed with sending to started") { (iteration, region, histogram) =>
+        val numberOfMessages = 1600 * NrOfMessagesFactor
+        (1 to numberOfMessages).foreach { n =>
+          val msg =
+            if (n % 20 == 0)
+              -(iteration * 100000 + n) // unique, will start new entity
+            else
+              iteration * 100000 + (n % 10) // these will go to same 10 started entities
+          region ! In(msg)
+
+          if (n == 10) {
+            for (_ <- 1 to 10) {
+              histogram.recordValue(expectMsgType[Out].latency)
+            }
+          }
+        }
+        for (_ <- 1 to numberOfMessages - 10) {
+          histogram.recordValue(expectMsgType[Out].latency)
+        }
+        numberOfMessages
+      }
+    }
+
+    "test sending to started" in {
+      runBench("sending to started") { (iteration, region, histogram) =>
+        val numberOfMessages = 1600 * NrOfMessagesFactor
+        (1 to numberOfMessages).foreach { n =>
+          region ! In(iteration * 100000 + (n % 10)) // these will go to same 10 started entities
+
+          if (n == 10) {
+            for (_ <- 1 to 10) {
+              histogram.recordValue(expectMsgType[Out].latency)
+            }
+          }
+        }
+        for (_ <- 1 to numberOfMessages - 10) {
+          histogram.recordValue(expectMsgType[Out].latency)
+        }
+        numberOfMessages
+      }
+    }
+  }
+}
--- a/akka-cluster-sharding-typed/src/test/java/akka/cluster/sharding/typed/javadsl/ClusterShardingSettingsCompileOnly.java
+++ b/akka-cluster-sharding-typed/src/test/java/akka/cluster/sharding/typed/javadsl/ClusterShardingSettingsCompileOnly.java
@ -0,0 +1,19 @@
+/*
+ * Copyright (C) 2020 Lightbend Inc. <https://www.lightbend.com>
+ */
+
+package akka.cluster.sharding.typed.javadsl;
+
+import akka.actor.typed.ActorSystem;
+import akka.cluster.sharding.typed.ClusterShardingSettings;
+
+public class ClusterShardingSettingsCompileOnly {
+
+  static void shouldBeUsableFromJava() {
+    ActorSystem<?> system = null;
+    ClusterShardingSettings.StateStoreMode mode = ClusterShardingSettings.stateStoreModeDdata();
+    ClusterShardingSettings.create(system)
+        .withStateStoreMode(mode)
+        .withRememberEntitiesStoreMode(ClusterShardingSettings.rememberEntitiesStoreModeDdata());
+  }
+}
--- a/akka-cluster-sharding-typed/src/test/scala/akka/cluster/sharding/FlightRecording.scala
+++ b/akka-cluster-sharding-typed/src/test/scala/akka/cluster/sharding/FlightRecording.scala
@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2020 Lightbend Inc. <https://www.lightbend.com>
+ */
+
+package akka.cluster.sharding
+import java.nio.file.Path
+
+import akka.actor.{ ActorSystem, ExtendedActorSystem }
+
+/**
+ * This will work on JDK11 and JDK8 built with the enable-jfr flag (8u262+).
+ *
+ * For Akka JRF recordings you may need to run a publish for multi jvm tests
+ * to get the ComileJDK9 things compiled.
+ */
+class FlightRecording(system: ActorSystem) {
+
+  private val dynamic = system.asInstanceOf[ExtendedActorSystem].dynamicAccess
+  private val recording =
+    dynamic.createInstanceFor[AnyRef]("jdk.jfr.Recording", Nil).toOption
+  private val clazz = recording.map(_.getClass)
+  private val startMethod = clazz.map(_.getDeclaredMethod("start"))
+  private val stopMethod = clazz.map(_.getDeclaredMethod("stop"))
+  private val dumpMethod = clazz.map(_.getDeclaredMethod("dump", classOf[Path]))
+
+  def start() = {
+    for {
+      r <- recording
+      m <- startMethod
+    } yield m.invoke(r)
+  }
+
+  def endAndDump(location: Path) = {
+    for {
+      r <- recording
+      stop <- stopMethod
+      dump <- dumpMethod
+    } yield {
+      stop.invoke(r)
+      dump.invoke(r, location)
+    }
+  }
+}
--- a/akka-cluster-sharding/src/main/java/akka/cluster/sharding/protobuf/msg/ClusterShardingMessages.java
+++ b/akka-cluster-sharding/src/main/java/akka/cluster/sharding/protobuf/msg/ClusterShardingMessages.java
--- a/akka-cluster-sharding/src/main/mima-filters/2.6.6.backwards.excludes/remember-entities-refactor.excludes
+++ b/akka-cluster-sharding/src/main/mima-filters/2.6.6.backwards.excludes/remember-entities-refactor.excludes
@ -0,0 +1,66 @@
+# all these are internals
+ProblemFilters.exclude[MissingClassProblem]("akka.cluster.sharding.RememberEntityStarter")
+ProblemFilters.exclude[MissingClassProblem]("akka.cluster.sharding.DDataShard")
+ProblemFilters.exclude[MissingClassProblem]("akka.cluster.sharding.Shard$StateChange")
+ProblemFilters.exclude[MissingClassProblem]("akka.cluster.sharding.PersistentShard")
+ProblemFilters.exclude[MissingClassProblem]("akka.cluster.sharding.Shard$State$")
+ProblemFilters.exclude[MissingClassProblem]("akka.cluster.sharding.Shard$EntityStarted$")
+ProblemFilters.exclude[MissingClassProblem]("akka.cluster.sharding.Shard$EntityStopped")
+ProblemFilters.exclude[MissingClassProblem]("akka.cluster.sharding.Shard$RestartEntity")
+ProblemFilters.exclude[MissingClassProblem]("akka.cluster.sharding.Shard$RestartEntity$")
+ProblemFilters.exclude[MissingClassProblem]("akka.cluster.sharding.Shard$RestartEntities")
+ProblemFilters.exclude[MissingClassProblem]("akka.cluster.sharding.Shard$RestartEntities$")
+ProblemFilters.exclude[MissingClassProblem]("akka.cluster.sharding.RememberEntityStarter$Tick$")
+ProblemFilters.exclude[MissingClassProblem]("akka.cluster.sharding.Shard$EntityStarted")
+ProblemFilters.exclude[MissingClassProblem]("akka.cluster.sharding.RememberingShard")
+ProblemFilters.exclude[MissingClassProblem]("akka.cluster.sharding.RememberEntityStarter$")
+ProblemFilters.exclude[MissingClassProblem]("akka.cluster.sharding.Shard$State")
+ProblemFilters.exclude[MissingClassProblem]("akka.cluster.sharding.Shard$EntityStopped$")
+ProblemFilters.exclude[MissingClassProblem]("akka.cluster.sharding.Shard$ShardCommand")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.ShardRegion.props")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.ShardRegion.proxyProps")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.Shard.props")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.ShardRegion.this")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.Shard.receiveCommand")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.Shard.deliverTo")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.Shard.state")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.Shard.state_=")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.Shard.idByRef")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.Shard.idByRef_=")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.Shard.refById")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.Shard.refById_=")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.Shard.lastMessageTimestamp")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.Shard.lastMessageTimestamp_=")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.Shard.passivating")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.Shard.passivating_=")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.Shard.messageBuffers")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.Shard.passivateIdleTask")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.Shard.onLeaseAcquired")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.Shard.processChange")
+ProblemFilters.exclude[IncompatibleMethTypeProblem]("akka.cluster.sharding.Shard.passivateCompleted")
+ProblemFilters.exclude[IncompatibleMethTypeProblem]("akka.cluster.sharding.Shard.sendMsgBuffer")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.Shard.deliverMessage")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.Shard.this")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.ShardCoordinator.props")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.DDataShardCoordinator.CoordinatorStateKey")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.DDataShardCoordinator.initEmptyState")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.DDataShardCoordinator.AllShardsKey")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.DDataShardCoordinator.allKeys")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.DDataShardCoordinator.shards")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.DDataShardCoordinator.shards_=")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.DDataShardCoordinator.terminating")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.DDataShardCoordinator.terminating_=")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.DDataShardCoordinator.getShardHomeRequests")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.DDataShardCoordinator.getShardHomeRequests_=")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.DDataShardCoordinator.waitingForState")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.DDataShardCoordinator.waitingForUpdate")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.DDataShardCoordinator.getAllShards")
+ProblemFilters.exclude[DirectMissingMethodProblem]("akka.cluster.sharding.DDataShardCoordinator.sendAllShardsUpdate")
+ProblemFilters.exclude[IncompatibleMethTypeProblem]("akka.cluster.sharding.ShardRegion.this")
+ProblemFilters.exclude[IncompatibleMethTypeProblem]("akka.cluster.sharding.DDataShardCoordinator.this")
+
+# not marked internal but for not intended as public (no public API use case)
+ProblemFilters.exclude[MissingClassProblem]("akka.cluster.sharding.EntityRecoveryStrategy")
+ProblemFilters.exclude[MissingClassProblem]("akka.cluster.sharding.EntityRecoveryStrategy$")
+ProblemFilters.exclude[MissingClassProblem]("akka.cluster.sharding.AllAtOnceEntityRecoveryStrategy")
+ProblemFilters.exclude[MissingClassProblem]("akka.cluster.sharding.ConstantRateEntityRecoveryStrategy")
--- a/akka-cluster-sharding/src/main/protobuf/ClusterShardingMessages.proto
+++ b/akka-cluster-sharding/src/main/protobuf/ClusterShardingMessages.proto
@ -42,14 +42,24 @@ message EntityState {
  repeated string entities = 1;
 }

+// not written anymore but kept for backwards compatibility
 message EntityStarted {
  required string entityId = 1;
 }

+message EntitiesStarted {
+    repeated string entityId = 1;
+}
+
+// not written anymore but kept for backwards compatibility
 message EntityStopped {
  required string entityId = 1;
 }

+message EntitiesStopped {
+  repeated string entityId = 1;
+}
+
 message ShardStats {
    required string shard = 1;
    required int32 entityCount = 2;
@ -117,3 +127,8 @@ message CurrentShardRegionState {
  repeated ShardState shards = 1;
  repeated string failed = 2;
 }
+
+message RememberedShardState {
+    repeated string shardId = 1;
+    optional bool marker = 2;
+}
--- a/akka-cluster-sharding/src/main/resources/reference.conf
+++ b/akka-cluster-sharding/src/main/resources/reference.conf
@ -23,6 +23,11 @@ akka.cluster.sharding {
  # due to rebalance or crash.
  remember-entities = off

+  # When 'remember-entities' is enabled and the state store mode is ddata this controls
+  # how the remembered entities and shards are stored. Possible values are "eventsourced" and "ddata"
+  # Default is ddata for backwards compatibility.
+  remember-entities-store = "ddata"
+
  # Set this to a time duration to have sharding passivate entities when they have not
  # received any message in this length of time. Set to 'off' to disable.
  # It is always disabled if `remember-entities` is enabled.
@ -77,7 +82,8 @@ akka.cluster.sharding {

  # Defines how the coordinator stores its state. Same is also used by the
  # shards for rememberEntities.
-  # Valid values are "ddata" or "persistence". 
+  # Valid values are "ddata" or "persistence".
+  # "persistence" mode is deprecated
  state-store-mode = "ddata"

  # The shard saves persistent snapshots after this number of persistent
@ -119,11 +125,10 @@ akka.cluster.sharding {
  # and for a shard to get its state when remembered entities is enabled
  # The read from ddata is a ReadMajority, for small clusters (< majority-min-cap) every node needs to respond
  # so is more likely to time out if there are nodes restarting e.g. when there is a rolling re-deploy happening
-  # Only used when state-store-mode=ddata
  waiting-for-state-timeout = 2 s

  # Timeout of waiting for update the distributed state (update will be retried if the timeout happened)
-  # Only used when state-store-mode=ddata
+  # Also used as timeout for writes of remember entities when that is enabled
  updating-state-timeout = 5 s

  # Timeout to wait for querying all shards for a given `ShardRegion`.
@ -143,6 +148,13 @@ akka.cluster.sharding {
    number-of-entities = 5
  }

+  event-sourced-remember-entities-store {
+    # When using remember entities and the event sourced remember entities store the batches
+    # written to the store are limited by this number to avoid getting a too large event for
+    # the journal to handle. If using long persistence ids you may have to increase this.
+    max-updates-per-write = 100
+  }
+
  # Settings for the coordinator singleton. Same layout as akka.cluster.singleton.
  # The "role" of the singleton configuration is not used. The singleton role will
  # be the same as "akka.cluster.sharding.role".
@ -201,6 +213,10 @@ akka.cluster.sharding {

  # The interval between retries for acquiring the lease
  lease-retry-interval = 5s
+
+  # Provide a higher level of details in the debug logs, often per routed message. Be careful about enabling
+  # in production systems.
+  verbose-debug-logging = off
 }
 # //#sharding-ext-config

--- a/akka-cluster-sharding/src/main/scala-jdk-9/akka/cluster/sharding/internal/jfr/Events.scala
+++ b/akka-cluster-sharding/src/main/scala-jdk-9/akka/cluster/sharding/internal/jfr/Events.scala
@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2020 Lightbend Inc. <https://www.lightbend.com>
+ */
+
+package akka.cluster.sharding.internal.jfr
+import akka.annotation.InternalApi
+import jdk.jfr.{ Category, Enabled, Event, Label, StackTrace, Timespan }
+
+// requires jdk9+ to compile
+// for editing these in IntelliJ, open module settings, change JDK dependency to 11 for only this module
+
+/** INTERNAL API */
+
+@InternalApi
+@StackTrace(false)
+@Category(Array("Akka", "Sharding", "Shard")) @Label("Remember Entity Operation")
+final class RememberEntityWrite(@Timespan(Timespan.NANOSECONDS) val timeTaken: Long) extends Event
+
+@InternalApi
+@StackTrace(false)
+@Category(Array("Akka", "Sharding", "Shard")) @Label("Remember Entity Add")
+final class RememberEntityAdd(val entityId: String) extends Event
+
+@InternalApi
+@StackTrace(false)
+@Category(Array("Akka", "Sharding", "Shard")) @Label("Remember Entity Remove")
+final class RememberEntityRemove(val entityId: String) extends Event
+
+@InternalApi
+@StackTrace(false)
+@Category(Array("Akka", "Sharding", "Shard")) @Label("Passivate")
+final class Passivate(val entityId: String) extends Event
+
+@InternalApi
+@StackTrace(false)
+@Category(Array("Akka", "Sharding", "Shard")) @Label("Passivate Restart")
+final class PassivateRestart(val entityId: String) extends Event
--- a/akka-cluster-sharding/src/main/scala-jdk-9/akka/cluster/sharding/internal/jfr/JFRShardingFlightRecorder.scala
+++ b/akka-cluster-sharding/src/main/scala-jdk-9/akka/cluster/sharding/internal/jfr/JFRShardingFlightRecorder.scala
@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2020 Lightbend Inc. <https://www.lightbend.com>
+ */
+
+package akka.cluster.sharding.internal.jfr
+
+import akka.cluster.sharding.ShardingFlightRecorder
+
+class JFRShardingFlightRecorder extends ShardingFlightRecorder {
+  override def rememberEntityOperation(duration: Long): Unit =
+    new RememberEntityWrite(duration).commit()
+  override def rememberEntityAdd(entityId: String): Unit =
+    new RememberEntityAdd(entityId).commit()
+  override def rememberEntityRemove(entityId: String): Unit =
+    new RememberEntityRemove(entityId).commit()
+  override def entityPassivate(entityId: String): Unit =
+    new Passivate(entityId).commit()
+  override def entityPassivateRestart(entityId: String): Unit =
+    new PassivateRestart(entityId).commit()
+}
--- a/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/ClusterSharding.scala
+++ b/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/ClusterSharding.scala
@ -31,6 +31,10 @@ import akka.cluster.ClusterSettings
 import akka.cluster.ClusterSettings.DataCenter
 import akka.cluster.ddata.Replicator
 import akka.cluster.ddata.ReplicatorSettings
+import akka.cluster.sharding.internal.CustomStateStoreModeProvider
+import akka.cluster.sharding.internal.DDataRememberEntitiesProvider
+import akka.cluster.sharding.internal.EventSourcedRememberEntitiesProvider
+import akka.cluster.sharding.internal.RememberEntitiesProvider
 import akka.cluster.singleton.ClusterSingletonManager
 import akka.event.Logging
 import akka.pattern.BackoffOpts
@ -717,7 +721,8 @@ private[akka] class ClusterShardingGuardian extends Actor {
  }

  private def replicator(settings: ClusterShardingSettings): ActorRef = {
-    if (settings.stateStoreMode == ClusterShardingSettings.StateStoreModeDData) {
+    if (settings.stateStoreMode == ClusterShardingSettings.StateStoreModeDData ||
+        settings.stateStoreMode == ClusterShardingSettings.RememberEntitiesStoreCustom) {
      // one Replicator per role
      replicatorByRole.get(settings.role) match {
        case Some(ref) => ref
@ -748,16 +753,39 @@ private[akka] class ClusterShardingGuardian extends Actor {
        import settings.tuningParameters.coordinatorFailureBackoff

        val rep = replicator(settings)
+        val rememberEntitiesStoreProvider: Option[RememberEntitiesProvider] =
+          if (!settings.rememberEntities) None
+          else {
+            // with the deprecated persistence state store mode we always use the event sourced provider for shard regions
+            // and no store for coordinator (the coordinator is a PersistentActor in that case)
+            val rememberEntitiesProvider =
+              if (settings.stateStoreMode == ClusterShardingSettings.StateStoreModePersistence) {
+                ClusterShardingSettings.RememberEntitiesStoreEventsourced
+              } else {
+                settings.rememberEntitiesStore
+              }
+            Some(rememberEntitiesProvider match {
+              case ClusterShardingSettings.RememberEntitiesStoreDData =>
+                new DDataRememberEntitiesProvider(typeName, settings, majorityMinCap, rep)
+              case ClusterShardingSettings.RememberEntitiesStoreEventsourced =>
+                new EventSourcedRememberEntitiesProvider(typeName, settings)
+              case ClusterShardingSettings.RememberEntitiesStoreCustom =>
+                new CustomStateStoreModeProvider(typeName, context.system, settings)
+            })
+          }
+
        val encName = URLEncoder.encode(typeName, ByteString.UTF_8)
        val cName = coordinatorSingletonManagerName(encName)
        val cPath = coordinatorPath(encName)
        val shardRegion = context.child(encName).getOrElse {
          if (context.child(cName).isEmpty) {
            val coordinatorProps =
-              if (settings.stateStoreMode == ClusterShardingSettings.StateStoreModePersistence)
+              if (settings.stateStoreMode == ClusterShardingSettings.StateStoreModePersistence) {
                ShardCoordinator.props(typeName, settings, allocationStrategy)
-              else
-                ShardCoordinator.props(typeName, settings, allocationStrategy, rep, majorityMinCap)
+              } else {
+                ShardCoordinator
+                  .props(typeName, settings, allocationStrategy, rep, majorityMinCap, rememberEntitiesStoreProvider)
+              }
            val singletonProps =
              BackoffOpts
                .onStop(
@ -787,8 +815,7 @@ private[akka] class ClusterShardingGuardian extends Actor {
                extractEntityId = extractEntityId,
                extractShardId = extractShardId,
                handOffStopMessage = handOffStopMessage,
-                replicator = rep,
-                majorityMinCap)
+                rememberEntitiesStoreProvider)
              .withDispatcher(context.props.dispatcher),
            name = encName)
        }
@ -819,9 +846,7 @@ private[akka] class ClusterShardingGuardian extends Actor {
                settings = settings,
                coordinatorPath = cPath,
                extractEntityId = extractEntityId,
-                extractShardId = extractShardId,
-                replicator = context.system.deadLetters,
-                majorityMinCap)
+                extractShardId = extractShardId)
              .withDispatcher(context.props.dispatcher),
            name = actorName)
        }
--- a/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/ClusterShardingSettings.scala
+++ b/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/ClusterShardingSettings.scala
@ -22,6 +22,25 @@ object ClusterShardingSettings {
  val StateStoreModePersistence = "persistence"
  val StateStoreModeDData = "ddata"

+  /**
+   * Only for testing
+   * INTERNAL API
+   */
+  @InternalApi
+  private[akka] val RememberEntitiesStoreCustom = "custom"
+
+  /**
+   * INTERNAL API
+   */
+  @InternalApi
+  private[akka] val RememberEntitiesStoreDData = "ddata"
+
+  /**
+   * INTERNAL API
+   */
+  @InternalApi
+  private[akka] val RememberEntitiesStoreEventsourced = "eventsourced"
+
  /**
   * Create settings from the default configuration
   * `akka.cluster.sharding`.
@ -84,6 +103,7 @@ object ClusterShardingSettings {
      journalPluginId = config.getString("journal-plugin-id"),
      snapshotPluginId = config.getString("snapshot-plugin-id"),
      stateStoreMode = config.getString("state-store-mode"),
+      rememberEntitiesStore = config.getString("remember-entities-store"),
      passivateIdleEntityAfter = passivateIdleAfter,
      shardRegionQueryTimeout = config.getDuration("shard-region-query-timeout", MILLISECONDS).millis,
      tuningParameters,
@ -280,6 +300,7 @@ final class ClusterShardingSettings(
    val journalPluginId: String,
    val snapshotPluginId: String,
    val stateStoreMode: String,
+    val rememberEntitiesStore: String,
    val passivateIdleEntityAfter: FiniteDuration,
    val shardRegionQueryTimeout: FiniteDuration,
    val tuningParameters: ClusterShardingSettings.TuningParameters,
@ -287,6 +308,33 @@ final class ClusterShardingSettings(
    val leaseSettings: Option[LeaseUsageSettings])
    extends NoSerializationVerificationNeeded {

+  @deprecated(
+    "Use the ClusterShardingSettings factory methods or the constructor including rememberedEntitiesStore instead",
+    "2.6.7")
+  def this(
+      role: Option[String],
+      rememberEntities: Boolean,
+      journalPluginId: String,
+      snapshotPluginId: String,
+      stateStoreMode: String,
+      passivateIdleEntityAfter: FiniteDuration,
+      shardRegionQueryTimeout: FiniteDuration,
+      tuningParameters: ClusterShardingSettings.TuningParameters,
+      coordinatorSingletonSettings: ClusterSingletonManagerSettings,
+      leaseSettings: Option[LeaseUsageSettings]) =
+    this(
+      role,
+      rememberEntities,
+      journalPluginId,
+      snapshotPluginId,
+      stateStoreMode,
+      "ddata",
+      passivateIdleEntityAfter,
+      shardRegionQueryTimeout,
+      tuningParameters,
+      coordinatorSingletonSettings,
+      leaseSettings)
+
  // bin compat for 2.5.23
  @deprecated(
    "Use the ClusterShardingSettings factory methods or the constructor including shardRegionQueryTimeout instead",
@ -311,7 +359,7 @@ final class ClusterShardingSettings(
      3.seconds,
      tuningParameters,
      coordinatorSingletonSettings,
-      None)
+      leaseSettings)

  // bin compat for 2.5.21
  @deprecated(
@ -360,10 +408,9 @@ final class ClusterShardingSettings(
      tuningParameters,
      coordinatorSingletonSettings)

-  import ClusterShardingSettings.StateStoreModeDData
-  import ClusterShardingSettings.StateStoreModePersistence
+  import ClusterShardingSettings.{ RememberEntitiesStoreCustom, StateStoreModeDData, StateStoreModePersistence }
  require(
-    stateStoreMode == StateStoreModePersistence || stateStoreMode == StateStoreModeDData,
+    stateStoreMode == StateStoreModePersistence || stateStoreMode == StateStoreModeDData || stateStoreMode == RememberEntitiesStoreCustom,
    s"Unknown 'state-store-mode' [$stateStoreMode], valid values are '$StateStoreModeDData' or '$StateStoreModePersistence'")

  /** If true, this node should run the shard region, otherwise just a shard proxy should started on this node. */
@ -435,6 +482,7 @@ final class ClusterShardingSettings(
      journalPluginId,
      snapshotPluginId,
      stateStoreMode,
+      rememberEntitiesStore,
      passivateIdleAfter,
      shardRegionQueryTimeout,
      tuningParameters,
--- a/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/OldCoordinatorStateMigrationEventAdapter.scala
+++ b/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/OldCoordinatorStateMigrationEventAdapter.scala
@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2020 Lightbend Inc. <https://www.lightbend.com>
+ */
+
+package akka.cluster.sharding
+
+import akka.annotation.InternalApi
+import akka.cluster.sharding.ShardCoordinator.Internal.ShardHomeAllocated
+import akka.persistence.journal.EventAdapter
+import akka.persistence.journal.EventSeq
+
+/**
+ * Used for migrating from persistent state store mode to the new event sourced remember entities. No user API,
+ * used through configuration. See reference docs for details.
+ *
+ * INTERNAL API
+ */
+@InternalApi
+private[akka] final class OldCoordinatorStateMigrationEventAdapter extends EventAdapter {
+  override def manifest(event: Any): String =
+    ""
+
+  override def toJournal(event: Any): Any =
+    event
+
+  override def fromJournal(event: Any, manifest: String): EventSeq = {
+    event match {
+      case ShardHomeAllocated(shardId, _) =>
+        EventSeq.single(shardId)
+      case _ => EventSeq.empty
+    }
+
+  }
+}
--- a/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/Shard.scala
+++ b/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/Shard.scala
--- a/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/ShardCoordinator.scala
+++ b/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/ShardCoordinator.scala
@ -8,23 +8,24 @@ import scala.collection.immutable
 import scala.concurrent.Future
 import scala.concurrent.duration._
 import scala.util.Success
-
 import com.github.ghik.silencer.silent
-
 import akka.actor._
 import akka.actor.DeadLetterSuppression
-import akka.annotation.InternalApi
+import akka.annotation.{ InternalApi, InternalStableApi }
 import akka.cluster.Cluster
 import akka.cluster.ClusterEvent
 import akka.cluster.ClusterEvent._
-import akka.cluster.ddata.GSet
-import akka.cluster.ddata.GSetKey
-import akka.cluster.ddata.Key
 import akka.cluster.ddata.LWWRegister
 import akka.cluster.ddata.LWWRegisterKey
-import akka.cluster.ddata.ReplicatedData
 import akka.cluster.ddata.Replicator._
 import akka.cluster.ddata.SelfUniqueAddress
+import akka.cluster.sharding.ShardRegion.ShardId
+import akka.cluster.sharding.internal.EventSourcedRememberEntitiesCoordinatorStore.MigrationMarker
+import akka.cluster.sharding.internal.{
+  EventSourcedRememberEntitiesCoordinatorStore,
+  RememberEntitiesCoordinatorStore,
+  RememberEntitiesProvider
+}
 import akka.dispatch.ExecutionContexts
 import akka.event.BusLogging
 import akka.event.Logging
@ -55,12 +56,14 @@ object ShardCoordinator {
   * INTERNAL API
   * Factory method for the [[akka.actor.Props]] of the [[ShardCoordinator]] actor with state based on ddata.
   */
+  @InternalStableApi
  private[akka] def props(
      typeName: String,
      settings: ClusterShardingSettings,
      allocationStrategy: ShardAllocationStrategy,
      replicator: ActorRef,
-      majorityMinCap: Int): Props =
+      majorityMinCap: Int,
+      rememberEntitiesStoreProvider: Option[RememberEntitiesProvider]): Props =
    Props(
      new DDataShardCoordinator(
        typeName: String,
@ -68,7 +71,7 @@ object ShardCoordinator {
        allocationStrategy,
        replicator,
        majorityMinCap,
-        settings.rememberEntities)).withDeploy(Deploy.local)
+        rememberEntitiesStoreProvider)).withDeploy(Deploy.local)

  /**
   * Interface of the pluggable shard allocation and rebalancing logic used by the [[ShardCoordinator]].
@ -346,7 +349,8 @@ object ShardCoordinator {
    }

    /**
-     * Persistent state of the event sourced ShardCoordinator.
+     * State of the shard coordinator.
+     * Was also used as the persistent state in the old persistent coordinator.
     */
    @SerialVersionUID(1L) final case class State private[akka] (
        // region for each shard
@ -354,6 +358,7 @@ object ShardCoordinator {
        // shards for each region
        regions: Map[ActorRef, Vector[ShardId]] = Map.empty,
        regionProxies: Set[ActorRef] = Set.empty,
+        // Only used if remembered entities is enabled
        unallocatedShards: Set[ShardId] = Set.empty,
        rememberEntities: Boolean = false)
        extends ClusterShardingSerializable {
@ -795,7 +800,10 @@ abstract class ShardCoordinator(
      deferGetShardHomeRequest(shard, sender())
      true
    } else if (!hasAllRegionsRegistered()) {
-      log.debug("GetShardHome [{}] request ignored, because not all regions have registered yet.", shard)
+      log.debug(
+        "GetShardHome [{}] request from [{}] ignored, because not all regions have registered yet.",
+        shard,
+        sender())
      true
    } else {
      state.shards.get(shard) match {
@ -983,6 +991,9 @@ abstract class ShardCoordinator(
 /**
 * Singleton coordinator that decides where to allocate shards.
 *
+ * Users can migrate to using DData to store state then either event sourcing or ddata to store
+ * the remembered entities.
+ *
 * @see [[ClusterSharding$ ClusterSharding extension]]
 */
@deprecated("Use `ddata` mode, persistence mode is deprecated.", "2.6.0")
@ -1002,6 +1013,9 @@ class PersistentShardCoordinator(
  override def snapshotPluginId: String = settings.snapshotPluginId

  override def receiveRecover: Receive = {
+    case MigrationMarker | SnapshotOffer(_, _: EventSourcedRememberEntitiesCoordinatorStore.State) =>
+      throw new IllegalStateException(
+        "state-store is set to persistence but a migration has taken place to remember-entities-store=eventsourced. You can not downgrade.")
    case evt: DomainEvent =>
      log.debug("receiveRecover {}", evt)
      evt match {
@ -1093,19 +1107,38 @@ class PersistentShardCoordinator(
 }

 /**
+ * INTERNAL API
+ */
+@InternalApi
+private[akka] object DDataShardCoordinator {
+  private case object RememberEntitiesStoreStopped
+  private case class RememberEntitiesTimeout(shardId: ShardId)
+  private case object RememberEntitiesLoadTimeout
+  private val RememberEntitiesTimeoutKey = "RememberEntityTimeout"
+}
+
+/**
+ * INTERNAL API
 * Singleton coordinator (with state based on ddata) that decides where to allocate shards.
 *
+ * The plan is for this to be the only type of ShardCoordinator. A full cluster shutdown will rely
+ * on remembered entities to re-initialize and reallocate the existing shards.
+ *
 * @see [[ClusterSharding$ ClusterSharding extension]]
 */
-class DDataShardCoordinator(
+@InternalApi
+private[akka] class DDataShardCoordinator(
    override val typeName: String,
    settings: ClusterShardingSettings,
    allocationStrategy: ShardCoordinator.ShardAllocationStrategy,
    replicator: ActorRef,
    majorityMinCap: Int,
-    rememberEntities: Boolean)
+    rememberEntitiesStoreProvider: Option[RememberEntitiesProvider])
    extends ShardCoordinator(settings, allocationStrategy)
-    with Stash {
+    with Stash
+    with Timers {
+
+  import DDataShardCoordinator._
  import ShardCoordinator.Internal._

  import akka.cluster.ddata.Replicator.Update
@ -1118,45 +1151,42 @@ class DDataShardCoordinator(
    case Int.MaxValue => WriteAll(settings.tuningParameters.waitingForStateTimeout)
    case additional   => WriteMajorityPlus(settings.tuningParameters.waitingForStateTimeout, majorityMinCap, additional)
  }
-  private val allShardsReadConsistency = ReadMajority(settings.tuningParameters.waitingForStateTimeout, majorityMinCap)
-  private val allShardsWriteConsistency = WriteMajority(settings.tuningParameters.updatingStateTimeout, majorityMinCap)

  implicit val node: Cluster = Cluster(context.system)
  private implicit val selfUniqueAddress: SelfUniqueAddress = SelfUniqueAddress(node.selfUniqueAddress)
-  val CoordinatorStateKey = LWWRegisterKey[State](s"${typeName}CoordinatorState")
-  val initEmptyState = State.empty.withRememberEntities(settings.rememberEntities)
+  private val CoordinatorStateKey = LWWRegisterKey[State](s"${typeName}CoordinatorState")
+  private val initEmptyState = State.empty.withRememberEntities(settings.rememberEntities)

-  val AllShardsKey = GSetKey[String](s"shard-${typeName}-all")
-  val allKeys: Set[Key[ReplicatedData]] =
-    if (rememberEntities) Set(CoordinatorStateKey, AllShardsKey) else Set(CoordinatorStateKey)
-
-  var shards = Set.empty[String]
-  var terminating = false
-  var getShardHomeRequests: Set[(ActorRef, GetShardHome)] = Set.empty
-
-  if (rememberEntities)
-    replicator ! Subscribe(AllShardsKey, self)
+  private var terminating = false
+  private var getShardHomeRequests: Set[(ActorRef, GetShardHome)] = Set.empty

+  private val rememberEntitiesStore =
+    rememberEntitiesStoreProvider.map { provider =>
+      log.debug("Starting remember entities store from provider {}", provider)
+      context.watchWith(
+        context.actorOf(provider.coordinatorStoreProps(), "RememberEntitiesStore"),
+        RememberEntitiesStoreStopped)
+    }
+  private val rememberEntities = rememberEntitiesStore.isDefined
  node.subscribe(self, ClusterEvent.InitialStateAsEvents, ClusterShuttingDown.getClass)

  // get state from ddata replicator, repeat until GetSuccess
  getCoordinatorState()
-  getAllShards()
+  if (settings.rememberEntities)
+    getAllRememberedShards()

-  override def receive: Receive = waitingForState(allKeys)
+  override def receive: Receive =
+    waitingForInitialState(Set.empty)

  // This state will drop all other messages since they will be retried
-  def waitingForState(remainingKeys: Set[Key[ReplicatedData]]): Receive =
+  // Note remembered entities initial set of shards can arrive here or later, does not keep us in this state
+  def waitingForInitialState(rememberedShards: Set[ShardId]): Receive =
    ({
+
      case g @ GetSuccess(CoordinatorStateKey, _) =>
-        val reg = g.get(CoordinatorStateKey)
-        state = reg.value.withRememberEntities(settings.rememberEntities)
-        log.debug("Received initial coordinator state [{}]", state)
-        val newRemainingKeys = remainingKeys - CoordinatorStateKey
-        if (newRemainingKeys.isEmpty)
-          becomeWaitingForStateInitialized()
-        else
-          context.become(waitingForState(newRemainingKeys))
+        val existingState = g.get(CoordinatorStateKey).value.withRememberEntities(settings.rememberEntities)
+        log.debug("Received initial coordinator state [{}]", existingState)
+        onInitialState(existingState, rememberedShards)

      case GetFailure(CoordinatorStateKey, _) =>
        log.error(
@ -1166,35 +1196,18 @@ class DDataShardCoordinator(
        getCoordinatorState()

      case NotFound(CoordinatorStateKey, _) =>
-        val newRemainingKeys = remainingKeys - CoordinatorStateKey
-        if (newRemainingKeys.isEmpty)
-          becomeWaitingForStateInitialized()
-        else
-          context.become(waitingForState(newRemainingKeys))
+        log.debug("Initial coordinator unknown using empty state")
+        // this.state is empty initially
+        onInitialState(this.state, rememberedShards)

-      case g @ GetSuccess(AllShardsKey, _) =>
-        shards = g.get(AllShardsKey).elements
-        val newUnallocatedShards = state.unallocatedShards.union(shards.diff(state.shards.keySet))
-        state = state.copy(unallocatedShards = newUnallocatedShards)
-        val newRemainingKeys = remainingKeys - AllShardsKey
-        if (newRemainingKeys.isEmpty)
-          becomeWaitingForStateInitialized()
-        else
-          context.become(waitingForState(newRemainingKeys))
+      case RememberEntitiesCoordinatorStore.RememberedShards(shardIds) =>
+        log.debug("Received [{}] remembered shard ids (when waitingForInitialState)", shardIds.size)
+        context.become(waitingForInitialState(shardIds))
+        timers.cancel(RememberEntitiesTimeoutKey)

-      case GetFailure(AllShardsKey, _) =>
-        log.error(
-          "The ShardCoordinator was unable to get all shards state within 'waiting-for-state-timeout': {} millis (retrying)",
-          allShardsReadConsistency.timeout.toMillis)
-        // repeat until GetSuccess
-        getAllShards()
-
-      case NotFound(AllShardsKey, _) =>
-        val newRemainingKeys = remainingKeys - AllShardsKey
-        if (newRemainingKeys.isEmpty)
-          becomeWaitingForStateInitialized()
-        else
-          context.become(waitingForState(newRemainingKeys))
+      case RememberEntitiesLoadTimeout =>
+        // repeat until successful
+        getAllRememberedShards()

      case ShardCoordinator.Internal.Terminate =>
        log.debug("Received termination message while waiting for state")
@ -1208,7 +1221,12 @@ class DDataShardCoordinator(

    }: Receive).orElse[Any, Unit](receiveTerminated)

-  private def becomeWaitingForStateInitialized(): Unit = {
+  private def onInitialState(loadedState: State, rememberedShards: Set[ShardId]): Unit = {
+    state = if (settings.rememberEntities && rememberedShards.nonEmpty) {
+      // Note that we don't wait for shards from store so they could also arrive later
+      val newUnallocatedShards = state.unallocatedShards.union(rememberedShards.diff(state.shards.keySet))
+      loadedState.copy(unallocatedShards = newUnallocatedShards)
+    } else loadedState
    if (state.isEmpty) {
      // empty state, activate immediately
      activate()
@ -1235,21 +1253,46 @@ class DDataShardCoordinator(
      log.debug("Received termination message while waiting for state initialized")
      context.stop(self)

+    case RememberEntitiesCoordinatorStore.RememberedShards(rememberedShards) =>
+      log.debug("Received [{}] remembered shard ids (when waitingForStateInitialized)", rememberedShards.size)
+      val newUnallocatedShards = state.unallocatedShards.union(rememberedShards.diff(state.shards.keySet))
+      state.copy(unallocatedShards = newUnallocatedShards)
+      timers.cancel(RememberEntitiesTimeoutKey)
+
+    case RememberEntitiesLoadTimeout =>
+      // repeat until successful
+      getAllRememberedShards()
+
+    case RememberEntitiesStoreStopped =>
+      onRememberEntitiesStoreStopped()
+
    case _ => stash()
  }

-  // this state will stash all messages until it receives UpdateSuccess
+  // this state will stash all messages until it receives UpdateSuccess and a successful remember shard started
+  // if remember entities is enabled
  def waitingForUpdate[E <: DomainEvent](
      evt: E,
-      afterUpdateCallback: E => Unit,
-      remainingKeys: Set[Key[ReplicatedData]]): Receive = {
+      shardId: Option[ShardId],
+      waitingForStateWrite: Boolean,
+      waitingForRememberShard: Boolean,
+      afterUpdateCallback: E => Unit): Receive = {
+
    case UpdateSuccess(CoordinatorStateKey, Some(`evt`)) =>
-      log.debug("The coordinator state was successfully updated with {}", evt)
-      val newRemainingKeys = remainingKeys - CoordinatorStateKey
-      if (newRemainingKeys.isEmpty)
+      if (!waitingForRememberShard) {
+        log.debug("The coordinator state was successfully updated with {}", evt)
+        if (shardId.isDefined) timers.cancel(RememberEntitiesTimeoutKey)
        unbecomeAfterUpdate(evt, afterUpdateCallback)
-      else
-        context.become(waitingForUpdate(evt, afterUpdateCallback, newRemainingKeys))
+      } else {
+        log.debug("The coordinator state was successfully updated with {}, waiting for remember shard update", evt)
+        context.become(
+          waitingForUpdate(
+            evt,
+            shardId,
+            waitingForStateWrite = false,
+            waitingForRememberShard = true,
+            afterUpdateCallback = afterUpdateCallback))
+      }

    case UpdateTimeout(CoordinatorStateKey, Some(`evt`)) =>
      log.error(
@ -1265,27 +1308,6 @@ class DDataShardCoordinator(
        sendCoordinatorStateUpdate(evt)
      }

-    case UpdateSuccess(AllShardsKey, Some(newShard: String)) =>
-      log.debug("The coordinator shards state was successfully updated with {}", newShard)
-      val newRemainingKeys = remainingKeys - AllShardsKey
-      if (newRemainingKeys.isEmpty)
-        unbecomeAfterUpdate(evt, afterUpdateCallback)
-      else
-        context.become(waitingForUpdate(evt, afterUpdateCallback, newRemainingKeys))
-
-    case UpdateTimeout(AllShardsKey, Some(newShard: String)) =>
-      log.error(
-        "The ShardCoordinator was unable to update shards distributed state within 'updating-state-timeout': {} millis ({}), event={}",
-        allShardsWriteConsistency.timeout.toMillis,
-        if (terminating) "terminating" else "retrying",
-        evt)
-      if (terminating) {
-        context.stop(self)
-      } else {
-        // repeat until UpdateSuccess
-        sendAllShardsUpdate(newShard)
-      }
-
    case ModifyFailure(key, error, cause, _) =>
      log.error(
        cause,
@ -1306,10 +1328,62 @@ class DDataShardCoordinator(
        stashGetShardHomeRequest(sender(), g) // must wait for update that is in progress

    case ShardCoordinator.Internal.Terminate =>
-      log.debug("Received termination message while waiting for update")
+      log.debug("The ShardCoordinator received termination message while waiting for update")
      terminating = true
      stash()

+    case RememberEntitiesCoordinatorStore.UpdateDone(shard) =>
+      if (!shardId.contains(shard)) {
+        log.warning(
+          "Saw remember entities update complete for shard id [{}], while waiting for [{}]",
+          shard,
+          shardId.getOrElse(""))
+      } else {
+        if (!waitingForStateWrite) {
+          log.debug("The ShardCoordinator saw remember shard start successfully written {}", evt)
+          if (shardId.isDefined) timers.cancel(RememberEntitiesTimeoutKey)
+          unbecomeAfterUpdate(evt, afterUpdateCallback)
+        } else {
+          log.debug(
+            "The ShardCoordinator saw remember shard start successfully written {}, waiting for state update",
+            evt)
+          context.become(
+            waitingForUpdate(
+              evt,
+              shardId,
+              waitingForStateWrite = true,
+              waitingForRememberShard = false,
+              afterUpdateCallback = afterUpdateCallback))
+        }
+      }
+
+    case RememberEntitiesCoordinatorStore.UpdateFailed(shard) =>
+      if (shardId.contains(shard)) {
+        onRememberEntitiesUpdateFailed(shard)
+      } else {
+        log.warning(
+          "Got an remember entities update failed for [{}] while waiting for [{}], ignoring",
+          shard,
+          shardId.getOrElse(""))
+      }
+
+    case RememberEntitiesTimeout(shard) =>
+      if (shardId.contains(shard)) {
+        onRememberEntitiesUpdateFailed(shard)
+      } else {
+        log.warning(
+          "Got an remember entities update timeout for [{}] while waiting for [{}], ignoring",
+          shard,
+          shardId.getOrElse(""))
+      }
+
+    case RememberEntitiesStoreStopped =>
+      onRememberEntitiesStoreStopped()
+
+    case _: RememberEntitiesCoordinatorStore.RememberedShards =>
+      log.debug("Late arrival of remembered shards while waiting for update, stashing")
+      stash()
+
    case _ => stash()
  }

@ -1338,39 +1412,61 @@ class DDataShardCoordinator(
  }

  def activate() = {
-    context.become(active)
+    context.become(active.orElse(receiveLateRememberedEntities))
    log.info("ShardCoordinator was moved to the active state {}", state)
  }

-  override def active: Receive =
-    if (rememberEntities) {
-      ({
-        case chg @ Changed(AllShardsKey) =>
-          shards = chg.get(AllShardsKey).elements
-      }: Receive).orElse[Any, Unit](super.active)
-    } else
-      super.active
+  // only used once the coordinator is initialized
+  def receiveLateRememberedEntities: Receive = {
+    case RememberEntitiesCoordinatorStore.RememberedShards(shardIds) =>
+      log.debug("Received [{}] remembered shard ids (after state initialized)", shardIds.size)
+      if (shardIds.nonEmpty) {
+        val newUnallocatedShards = state.unallocatedShards.union(shardIds.diff(state.shards.keySet))
+        state = state.copy(unallocatedShards = newUnallocatedShards)
+        allocateShardHomesForRememberEntities()
+      }
+      timers.cancel(RememberEntitiesTimeoutKey)
+
+    case RememberEntitiesLoadTimeout =>
+      // repeat until successful
+      getAllRememberedShards()
+  }

  def update[E <: DomainEvent](evt: E)(f: E => Unit): Unit = {
    sendCoordinatorStateUpdate(evt)
-    evt match {
-      case s: ShardHomeAllocated if rememberEntities && !shards(s.shard) =>
-        sendAllShardsUpdate(s.shard)
-        context.become(waitingForUpdate(evt, f, allKeys), discardOld = false)
-      case _ =>
-        // no update of shards, already known
-        context.become(waitingForUpdate(evt, f, Set(CoordinatorStateKey)), discardOld = false)
-    }
+    val waitingReceive =
+      evt match {
+        case s: ShardHomeAllocated if rememberEntities && !state.shards.contains(s.shard) =>
+          rememberShardAllocated(s.shard)
+          waitingForUpdate(
+            evt,
+            shardId = Some(s.shard),
+            waitingForStateWrite = true,
+            waitingForRememberShard = true,
+            afterUpdateCallback = f)

+        case _ =>
+          // no update of shards, already known
+          waitingForUpdate(
+            evt,
+            shardId = None,
+            waitingForStateWrite = true,
+            waitingForRememberShard = false,
+            afterUpdateCallback = f)
+      }
+    context.become(waitingReceive, discardOld = false)
  }

  def getCoordinatorState(): Unit = {
    replicator ! Get(CoordinatorStateKey, stateReadConsistency)
  }

-  def getAllShards(): Unit = {
-    if (rememberEntities)
-      replicator ! Get(AllShardsKey, allShardsReadConsistency)
+  def getAllRememberedShards(): Unit = {
+    timers.startSingleTimer(
+      RememberEntitiesTimeoutKey,
+      RememberEntitiesLoadTimeout,
+      settings.tuningParameters.waitingForStateTimeout)
+    rememberEntitiesStore.foreach(_ ! RememberEntitiesCoordinatorStore.GetShards)
  }

  def sendCoordinatorStateUpdate(evt: DomainEvent) = {
@ -1385,8 +1481,38 @@ class DDataShardCoordinator(
    }
  }

-  def sendAllShardsUpdate(newShard: String) = {
-    replicator ! Update(AllShardsKey, GSet.empty[String], allShardsWriteConsistency, Some(newShard))(_ + newShard)
+  def rememberShardAllocated(newShard: String) = {
+    log.debug("Remembering shard allocation [{}]", newShard)
+    rememberEntitiesStore.foreach(_ ! RememberEntitiesCoordinatorStore.AddShard(newShard))
+    timers.startSingleTimer(
+      RememberEntitiesTimeoutKey,
+      RememberEntitiesTimeout(newShard),
+      settings.tuningParameters.updatingStateTimeout)
+  }
+
+  override def receiveTerminated: Receive =
+    super.receiveTerminated.orElse {
+      case RememberEntitiesStoreStopped =>
+        onRememberEntitiesStoreStopped()
+    }
+
+  def onRememberEntitiesUpdateFailed(shardId: ShardId): Unit = {
+    log.error(
+      "The ShardCoordinator was unable to update remembered shard [{}] within 'updating-state-timeout': {} millis, {}",
+      shardId,
+      settings.tuningParameters.updatingStateTimeout.toMillis,
+      if (terminating) "terminating" else "retrying")
+    if (terminating) context.stop(self)
+    else {
+      // retry until successful
+      rememberShardAllocated(shardId)
+    }
+  }
+
+  def onRememberEntitiesStoreStopped(): Unit = {
+    // rely on backoff supervision of coordinator
+    log.error("The ShardCoordinator stopping because the remember entities store stopped")
+    context.stop(self)
  }

 }
--- a/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/ShardRegion.scala
+++ b/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/ShardRegion.scala
@ -13,17 +13,23 @@ import scala.concurrent.duration._
 import scala.reflect.ClassTag
 import scala.runtime.AbstractFunction1
 import scala.util.{ Failure, Success }
-
 import akka.Done
 import akka.actor._
-import akka.annotation.InternalApi
-import akka.cluster.{ Cluster, ClusterSettings, Member, MemberStatus }
+import akka.annotation.{ InternalApi, InternalStableApi }
+import akka.cluster.Cluster
 import akka.cluster.ClusterEvent._
+import akka.cluster.ClusterSettings
 import akka.cluster.ClusterSettings.DataCenter
+import akka.cluster.Member
+import akka.cluster.MemberStatus
 import akka.cluster.sharding.Shard.ShardStats
+import akka.cluster.sharding.internal.RememberEntitiesProvider
 import akka.event.Logging
-import akka.pattern.{ ask, pipe }
-import akka.util.{ MessageBufferMap, PrettyDuration, Timeout }
+import akka.pattern.ask
+import akka.pattern.pipe
+import akka.util.MessageBufferMap
+import akka.util.PrettyDuration
+import akka.util.Timeout

 /**
 * @see [[ClusterSharding$ ClusterSharding extension]]
@ -42,8 +48,7 @@ object ShardRegion {
      extractEntityId: ShardRegion.ExtractEntityId,
      extractShardId: ShardRegion.ExtractShardId,
      handOffStopMessage: Any,
-      replicator: ActorRef,
-      majorityMinCap: Int): Props =
+      rememberEntitiesProvider: Option[RememberEntitiesProvider]): Props =
    Props(
      new ShardRegion(
        typeName,
@ -54,8 +59,7 @@ object ShardRegion {
        extractEntityId,
        extractShardId,
        handOffStopMessage,
-        replicator,
-        majorityMinCap)).withDeploy(Deploy.local)
+        rememberEntitiesProvider)).withDeploy(Deploy.local)

  /**
   * INTERNAL API
@ -68,9 +72,7 @@ object ShardRegion {
      settings: ClusterShardingSettings,
      coordinatorPath: String,
      extractEntityId: ShardRegion.ExtractEntityId,
-      extractShardId: ShardRegion.ExtractShardId,
-      replicator: ActorRef,
-      majorityMinCap: Int): Props =
+      extractShardId: ShardRegion.ExtractShardId): Props =
    Props(
      new ShardRegion(
        typeName,
@ -81,8 +83,7 @@ object ShardRegion {
        extractEntityId,
        extractShardId,
        PoisonPill,
-        replicator,
-        majorityMinCap)).withDeploy(Deploy.local)
+        None)).withDeploy(Deploy.local)

  /**
   * Marker type of entity identifier (`String`).
@ -501,6 +502,7 @@ object ShardRegion {
      stopMessage: Any,
      handoffTimeout: FiniteDuration): Props =
    Props(new HandOffStopper(shard, replyTo, entities, stopMessage, handoffTimeout)).withDeploy(Deploy.local)
+
 }

 /**
@ -513,6 +515,7 @@ object ShardRegion {
 *
 * @see [[ClusterSharding$ ClusterSharding extension]]
 */
+@InternalStableApi
 private[akka] class ShardRegion(
    typeName: String,
    entityProps: Option[String => Props],
@ -522,8 +525,7 @@ private[akka] class ShardRegion(
    extractEntityId: ShardRegion.ExtractEntityId,
    extractShardId: ShardRegion.ExtractShardId,
    handOffStopMessage: Any,
-    replicator: ActorRef,
-    majorityMinCap: Int)
+    rememberEntitiesProvider: Option[RememberEntitiesProvider])
    extends Actor
    with Timers {

@ -753,7 +755,13 @@ private[akka] class ShardRegion(
      // because they might be forwarded from other regions and there
      // is a risk or message re-ordering otherwise
      if (shardBuffers.contains(shard)) {
-        shardBuffers.remove(shard)
+        val dropped = shardBuffers
+          .drop(shard, "Avoiding reordering of buffered messages at shard handoff", context.system.deadLetters)
+        if (dropped > 0)
+          log.warning(
+            "Dropping [{}] buffered messages to shard [{}] during hand off to avoid re-ordering",
+            dropped,
+            shard)
        loggedFullBufferWarning = false
      }

@ -971,15 +979,16 @@ private[akka] class ShardRegion(

    if (retryCount >= 5 && retryCount % 5 == 0 && log.isWarningEnabled) {
      log.warning(
-        "{}: Retry request for shards [{}] homes from coordinator. [{}] total buffered messages.",
+        "{}: Retry request for shards [{}] homes from coordinator. [{}] total buffered messages. Coordinator [{}]",
        typeName,
        shards.sorted.mkString(","),
-        totalBuffered)
+        totalBuffered,
+        coordinator)
    }
  }

  def initializeShard(id: ShardId, shard: ActorRef): Unit = {
-    log.debug("{}: Shard was initialized {}", typeName, id)
+    log.debug("{}: Shard was initialized [{}]", typeName, id)
    startingShards -= id
    deliverBufferedMessages(id, shard)
  }
@ -1103,6 +1112,7 @@ private[akka] class ShardRegion(
            log.debug(ShardingLogMarker.shardStarted(typeName, id), "{}: Starting shard [{}] in region", typeName, id)

            val name = URLEncoder.encode(id, "utf-8")
+
            val shard = context.watch(
              context.actorOf(
                Shard
@ -1114,8 +1124,7 @@ private[akka] class ShardRegion(
                    extractEntityId,
                    extractShardId,
                    handOffStopMessage,
-                    replicator,
-                    majorityMinCap)
+                    rememberEntitiesProvider)
                  .withDispatcher(context.props.dispatcher),
                name))
            shardsByRef = shardsByRef.updated(shard, id)
--- a/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/ShardingFlightRecorder.scala
+++ b/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/ShardingFlightRecorder.scala
@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2020 Lightbend Inc. <https://www.lightbend.com>
+ */
+
+package akka.cluster.sharding
+import akka.actor.{ ExtendedActorSystem, Extension, ExtensionId, ExtensionIdProvider }
+import akka.annotation.InternalApi
+import akka.util.FlightRecorderLoader
+
+/**
+ * INTERNAL API
+ */
+@InternalApi
+object ShardingFlightRecorder extends ExtensionId[ShardingFlightRecorder] with ExtensionIdProvider {
+
+  override def lookup(): ExtensionId[_ <: Extension] = this
+
+  override def createExtension(system: ExtendedActorSystem): ShardingFlightRecorder =
+    FlightRecorderLoader.load[ShardingFlightRecorder](
+      system,
+      "akka.cluster.sharding.internal.jfr.JFRShardingFlightRecorder",
+      NoOpShardingFlightRecorder)
+}
+
+/**
+ * INTERNAL API
+ */
+@InternalApi private[akka] trait ShardingFlightRecorder extends Extension {
+  def rememberEntityOperation(duration: Long): Unit
+  def rememberEntityAdd(entityId: String): Unit
+  def rememberEntityRemove(entityId: String): Unit
+  def entityPassivate(entityId: String): Unit
+  def entityPassivateRestart(entityId: String): Unit
+}
+
+/**
+ * INTERNAL
+ */
+@InternalApi
+private[akka] case object NoOpShardingFlightRecorder extends ShardingFlightRecorder {
+  override def rememberEntityOperation(duration: Long): Unit = ()
+  override def rememberEntityAdd(entityId: String): Unit = ()
+  override def rememberEntityRemove(entityId: String): Unit = ()
+  override def entityPassivate(entityId: String): Unit = ()
+  override def entityPassivateRestart(entityId: String): Unit = ()
+}
--- a/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/internal/CustomStateStoreModeProvider.scala
+++ b/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/internal/CustomStateStoreModeProvider.scala
@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2009-2020 Lightbend Inc. <https://www.lightbend.com>
+ */
+
+package akka.cluster.sharding.internal
+import akka.actor.ActorSystem
+import akka.actor.ExtendedActorSystem
+import akka.actor.Props
+import akka.annotation.InternalApi
+import akka.cluster.sharding.ClusterShardingSettings
+import akka.cluster.sharding.ShardRegion.ShardId
+import akka.event.Logging
+
+/**
+ * INTERNAL API
+ *
+ * Only intended for testing, not an extension point.
+ */
+@InternalApi
+private[akka] final class CustomStateStoreModeProvider(
+    typeName: String,
+    system: ActorSystem,
+    settings: ClusterShardingSettings)
+    extends RememberEntitiesProvider {
+
+  private val log = Logging(system, getClass)
+  log.warning("Using custom remember entities store for [{}], not intended for production use.", typeName)
+  val customStore = if (system.settings.config.hasPath("akka.cluster.sharding.remember-entities-custom-store")) {
+    val customClassName = system.settings.config.getString("akka.cluster.sharding.remember-entities-custom-store")
+
+    val store = system
+      .asInstanceOf[ExtendedActorSystem]
+      .dynamicAccess
+      .createInstanceFor[RememberEntitiesProvider](
+        customClassName,
+        Vector((classOf[ClusterShardingSettings], settings), (classOf[String], typeName)))
+    log.debug("Will use custom remember entities store provider [{}]", store)
+    store.get
+
+  } else {
+    log.error("Missing custom store class configuration for CustomStateStoreModeProvider")
+    throw new RuntimeException("Missing custom store class configuration")
+  }
+
+  override def shardStoreProps(shardId: ShardId): Props = customStore.shardStoreProps(shardId)
+
+  override def coordinatorStoreProps(): Props = customStore.coordinatorStoreProps()
+}
--- a/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/internal/DDataRememberEntitiesCoordinatorStore.scala
+++ b/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/internal/DDataRememberEntitiesCoordinatorStore.scala
@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2009-2020 Lightbend Inc. <https://www.lightbend.com>
+ */
+
+package akka.cluster.sharding.internal
+import akka.actor.Actor
+import akka.actor.ActorLogging
+import akka.actor.ActorRef
+import akka.actor.Props
+import akka.annotation.InternalApi
+import akka.cluster.Cluster
+import akka.cluster.ddata.GSet
+import akka.cluster.ddata.GSetKey
+import akka.cluster.ddata.Replicator
+import akka.cluster.ddata.Replicator.ReadMajority
+import akka.cluster.ddata.Replicator.WriteMajority
+import akka.cluster.ddata.SelfUniqueAddress
+import akka.cluster.sharding.ClusterShardingSettings
+import akka.cluster.sharding.ShardRegion.ShardId
+
+/**
+ * INTERNAL API
+ */
+@InternalApi
+private[akka] object DDataRememberEntitiesCoordinatorStore {
+  def props(typeName: String, settings: ClusterShardingSettings, replicator: ActorRef, majorityMinCap: Int): Props =
+    Props(new DDataRememberEntitiesCoordinatorStore(typeName, settings, replicator, majorityMinCap))
+}
+
+/**
+ * INTERNAL API
+ */
+@InternalApi
+private[akka] final class DDataRememberEntitiesCoordinatorStore(
+    typeName: String,
+    settings: ClusterShardingSettings,
+    replicator: ActorRef,
+    majorityMinCap: Int)
+    extends Actor
+    with ActorLogging {
+
+  implicit val node: Cluster = Cluster(context.system)
+  implicit val selfUniqueAddress: SelfUniqueAddress = SelfUniqueAddress(node.selfUniqueAddress)
+
+  private val readMajority = ReadMajority(settings.tuningParameters.waitingForStateTimeout, majorityMinCap)
+  private val writeMajority = WriteMajority(settings.tuningParameters.updatingStateTimeout, majorityMinCap)
+
+  private val AllShardsKey = GSetKey[String](s"shard-${typeName}-all")
+  private var allShards: Option[Set[ShardId]] = None
+  private var coordinatorWaitingForShards: Option[ActorRef] = None
+
+  // eager load of remembered shard ids
+  def getAllShards(): Unit = {
+    replicator ! Replicator.Get(AllShardsKey, readMajority)
+  }
+  getAllShards()
+
+  override def receive: Receive = {
+    case RememberEntitiesCoordinatorStore.GetShards =>
+      allShards match {
+        case Some(shardIds) =>
+          coordinatorWaitingForShards = Some(sender())
+          onGotAllShards(shardIds);
+        case None =>
+          // reply when we get them, since there is only ever one coordinator communicating with us
+          // and it may retry we can just keep the latest sender
+          coordinatorWaitingForShards = Some(sender())
+      }
+
+    case g @ Replicator.GetSuccess(AllShardsKey, _) =>
+      onGotAllShards(g.get(AllShardsKey).elements)
+
+    case Replicator.NotFound(AllShardsKey, _) =>
+      onGotAllShards(Set.empty)
+
+    case Replicator.GetFailure(AllShardsKey, _) =>
+      log.error(
+        "The ShardCoordinator was unable to get all shards state within 'waiting-for-state-timeout': {} millis (retrying)",
+        readMajority.timeout.toMillis)
+      // repeat until GetSuccess
+      getAllShards()
+
+    case RememberEntitiesCoordinatorStore.AddShard(shardId) =>
+      replicator ! Replicator.Update(AllShardsKey, GSet.empty[String], writeMajority, Some((sender(), shardId)))(
+        _ + shardId)
+
+    case Replicator.UpdateSuccess(AllShardsKey, Some((replyTo: ActorRef, shardId: ShardId))) =>
+      log.debug("The coordinator shards state was successfully updated with {}", shardId)
+      replyTo ! RememberEntitiesCoordinatorStore.UpdateDone(shardId)
+
+    case Replicator.UpdateTimeout(AllShardsKey, Some((replyTo: ActorRef, shardId: ShardId))) =>
+      log.error(
+        "The ShardCoordinator was unable to update shards distributed state within 'updating-state-timeout': {} millis (retrying), adding shard={}",
+        writeMajority.timeout.toMillis,
+        shardId)
+      replyTo ! RememberEntitiesCoordinatorStore.UpdateFailed(shardId)
+
+    case Replicator.ModifyFailure(key, error, cause, Some((replyTo: ActorRef, shardId: ShardId))) =>
+      log.error(
+        cause,
+        "The remember entities store was unable to add shard [{}] (key [{}], failed with error: {})",
+        shardId,
+        key,
+        error)
+      replyTo ! RememberEntitiesCoordinatorStore.UpdateFailed(shardId)
+  }
+
+  def onGotAllShards(shardIds: Set[ShardId]): Unit = {
+    coordinatorWaitingForShards match {
+      case Some(coordinator) =>
+        coordinator ! RememberEntitiesCoordinatorStore.RememberedShards(shardIds)
+        coordinatorWaitingForShards = None
+        // clear the shards out now that we have sent them to coordinator, to save some memory
+        allShards = None
+      case None =>
+        // wait for coordinator to ask
+        allShards = Some(shardIds)
+    }
+  }
+
+}
--- a/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/internal/DDataRememberEntitiesProvider.scala
+++ b/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/internal/DDataRememberEntitiesProvider.scala
@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2009-2020 Lightbend Inc. <https://www.lightbend.com>
+ */
+
+package akka.cluster.sharding.internal
+
+import akka.actor.ActorRef
+import akka.actor.Props
+import akka.annotation.InternalApi
+import akka.cluster.sharding.ClusterShardingSettings
+import akka.cluster.sharding.ShardRegion.ShardId
+
+/**
+ * INTERNAL API
+ */
+@InternalApi
+private[akka] final class DDataRememberEntitiesProvider(
+    typeName: String,
+    settings: ClusterShardingSettings,
+    majorityMinCap: Int,
+    replicator: ActorRef)
+    extends RememberEntitiesProvider {
+
+  override def coordinatorStoreProps(): Props =
+    DDataRememberEntitiesCoordinatorStore.props(typeName, settings, replicator, majorityMinCap)
+
+  override def shardStoreProps(shardId: ShardId): Props =
+    DDataRememberEntitiesShardStore.props(shardId, typeName, settings, replicator, majorityMinCap)
+}
--- a/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/internal/DDataRememberEntitiesShardStore.scala
+++ b/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/internal/DDataRememberEntitiesShardStore.scala
@ -0,0 +1,259 @@
+/*
+ * Copyright (C) 2009-2020 Lightbend Inc. <https://www.lightbend.com>
+ */
+
+package akka.cluster.sharding.internal
+
+import akka.actor.Actor
+import akka.actor.ActorLogging
+import akka.actor.ActorRef
+import akka.actor.Props
+import akka.actor.Stash
+import akka.annotation.InternalApi
+import akka.cluster.Cluster
+import akka.cluster.ddata.ORSet
+import akka.cluster.ddata.ORSetKey
+import akka.cluster.ddata.Replicator.Get
+import akka.cluster.ddata.Replicator.GetDataDeleted
+import akka.cluster.ddata.Replicator.GetFailure
+import akka.cluster.ddata.Replicator.GetSuccess
+import akka.cluster.ddata.Replicator.ModifyFailure
+import akka.cluster.ddata.Replicator.NotFound
+import akka.cluster.ddata.Replicator.ReadMajority
+import akka.cluster.ddata.Replicator.StoreFailure
+import akka.cluster.ddata.Replicator.Update
+import akka.cluster.ddata.Replicator.UpdateDataDeleted
+import akka.cluster.ddata.Replicator.UpdateSuccess
+import akka.cluster.ddata.Replicator.UpdateTimeout
+import akka.cluster.ddata.Replicator.WriteMajority
+import akka.cluster.ddata.SelfUniqueAddress
+import akka.cluster.sharding.ClusterShardingSettings
+import akka.cluster.sharding.ShardRegion.EntityId
+import akka.cluster.sharding.ShardRegion.ShardId
+import akka.util.PrettyDuration._
+
+import scala.concurrent.ExecutionContext
+
+/**
+ * INTERNAL API
+ */
+@InternalApi
+private[akka] object DDataRememberEntitiesShardStore {
+
+  def props(
+      shardId: ShardId,
+      typeName: String,
+      settings: ClusterShardingSettings,
+      replicator: ActorRef,
+      majorityMinCap: Int): Props =
+    Props(new DDataRememberEntitiesShardStore(shardId, typeName, settings, replicator, majorityMinCap))
+
+  // The default maximum-frame-size is 256 KiB with Artery.
+  // When using entity identifiers with 36 character strings (e.g. UUID.randomUUID).
+  // By splitting the elements over 5 keys we can support 10000 entities per shard.
+  // The Gossip message size of 5 ORSet with 2000 ids is around 200 KiB.
+  // This is by intention not configurable because it's important to have the same
+  // configuration on each node.
+  private val numberOfKeys = 5
+
+  private def stateKeys(typeName: String, shardId: ShardId): Array[ORSetKey[EntityId]] =
+    Array.tabulate(numberOfKeys)(i => ORSetKey[EntityId](s"shard-$typeName-$shardId-$i"))
+
+  private sealed trait Evt {
+    def id: EntityId
+  }
+  private case class Started(id: EntityId) extends Evt
+  private case class Stopped(id: EntityId) extends Evt
+
+}
+
+/**
+ * INTERNAL API
+ */
+@InternalApi
+private[akka] final class DDataRememberEntitiesShardStore(
+    shardId: ShardId,
+    typeName: String,
+    settings: ClusterShardingSettings,
+    replicator: ActorRef,
+    majorityMinCap: Int)
+    extends Actor
+    with Stash
+    with ActorLogging {
+
+  import DDataRememberEntitiesShardStore._
+
+  implicit val ec: ExecutionContext = context.dispatcher
+  implicit val node: Cluster = Cluster(context.system)
+  implicit val selfUniqueAddress: SelfUniqueAddress = SelfUniqueAddress(node.selfUniqueAddress)
+
+  private val readMajority = ReadMajority(settings.tuningParameters.waitingForStateTimeout, majorityMinCap)
+  // Note that the timeout is actually updatingStateTimeout / 4 so that we fit 3 retries and a response in the timeout before the shard sees it as a failure
+  private val writeMajority = WriteMajority(settings.tuningParameters.updatingStateTimeout / 4, majorityMinCap)
+  private val maxUpdateAttempts = 3
+  private val keys = stateKeys(typeName, shardId)
+
+  if (log.isDebugEnabled) {
+    log.debug(
+      "Starting up DDataRememberEntitiesStore, write timeout: [{}], read timeout: [{}], majority min cap: [{}]",
+      settings.tuningParameters.waitingForStateTimeout.pretty,
+      settings.tuningParameters.updatingStateTimeout.pretty,
+      majorityMinCap)
+  }
+  loadAllEntities()
+
+  private def key(entityId: EntityId): ORSetKey[EntityId] = {
+    val i = math.abs(entityId.hashCode % numberOfKeys)
+    keys(i)
+  }
+
+  override def receive: Receive = {
+    waitingForAllEntityIds(Set.empty, Set.empty, None)
+  }
+
+  def idle: Receive = {
+    case RememberEntitiesShardStore.GetEntities =>
+      // not supported, but we may get several if the shard timed out and retried
+      log.debug("Another get entities request after responding to one, not expected/supported, ignoring")
+    case update: RememberEntitiesShardStore.Update => onUpdate(update)
+  }
+
+  def waitingForAllEntityIds(gotKeys: Set[Int], ids: Set[EntityId], shardWaiting: Option[ActorRef]): Receive = {
+    def receiveOne(i: Int, idsForKey: Set[EntityId]): Unit = {
+      val newGotKeys = gotKeys + i
+      val newIds = ids.union(idsForKey)
+      if (newGotKeys.size == numberOfKeys) {
+        shardWaiting match {
+          case Some(shard) =>
+            log.debug("Shard waiting for remembered entities, sending remembered and going idle")
+            shard ! RememberEntitiesShardStore.RememberedEntities(newIds)
+            context.become(idle)
+            unstashAll()
+          case None =>
+            // we haven't seen request yet
+            log.debug("Got remembered entities, waiting for shard to request them")
+            context.become(waitingForAllEntityIds(newGotKeys, newIds, None))
+        }
+      } else {
+        context.become(waitingForAllEntityIds(newGotKeys, newIds, shardWaiting))
+      }
+    }
+
+    {
+      case g @ GetSuccess(_, Some(i: Int)) =>
+        val key = keys(i)
+        val ids = g.get(key).elements
+        receiveOne(i, ids)
+      case NotFound(_, Some(i: Int)) =>
+        receiveOne(i, Set.empty)
+      case GetFailure(key, _) =>
+        log.error(
+          "Unable to get an initial state within 'waiting-for-state-timeout': [{}] using [{}] (key [{}])",
+          readMajority.timeout.pretty,
+          readMajority,
+          key)
+        context.stop(self)
+      case GetDataDeleted(_, _) =>
+        log.error("Unable to get an initial state because it was deleted")
+        context.stop(self)
+      case update: RememberEntitiesShardStore.Update =>
+        log.warning("Got an update before load of initial entities completed, dropping update: [{}]", update)
+      case RememberEntitiesShardStore.GetEntities =>
+        if (gotKeys.size == numberOfKeys) {
+          // we already got all and was waiting for a request
+          log.debug("Got request from shard, sending remembered entities")
+          sender() ! RememberEntitiesShardStore.RememberedEntities(ids)
+          context.become(idle)
+          unstashAll()
+        } else {
+          // we haven't seen all ids yet
+          log.debug("Got request from shard, waiting for all remembered entities to arrive")
+          context.become(waitingForAllEntityIds(gotKeys, ids, Some(sender())))
+        }
+      case _ =>
+        // if we get a write while waiting for the listing, defer it until we saw listing, if not we can get a mismatch
+        // of remembered with what the shard thinks it just wrote
+        stash()
+    }
+  }
+
+  private def onUpdate(update: RememberEntitiesShardStore.Update): Unit = {
+    val allEvts: Set[Evt] = (update.started.map(Started(_): Evt).union(update.stopped.map(Stopped)))
+    // map from set of evts (for same ddata key) to one update that applies each of them
+    val ddataUpdates: Map[Set[Evt], (Update[ORSet[EntityId]], Int)] =
+      allEvts.groupBy(evt => key(evt.id)).map {
+        case (key, evts) =>
+          (evts, (Update(key, ORSet.empty[EntityId], writeMajority, Some(evts)) { existing =>
+            evts.foldLeft(existing) {
+              case (acc, Started(id)) => acc :+ id
+              case (acc, Stopped(id)) => acc.remove(id)
+            }
+          }, maxUpdateAttempts))
+      }
+
+    ddataUpdates.foreach {
+      case (_, (update, _)) =>
+        replicator ! update
+    }
+
+    context.become(waitingForUpdates(sender(), update, ddataUpdates))
+  }
+
+  private def waitingForUpdates(
+      requestor: ActorRef,
+      update: RememberEntitiesShardStore.Update,
+      allUpdates: Map[Set[Evt], (Update[ORSet[EntityId]], Int)]): Receive = {
+
+    // updatesLeft used both to keep track of what work remains and for retrying on timeout up to a limit
+    def next(updatesLeft: Map[Set[Evt], (Update[ORSet[EntityId]], Int)]): Receive = {
+      case UpdateSuccess(_, Some(evts: Set[Evt] @unchecked)) =>
+        log.debug("The DDataShard state was successfully updated for [{}]", evts)
+        val remainingAfterThis = updatesLeft - evts
+        if (remainingAfterThis.isEmpty) {
+          requestor ! RememberEntitiesShardStore.UpdateDone(update.started, update.stopped)
+          context.become(idle)
+        } else {
+          context.become(next(remainingAfterThis))
+        }
+
+      case UpdateTimeout(_, Some(evts: Set[Evt] @unchecked)) =>
+        val (updateForEvts, retriesLeft) = updatesLeft(evts)
+        if (retriesLeft > 0) {
+          log.debug("Retrying update because of write timeout, tries left [{}]", retriesLeft)
+          replicator ! updateForEvts
+          context.become(next(updatesLeft.updated(evts, (updateForEvts, retriesLeft - 1))))
+        } else {
+          log.error(
+            "Unable to update state, within 'updating-state-timeout'= [{}], gave up after [{}] retries",
+            writeMajority.timeout.pretty,
+            maxUpdateAttempts)
+          // will trigger shard restart
+          context.stop(self)
+        }
+      case StoreFailure(_, _) =>
+        log.error("Unable to update state, due to store failure")
+        // will trigger shard restart
+        context.stop(self)
+      case ModifyFailure(_, error, cause, _) =>
+        log.error(cause, "Unable to update state, due to modify failure: {}", error)
+        // will trigger shard restart
+        context.stop(self)
+      case UpdateDataDeleted(_, _) =>
+        log.error("Unable to update state, due to delete")
+        // will trigger shard restart
+        context.stop(self)
+      case update: RememberEntitiesShardStore.Update =>
+        log.warning("Got a new update before write of previous completed, dropping update: [{}]", update)
+    }
+
+    next(allUpdates)
+  }
+
+  private def loadAllEntities(): Unit = {
+    (0 until numberOfKeys).toSet[Int].foreach { i =>
+      val key = keys(i)
+      replicator ! Get(key, readMajority, Some(i))
+    }
+  }
+
+}
--- a/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/internal/EventSourcedRememberEntitiesCoordinatorStore.scala
+++ b/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/internal/EventSourcedRememberEntitiesCoordinatorStore.scala
@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2020 Lightbend Inc. <https://www.lightbend.com>
+ */
+
+package akka.cluster.sharding.internal
+
+import akka.actor.ActorLogging
+import akka.actor.Props
+import akka.annotation.InternalApi
+import akka.cluster.sharding.ShardCoordinator.Internal
+import akka.cluster.sharding.ShardRegion.ShardId
+import akka.cluster.sharding.ClusterShardingSerializable
+import akka.cluster.sharding.ClusterShardingSettings
+import akka.persistence._
+
+import scala.collection.mutable
+
+/**
+ * INTERNAL API
+ */
+@InternalApi
+private[akka] object EventSourcedRememberEntitiesCoordinatorStore {
+  def props(typeName: String, settings: ClusterShardingSettings): Props =
+    Props(new EventSourcedRememberEntitiesCoordinatorStore(typeName, settings))
+
+  case class State(shards: Set[ShardId], writtenMigrationMarker: Boolean = false) extends ClusterShardingSerializable
+
+  case object MigrationMarker extends ClusterShardingSerializable
+}
+
+/**
+ * INTERNAL API
+ */
+@InternalApi
+private[akka] final class EventSourcedRememberEntitiesCoordinatorStore(
+    typeName: String,
+    settings: ClusterShardingSettings)
+    extends PersistentActor
+    with ActorLogging {
+
+  import EventSourcedRememberEntitiesCoordinatorStore._
+
+  // Uses the same persistence id as the old persistent coordinator so that the old data can be migrated
+  // without any user action
+  override def persistenceId = s"/sharding/${typeName}Coordinator"
+  override def journalPluginId: String = settings.journalPluginId
+  override def snapshotPluginId: String = settings.snapshotPluginId
+
+  private val shards = mutable.Set.empty[ShardId]
+  private var writtenMarker = false
+
+  override def receiveRecover: Receive = {
+    case shardId: ShardId =>
+      shards.add(shardId)
+    case SnapshotOffer(_, state: Internal.State) =>
+      shards ++= (state.shards.keys ++ state.unallocatedShards)
+    case SnapshotOffer(_, State(shardIds, marker)) =>
+      shards ++= shardIds
+      writtenMarker = marker
+    case RecoveryCompleted =>
+      log.debug("Recovery complete. Current shards {}. Written Marker {}", shards, writtenMarker)
+      if (!writtenMarker) {
+        persist(MigrationMarker) { _ =>
+          log.debug("Written migration marker")
+          writtenMarker = true
+        }
+      }
+    case MigrationMarker =>
+      writtenMarker = true
+    case other =>
+      log.error(
+        "Unexpected message type [{}]. Are you migrating from persistent coordinator state store? If so you must add the migration event adapter. Shards will not be restarted.",
+        other.getClass)
+  }
+
+  override def receiveCommand: Receive = {
+    case RememberEntitiesCoordinatorStore.GetShards =>
+      sender() ! RememberEntitiesCoordinatorStore.RememberedShards(shards.toSet)
+
+    case RememberEntitiesCoordinatorStore.AddShard(shardId: ShardId) =>
+      persistAsync(shardId) { shardId =>
+        shards.add(shardId)
+        sender() ! RememberEntitiesCoordinatorStore.UpdateDone(shardId)
+        saveSnapshotWhenNeeded()
+      }
+
+    case e: SaveSnapshotSuccess =>
+      log.debug("Snapshot saved successfully")
+      internalDeleteMessagesBeforeSnapshot(
+        e,
+        settings.tuningParameters.keepNrOfBatches,
+        settings.tuningParameters.snapshotAfter)
+
+    case SaveSnapshotFailure(_, reason) =>
+      log.warning("Snapshot failure: [{}]", reason.getMessage)
+
+    case DeleteMessagesSuccess(toSequenceNr) =>
+      val deleteTo = toSequenceNr - 1
+      val deleteFrom =
+        math.max(0, deleteTo - (settings.tuningParameters.keepNrOfBatches * settings.tuningParameters.snapshotAfter))
+      log.debug(
+        "Messages to [{}] deleted successfully. Deleting snapshots from [{}] to [{}]",
+        toSequenceNr,
+        deleteFrom,
+        deleteTo)
+      deleteSnapshots(SnapshotSelectionCriteria(minSequenceNr = deleteFrom, maxSequenceNr = deleteTo))
+
+    case DeleteMessagesFailure(reason, toSequenceNr) =>
+      log.warning("Messages to [{}] deletion failure: [{}]", toSequenceNr, reason.getMessage)
+
+    case DeleteSnapshotsSuccess(m) =>
+      log.debug("Snapshots matching [{}] deleted successfully", m)
+
+    case DeleteSnapshotsFailure(m, reason) =>
+      log.warning("Snapshots matching [{}] deletion failure: [{}]", m, reason.getMessage)
+  }
+
+  def saveSnapshotWhenNeeded(): Unit = {
+    if (lastSequenceNr % settings.tuningParameters.snapshotAfter == 0 && lastSequenceNr != 0) {
+      log.debug("Saving snapshot, sequence number [{}]", snapshotSequenceNr)
+      saveSnapshot(State(shards.toSet, writtenMarker))
+    }
+  }
+}
--- a/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/internal/EventSourcedRememberEntitiesProvider.scala
+++ b/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/internal/EventSourcedRememberEntitiesProvider.scala
@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2020 Lightbend Inc. <https://www.lightbend.com>
+ */
+
+package akka.cluster.sharding.internal
+
+import akka.actor.Props
+import akka.annotation.InternalApi
+import akka.cluster.sharding.ClusterShardingSettings
+import akka.cluster.sharding.ShardRegion.ShardId
+
+/**
+ * INTERNAL API
+ */
+@InternalApi
+private[akka] final class EventSourcedRememberEntitiesProvider(typeName: String, settings: ClusterShardingSettings)
+    extends RememberEntitiesProvider {
+
+  // this is backed by an actor using the same events, at the serialization level, as the now removed PersistentShard when state-store-mode=persistence
+  // new events can be added but the old events should continue to be handled
+  override def shardStoreProps(shardId: ShardId): Props =
+    EventSourcedRememberEntitiesShardStore.props(typeName, shardId, settings)
+
+  // Note that this one is never used for the deprecated persistent state store mode, only when state store is ddata
+  // combined with eventsourced remember entities storage
+  override def coordinatorStoreProps(): Props =
+    EventSourcedRememberEntitiesCoordinatorStore.props(typeName, settings)
+}
--- a/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/internal/EventSourcedRememberEntitiesShardStore.scala
+++ b/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/internal/EventSourcedRememberEntitiesShardStore.scala
@ -0,0 +1,160 @@
+/*
+ * Copyright (C) 2009-2020 Lightbend Inc. <https://www.lightbend.com>
+ */
+
+package akka.cluster.sharding.internal
+
+import akka.actor.ActorLogging
+import akka.actor.Props
+import akka.annotation.InternalApi
+import akka.cluster.sharding.ClusterShardingSerializable
+import akka.cluster.sharding.ClusterShardingSettings
+import akka.cluster.sharding.ShardRegion
+import akka.cluster.sharding.ShardRegion.EntityId
+import akka.persistence.DeleteMessagesFailure
+import akka.persistence.DeleteMessagesSuccess
+import akka.persistence.DeleteSnapshotsFailure
+import akka.persistence.DeleteSnapshotsSuccess
+import akka.persistence.PersistentActor
+import akka.persistence.RecoveryCompleted
+import akka.persistence.SaveSnapshotFailure
+import akka.persistence.SaveSnapshotSuccess
+import akka.persistence.SnapshotOffer
+import akka.persistence.SnapshotSelectionCriteria
+
+/**
+ * INTERNAL API
+ */
+@InternalApi
+private[akka] object EventSourcedRememberEntitiesShardStore {
+
+  /**
+   * A case class which represents a state change for the Shard
+   */
+  sealed trait StateChange extends ClusterShardingSerializable
+
+  /**
+   * Persistent state of the Shard.
+   */
+  final case class State private[akka] (entities: Set[EntityId] = Set.empty) extends ClusterShardingSerializable
+
+  /**
+   * `State` change for starting a set of entities in this `Shard`
+   */
+  final case class EntitiesStarted(entities: Set[EntityId]) extends StateChange
+
+  case object StartedAck
+
+  /**
+   * `State` change for an entity which has terminated.
+   */
+  final case class EntitiesStopped(entities: Set[EntityId]) extends StateChange
+
+  def props(typeName: String, shardId: ShardRegion.ShardId, settings: ClusterShardingSettings): Props =
+    Props(new EventSourcedRememberEntitiesShardStore(typeName, shardId, settings))
+}
+
+/**
+ * INTERNAL API
+ *
+ * Persistent actor keeping the state for Akka Persistence backed remember entities (enabled through `state-store-mode=persistence`).
+ *
+ * @see [[ClusterSharding$ ClusterSharding extension]]
+ */
+@InternalApi
+private[akka] final class EventSourcedRememberEntitiesShardStore(
+    typeName: String,
+    shardId: ShardRegion.ShardId,
+    settings: ClusterShardingSettings)
+    extends PersistentActor
+    with ActorLogging {
+
+  import EventSourcedRememberEntitiesShardStore._
+  import settings.tuningParameters._
+
+  private val maxUpdatesPerWrite = context.system.settings.config
+    .getInt("akka.cluster.sharding.event-sourced-remember-entities-store.max-updates-per-write")
+
+  log.debug("Starting up EventSourcedRememberEntitiesStore")
+  private var state = State()
+  override def persistenceId = s"/sharding/${typeName}Shard/$shardId"
+  override def journalPluginId: String = settings.journalPluginId
+  override def snapshotPluginId: String = settings.snapshotPluginId
+
+  override def receiveRecover: Receive = {
+    case EntitiesStarted(ids)              => state = state.copy(state.entities.union(ids))
+    case EntitiesStopped(ids)              => state = state.copy(state.entities.diff(ids))
+    case SnapshotOffer(_, snapshot: State) => state = snapshot
+    case RecoveryCompleted =>
+      log.debug("Recovery completed for shard [{}] with [{}] entities", shardId, state.entities.size)
+  }
+
+  override def receiveCommand: Receive = {
+
+    case RememberEntitiesShardStore.Update(started, stopped) =>
+      val events =
+        (if (started.nonEmpty) EntitiesStarted(started) :: Nil else Nil) :::
+        (if (stopped.nonEmpty) EntitiesStopped(stopped) :: Nil else Nil)
+      var left = events.size
+      def persistEventsAndHandleComplete(evts: List[StateChange]): Unit = {
+        persistAll(evts) { _ =>
+          left -= 1
+          if (left == 0) {
+            sender() ! RememberEntitiesShardStore.UpdateDone(started, stopped)
+            state.copy(state.entities.union(started).diff(stopped))
+            saveSnapshotWhenNeeded()
+          }
+        }
+      }
+      if (left <= maxUpdatesPerWrite) {
+        // optimized when batches are small
+        persistEventsAndHandleComplete(events)
+      } else {
+        // split up in several writes so we don't hit journal limit
+        events.grouped(maxUpdatesPerWrite).foreach(persistEventsAndHandleComplete)
+      }
+
+    case RememberEntitiesShardStore.GetEntities =>
+      sender() ! RememberEntitiesShardStore.RememberedEntities(state.entities)
+
+    case e: SaveSnapshotSuccess =>
+      log.debug("Snapshot saved successfully")
+      internalDeleteMessagesBeforeSnapshot(e, keepNrOfBatches, snapshotAfter)
+
+    case SaveSnapshotFailure(_, reason) =>
+      log.warning("Snapshot failure: [{}]", reason.getMessage)
+
+    case DeleteMessagesSuccess(toSequenceNr) =>
+      val deleteTo = toSequenceNr - 1
+      val deleteFrom = math.max(0, deleteTo - (keepNrOfBatches * snapshotAfter))
+      log.debug(
+        "Messages to [{}] deleted successfully. Deleting snapshots from [{}] to [{}]",
+        toSequenceNr,
+        deleteFrom,
+        deleteTo)
+      deleteSnapshots(SnapshotSelectionCriteria(minSequenceNr = deleteFrom, maxSequenceNr = deleteTo))
+
+    case DeleteMessagesFailure(reason, toSequenceNr) =>
+      log.warning("Messages to [{}] deletion failure: [{}]", toSequenceNr, reason.getMessage)
+
+    case DeleteSnapshotsSuccess(m) =>
+      log.debug("Snapshots matching [{}] deleted successfully", m)
+
+    case DeleteSnapshotsFailure(m, reason) =>
+      log.warning("Snapshots matching [{}] deletion failure: [{}]", m, reason.getMessage)
+
+  }
+
+  override def postStop(): Unit = {
+    super.postStop()
+    log.debug("Store stopping")
+  }
+
+  def saveSnapshotWhenNeeded(): Unit = {
+    if (lastSequenceNr % snapshotAfter == 0 && lastSequenceNr != 0) {
+      log.debug("Saving snapshot, sequence number [{}]", snapshotSequenceNr)
+      saveSnapshot(state)
+    }
+  }
+
+}
--- a/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/internal/RememberEntitiesStore.scala
+++ b/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/internal/RememberEntitiesStore.scala
@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2009-2020 Lightbend Inc. <https://www.lightbend.com>
+ */
+
+package akka.cluster.sharding.internal
+
+import akka.actor.Props
+import akka.annotation.InternalApi
+import akka.cluster.sharding.ShardRegion.EntityId
+import akka.cluster.sharding.ShardRegion.ShardId
+
+/**
+ * INTERNAL API
+ *
+ * Created once for the shard guardian
+ */
+@InternalApi
+private[akka] trait RememberEntitiesProvider {
+
+  /**
+   * Called once per started shard coordinator to create the remember entities coordinator store.
+   *
+   * Note that this is not used for the deprecated persistent coordinator which has its own impl for keeping track of
+   * remembered shards.
+   *
+   * @return an actor that handles the protocol defined in [[RememberEntitiesCoordinatorStore]]
+   */
+  def coordinatorStoreProps(): Props
+
+  /**
+   * Called once per started shard to create the remember entities shard store
+   * @return an actor that handles the protocol defined in [[RememberEntitiesShardStore]]
+   */
+  def shardStoreProps(shardId: ShardId): Props
+}
+
+/**
+ * INTERNAL API
+ *
+ * Could potentially become an open SPI in the future.
+ *
+ * Implementations are responsible for each of the methods failing the returned future after a timeout.
+ */
+@InternalApi
+private[akka] object RememberEntitiesShardStore {
+  // SPI protocol for a remember entities shard store
+  sealed trait Command
+
+  // Note: the store is not expected to receive and handle new update before it has responded to the previous one
+  final case class Update(started: Set[EntityId], stopped: Set[EntityId]) extends Command
+  // responses for Update
+  final case class UpdateDone(started: Set[EntityId], stopped: Set[EntityId])
+
+  case object GetEntities extends Command
+  final case class RememberedEntities(entities: Set[EntityId])
+
+}
+
+/**
+ * INTERNAL API
+ *
+ * Could potentially become an open SPI in the future.
+ */
+@InternalApi
+private[akka] object RememberEntitiesCoordinatorStore {
+  // SPI protocol for a remember entities coordinator store
+  sealed trait Command
+
+  /**
+   * Sent once for every started shard (but could be retried), should result in a response of either
+   * UpdateDone or UpdateFailed
+   */
+  final case class AddShard(entityId: ShardId) extends Command
+  final case class UpdateDone(entityId: ShardId)
+  final case class UpdateFailed(entityId: ShardId)
+
+  /**
+   * Sent once when the coordinator starts (but could be retried), should result in a response of
+   * RememberedShards
+   */
+  case object GetShards extends Command
+  final case class RememberedShards(entities: Set[ShardId])
+  // No message for failed load since we eager lod the set of shards, may need to change in the future
+}
--- a/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/internal/RememberEntityStarter.scala
+++ b/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/internal/RememberEntityStarter.scala
@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2009-2020 Lightbend Inc. <https://www.lightbend.com>
+ */
+
+package akka.cluster.sharding.internal
+
+import akka.actor.Actor
+import akka.actor.ActorLogging
+import akka.actor.ActorRef
+import akka.actor.NoSerializationVerificationNeeded
+import akka.actor.Props
+import akka.actor.Timers
+import akka.annotation.InternalApi
+import akka.cluster.sharding.ClusterShardingSettings
+import akka.cluster.sharding.Shard
+import akka.cluster.sharding.ShardRegion
+import akka.cluster.sharding.ShardRegion.EntityId
+import akka.cluster.sharding.ShardRegion.ShardId
+
+import scala.collection.immutable.Set
+import scala.concurrent.ExecutionContext
+
+/**
+ * INTERNAL API
+ */
+@InternalApi
+private[akka] object RememberEntityStarter {
+  def props(
+      region: ActorRef,
+      shard: ActorRef,
+      shardId: ShardRegion.ShardId,
+      ids: Set[ShardRegion.EntityId],
+      settings: ClusterShardingSettings) =
+    Props(new RememberEntityStarter(region, shard, shardId, ids, settings))
+
+  private final case class StartBatch(batchSize: Int) extends NoSerializationVerificationNeeded
+  private case object ResendUnAcked extends NoSerializationVerificationNeeded
+}
+
+/**
+ * INTERNAL API: Actor responsible for starting entities when rememberEntities is enabled
+ */
+@InternalApi
+private[akka] final class RememberEntityStarter(
+    region: ActorRef,
+    shard: ActorRef,
+    shardId: ShardRegion.ShardId,
+    ids: Set[ShardRegion.EntityId],
+    settings: ClusterShardingSettings)
+    extends Actor
+    with ActorLogging
+    with Timers {
+
+  implicit val ec: ExecutionContext = context.dispatcher
+  import RememberEntityStarter._
+
+  require(ids.nonEmpty)
+
+  private var idsLeftToStart = Set.empty[EntityId]
+  private var waitingForAck = Set.empty[EntityId]
+  private var entitiesMoved = Set.empty[EntityId]
+
+  log.debug(
+    "Shard starting [{}] remembered entities using strategy [{}]",
+    ids.size,
+    settings.tuningParameters.entityRecoveryStrategy)
+
+  settings.tuningParameters.entityRecoveryStrategy match {
+    case "all" =>
+      idsLeftToStart = Set.empty
+      startBatch(ids)
+    case "constant" =>
+      import settings.tuningParameters
+      idsLeftToStart = ids
+      timers.startTimerWithFixedDelay(
+        "constant",
+        StartBatch(tuningParameters.entityRecoveryConstantRateStrategyNumberOfEntities),
+        tuningParameters.entityRecoveryConstantRateStrategyFrequency)
+      startBatch(tuningParameters.entityRecoveryConstantRateStrategyNumberOfEntities)
+  }
+  timers.startTimerWithFixedDelay("retry", ResendUnAcked, settings.tuningParameters.retryInterval)
+
+  override def receive: Receive = {
+    case StartBatch(batchSize)                                => startBatch(batchSize)
+    case ShardRegion.StartEntityAck(entityId, ackFromShardId) => onAck(entityId, ackFromShardId)
+    case ResendUnAcked                                        => retryUnacked()
+  }
+
+  private def onAck(entityId: EntityId, ackFromShardId: ShardId): Unit = {
+    idsLeftToStart -= entityId
+    waitingForAck -= entityId
+    if (shardId != ackFromShardId) entitiesMoved += entityId
+    if (waitingForAck.isEmpty && idsLeftToStart.isEmpty) {
+      if (entitiesMoved.nonEmpty) {
+        log.info("Found [{}] entities moved to new shard(s)", entitiesMoved.size)
+        shard ! Shard.EntitiesMovedToOtherShard(entitiesMoved)
+      }
+      context.stop(self)
+    }
+  }
+
+  private def startBatch(batchSize: Int): Unit = {
+    log.debug("Starting batch of [{}] remembered entities", batchSize)
+    val (batch, newIdsLeftToStart) = idsLeftToStart.splitAt(batchSize)
+    idsLeftToStart = newIdsLeftToStart
+    startBatch(batch)
+  }
+
+  private def startBatch(entityIds: Set[EntityId]): Unit = {
+    // these go through the region rather the directly to the shard
+    // so that shard id extractor changes make them start on the right shard
+    waitingForAck = waitingForAck.union(entityIds)
+    entityIds.foreach(entityId => region ! ShardRegion.StartEntity(entityId))
+  }
+
+  private def retryUnacked(): Unit = {
+    if (waitingForAck.nonEmpty) {
+      log.debug("Found [{}] remembered entities waiting for StartEntityAck, retrying", waitingForAck.size)
+      waitingForAck.foreach { id =>
+        // for now we just retry all (as that was the existing behavior spread out over starter and shard)
+        // but in the future it could perhaps make sense to batch also the retries to avoid thundering herd
+        region ! ShardRegion.StartEntity(id)
+      }
+    }
+  }
+
+}
--- a/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/protobuf/ClusterShardingMessageSerializer.scala
+++ b/akka-cluster-sharding/src/main/scala/akka/cluster/sharding/protobuf/ClusterShardingMessageSerializer.scala
@ -10,6 +10,8 @@ import java.util.zip.GZIPInputStream
 import java.util.zip.GZIPOutputStream

 import scala.annotation.tailrec
+import scala.concurrent.duration._
+import akka.util.ccompat.JavaConverters._
 import scala.collection.immutable
 import scala.concurrent.duration._

@ -21,11 +23,18 @@ import akka.cluster.sharding.ShardCoordinator
 import akka.cluster.sharding.ShardRegion._
 import akka.cluster.sharding.protobuf.msg.{ ClusterShardingMessages => sm }
 import akka.cluster.sharding.protobuf.msg.ClusterShardingMessages
+import akka.cluster.sharding.internal.EventSourcedRememberEntitiesCoordinatorStore.{
+  MigrationMarker,
+  State => RememberShardsState
+}
+import akka.cluster.sharding.internal.EventSourcedRememberEntitiesShardStore.{ State => EntityState }
+import akka.cluster.sharding.internal.EventSourcedRememberEntitiesShardStore.{ EntitiesStarted, EntitiesStopped }
 import akka.protobufv3.internal.MessageLite
 import akka.serialization.BaseSerializer
 import akka.serialization.Serialization
 import akka.serialization.SerializerWithStringManifest
 import akka.util.ccompat._
+
 import akka.util.ccompat.JavaConverters._

 /**
@ -37,7 +46,6 @@ private[akka] class ClusterShardingMessageSerializer(val system: ExtendedActorSy
    with BaseSerializer {
  import Shard.{ CurrentShardState, GetCurrentShardState }
  import Shard.{ GetShardStats, ShardStats }
-  import Shard.{ State => EntityState, EntityStarted, EntityStopped }
  import ShardCoordinator.Internal._

  private final val BufferSize = 1024 * 4
@ -66,6 +74,8 @@ private[akka] class ClusterShardingMessageSerializer(val system: ExtendedActorSy
  private val EntityStateManifest = "CA"
  private val EntityStartedManifest = "CB"
  private val EntityStoppedManifest = "CD"
+  private val EntitiesStartedManifest = "CE"
+  private val EntitiesStoppedManifest = "CF"

  private val StartEntityManifest = "EA"
  private val StartEntityAckManifest = "EB"
@ -85,10 +95,15 @@ private[akka] class ClusterShardingMessageSerializer(val system: ExtendedActorSy
  private val ShardStateManifest = "FD"
  private val CurrentShardRegionStateManifest = "FE"

+  private val EventSourcedRememberShardsMigrationMarkerManifest = "SM"
+  private val EventSourcedRememberShardsState = "SS"
+
  private val fromBinaryMap = collection.immutable.HashMap[String, Array[Byte] => AnyRef](
    EntityStateManifest -> entityStateFromBinary,
    EntityStartedManifest -> entityStartedFromBinary,
+    EntitiesStartedManifest -> entitiesStartedFromBinary,
    EntityStoppedManifest -> entityStoppedFromBinary,
+    EntitiesStoppedManifest -> entitiesStoppedFromBinary,
    CoordinatorStateManifest -> coordinatorStateFromBinary,
    ShardRegionRegisteredManifest -> { bytes =>
      ShardRegionRegistered(actorRefMessageFromBinary(bytes))
@ -164,8 +179,8 @@ private[akka] class ClusterShardingMessageSerializer(val system: ExtendedActorSy
    CurrentRegionsManifest -> { bytes =>
      currentRegionsFromBinary(bytes)
    },
-    StartEntityManifest -> { startEntityFromBinary(_) },
-    StartEntityAckManifest -> { startEntityAckFromBinary(_) },
+    StartEntityManifest -> { startEntityFromBinary },
+    StartEntityAckManifest -> { startEntityAckFromBinary },
    GetCurrentShardStateManifest -> { _ =>
      GetCurrentShardState
    },
@ -180,12 +195,18 @@ private[akka] class ClusterShardingMessageSerializer(val system: ExtendedActorSy
    },
    CurrentShardRegionStateManifest -> { bytes =>
      currentShardRegionStateFromBinary(bytes)
+    },
+    EventSourcedRememberShardsMigrationMarkerManifest -> { _ =>
+      MigrationMarker
+    },
+    EventSourcedRememberShardsState -> { bytes =>
+      rememberShardsStateFromBinary(bytes)
    })

  override def manifest(obj: AnyRef): String = obj match {
-    case _: EntityState   => EntityStateManifest
-    case _: EntityStarted => EntityStartedManifest
-    case _: EntityStopped => EntityStoppedManifest
+    case _: EntityState     => EntityStateManifest
+    case _: EntitiesStarted => EntitiesStartedManifest
+    case _: EntitiesStopped => EntitiesStoppedManifest

    case _: State                      => CoordinatorStateManifest
    case _: ShardRegionRegistered      => ShardRegionRegisteredManifest
@ -226,6 +247,9 @@ private[akka] class ClusterShardingMessageSerializer(val system: ExtendedActorSy
    case _: ShardState              => ShardStateManifest
    case _: CurrentShardRegionState => CurrentShardRegionStateManifest

+    case MigrationMarker        => EventSourcedRememberShardsMigrationMarkerManifest
+    case _: RememberShardsState => EventSourcedRememberShardsState
+
    case _ =>
      throw new IllegalArgumentException(s"Can't serialize object of type ${obj.getClass} in [${getClass.getName}]")
  }
@ -253,9 +277,9 @@ private[akka] class ClusterShardingMessageSerializer(val system: ExtendedActorSy
    case GracefulShutdownReq(ref) =>
      actorRefMessageToProto(ref).toByteArray

-    case m: EntityState   => entityStateToProto(m).toByteArray
-    case m: EntityStarted => entityStartedToProto(m).toByteArray
-    case m: EntityStopped => entityStoppedToProto(m).toByteArray
+    case m: EntityState     => entityStateToProto(m).toByteArray
+    case m: EntitiesStarted => entitiesStartedToProto(m).toByteArray
+    case m: EntitiesStopped => entitiesStoppedToProto(m).toByteArray

    case s: StartEntity    => startEntityToByteArray(s)
    case s: StartEntityAck => startEntityAckToByteArray(s)
@ -275,6 +299,9 @@ private[akka] class ClusterShardingMessageSerializer(val system: ExtendedActorSy
    case m: ShardState              => shardStateToProto(m).toByteArray
    case m: CurrentShardRegionState => currentShardRegionStateToProto(m).toByteArray

+    case MigrationMarker        => Array.emptyByteArray
+    case m: RememberShardsState => rememberShardsStateToProto(m).toByteArray
+
    case _ =>
      throw new IllegalArgumentException(s"Can't serialize object of type ${obj.getClass} in [${getClass.getName}]")
  }
@ -287,6 +314,18 @@ private[akka] class ClusterShardingMessageSerializer(val system: ExtendedActorSy
          s"Unimplemented deserialization of message with manifest [$manifest] in [${getClass.getName}]")
    }

+  private def rememberShardsStateToProto(state: RememberShardsState): sm.RememberedShardState = {
+    val builder = sm.RememberedShardState.newBuilder()
+    builder.addAllShardId(state.shards.toList.asJava)
+    builder.setMarker(state.writtenMigrationMarker)
+    builder.build()
+  }
+
+  private def rememberShardsStateFromBinary(bytes: Array[Byte]): RememberShardsState = {
+    val proto = sm.RememberedShardState.parseFrom(bytes)
+    RememberShardsState(proto.getShardIdList.asScala.toSet, proto.getMarker)
+  }
+
  private def coordinatorStateToProto(state: State): sm.CoordinatorState = {
    val builder = sm.CoordinatorState.newBuilder()

@ -372,17 +411,23 @@ private[akka] class ClusterShardingMessageSerializer(val system: ExtendedActorSy
  private def entityStateFromBinary(bytes: Array[Byte]): EntityState =
    EntityState(sm.EntityState.parseFrom(bytes).getEntitiesList.asScala.toSet)

-  private def entityStartedToProto(evt: EntityStarted): sm.EntityStarted =
-    sm.EntityStarted.newBuilder().setEntityId(evt.entityId).build()
+  private def entityStartedFromBinary(bytes: Array[Byte]): EntitiesStarted =
+    EntitiesStarted(Set(sm.EntityStarted.parseFrom(bytes).getEntityId))

-  private def entityStartedFromBinary(bytes: Array[Byte]): EntityStarted =
-    EntityStarted(sm.EntityStarted.parseFrom(bytes).getEntityId)
+  private def entitiesStartedToProto(evt: EntitiesStarted): sm.EntitiesStarted =
+    sm.EntitiesStarted.newBuilder().addAllEntityId(evt.entities.asJava).build()

-  private def entityStoppedToProto(evt: EntityStopped): sm.EntityStopped =
-    sm.EntityStopped.newBuilder().setEntityId(evt.entityId).build()
+  private def entitiesStartedFromBinary(bytes: Array[Byte]): EntitiesStarted =
+    EntitiesStarted(sm.EntitiesStarted.parseFrom(bytes).getEntityIdList.asScala.toSet)

-  private def entityStoppedFromBinary(bytes: Array[Byte]): EntityStopped =
-    EntityStopped(sm.EntityStopped.parseFrom(bytes).getEntityId)
+  private def entitiesStoppedToProto(evt: EntitiesStopped): sm.EntitiesStopped =
+    sm.EntitiesStopped.newBuilder().addAllEntityId(evt.entities.asJava).build()
+
+  private def entityStoppedFromBinary(bytes: Array[Byte]): EntitiesStopped =
+    EntitiesStopped(Set(sm.EntityStopped.parseFrom(bytes).getEntityId))
+
+  private def entitiesStoppedFromBinary(bytes: Array[Byte]): EntitiesStopped =
+    EntitiesStopped(sm.EntitiesStopped.parseFrom(bytes).getEntityIdList.asScala.toSet)

  private def shardStatsToProto(evt: ShardStats): sm.ShardStats =
    sm.ShardStats.newBuilder().setShard(evt.shardId).setEntityCount(evt.entityCount).build()
--- a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardCoordinatorDowning2Spec.scala
+++ b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardCoordinatorDowning2Spec.scala
@ -99,7 +99,7 @@ abstract class ClusterShardCoordinatorDowning2Spec(multiNodeConfig: ClusterShard
  s"Cluster sharding ($mode) with down member, scenario 2" must {

    "join cluster" in within(20.seconds) {
-      startPersistenceIfNotDdataMode(startOn = first, setStoreOn = Seq(first, second))
+      startPersistenceIfNeeded(startOn = first, setStoreOn = Seq(first, second))

      join(first, first, onJoinedRunOnFrom = startSharding())
      join(second, first, onJoinedRunOnFrom = startSharding(), assertNodeUp = false)
--- a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardCoordinatorDowningSpec.scala
+++ b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardCoordinatorDowningSpec.scala
@ -102,7 +102,7 @@ abstract class ClusterShardCoordinatorDowningSpec(multiNodeConfig: ClusterShardC
  s"Cluster sharding ($mode) with down member, scenario 1" must {

    "join cluster" in within(20.seconds) {
-      startPersistenceIfNotDdataMode(startOn = controller, setStoreOn = Seq(first, second))
+      startPersistenceIfNeeded(startOn = controller, setStoreOn = Seq(first, second))

      join(first, first, onJoinedRunOnFrom = startSharding())
      join(second, first, onJoinedRunOnFrom = startSharding(), assertNodeUp = false)
--- a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingCustomShardAllocationSpec.scala
+++ b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingCustomShardAllocationSpec.scala
@ -118,7 +118,7 @@ abstract class ClusterShardingCustomShardAllocationSpec(multiNodeConfig: Cluster
  s"Cluster sharding ($mode) with custom allocation strategy" must {

    "use specified region" in within(30.seconds) {
-      startPersistenceIfNotDdataMode(startOn = first, setStoreOn = Seq(first, second))
+      startPersistenceIfNeeded(startOn = first, setStoreOn = Seq(first, second))

      join(first, first)

--- a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingFailureSpec.scala
+++ b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingFailureSpec.scala
@ -8,6 +8,7 @@ import scala.concurrent.duration._

 import akka.actor._
 import akka.cluster.sharding.ShardRegion.Passivate
+import akka.cluster.sharding.ShardRegion.StartEntity
 import akka.remote.testconductor.RoleName
 import akka.remote.transport.ThrottlerTransportAdapter.Direction
 import akka.serialization.jackson.CborSerializable
@ -20,13 +21,20 @@ object ClusterShardingFailureSpec {
  case class Add(id: String, i: Int) extends CborSerializable
  case class Value(id: String, n: Int) extends CborSerializable

-  class Entity extends Actor {
+  class Entity extends Actor with ActorLogging {
+    log.debug("Starting")
    var n = 0

    def receive = {
-      case Get(id)   => sender() ! Value(id, n)
-      case Add(_, i) => n += i
+      case Get(id) =>
+        log.debug("Got get request from {}", sender())
+        sender() ! Value(id, n)
+      case Add(_, i) =>
+        n += i
+        log.debug("Got add request from {}", sender())
    }
+
+    override def postStop(): Unit = log.debug("Stopping")
  }

  val extractEntityId: ShardRegion.ExtractEntityId = {
@ -35,8 +43,9 @@ object ClusterShardingFailureSpec {
  }

  val extractShardId: ShardRegion.ExtractShardId = {
-    case Get(id)    => id.charAt(0).toString
-    case Add(id, _) => id.charAt(0).toString
+    case Get(id)         => id.charAt(0).toString
+    case Add(id, _)      => id.charAt(0).toString
+    case StartEntity(id) => id
  }
 }

@ -44,11 +53,14 @@ abstract class ClusterShardingFailureSpecConfig(override val mode: String)
    extends MultiNodeClusterShardingConfig(
      mode,
      additionalConfig = s"""
+        akka.loglevel=DEBUG
        akka.cluster.roles = ["backend"]
        akka.cluster.sharding {
          coordinator-failure-backoff = 3s
          shard-failure-backoff = 3s
        }
+        # don't leak ddata state across runs
+        akka.cluster.sharding.distributed-data.durable.keys = []
        akka.persistence.journal.leveldb-shared.store.native = off
        # using Java serialization for these messages because test is sending them
        # to other nodes, which isn't normal usage.
@ -105,7 +117,7 @@ abstract class ClusterShardingFailureSpec(multiNodeConfig: ClusterShardingFailur
  s"Cluster sharding ($mode) with flaky journal/network" must {

    "join cluster" in within(20.seconds) {
-      startPersistenceIfNotDdataMode(startOn = controller, setStoreOn = Seq(first, second))
+      startPersistenceIfNeeded(startOn = controller, setStoreOn = Seq(first, second))

      join(first, first)
      join(second, first)
@ -127,11 +139,11 @@ abstract class ClusterShardingFailureSpec(multiNodeConfig: ClusterShardingFailur

    "recover after journal/network failure" in within(20.seconds) {
      runOn(controller) {
-        if (isDdataMode)
-          testConductor.blackhole(first, second, Direction.Both).await
-        else {
+        if (persistenceIsNeeded) {
          testConductor.blackhole(controller, first, Direction.Both).await
          testConductor.blackhole(controller, second, Direction.Both).await
+        } else {
+          testConductor.blackhole(first, second, Direction.Both).await
        }
      }
      enterBarrier("journal-blackholed")
@ -147,11 +159,11 @@ abstract class ClusterShardingFailureSpec(multiNodeConfig: ClusterShardingFailur
      enterBarrier("first-delayed")

      runOn(controller) {
-        if (isDdataMode)
-          testConductor.passThrough(first, second, Direction.Both).await
-        else {
+        if (persistenceIsNeeded) {
          testConductor.passThrough(controller, first, Direction.Both).await
          testConductor.passThrough(controller, second, Direction.Both).await
+        } else {
+          testConductor.passThrough(first, second, Direction.Both).await
        }
      }
      enterBarrier("journal-ok")
--- a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingGracefulShutdownSpec.scala
+++ b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingGracefulShutdownSpec.scala
@ -68,7 +68,7 @@ abstract class ClusterShardingGracefulShutdownSpec(multiNodeConfig: ClusterShard
  s"Cluster sharding ($mode)" must {

    "start some shards in both regions" in within(30.seconds) {
-      startPersistenceIfNotDdataMode(startOn = first, setStoreOn = Seq(first, second))
+      startPersistenceIfNeeded(startOn = first, setStoreOn = Seq(first, second))

      join(first, first, typeName)
      join(second, first, typeName)
--- a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingLeavingSpec.scala
+++ b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingLeavingSpec.scala
@ -45,8 +45,9 @@ object ClusterShardingLeavingSpec {
 abstract class ClusterShardingLeavingSpecConfig(mode: String)
    extends MultiNodeClusterShardingConfig(
      mode,
-      loglevel = "INFO",
+      loglevel = "DEBUG",
      additionalConfig = """
+        akka.cluster.sharding.verbose-debug-logging = on
        akka.cluster.sharding.rebalance-interval = 120 s
        akka.cluster.sharding.distributed-data.majority-min-cap = 1
        akka.cluster.sharding.coordinator-state.write-majority-plus = 1
@ -101,7 +102,7 @@ abstract class ClusterShardingLeavingSpec(multiNodeConfig: ClusterShardingLeavin
  s"Cluster sharding ($mode) with leaving member" must {

    "join cluster" in within(20.seconds) {
-      startPersistenceIfNotDdataMode(startOn = first, setStoreOn = roles)
+      startPersistenceIfNeeded(startOn = first, setStoreOn = roles)

      join(first, first, onJoinedRunOnFrom = startSharding())
      join(second, first, onJoinedRunOnFrom = startSharding(), assertNodeUp = false)
@ -173,6 +174,5 @@ abstract class ClusterShardingLeavingSpec(multiNodeConfig: ClusterShardingLeavin

      enterBarrier("after-4")
    }
-
  }
 }
--- a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingMinMembersSpec.scala
+++ b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingMinMembersSpec.scala
@ -67,7 +67,7 @@ abstract class ClusterShardingMinMembersSpec(multiNodeConfig: ClusterShardingMin
  s"Cluster with min-nr-of-members using sharding ($mode)" must {

    "use all nodes" in within(30.seconds) {
-      startPersistenceIfNotDdataMode(startOn = first, setStoreOn = Seq(first, second, third))
+      startPersistenceIfNeeded(startOn = first, setStoreOn = Seq(first, second, third))

      // the only test not asserting join status before starting to shard
      join(first, first, onJoinedRunOnFrom = startSharding(), assertNodeUp = false)
--- a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingRememberEntitiesNewExtractorSpec.scala
+++ b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingRememberEntitiesNewExtractorSpec.scala
@ -137,7 +137,7 @@ abstract class ClusterShardingRememberEntitiesNewExtractorSpec(
  s"Cluster with min-nr-of-members using sharding ($mode)" must {

    "start up first cluster and sharding" in within(15.seconds) {
-      startPersistenceIfNotDdataMode(startOn = first, setStoreOn = Seq(second, third))
+      startPersistenceIfNeeded(startOn = first, setStoreOn = Seq(second, third))

      join(first, first)
      join(second, first)
@ -200,7 +200,7 @@ abstract class ClusterShardingRememberEntitiesNewExtractorSpec(
        val sys2 = ActorSystem(system.name, system.settings.config)
        val probe2 = TestProbe()(sys2)

-        if (!isDdataMode) {
+        if (persistenceIsNeeded) {
          sys2.actorSelection(node(first) / "user" / "store").tell(Identify(None), probe2.ref)
          val sharedStore = probe2.expectMsgType[ActorIdentity](10.seconds).ref.get
          SharedLeveldbJournal.setStore(sharedStore, sys2)
--- a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingRememberEntitiesPerfSpec.scala
+++ b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingRememberEntitiesPerfSpec.scala
@ -1,218 +0,0 @@
-/*
- * Copyright (C) 2019-2020 Lightbend Inc. <https://www.lightbend.com>
- */
-
-package akka.cluster.sharding
-
-import java.util.concurrent.TimeUnit.NANOSECONDS
-
-import scala.concurrent.duration._
-
-import com.typesafe.config.ConfigFactory
-
-import akka.actor._
-import akka.cluster.MemberStatus
-import akka.testkit._
-import akka.util.ccompat._
-
-@ccompatUsedUntil213
-object ClusterShardingRememberEntitiesPerfSpec {
-
-  def props(): Props = Props(new TestEntity)
-
-  class TestEntity extends Actor with ActorLogging {
-
-    log.debug("Started TestEntity: {}", self)
-
-    def receive = {
-      case m => sender() ! m
-    }
-  }
-
-  val extractEntityId: ShardRegion.ExtractEntityId = {
-    case id: Int => (id.toString, id)
-  }
-
-  val extractShardId: ShardRegion.ExtractShardId = msg =>
-    msg match {
-      case _: Int                     => "0" // only one shard
-      case ShardRegion.StartEntity(_) => "0"
-    }
-
-}
-
-object ClusterShardingRememberEntitiesPerfSpecConfig extends MultiNodeClusterShardingConfig(additionalConfig = s"""
-    akka.testconductor.barrier-timeout = 3 minutes
-    akka.remote.artery.advanced.outbound-message-queue-size = 10000
-    akka.remote.artery.advanced.maximum-frame-size = 512 KiB
-    # comment next line to enable durable lmdb storage
-    akka.cluster.sharding.distributed-data.durable.keys = []
-    """) {
-
-  val first = role("first")
-  val second = role("second")
-  val third = role("third")
-
-  nodeConfig(third)(ConfigFactory.parseString(s"""
-    akka.cluster.sharding.distributed-data.durable.lmdb {
-      # use same directory when starting new node on third (not used at same time)
-      dir = "$targetDir/sharding-third"
-    }
-    """))
-}
-
-class ClusterShardingRememberEntitiesPerfSpecMultiJvmNode1 extends ClusterShardingRememberEntitiesPerfSpec
-class ClusterShardingRememberEntitiesPerfSpecMultiJvmNode2 extends ClusterShardingRememberEntitiesPerfSpec
-class ClusterShardingRememberEntitiesPerfSpecMultiJvmNode3 extends ClusterShardingRememberEntitiesPerfSpec
-
-abstract class ClusterShardingRememberEntitiesPerfSpec
-    extends MultiNodeClusterShardingSpec(ClusterShardingRememberEntitiesPerfSpecConfig)
-    with ImplicitSender {
-  import ClusterShardingRememberEntitiesPerfSpec._
-  import ClusterShardingRememberEntitiesPerfSpecConfig._
-
-  def startSharding(): Unit = {
-    (1 to 3).foreach { n =>
-      startSharding(
-        system,
-        typeName = s"Entity$n",
-        entityProps = ClusterShardingRememberEntitiesPerfSpec.props(),
-        extractEntityId = extractEntityId,
-        extractShardId = extractShardId)
-    }
-  }
-
-  lazy val region1 = ClusterSharding(system).shardRegion("Entity1")
-  lazy val region2 = ClusterSharding(system).shardRegion("Entity2")
-  lazy val region3 = ClusterSharding(system).shardRegion("Entity3")
-
-  // use 5 for "real" testing
-  private val nrIterations = 2
-  // use 5 for "real" testing
-  private val numberOfMessagesFactor = 1
-
-  s"Cluster sharding with remember entities performance" must {
-
-    "form cluster" in within(20.seconds) {
-      join(first, first)
-
-      startSharding()
-
-      // this will make it run on first
-      runOn(first) {
-        region1 ! 0
-        expectMsg(0)
-        region2 ! 0
-        expectMsg(0)
-        region3 ! 0
-        expectMsg(0)
-      }
-      enterBarrier("allocated-on-first")
-
-      join(second, first)
-      join(third, first)
-
-      within(remaining) {
-        awaitAssert {
-          cluster.state.members.size should ===(3)
-          cluster.state.members.unsorted.map(_.status) should ===(Set(MemberStatus.Up))
-        }
-      }
-
-      enterBarrier("all-up")
-    }
-
-    "test when starting new entity" in {
-      runOn(first) {
-        val numberOfMessages = 200 * numberOfMessagesFactor
-        (1 to nrIterations).foreach { iteration =>
-          val startTime = System.nanoTime()
-          (1 to numberOfMessages).foreach { n =>
-            region1 ! (iteration * 100000 + n)
-          }
-          receiveN(numberOfMessages, 20.seconds)
-          val took = NANOSECONDS.toMillis(System.nanoTime - startTime)
-          val throughput = numberOfMessages * 1000.0 / took
-          println(
-            s"### Test1 with $numberOfMessages took ${(System.nanoTime() - startTime) / 1000 / 1000} ms, " +
-            f"throughput $throughput%,.0f msg/s")
-        }
-      }
-      enterBarrier("after-1")
-    }
-
-    "test when starting new entity and sending a few messages to it" in {
-      runOn(first) {
-        val numberOfMessages = 800 * numberOfMessagesFactor
-        (1 to nrIterations).foreach { iteration =>
-          val startTime = System.nanoTime()
-          for (n <- 1 to numberOfMessages / 5; _ <- 1 to 5) {
-            region2 ! (iteration * 100000 + n)
-          }
-          receiveN(numberOfMessages, 20.seconds)
-          val took = NANOSECONDS.toMillis(System.nanoTime - startTime)
-          val throughput = numberOfMessages * 1000.0 / took
-          println(
-            s"### Test2 with $numberOfMessages took ${(System.nanoTime() - startTime) / 1000 / 1000} ms, " +
-            f"throughput $throughput%,.0f msg/s")
-        }
-      }
-      enterBarrier("after-2")
-    }
-
-    "test when starting some new entities mixed with sending to started" in {
-      runOn(first) {
-        val numberOfMessages = 1600 * numberOfMessagesFactor
-        (1 to nrIterations).foreach { iteration =>
-          val startTime = System.nanoTime()
-          (1 to numberOfMessages).foreach { n =>
-            val msg =
-              if (n % 20 == 0)
-                -(iteration * 100000 + n) // unique, will start new entity
-              else
-                iteration * 100000 + (n % 10) // these will go to same 10 started entities
-            region3 ! msg
-
-            if (n == 10) {
-              // wait for the first 10 to avoid filling up stash
-              receiveN(10, 5.seconds)
-            }
-          }
-          receiveN(numberOfMessages - 10, 20.seconds)
-          val took = NANOSECONDS.toMillis(System.nanoTime - startTime)
-          val throughput = numberOfMessages * 1000.0 / took
-          println(
-            s"### Test3 with $numberOfMessages took ${(System.nanoTime() - startTime) / 1000 / 1000} ms, " +
-            f"throughput $throughput%,.0f msg/s")
-        }
-      }
-      enterBarrier("after-3")
-    }
-
-    "test sending to started" in {
-      runOn(first) {
-        val numberOfMessages = 1600 * numberOfMessagesFactor
-        (1 to nrIterations).foreach { iteration =>
-          var startTime = System.nanoTime()
-          (1 to numberOfMessages).foreach { n =>
-            region3 ! (iteration * 100000 + (n % 10)) // these will go to same 10 started entities
-
-            if (n == 10) {
-              // wait for the first 10 and then start the clock
-              receiveN(10, 5.seconds)
-              startTime = System.nanoTime()
-            }
-          }
-          receiveN(numberOfMessages - 10, 20.seconds)
-          val took = NANOSECONDS.toMillis(System.nanoTime - startTime)
-          val throughput = numberOfMessages * 1000.0 / took
-          println(
-            s"### Test4 with $numberOfMessages took ${(System.nanoTime() - startTime) / 1000 / 1000} ms, " +
-            f"throughput $throughput%,.0f msg/s")
-        }
-      }
-      enterBarrier("after-4")
-    }
-  }
-
-}
--- a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingRememberEntitiesSpec.scala
+++ b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingRememberEntitiesSpec.scala
@ -20,18 +20,21 @@ object ClusterShardingRememberEntitiesSpec {
    case id: Int => (id.toString, id)
  }

-  val extractShardId: ShardRegion.ExtractShardId = msg =>
-    msg match {
-      case id: Int                     => id.toString
-      case ShardRegion.StartEntity(id) => id
-    }
+  val extractShardId: ShardRegion.ExtractShardId = {
+    case id: Int                     => id.toString
+    case ShardRegion.StartEntity(id) => id
+  }

 }

-abstract class ClusterShardingRememberEntitiesSpecConfig(mode: String, rememberEntities: Boolean)
+abstract class ClusterShardingRememberEntitiesSpecConfig(
+    mode: String,
+    rememberEntities: Boolean,
+    rememberEntitiesStore: String = ClusterShardingSettings.RememberEntitiesStoreDData)
    extends MultiNodeClusterShardingConfig(
      mode,
      rememberEntities,
+      rememberEntitiesStore = rememberEntitiesStore,
      additionalConfig = s"""
      akka.testconductor.barrier-timeout = 60 s
      akka.test.single-expect-default = 60 s
@ -58,12 +61,23 @@ class PersistentClusterShardingRememberEntitiesSpecConfig(rememberEntities: Bool
 class DDataClusterShardingRememberEntitiesSpecConfig(rememberEntities: Boolean)
    extends ClusterShardingRememberEntitiesSpecConfig(ClusterShardingSettings.StateStoreModeDData, rememberEntities)

+class DDataClusterShardingEventSourcedRememberEntitiesSpecConfig(rememberEntities: Boolean)
+    extends ClusterShardingRememberEntitiesSpecConfig(
+      ClusterShardingSettings.StateStoreModeDData,
+      rememberEntities,
+      ClusterShardingSettings.RememberEntitiesStoreEventsourced)
+
 abstract class PersistentClusterShardingRememberEntitiesSpec(rememberEntities: Boolean)
    extends ClusterShardingRememberEntitiesSpec(
      new PersistentClusterShardingRememberEntitiesSpecConfig(rememberEntities))
+
 abstract class DDataClusterShardingRememberEntitiesSpec(rememberEntities: Boolean)
    extends ClusterShardingRememberEntitiesSpec(new DDataClusterShardingRememberEntitiesSpecConfig(rememberEntities))

+abstract class DDataClusterShardingEventSourcedRememberEntitiesSpec(rememberEntities: Boolean)
+    extends ClusterShardingRememberEntitiesSpec(
+      new DDataClusterShardingEventSourcedRememberEntitiesSpecConfig(rememberEntities))
+
 class PersistentClusterShardingRememberEntitiesEnabledMultiJvmNode1
    extends PersistentClusterShardingRememberEntitiesSpec(true)
 class PersistentClusterShardingRememberEntitiesEnabledMultiJvmNode2
@ -86,6 +100,13 @@ class DDataClusterShardingRememberEntitiesDefaultMultiJvmNode1 extends DDataClus
 class DDataClusterShardingRememberEntitiesDefaultMultiJvmNode2 extends DDataClusterShardingRememberEntitiesSpec(false)
 class DDataClusterShardingRememberEntitiesDefaultMultiJvmNode3 extends DDataClusterShardingRememberEntitiesSpec(false)

+class DDataClusterShardingEventSourcedRememberEntitiesEnabledMultiJvmNode1
+    extends DDataClusterShardingEventSourcedRememberEntitiesSpec(true)
+class DDataClusterShardingEventSourcedRememberEntitiesEnabledMultiJvmNode2
+    extends DDataClusterShardingEventSourcedRememberEntitiesSpec(true)
+class DDataClusterShardingEventSourcedRememberEntitiesEnabledMultiJvmNode3
+    extends DDataClusterShardingEventSourcedRememberEntitiesSpec(true)
+
 abstract class ClusterShardingRememberEntitiesSpec(multiNodeConfig: ClusterShardingRememberEntitiesSpecConfig)
    extends MultiNodeClusterShardingSpec(multiNodeConfig)
    with ImplicitSender {
@ -123,7 +144,7 @@ abstract class ClusterShardingRememberEntitiesSpec(multiNodeConfig: ClusterShard
  s"Cluster sharding with remember entities ($mode)" must {

    "start remembered entities when coordinator fail over" in within(30.seconds) {
-      startPersistenceIfNotDdataMode(startOn = first, setStoreOn = Seq(first, second, third))
+      startPersistenceIfNeeded(startOn = first, setStoreOn = Seq(first, second, third))

      val entityProbe = TestProbe()
      val probe = TestProbe()
@ -182,7 +203,7 @@ abstract class ClusterShardingRememberEntitiesSpec(multiNodeConfig: ClusterShard
        val entityProbe2 = TestProbe()(sys2)
        val probe2 = TestProbe()(sys2)

-        if (!isDdataMode) setStore(sys2, storeOn = first)
+        if (persistenceIsNeeded) setStore(sys2, storeOn = first)

        Cluster(sys2).join(Cluster(sys2).selfAddress)

--- a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingSpec.scala
+++ b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/ClusterShardingSpec.scala
@ -14,10 +14,11 @@ import akka.cluster.Cluster
 import akka.cluster.ddata.{ Replicator, ReplicatorSettings }
 import akka.cluster.sharding.ShardCoordinator.Internal.{ HandOff, ShardStopped }
 import akka.cluster.sharding.ShardRegion.{ CurrentRegions, GetCurrentRegions, Passivate }
+import akka.cluster.sharding.internal.{ DDataRememberEntitiesProvider, EventSourcedRememberEntitiesProvider }
 import akka.cluster.singleton.{ ClusterSingletonManager, ClusterSingletonManagerSettings }
 import akka.pattern.BackoffOpts
-import akka.persistence.{ Persistence, PersistentActor }
 import akka.persistence.journal.leveldb.{ SharedLeveldbJournal, SharedLeveldbStore }
+import akka.persistence.{ Persistence, PersistentActor }
 import akka.remote.testconductor.RoleName
 import akka.testkit._
 import akka.testkit.TestEvent.Mute
@ -108,8 +109,11 @@ object ClusterShardingSpec {

 }

-abstract class ClusterShardingSpecConfig(mode: String, val entityRecoveryStrategy: String = "all")
-    extends MultiNodeClusterShardingConfig(mode) {
+abstract class ClusterShardingSpecConfig(
+    mode: String,
+    rememberEntitiesStore: String,
+    val entityRecoveryStrategy: String = "all")
+    extends MultiNodeClusterShardingConfig(mode = mode, rememberEntitiesStore = rememberEntitiesStore) {

  val controller = role("controller")
  val first = role("first")
@ -204,12 +208,24 @@ object ClusterShardingDocCode {
 }

 object PersistentClusterShardingSpecConfig
-    extends ClusterShardingSpecConfig(ClusterShardingSettings.StateStoreModePersistence)
-object DDataClusterShardingSpecConfig extends ClusterShardingSpecConfig(ClusterShardingSettings.StateStoreModeDData)
+    extends ClusterShardingSpecConfig(
+      ClusterShardingSettings.StateStoreModePersistence,
+      ClusterShardingSettings.RememberEntitiesStoreEventsourced)
+object DDataClusterShardingSpecConfig
+    extends ClusterShardingSpecConfig(
+      ClusterShardingSettings.StateStoreModeDData,
+      ClusterShardingSettings.RememberEntitiesStoreDData)
+
 object PersistentClusterShardingWithEntityRecoverySpecConfig
-    extends ClusterShardingSpecConfig(ClusterShardingSettings.StateStoreModePersistence, "constant")
+    extends ClusterShardingSpecConfig(
+      ClusterShardingSettings.StateStoreModePersistence,
+      ClusterShardingSettings.RememberEntitiesStoreEventsourced,
+      "constant")
 object DDataClusterShardingWithEntityRecoverySpecConfig
-    extends ClusterShardingSpecConfig(ClusterShardingSettings.StateStoreModeDData, "constant")
+    extends ClusterShardingSpecConfig(
+      ClusterShardingSettings.StateStoreModeDData,
+      ClusterShardingSettings.RememberEntitiesStoreDData,
+      "constant")

 class PersistentClusterShardingSpec extends ClusterShardingSpec(PersistentClusterShardingSpecConfig)
 class DDataClusterShardingSpec extends ClusterShardingSpec(DDataClusterShardingSpecConfig)
@ -264,9 +280,18 @@ abstract class ClusterShardingSpec(multiNodeConfig: ClusterShardingSpecConfig)
    Replicator.props(ReplicatorSettings(system).withGossipInterval(1.second).withMaxDeltaElements(10)),
    "replicator")

+  def ddataRememberEntitiesProvider(typeName: String) = {
+    val majorityMinCap = system.settings.config.getInt("akka.cluster.sharding.distributed-data.majority-min-cap")
+    new DDataRememberEntitiesProvider(typeName, settings, majorityMinCap, replicator)
+  }
+
+  def eventSourcedRememberEntitiesProvider(typeName: String, settings: ClusterShardingSettings) = {
+    new EventSourcedRememberEntitiesProvider(typeName, settings)
+  }
+
  def createCoordinator(): Unit = {

-    def coordinatorProps(typeName: String, rebalanceEnabled: Boolean, rememberEntities: Boolean) = {
+    def coordinatorProps(typeName: String, rebalanceEnabled: Boolean, rememberEntities: Boolean): Props = {
      val allocationStrategy =
        new ShardCoordinator.LeastShardAllocationStrategy(rebalanceThreshold = 2, maxSimultaneousRebalance = 1)
      val cfg = ConfigFactory.parseString(s"""
@ -275,11 +300,23 @@ abstract class ClusterShardingSpec(multiNodeConfig: ClusterShardingSpecConfig)
      rebalance-interval = ${if (rebalanceEnabled) "2s" else "3600s"}
      """).withFallback(system.settings.config.getConfig("akka.cluster.sharding"))
      val settings = ClusterShardingSettings(cfg).withRememberEntities(rememberEntities)
-      val majorityMinCap = system.settings.config.getInt("akka.cluster.sharding.distributed-data.majority-min-cap")
+
      if (settings.stateStoreMode == "persistence")
        ShardCoordinator.props(typeName, settings, allocationStrategy)
-      else
-        ShardCoordinator.props(typeName, settings, allocationStrategy, replicator, majorityMinCap)
+      else {
+        val majorityMinCap = system.settings.config.getInt("akka.cluster.sharding.distributed-data.majority-min-cap")
+        val rememberEntitiesStore =
+          // only store provider if ddata for now, persistence uses all-in-one-coordinator
+          if (settings.rememberEntities) Some(ddataRememberEntitiesProvider(typeName))
+          else None
+        ShardCoordinator.props(
+          typeName,
+          settings,
+          allocationStrategy,
+          replicator,
+          majorityMinCap,
+          rememberEntitiesStore)
+      }
    }

    List(
@ -319,6 +356,15 @@ abstract class ClusterShardingSpec(multiNodeConfig: ClusterShardingSpecConfig)
      buffer-size = 1000
      """).withFallback(system.settings.config.getConfig("akka.cluster.sharding"))
    val settings = ClusterShardingSettings(cfg).withRememberEntities(rememberEntities)
+    val rememberEntitiesProvider =
+      if (!rememberEntities) None
+      else
+        settings.rememberEntitiesStore match {
+          case ClusterShardingSettings.RememberEntitiesStoreDData => Some(ddataRememberEntitiesProvider(typeName))
+          case ClusterShardingSettings.RememberEntitiesStoreEventsourced =>
+            Some(eventSourcedRememberEntitiesProvider(typeName, settings))
+        }
+
    system.actorOf(
      ShardRegion.props(
        typeName = typeName,
@ -328,8 +374,7 @@ abstract class ClusterShardingSpec(multiNodeConfig: ClusterShardingSpecConfig)
        extractEntityId = extractEntityId,
        extractShardId = extractShardId,
        handOffStopMessage = PoisonPill,
-        replicator,
-        majorityMinCap = 3),
+        rememberEntitiesProvider = rememberEntitiesProvider),
      name = typeName + "Region")
  }

@ -455,9 +500,7 @@ abstract class ClusterShardingSpec(multiNodeConfig: ClusterShardingSpecConfig)
            settings,
            coordinatorPath = "/user/counterCoordinator/singleton/coordinator",
            extractEntityId = extractEntityId,
-            extractShardId = extractShardId,
-            system.deadLetters,
-            majorityMinCap = 0),
+            extractShardId = extractShardId),
          name = "regionProxy")

        proxy ! Get(1)
--- a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/MultiNodeClusterShardingConfig.scala
+++ b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/MultiNodeClusterShardingConfig.scala
@ -86,6 +86,7 @@ object MultiNodeClusterShardingConfig {
 abstract class MultiNodeClusterShardingConfig(
    val mode: String = ClusterShardingSettings.StateStoreModeDData,
    val rememberEntities: Boolean = false,
+    val rememberEntitiesStore: String = ClusterShardingSettings.RememberEntitiesStoreDData,
    additionalConfig: String = "",
    loglevel: String = "INFO")
    extends MultiNodeConfig {
@ -96,7 +97,8 @@ abstract class MultiNodeClusterShardingConfig(
    s"target/ClusterSharding${testNameFromCallStack(classOf[MultiNodeClusterShardingConfig]).replace("Config", "").replace("_", "")}"

  val persistenceConfig: Config =
-    if (mode == ClusterShardingSettings.StateStoreModeDData) ConfigFactory.empty
+    if (mode == ClusterShardingSettings.StateStoreModeDData && rememberEntitiesStore != ClusterShardingSettings.RememberEntitiesStoreEventsourced)
+      ConfigFactory.empty
    else MultiNodeClusterShardingConfig.persistenceConfig(targetDir)

  val common: Config =
@ -106,6 +108,8 @@ abstract class MultiNodeClusterShardingConfig(
        akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning
        akka.cluster.testkit.auto-down-unreachable-after = 0s
        akka.cluster.sharding.state-store-mode = "$mode"
+        akka.cluster.sharding.remember-entities = $rememberEntities
+        akka.cluster.sharding.remember-entities-store = "$rememberEntitiesStore"
        akka.cluster.sharding.distributed-data.durable.lmdb {
          dir = $targetDir/sharding-ddata
          map-size = 10 MiB
--- a/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/MultiNodeClusterShardingSpec.scala
+++ b/akka-cluster-sharding/src/multi-jvm/scala/akka/cluster/sharding/MultiNodeClusterShardingSpec.scala
@ -95,6 +95,8 @@ abstract class MultiNodeClusterShardingSpec(val config: MultiNodeClusterSharding
  protected lazy val storageLocations = List(
    new File(system.settings.config.getString("akka.cluster.sharding.distributed-data.durable.lmdb.dir")).getParentFile)

+  override def expectedTestDuration = 120.seconds
+
  override protected def atStartup(): Unit = {
    storageLocations.foreach(dir => if (dir.exists) FileUtils.deleteQuietly(dir))
    enterBarrier("startup")
@ -169,10 +171,14 @@ abstract class MultiNodeClusterShardingSpec(val config: MultiNodeClusterSharding
    ClusterSharding(sys).startProxy(typeName, role, extractEntityId, extractShardId)
  }

-  protected def isDdataMode: Boolean = mode == ClusterShardingSettings.StateStoreModeDData
+  protected def isDdataMode = mode == ClusterShardingSettings.StateStoreModeDData
+  protected def persistenceIsNeeded: Boolean =
+    mode == ClusterShardingSettings.StateStoreModePersistence ||
+    system.settings.config
+      .getString("akka.cluster.sharding.remember-entities-store") == ClusterShardingSettings.RememberEntitiesStoreEventsourced

-  protected def setStoreIfNotDdataMode(sys: ActorSystem, storeOn: RoleName): Unit =
-    if (!isDdataMode) setStore(sys, storeOn)
+  protected def setStoreIfNeeded(sys: ActorSystem, storeOn: RoleName): Unit =
+    if (persistenceIsNeeded) setStore(sys, storeOn)

  protected def setStore(sys: ActorSystem, storeOn: RoleName): Unit = {
    val probe = TestProbe()(sys)
@ -189,8 +195,8 @@ abstract class MultiNodeClusterShardingSpec(val config: MultiNodeClusterSharding
   * @param startOn the node to start the `SharedLeveldbStore` store on
   * @param setStoreOn the nodes to `SharedLeveldbJournal.setStore` on
   */
-  protected def startPersistenceIfNotDdataMode(startOn: RoleName, setStoreOn: Seq[RoleName]): Unit =
-    if (!isDdataMode) startPersistence(startOn, setStoreOn)
+  protected def startPersistenceIfNeeded(startOn: RoleName, setStoreOn: Seq[RoleName]): Unit =
+    if (persistenceIsNeeded) startPersistence(startOn, setStoreOn)

  /**
   * {{{
--- a/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/AllAtOnceEntityRecoveryStrategySpec.scala
+++ b/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/AllAtOnceEntityRecoveryStrategySpec.scala
@ -1,27 +0,0 @@
-/*
- * Copyright (C) 2018-2020 Lightbend Inc. <https://www.lightbend.com>
- */
-
-package akka.cluster.sharding
-
-import akka.cluster.sharding.ShardRegion.EntityId
-import akka.testkit.AkkaSpec
-
-class AllAtOnceEntityRecoveryStrategySpec extends AkkaSpec {
-  val strategy = EntityRecoveryStrategy.allStrategy()
-
-  "AllAtOnceEntityRecoveryStrategy" must {
-    "recover entities" in {
-      val entities = Set[EntityId]("1", "2", "3", "4", "5")
-      val result = strategy.recoverEntities(entities)
-      result.size should ===(1)
-      // the Future is completed immediately for allStrategy
-      result.head.value.get.get should ===(entities)
-    }
-
-    "not recover when no entities to recover" in {
-      val result = strategy.recoverEntities(Set[EntityId]())
-      result.size should ===(0)
-    }
-  }
-}
--- a/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/ClusterShardingInternalsSpec.scala
+++ b/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/ClusterShardingInternalsSpec.scala
@ -11,6 +11,7 @@ import akka.cluster.ClusterSettings.DataCenter
 import akka.cluster.sharding.ShardCoordinator.Internal.ShardStopped
 import akka.cluster.sharding.ShardCoordinator.LeastShardAllocationStrategy
 import akka.cluster.sharding.ShardRegion.{ ExtractEntityId, ExtractShardId, HandOffStopper, Msg }
+import akka.testkit.WithLogCapturing
 import akka.testkit.{ AkkaSpec, TestProbe }

 object ClusterShardingInternalsSpec {
@ -30,7 +31,10 @@ class ClusterShardingInternalsSpec extends AkkaSpec("""
    |akka.actor.provider = cluster
    |akka.remote.classic.netty.tcp.port = 0
    |akka.remote.artery.canonical.port = 0
-    |""".stripMargin) {
+    |akka.loglevel = DEBUG
+    |akka.cluster.sharding.verbose-debug-logging = on
+    |akka.loggers = ["akka.testkit.SilenceAllTestEventListener"]
+    |""".stripMargin) with WithLogCapturing {
  import ClusterShardingInternalsSpec._

  case class StartingProxy(
--- a/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/ClusterShardingLeaseSpec.scala
+++ b/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/ClusterShardingLeaseSpec.scala
@ -7,20 +7,18 @@ import scala.concurrent.Future
 import scala.concurrent.duration._
 import scala.util.Success
 import scala.util.control.NoStackTrace
-
 import com.typesafe.config.{ Config, ConfigFactory }
-
 import akka.actor.Props
 import akka.cluster.{ Cluster, MemberStatus }
 import akka.coordination.lease.TestLease
 import akka.coordination.lease.TestLeaseExt
-import akka.testkit.{ AkkaSpec, ImplicitSender }
+import akka.testkit.{ AkkaSpec, ImplicitSender, WithLogCapturing }
 import akka.testkit.TestActors.EchoActor

 object ClusterShardingLeaseSpec {
  val config = ConfigFactory.parseString("""
    akka.loglevel = DEBUG
-    #akka.loggers = ["akka.testkit.SilenceAllTestEventListener"]
+    akka.loggers = ["akka.testkit.SilenceAllTestEventListener"]
    akka.actor.provider = "cluster"
    akka.remote.classic.netty.tcp.port = 0
    akka.remote.artery.canonical.port = 0
@ -30,6 +28,7 @@ object ClusterShardingLeaseSpec {
       distributed-data.durable {
        keys = []
       }
+       verbose-debug-logging = on
     }
    """).withFallback(TestLease.config)

@ -62,7 +61,8 @@ class DDataClusterShardingLeaseSpec extends ClusterShardingLeaseSpec(ClusterShar

 class ClusterShardingLeaseSpec(config: Config, rememberEntities: Boolean)
    extends AkkaSpec(config.withFallback(ClusterShardingLeaseSpec.config))
-    with ImplicitSender {
+    with ImplicitSender
+    with WithLogCapturing {
  import ClusterShardingLeaseSpec._

  def this() = this(ConfigFactory.empty(), false)
@ -132,7 +132,7 @@ class ClusterShardingLeaseSpec(config: Config, rememberEntities: Boolean)
      awaitAssert({
        region ! 4
        expectMsg(4)
-      }, max = 5.seconds)
+      }, max = 10.seconds)
    }
  }
 }
--- a/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/ConcurrentStartupShardingSpec.scala
+++ b/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/ConcurrentStartupShardingSpec.scala
@ -5,7 +5,6 @@
 package akka.cluster.sharding

 import scala.concurrent.duration._
-
 import akka.actor.Actor
 import akka.actor.ActorRef
 import akka.actor.Props
@ -14,17 +13,20 @@ import akka.cluster.MemberStatus
 import akka.testkit.AkkaSpec
 import akka.testkit.DeadLettersFilter
 import akka.testkit.TestEvent.Mute
+import akka.testkit.WithLogCapturing

 object ConcurrentStartupShardingSpec {

  val config =
    """
    akka.actor.provider = "cluster"
+    akka.loglevel = DEBUG
+    akka.loggers = ["akka.testkit.SilenceAllTestEventListener"]
    akka.remote.classic.netty.tcp.port = 0
    akka.remote.artery.canonical.port = 0
    akka.log-dead-letters = off
    akka.log-dead-letters-during-shutdown = off
-
+    akka.cluster.sharding.verbose-debug-logging = on
    akka.actor {
      default-dispatcher {
        executor = "fork-join-executor"
@ -57,7 +59,7 @@ object ConcurrentStartupShardingSpec {
  }
 }

-class ConcurrentStartupShardingSpec extends AkkaSpec(ConcurrentStartupShardingSpec.config) {
+class ConcurrentStartupShardingSpec extends AkkaSpec(ConcurrentStartupShardingSpec.config) with WithLogCapturing {
  import ConcurrentStartupShardingSpec._

  // mute logging of deadLetters
--- a/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/ConstantRateEntityRecoveryStrategySpec.scala
+++ b/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/ConstantRateEntityRecoveryStrategySpec.scala
@ -1,47 +0,0 @@
-/*
- * Copyright (C) 2018-2020 Lightbend Inc. <https://www.lightbend.com>
- */
-
-package akka.cluster.sharding
-
-import scala.concurrent.{ Await, Future }
-import scala.concurrent.duration._
-
-import akka.cluster.sharding.ShardRegion.EntityId
-import akka.testkit.{ AkkaSpec, TimingTest }
-
-class ConstantRateEntityRecoveryStrategySpec extends AkkaSpec {
-
-  val strategy = EntityRecoveryStrategy.constantStrategy(system, 1.second, 2)
-  "ConstantRateEntityRecoveryStrategy" must {
-    "recover entities" taggedAs TimingTest in {
-      import system.dispatcher
-      val entities = Set[EntityId]("1", "2", "3", "4", "5")
-      val startTime = System.nanoTime()
-      val resultWithTimes =
-        strategy.recoverEntities(entities).map(_.map(entityIds => entityIds -> (System.nanoTime() - startTime).nanos))
-
-      val result =
-        Await.result(Future.sequence(resultWithTimes), 6.seconds).toVector.sortBy { case (_, duration) => duration }
-      result.size should ===(3)
-
-      val scheduledEntities = result.map(_._1)
-      scheduledEntities(0).size should ===(2)
-      scheduledEntities(1).size should ===(2)
-      scheduledEntities(2).size should ===(1)
-      scheduledEntities.flatten.toSet should ===(entities)
-
-      val timesMillis = result.map(_._2.toMillis)
-
-      // scheduling will not happen too early
-      timesMillis(0) should ===(1400L +- 500)
-      timesMillis(1) should ===(2400L +- 500L)
-      timesMillis(2) should ===(3400L +- 500L)
-    }
-
-    "not recover when no entities to recover" in {
-      val result = strategy.recoverEntities(Set[EntityId]())
-      result.size should ===(0)
-    }
-  }
-}
--- a/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/CoordinatedShutdownShardingSpec.scala
+++ b/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/CoordinatedShutdownShardingSpec.scala
@ -22,10 +22,12 @@ import akka.util.ccompat._
 object CoordinatedShutdownShardingSpec {
  val config =
    """
+    akka.loglevel = DEBUG
    akka.loggers = ["akka.testkit.SilenceAllTestEventListener"]
    akka.actor.provider = "cluster"
    akka.remote.classic.netty.tcp.port = 0
    akka.remote.artery.canonical.port = 0
+    akka.cluster.sharding.verbose-debug-logging = on
    """

  val extractEntityId: ShardRegion.ExtractEntityId = {
--- a/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/EntitiesSpec.scala
+++ b/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/EntitiesSpec.scala
@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2020 Lightbend Inc. <https://www.lightbend.com>
+ */
+
+package akka.cluster.sharding
+import akka.actor.ActorRef
+import akka.cluster.sharding
+import akka.cluster.sharding.Shard.Active
+import akka.cluster.sharding.Shard.NoState
+import akka.cluster.sharding.Shard.Passivating
+import akka.cluster.sharding.Shard.RememberedButNotCreated
+import akka.cluster.sharding.Shard.RememberingStart
+import akka.cluster.sharding.Shard.RememberingStop
+import akka.event.NoLogging
+import akka.util.OptionVal
+import org.scalatest.matchers.should.Matchers
+import org.scalatest.wordspec.AnyWordSpec
+
+class EntitiesSpec extends AnyWordSpec with Matchers {
+
+  private def newEntities(rememberingEntities: Boolean) =
+    new sharding.Shard.Entities(NoLogging, rememberingEntities = rememberingEntities, false)
+
+  "Entities" should {
+    "start empty" in {
+      val entities = newEntities(rememberingEntities = false)
+      entities.activeEntityIds() shouldEqual Set.empty
+      entities.size shouldEqual 0
+      entities.activeEntities() shouldEqual Set.empty
+    }
+    "set already remembered entities to state RememberedButNotStarted" in {
+      val entities = newEntities(rememberingEntities = true)
+      val ids = Set("a", "b", "c")
+      entities.alreadyRemembered(ids)
+      entities.activeEntities() shouldEqual Set.empty
+      entities.size shouldEqual 3
+      ids.foreach { id =>
+        entities.entityState(id) shouldEqual RememberedButNotCreated
+      }
+    }
+    "set state to remembering start" in {
+      val entities = newEntities(rememberingEntities = true)
+      entities.rememberingStart("a", None)
+      entities.entityState("a") shouldEqual RememberingStart(None)
+      entities.pendingRememberedEntitiesExist() should ===(true)
+      val (starts, stops) = entities.pendingRememberEntities()
+      starts.keySet should contain("a")
+      stops should be(empty)
+
+      // also verify removal from pending once it starts
+      entities.addEntity("a", ActorRef.noSender)
+      entities.pendingRememberedEntitiesExist() should ===(false)
+      entities.pendingRememberEntities()._1 should be(empty)
+    }
+    "set state to remembering stop" in {
+      val entities = newEntities(rememberingEntities = true)
+      entities.addEntity("a", ActorRef.noSender) // need to go through active to passivate
+      entities.entityPassivating("a") // need to go through passivate to remember stop
+      entities.rememberingStop("a")
+      entities.entityState("a") shouldEqual RememberingStop
+      entities.pendingRememberedEntitiesExist() should ===(true)
+      val (starts, stops) = entities.pendingRememberEntities()
+      stops should contain("a")
+      starts should be(empty)
+
+      // also verify removal from pending once it stops
+      entities.removeEntity("a")
+      entities.pendingRememberedEntitiesExist() should ===(false)
+      entities.pendingRememberEntities()._2 should be(empty)
+    }
+
+    "fully remove an entity" in {
+      val entities = newEntities(rememberingEntities = true)
+      val ref = ActorRef.noSender
+      entities.addEntity("a", ref)
+      entities.entityPassivating("a") // needs to go through passivating to be removed
+      entities.removeEntity("a")
+      entities.entityState("a") shouldEqual NoState
+      entities.activeEntities() should be(empty)
+      entities.activeEntityIds() should be(empty)
+
+    }
+    "add an entity as active" in {
+      val entities = newEntities(rememberingEntities = false)
+      val ref = ActorRef.noSender
+      entities.addEntity("a", ref)
+      entities.entityState("a") shouldEqual Active(ref)
+    }
+    "look up actor ref by id" in {
+      val entities = newEntities(rememberingEntities = false)
+      val ref = ActorRef.noSender
+      entities.addEntity("a", ref)
+      entities.entityId(ref) shouldEqual OptionVal.Some("a")
+    }
+    "set state to passivating" in {
+      val entities = newEntities(rememberingEntities = false)
+      val ref = ActorRef.noSender
+      entities.addEntity("a", ref)
+      entities.entityPassivating("a")
+      entities.entityState("a") shouldEqual Passivating(ref)
+    }
+  }
+
+}
--- a/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/EntityTerminationSpec.scala
+++ b/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/EntityTerminationSpec.scala
@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2020 Lightbend Inc. <https://www.lightbend.com>
+ */
+
+package akka.cluster.sharding
+
+import akka.actor.Actor
+import akka.actor.Props
+import akka.cluster.Cluster
+import akka.cluster.MemberStatus
+import akka.testkit.AkkaSpec
+import akka.testkit.ImplicitSender
+import akka.testkit.WithLogCapturing
+import com.typesafe.config.ConfigFactory
+
+import scala.concurrent.duration._
+
+/**
+ * Verifies that the automatic restart on terminate/crash that is in place for remember entities does not apply
+ * when remember entities is not enabled
+ */
+object EntityTerminationSpec {
+
+  final case class EntityEnvelope(id: String, msg: Any)
+
+  def config = ConfigFactory.parseString("""
+      akka.loglevel=DEBUG
+      akka.loggers = ["akka.testkit.SilenceAllTestEventListener"]
+      akka.actor.provider = cluster
+      akka.remote.artery.canonical.port = 0
+      akka.remote.classic.netty.tcp.port = 0
+      akka.cluster.sharding.state-store-mode = ddata
+      # no leaks between test runs thank you
+      akka.cluster.sharding.distributed-data.durable.keys = []
+      akka.cluster.sharding.verbose-debug-logging = on
+      akka.cluster.sharding.entity-restart-backoff = 250ms
+    """.stripMargin)
+
+  object StoppingActor {
+    def props(): Props = Props(new StoppingActor)
+  }
+  class StoppingActor extends Actor {
+    def receive = {
+      case "stop"      => context.stop(self)
+      case "ping"      => sender() ! "pong"
+      case "passivate" => context.parent ! ShardRegion.Passivate("stop")
+    }
+  }
+}
+
+class EntityTerminationSpec extends AkkaSpec(EntityTerminationSpec.config) with ImplicitSender with WithLogCapturing {
+
+  import EntityTerminationSpec._
+
+  val extractEntityId: ShardRegion.ExtractEntityId = {
+    case EntityEnvelope(id, payload) => (id.toString, payload)
+  }
+
+  val extractShardId: ShardRegion.ExtractShardId = {
+    case EntityEnvelope(_, _)       => "1" // single shard for all entities
+    case ShardRegion.StartEntity(_) => "1"
+  }
+
+  override def atStartup(): Unit = {
+    // Form a one node cluster
+    val cluster = Cluster(system)
+    cluster.join(cluster.selfAddress)
+    awaitAssert(cluster.readView.members.count(_.status == MemberStatus.Up) should ===(1))
+  }
+
+  "Sharding, when an entity terminates" must {
+
+    "allow stop without passivation if not remembering entities" in {
+      val sharding = ClusterSharding(system).start(
+        "regular",
+        StoppingActor.props(),
+        ClusterShardingSettings(system),
+        extractEntityId,
+        extractShardId)
+
+      sharding ! EntityEnvelope("1", "ping")
+      expectMsg("pong")
+      val entity = lastSender
+      watch(entity)
+
+      sharding ! EntityEnvelope("1", "stop")
+      expectTerminated(entity)
+
+      Thread.sleep(400) // restart backoff is 250 ms
+      sharding ! ShardRegion.GetShardRegionState
+      val regionState = expectMsgType[ShardRegion.CurrentShardRegionState]
+      regionState.shards should have size (1)
+      regionState.shards.head.entityIds should be(empty)
+    }
+
+    "automatically restart a terminating entity (not passivating) if remembering entities" in {
+      val sharding = ClusterSharding(system).start(
+        "remembering",
+        StoppingActor.props(),
+        ClusterShardingSettings(system).withRememberEntities(true),
+        extractEntityId,
+        extractShardId)
+
+      sharding ! EntityEnvelope("1", "ping")
+      expectMsg("pong")
+      val entity = lastSender
+      watch(entity)
+
+      sharding ! EntityEnvelope("1", "stop")
+      expectTerminated(entity)
+
+      Thread.sleep(400) // restart backoff is 250 ms
+      awaitAssert({
+        sharding ! ShardRegion.GetShardRegionState
+        val regionState = expectMsgType[ShardRegion.CurrentShardRegionState]
+        regionState.shards should have size (1)
+        regionState.shards.head.entityIds should have size (1)
+      }, 2.seconds)
+    }
+
+    "allow terminating entity to passivate if remembering entities" in {
+      val sharding = ClusterSharding(system).start(
+        "remembering",
+        StoppingActor.props(),
+        ClusterShardingSettings(system).withRememberEntities(true),
+        extractEntityId,
+        extractShardId)
+
+      sharding ! EntityEnvelope("1", "ping")
+      expectMsg("pong")
+      val entity = lastSender
+      watch(entity)
+
+      sharding ! EntityEnvelope("1", "passivate")
+      expectTerminated(entity)
+      Thread.sleep(400) // restart backoff is 250 ms
+
+      sharding ! ShardRegion.GetShardRegionState
+      val regionState = expectMsgType[ShardRegion.CurrentShardRegionState]
+      regionState.shards should have size (1)
+      regionState.shards.head.entityIds should have size (0)
+
+    }
+
+  }
+
+}
--- a/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/GetShardTypeNamesSpec.scala
+++ b/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/GetShardTypeNamesSpec.scala
@ -8,11 +8,13 @@ import akka.actor.Props
 import akka.cluster.Cluster
 import akka.testkit.AkkaSpec
 import akka.testkit.TestActors.EchoActor
+import akka.testkit.WithLogCapturing

 object GetShardTypeNamesSpec {
  val config =
    """
-    akka.loglevel = INFO
+    akka.loglevel = DEBUG
+    akka.loggers = ["akka.testkit.SilenceAllTestEventListener"]
    akka.actor.provider = "cluster"
    akka.remote.classic.netty.tcp.port = 0
    akka.remote.artery.canonical.port = 0
@ -27,7 +29,7 @@ object GetShardTypeNamesSpec {
  }
 }

-class GetShardTypeNamesSpec extends AkkaSpec(GetShardTypeNamesSpec.config) {
+class GetShardTypeNamesSpec extends AkkaSpec(GetShardTypeNamesSpec.config) with WithLogCapturing {
  import GetShardTypeNamesSpec._

  "GetShardTypeNames" must {
--- a/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/InactiveEntityPassivationSpec.scala
+++ b/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/InactiveEntityPassivationSpec.scala
@ -12,15 +12,18 @@ import com.typesafe.config.ConfigFactory
 import akka.actor.{ Actor, ActorRef, Props }
 import akka.cluster.Cluster
 import akka.cluster.sharding.InactiveEntityPassivationSpec.Entity.GotIt
+import akka.testkit.WithLogCapturing
 import akka.testkit.{ AkkaSpec, TestProbe }

 object InactiveEntityPassivationSpec {

  val config = ConfigFactory.parseString("""
-    akka.loglevel = INFO
+    akka.loglevel = DEBUG
+    akka.loggers = ["akka.testkit.SilenceAllTestEventListener"]
    akka.actor.provider = "cluster"
    akka.remote.classic.netty.tcp.port = 0
    akka.remote.artery.canonical.port = 0
+    akka.cluster.sharding.verbose-debug-logging = on
    """)

  val enabledConfig = ConfigFactory.parseString("""
@ -55,7 +58,7 @@ object InactiveEntityPassivationSpec {
  }
 }

-abstract class AbstractInactiveEntityPassivationSpec(c: Config) extends AkkaSpec(c) {
+abstract class AbstractInactiveEntityPassivationSpec(c: Config) extends AkkaSpec(c) with WithLogCapturing {
  import InactiveEntityPassivationSpec._

  private val smallTolerance = 300.millis
--- a/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/JoinConfigCompatCheckShardingSpec.scala
+++ b/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/JoinConfigCompatCheckShardingSpec.scala
@ -11,9 +11,10 @@ import com.typesafe.config.{ Config, ConfigFactory }

 import akka.actor.ActorSystem
 import akka.cluster.{ Cluster, ClusterReadView }
+import akka.testkit.WithLogCapturing
 import akka.testkit.{ AkkaSpec, LongRunningTest }

-class JoinConfigCompatCheckShardingSpec extends AkkaSpec() {
+class JoinConfigCompatCheckShardingSpec extends AkkaSpec() with WithLogCapturing {

  def initCluster(system: ActorSystem): ClusterReadView = {
    val cluster = Cluster(system)
@ -26,9 +27,12 @@ class JoinConfigCompatCheckShardingSpec extends AkkaSpec() {
  val baseConfig: Config =
    ConfigFactory.parseString("""
     akka.actor.provider = "cluster"
+     akka.loglevel = DEBUG
+     akka.loggers = ["akka.testkit.SilenceAllTestEventListener"]
     akka.coordinated-shutdown.terminate-actor-system = on
     akka.remote.classic.netty.tcp.port = 0
     akka.remote.artery.canonical.port = 0
+     akka.cluster.sharding.verbose-debug-logging = on
     """)

  "A Joining Node" must {
--- a/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/PersistentShardSpec.scala
+++ b/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/PersistentShardSpec.scala
@ -1,56 +0,0 @@
-/*
- * Copyright (C) 2018-2020 Lightbend Inc. <https://www.lightbend.com>
- */
-
-package akka.cluster.sharding
-
-import com.typesafe.config.ConfigFactory
-import org.scalatest.wordspec.AnyWordSpecLike
-
-import akka.actor.{ Actor, PoisonPill, Props }
-import akka.cluster.sharding.PersistentShardSpec.EntityActor
-import akka.cluster.sharding.Shard.{ GetShardStats, ShardStats }
-import akka.cluster.sharding.ShardRegion.{ StartEntity, StartEntityAck }
-import akka.testkit.{ AkkaSpec, ImplicitSender }
-
-object PersistentShardSpec {
-  class EntityActor extends Actor {
-    override def receive: Receive = {
-      case _ =>
-    }
-  }
-
-  val config = ConfigFactory.parseString("""
-      akka.persistence.journal.plugin = "akka.persistence.journal.inmem"
-    """.stripMargin)
-}
-
-class PersistentShardSpec extends AkkaSpec(PersistentShardSpec.config) with AnyWordSpecLike with ImplicitSender {
-
-  "Persistent Shard" must {
-
-    "remember entities started with StartEntity" in {
-      val props =
-        Props(new PersistentShard("cats", "shard-1", _ => Props(new EntityActor), ClusterShardingSettings(system), {
-          case _ => ("entity-1", "msg")
-        }, { _ =>
-          "shard-1"
-        }, PoisonPill))
-      val persistentShard = system.actorOf(props)
-      watch(persistentShard)
-
-      persistentShard ! StartEntity("entity-1")
-      expectMsg(StartEntityAck("entity-1", "shard-1"))
-
-      persistentShard ! PoisonPill
-      expectTerminated(persistentShard)
-
-      system.log.info("Starting shard again")
-      val secondIncarnation = system.actorOf(props)
-
-      secondIncarnation ! GetShardStats
-      awaitAssert(expectMsg(ShardStats("shard-1", 1)))
-    }
-  }
-
-}
--- a/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/PersistentShardingMigrationSpec.scala
+++ b/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/PersistentShardingMigrationSpec.scala
@ -0,0 +1,174 @@
+/*
+ * Copyright (C) 2020 Lightbend Inc. <https://www.lightbend.com>
+ */
+
+package akka.cluster.sharding
+import java.util.UUID
+
+import akka.actor.{ ActorRef, ActorSystem, Props }
+import akka.cluster.{ Cluster, MemberStatus }
+import akka.persistence.PersistentActor
+import akka.testkit.{ AkkaSpec, ImplicitSender, TestProbe }
+import com.typesafe.config.{ Config, ConfigFactory }
+
+import scala.concurrent.Await
+import scala.concurrent.duration._
+
+/**
+ * Test migration from old persistent shard coordinator with remembered
+ * entities to using a ddatabacked shard coordinator with an event sourced
+ * replicated entity store.
+ */
+object PersistentShardingMigrationSpec {
+  val config = ConfigFactory.parseString(s"""
+       akka.loglevel = INFO
+       akka.actor.provider = "cluster"
+       akka.remote.artery.canonical.port = 0 
+       akka.cluster.sharding {
+        remember-entities = on
+        remember-entities-store = "eventsourced"
+
+        # this forces the remembered entity store to use persistence
+        # is is deprecated
+        state-store-mode = "persistence"
+       
+        # make sure we test snapshots
+        snapshot-after = 5
+        
+        verbose-debug-logging = on
+        
+        # Lots of sharding setup, make it quicker
+        retry-interval = 500ms 
+       }
+       
+       akka.persistence.journal.plugin = "akka.persistence.journal.leveldb"
+       akka.persistence.snapshot-store.plugin = "akka.persistence.snapshot-store.local"
+       akka.persistence.snapshot-store.local.dir = "target/PersistentShardingMigrationSpec-${UUID
+    .randomUUID()
+    .toString}"
+       akka.persistence.journal.leveldb {
+         native = off
+          dir = "target/journal-PersistentShardingMigrationSpec-${UUID.randomUUID()}"
+      }
+      """)
+
+  val configForNewMode = ConfigFactory
+    .parseString("""
+       akka.cluster.sharding {
+        remember-entities = on
+        remember-entities-store = "eventsourced"
+        state-store-mode = "ddata"
+       }
+       
+       akka.persistence.journal.leveldb {
+        event-adapters {
+          coordinator-migration = "akka.cluster.sharding.OldCoordinatorStateMigrationEventAdapter"
+        }
+
+        event-adapter-bindings {
+          "akka.cluster.sharding.ShardCoordinator$Internal$DomainEvent"        = coordinator-migration
+        }
+      }
+       
+      """)
+    .withFallback(config)
+
+  case class Message(id: Long)
+
+  class PA extends PersistentActor {
+    override def persistenceId: String = "pa-" + self.path.name
+    override def receiveRecover: Receive = {
+      case _ =>
+    }
+    override def receiveCommand: Receive = {
+      case _ =>
+        sender() ! "ack"
+    }
+  }
+
+  val extractEntityId: ShardRegion.ExtractEntityId = {
+    case msg @ Message(id) => (id.toString, msg)
+  }
+
+  def extractShardId(probe: ActorRef): ShardRegion.ExtractShardId = {
+    case Message(id)                 => id.toString
+    case ShardRegion.StartEntity(id) =>
+      // StartEntity is used by remembering entities feature
+      probe ! id
+      id
+  }
+}
+
+class PersistentShardingMigrationSpec extends AkkaSpec(PersistentShardingMigrationSpec.config) with ImplicitSender {
+
+  import PersistentShardingMigrationSpec._
+
+  "Migration" should {
+    "allow migration of remembered shards and now allow going back" in {
+      val typeName = "Migration"
+
+      withSystem(config, typeName, "OldMode") { (_, region, _) =>
+        region ! Message(1)
+        expectMsg("ack")
+        region ! Message(2)
+        expectMsg("ack")
+        region ! Message(3)
+        expectMsg("ack")
+      }
+
+      withSystem(configForNewMode, typeName, "NewMode") { (system, region, rememberedEntitiesProbe) =>
+        val probe = TestProbe()(system)
+        region.tell(Message(1), probe.ref)
+        probe.expectMsg("ack")
+        Set(
+          rememberedEntitiesProbe.expectMsgType[String],
+          rememberedEntitiesProbe.expectMsgType[String],
+          rememberedEntitiesProbe
+            .expectMsgType[String]) shouldEqual Set("1", "2", "3") // 1-2 from the snapshot, 3 from a replayed message
+        rememberedEntitiesProbe.expectNoMessage()
+      }
+
+      withSystem(config, typeName, "OldModeAfterMigration") { (system, region, _) =>
+        val probe = TestProbe()(system)
+        region.tell(Message(1), probe.ref)
+        import scala.concurrent.duration._
+        probe.expectNoMessage(5.seconds) // sharding should have failed to start
+      }
+    }
+    "not allow going back to persistence mode based on a snapshot" in {
+      val typeName = "Snapshots"
+      withSystem(configForNewMode, typeName, "NewMode") { (system, region, _) =>
+        val probe = TestProbe()(system)
+        for (i <- 1 to 5) {
+          region.tell(Message(i), probe.ref)
+          probe.expectMsg("ack")
+        }
+      }
+
+      withSystem(config, typeName, "OldModeShouldNotWork") { (system, region, _) =>
+        val probe = TestProbe()(system)
+        region.tell(Message(1), probe.ref)
+        probe.expectNoMessage(1.seconds)
+      }
+    }
+
+    def withSystem(config: Config, typeName: String, systemName: String)(
+        f: (ActorSystem, ActorRef, TestProbe) => Unit) = {
+      val system = ActorSystem(systemName, config)
+      val cluster = Cluster(system)
+      cluster.join(cluster.selfAddress)
+      awaitAssert(cluster.selfMember.status shouldEqual MemberStatus.Up)
+      try {
+        val rememberedEntitiesProbe = TestProbe()(system)
+        val region = ClusterSharding(system).start(
+          typeName,
+          Props(new PA()),
+          extractEntityId,
+          extractShardId(rememberedEntitiesProbe.ref))
+        f(system, region, rememberedEntitiesProbe)
+      } finally {
+        Await.ready(system.terminate(), 20.seconds)
+      }
+    }
+  }
+}
--- a/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/PersistentStartEntitySpec.scala
+++ b/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/PersistentStartEntitySpec.scala
@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2018-2020 Lightbend Inc. <https://www.lightbend.com>
+ */
+
+package akka.cluster.sharding
+
+import akka.actor.Actor
+import akka.actor.ActorRef
+import akka.actor.PoisonPill
+import akka.actor.Props
+import akka.cluster.Cluster
+import akka.cluster.MemberStatus
+import akka.cluster.sharding.Shard.GetShardStats
+import akka.cluster.sharding.Shard.ShardStats
+import akka.cluster.sharding.ShardRegion.StartEntity
+import akka.cluster.sharding.ShardRegion.StartEntityAck
+import akka.testkit.AkkaSpec
+import akka.testkit.ImplicitSender
+import akka.testkit.WithLogCapturing
+import com.typesafe.config.ConfigFactory
+import org.scalatest.wordspec.AnyWordSpecLike
+
+object PersistentStartEntitySpec {
+  class EntityActor extends Actor {
+    override def receive: Receive = {
+      case "give-me-shard" => sender() ! context.parent
+      case msg             => sender() ! msg
+    }
+  }
+
+  case class EntityEnvelope(entityId: Int, msg: Any)
+
+  val extractEntityId: ShardRegion.ExtractEntityId = {
+    case EntityEnvelope(id, payload) => (id.toString, payload)
+  }
+
+  val extractShardId: ShardRegion.ExtractShardId = {
+    case EntityEnvelope(id, _) => (id % 10).toString
+    case StartEntity(id)       => (id.toInt % 10).toString
+  }
+
+  val config = ConfigFactory.parseString("""
+      akka.loglevel=DEBUG
+      akka.loggers = ["akka.testkit.SilenceAllTestEventListener"]
+      akka.actor.provider = cluster
+      akka.remote.artery.canonical.port = 0
+      akka.remote.classic.netty.tcp.port = 0
+      akka.persistence.journal.plugin = "akka.persistence.journal.inmem"
+      akka.cluster.sharding.verbose-debug-logging = on
+    """.stripMargin)
+}
+
+// this test covers remember entities + StartEntity for the deprecated persistent state store
+class PersistentStartEntitySpec
+    extends AkkaSpec(PersistentStartEntitySpec.config)
+    with AnyWordSpecLike
+    with ImplicitSender
+    with WithLogCapturing {
+
+  import PersistentStartEntitySpec._
+
+  override def atStartup(): Unit = {
+    // Form a one node cluster
+    val cluster = Cluster(system)
+    cluster.join(cluster.selfAddress)
+    awaitAssert(cluster.readView.members.count(_.status == MemberStatus.Up) should ===(1))
+  }
+
+  "Persistent Shard" must {
+
+    "remember entities started with StartEntity" in {
+      val sharding = ClusterSharding(system).start(
+        s"startEntity",
+        Props[EntityActor],
+        ClusterShardingSettings(system)
+          .withRememberEntities(true)
+          .withStateStoreMode(ClusterShardingSettings.StateStoreModePersistence),
+        extractEntityId,
+        extractShardId)
+
+      sharding ! StartEntity("1")
+      expectMsg(StartEntityAck("1", "1"))
+      val shard = lastSender
+
+      watch(shard)
+      shard ! PoisonPill
+      expectTerminated(shard)
+
+      // trigger shard start by messaging other actor in it
+      system.log.info("Starting shard again")
+      sharding ! EntityEnvelope(11, "give-me-shard")
+      val secondShardIncarnation = expectMsgType[ActorRef]
+
+      awaitAssert {
+        secondShardIncarnation ! GetShardStats
+        // the remembered 1 and 11 which we just triggered start of
+        expectMsg(ShardStats("1", 2))
+      }
+    }
+  }
+
+}
--- a/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/ProxyShardingSpec.scala
+++ b/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/ProxyShardingSpec.scala
@ -11,16 +11,20 @@ import scala.concurrent.duration.FiniteDuration
 import akka.actor.ActorRef
 import akka.testkit.AkkaSpec
 import akka.testkit.TestActors
+import akka.testkit.WithLogCapturing

 object ProxyShardingSpec {
  val config = """
-  akka.actor.provider = "cluster"
+  akka.actor.provider = cluster
+  akka.loglevel = DEBUG
+  akka.loggers = ["akka.testkit.SilenceAllTestEventListener"]
  akka.remote.classic.netty.tcp.port = 0
  akka.remote.artery.canonical.port = 0
+  akka.cluster.sharding.verbose-debug-logging = on
  """
 }

-class ProxyShardingSpec extends AkkaSpec(ProxyShardingSpec.config) {
+class ProxyShardingSpec extends AkkaSpec(ProxyShardingSpec.config) with WithLogCapturing {

  val role = "Shard"
  val clusterSharding: ClusterSharding = ClusterSharding(system)
--- a/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/RememberEntitiesBatchedUpdatesSpec.scala
+++ b/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/RememberEntitiesBatchedUpdatesSpec.scala
@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2020 Lightbend Inc. <https://www.lightbend.com>
+ */
+
+package akka.cluster.sharding
+
+import akka.actor.{ Actor, ActorLogging, ActorRef, Props }
+import akka.cluster.{ Cluster, MemberStatus }
+import akka.testkit.{ AkkaSpec, ImplicitSender, TestProbe }
+import com.typesafe.config.ConfigFactory
+import org.scalatest.wordspec.AnyWordSpecLike
+
+object RememberEntitiesBatchedUpdatesSpec {
+
+  case class EntityEnvelope(id: Int, msg: Any)
+
+  object EntityActor {
+    case class Started(id: Int)
+    case class Stopped(id: Int)
+    def props(probe: ActorRef) = Props(new EntityActor(probe))
+  }
+  class EntityActor(probe: ActorRef) extends Actor with ActorLogging {
+    import EntityActor._
+    probe ! Started(self.path.name.toInt)
+    override def receive: Receive = {
+      case "stop" =>
+        log.debug("Got stop message, stopping")
+        context.stop(self)
+      case "graceful-stop" =>
+        log.debug("Got a graceful stop, requesting passivation")
+        context.parent ! ShardRegion.Passivate("stop")
+      case "start" =>
+        log.debug("Got a start")
+      case "ping" =>
+    }
+
+    override def postStop(): Unit = {
+      probe ! Stopped(self.path.name.toInt)
+    }
+  }
+
+  def config = ConfigFactory.parseString("""
+      akka.loglevel=DEBUG
+      # akka.loggers = ["akka.testkit.SilenceAllTestEventListener"]
+      akka.actor.provider = cluster
+      akka.remote.artery.canonical.port = 0
+      akka.remote.classic.netty.tcp.port = 0
+      akka.cluster.sharding.state-store-mode = ddata
+      akka.cluster.sharding.remember-entities = on
+      # no leaks between test runs thank you
+      akka.cluster.sharding.distributed-data.durable.keys = []
+      akka.cluster.sharding.verbose-debug-logging = on
+    """.stripMargin)
+}
+class RememberEntitiesBatchedUpdatesSpec
+    extends AkkaSpec(RememberEntitiesBatchedUpdatesSpec.config)
+    with AnyWordSpecLike
+    with ImplicitSender {
+
+  import RememberEntitiesBatchedUpdatesSpec._
+
+  val extractEntityId: ShardRegion.ExtractEntityId = {
+    case EntityEnvelope(id, payload) => (id.toString, payload)
+  }
+
+  val extractShardId: ShardRegion.ExtractShardId = {
+    case EntityEnvelope(_, _)       => "1" // single shard for all entities
+    case ShardRegion.StartEntity(_) => "1"
+  }
+
+  override def atStartup(): Unit = {
+    // Form a one node cluster
+    val cluster = Cluster(system)
+    cluster.join(cluster.selfAddress)
+    awaitAssert(cluster.readView.members.count(_.status == MemberStatus.Up) should ===(1))
+  }
+
+  "Batching of starts and stops" must {
+
+    "work" in {
+      val probe = TestProbe()
+      val sharding = ClusterSharding(system).start(
+        "batching",
+        EntityActor.props(probe.ref),
+        ClusterShardingSettings(system),
+        extractEntityId,
+        extractShardId)
+
+      // make sure that sharding is up and running
+      sharding.tell(EntityEnvelope(0, "ping"), probe.ref)
+      probe.expectMsg(EntityActor.Started(0))
+
+      // start 20, should write first and batch the rest
+      (1 to 20).foreach { i =>
+        sharding ! EntityEnvelope(i, "start")
+      }
+      probe.receiveN(20)
+
+      // start 20 more, and stop the previous ones that are already running,
+      // should create a mixed batch of start + stops
+      (21 to 40).foreach { i =>
+        sharding ! EntityEnvelope(i, "start")
+        sharding ! EntityEnvelope(i - 20, "graceful-stop")
+      }
+      probe.receiveN(40)
+      // stop the last 20, should batch stops only
+      (21 to 40).foreach { i =>
+        sharding ! EntityEnvelope(i, "graceful-stop")
+      }
+      probe.receiveN(20)
+    }
+
+  }
+
+}
--- a/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/RememberEntitiesFailureSpec.scala
+++ b/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/RememberEntitiesFailureSpec.scala
@ -0,0 +1,395 @@
+/*
+ * Copyright (C) 2009-2020 Lightbend Inc. <https://www.lightbend.com>
+ */
+
+package akka.cluster.sharding
+
+import akka.Done
+import akka.actor.{ Actor, ActorLogging, ActorRef, Props, Timers }
+import akka.cluster.Cluster
+import akka.cluster.MemberStatus
+import akka.cluster.sharding.ShardRegion.ShardId
+import akka.cluster.sharding.internal.RememberEntitiesCoordinatorStore
+import akka.cluster.sharding.internal.RememberEntitiesShardStore
+import akka.cluster.sharding.internal.RememberEntitiesProvider
+import akka.testkit.AkkaSpec
+import akka.testkit.TestException
+import akka.testkit.TestProbe
+import akka.testkit.WithLogCapturing
+import com.github.ghik.silencer.silent
+import com.typesafe.config.ConfigFactory
+import org.scalatest.wordspec.AnyWordSpecLike
+
+import scala.concurrent.duration._
+
+object RememberEntitiesFailureSpec {
+  val config = ConfigFactory.parseString(s"""
+      akka.loglevel = DEBUG
+      akka.loggers = ["akka.testkit.SilenceAllTestEventListener"]
+      akka.actor.provider = cluster
+      akka.remote.artery.canonical.port = 0
+      akka.remote.classic.netty.tcp.port = 0
+      akka.cluster.sharding.distributed-data.durable.keys = []
+      # must be ddata or else remember entities store is ignored
+      akka.cluster.sharding.state-store-mode = ddata
+      akka.cluster.sharding.remember-entities = on
+      akka.cluster.sharding.remember-entities-store = custom
+      akka.cluster.sharding.remember-entities-custom-store = "akka.cluster.sharding.RememberEntitiesFailureSpec$$FakeStore"
+      # quick backoffs
+      akka.cluster.sharding.entity-restart-backoff = 1s
+      akka.cluster.sharding.shard-failure-backoff = 1s
+      akka.cluster.sharding.coordinator-failure-backoff = 1s
+      akka.cluster.sharding.updating-state-timeout = 1s
+      akka.cluster.sharding.verbose-debug-logging = on
+    """)
+
+  class EntityActor extends Actor with ActorLogging {
+    log.info("Entity actor [{}] starting up", context.self.path.name)
+    override def receive: Receive = {
+      case "stop" =>
+        log.info("Stopping myself!")
+        context.stop(self)
+      case "graceful-stop" =>
+        context.parent ! ShardRegion.Passivate("stop")
+      case msg => sender() ! msg
+    }
+  }
+
+  case class EntityEnvelope(entityId: Int, msg: Any)
+
+  val extractEntityId: ShardRegion.ExtractEntityId = {
+    case EntityEnvelope(id, payload) => (id.toString, payload)
+  }
+
+  val extractShardId: ShardRegion.ExtractShardId = {
+    case EntityEnvelope(id, _) => (id % 10).toString
+  }
+
+  sealed trait Fail
+  case object NoResponse extends Fail
+  case object CrashStore extends Fail
+  case object StopStore extends Fail
+  // not really a failure but close enough
+  case class Delay(howLong: FiniteDuration) extends Fail
+
+  // outside store since we need to be able to set them before sharding initializes
+  @volatile var failShardGetEntities = Map.empty[ShardId, Fail]
+  @volatile var failCoordinatorGetShards: Option[Fail] = None
+
+  case class ShardStoreCreated(store: ActorRef, shardId: ShardId)
+  case class CoordinatorStoreCreated(store: ActorRef)
+
+  @silent("never used")
+  class FakeStore(settings: ClusterShardingSettings, typeName: String) extends RememberEntitiesProvider {
+    override def shardStoreProps(shardId: ShardId): Props = FakeShardStoreActor.props(shardId)
+    override def coordinatorStoreProps(): Props = FakeCoordinatorStoreActor.props()
+  }
+
+  object FakeShardStoreActor {
+    def props(shardId: ShardId): Props = Props(new FakeShardStoreActor(shardId))
+
+    case class FailUpdateEntity(whichWay: Fail)
+    case object ClearFail
+
+    case class Delayed(replyTo: ActorRef, msg: Any)
+  }
+  class FakeShardStoreActor(shardId: ShardId) extends Actor with ActorLogging with Timers {
+    import FakeShardStoreActor._
+
+    implicit val ec = context.system.dispatcher
+    private var failUpdate: Option[Fail] = None
+
+    context.system.eventStream.publish(ShardStoreCreated(self, shardId))
+
+    override def receive: Receive = {
+      case RememberEntitiesShardStore.GetEntities =>
+        failShardGetEntities.get(shardId) match {
+          case None             => sender ! RememberEntitiesShardStore.RememberedEntities(Set.empty)
+          case Some(NoResponse) => log.debug("Sending no response for GetEntities")
+          case Some(CrashStore) => throw TestException("store crash on GetEntities")
+          case Some(StopStore)  => context.stop(self)
+          case Some(Delay(howLong)) =>
+            log.debug("Delaying initial entities listing with {}", howLong)
+            timers.startSingleTimer("get-entities-delay", Delayed(sender(), Set.empty), howLong)
+        }
+      case RememberEntitiesShardStore.Update(started, stopped) =>
+        failUpdate match {
+          case None             => sender ! RememberEntitiesShardStore.UpdateDone(started, stopped)
+          case Some(NoResponse) => log.debug("Sending no response for AddEntity")
+          case Some(CrashStore) => throw TestException("store crash on AddEntity")
+          case Some(StopStore)  => context.stop(self)
+          case Some(Delay(howLong)) =>
+            log.debug("Delaying response for AddEntity with {}", howLong)
+            timers.startSingleTimer("add-entity-delay", Delayed(sender(), Set.empty), howLong)
+        }
+      case FailUpdateEntity(whichWay) =>
+        failUpdate = Some(whichWay)
+        sender() ! Done
+      case ClearFail =>
+        failUpdate = None
+        sender() ! Done
+      case Delayed(to, msg) =>
+        to ! msg
+    }
+  }
+
+  object FakeCoordinatorStoreActor {
+    def props(): Props = Props(new FakeCoordinatorStoreActor)
+
+    case class FailAddShard(shardId: ShardId, wayToFail: Fail)
+    case class ClearFailShard(shardId: ShardId)
+  }
+  class FakeCoordinatorStoreActor extends Actor with ActorLogging with Timers {
+    import FakeCoordinatorStoreActor._
+    import FakeShardStoreActor.Delayed
+
+    context.system.eventStream.publish(CoordinatorStoreCreated(context.self))
+
+    private var failAddShard = Map.empty[ShardId, Fail]
+
+    override def receive: Receive = {
+      case RememberEntitiesCoordinatorStore.GetShards =>
+        failCoordinatorGetShards match {
+          case None             => sender() ! RememberEntitiesCoordinatorStore.RememberedShards(Set.empty)
+          case Some(NoResponse) =>
+          case Some(CrashStore) => throw TestException("store crash on load")
+          case Some(StopStore)  => context.stop(self)
+          case Some(Delay(howLong)) =>
+            log.debug("Delaying initial shard listing with {}", howLong)
+            timers.startSingleTimer("list-shards-delay", Delayed(sender(), Set.empty), howLong)
+        }
+      case RememberEntitiesCoordinatorStore.AddShard(shardId) =>
+        failAddShard.get(shardId) match {
+          case None             => sender() ! RememberEntitiesCoordinatorStore.UpdateDone(shardId)
+          case Some(NoResponse) =>
+          case Some(CrashStore) => throw TestException("store crash on add")
+          case Some(StopStore)  => context.stop(self)
+          case Some(Delay(howLong)) =>
+            log.debug("Delaying adding shard with {}", howLong)
+            timers.startSingleTimer("add-shard-delay", Delayed(sender(), Set.empty), howLong)
+        }
+      case FailAddShard(shardId, wayToFail) =>
+        log.debug("Failing store of {} with {}", shardId, wayToFail)
+        failAddShard = failAddShard.updated(shardId, wayToFail)
+        sender() ! Done
+      case ClearFailShard(shardId) =>
+        log.debug("No longer failing store of {}", shardId)
+        failAddShard = failAddShard - shardId
+        sender() ! Done
+      case Delayed(to, msg) =>
+        to ! msg
+    }
+  }
+
+}
+
+class RememberEntitiesFailureSpec
+    extends AkkaSpec(RememberEntitiesFailureSpec.config)
+    with AnyWordSpecLike
+    with WithLogCapturing {
+
+  import RememberEntitiesFailureSpec._
+
+  override def atStartup(): Unit = {
+    // Form a one node cluster
+    val cluster = Cluster(system)
+    cluster.join(cluster.selfAddress)
+    awaitAssert(cluster.readView.members.count(_.status == MemberStatus.Up) should ===(1))
+  }
+
+  "Remember entities handling in sharding" must {
+
+    List(NoResponse, CrashStore, StopStore, Delay(500.millis), Delay(1.second)).foreach { wayToFail: Fail =>
+      s"recover when initial remember entities load fails $wayToFail" in {
+        log.debug("Getting entities for shard 1 will fail")
+        failShardGetEntities = Map("1" -> wayToFail)
+
+        try {
+          val probe = TestProbe()
+          val sharding = ClusterSharding(system).start(
+            s"initial-$wayToFail",
+            Props[EntityActor],
+            ClusterShardingSettings(system).withRememberEntities(true),
+            extractEntityId,
+            extractShardId)
+
+          sharding.tell(EntityEnvelope(1, "hello-1"), probe.ref)
+          probe.expectNoMessage() // message is lost because shard crashes
+
+          log.debug("Resetting initial fail")
+          failShardGetEntities = Map.empty
+
+          // shard should be restarted and eventually succeed
+          awaitAssert {
+            sharding.tell(EntityEnvelope(1, "hello-1"), probe.ref)
+            probe.expectMsg("hello-1")
+          }
+
+          system.stop(sharding)
+        } finally {
+          failShardGetEntities = Map.empty
+        }
+      }
+
+      s"recover when shard storing a start event fails $wayToFail" in {
+        val storeProbe = TestProbe()
+        system.eventStream.subscribe(storeProbe.ref, classOf[ShardStoreCreated])
+
+        val sharding = ClusterSharding(system).start(
+          s"shardStoreStart-$wayToFail",
+          Props[EntityActor],
+          ClusterShardingSettings(system).withRememberEntities(true),
+          extractEntityId,
+          extractShardId)
+
+        // trigger shard start and store creation
+        val probe = TestProbe()
+        sharding.tell(EntityEnvelope(1, "hello-1"), probe.ref)
+        var shardStore = storeProbe.expectMsgType[ShardStoreCreated].store
+        probe.expectMsg("hello-1")
+
+        // hit shard with other entity that will fail
+        shardStore.tell(FakeShardStoreActor.FailUpdateEntity(wayToFail), storeProbe.ref)
+        storeProbe.expectMsg(Done)
+
+        sharding.tell(EntityEnvelope(11, "hello-11"), probe.ref)
+
+        // do we get an answer here? shard crashes
+        probe.expectNoMessage()
+        if (wayToFail == StopStore || wayToFail == CrashStore) {
+          // a new store should be started
+          shardStore = storeProbe.expectMsgType[ShardStoreCreated].store
+        }
+
+        val stopFailingProbe = TestProbe()
+        shardStore.tell(FakeShardStoreActor.ClearFail, stopFailingProbe.ref)
+        stopFailingProbe.expectMsg(Done)
+
+        // it takes a while - timeout hits and then backoff
+        awaitAssert({
+          sharding.tell(EntityEnvelope(11, "hello-11-2"), probe.ref)
+          probe.expectMsg("hello-11-2")
+        }, 10.seconds)
+        system.stop(sharding)
+      }
+
+      s"recover on abrupt entity stop when storing a stop event fails $wayToFail" in {
+        val storeProbe = TestProbe()
+        system.eventStream.subscribe(storeProbe.ref, classOf[ShardStoreCreated])
+
+        val sharding = ClusterSharding(system).start(
+          s"shardStoreStopAbrupt-$wayToFail",
+          Props[EntityActor],
+          ClusterShardingSettings(system).withRememberEntities(true),
+          extractEntityId,
+          extractShardId)
+
+        val probe = TestProbe()
+
+        // trigger shard start and store creation
+        sharding.tell(EntityEnvelope(1, "hello-1"), probe.ref)
+        val shard1Store = storeProbe.expectMsgType[ShardStoreCreated].store
+        probe.expectMsg("hello-1")
+
+        // fail it when stopping
+        shard1Store.tell(FakeShardStoreActor.FailUpdateEntity(wayToFail), storeProbe.ref)
+        storeProbe.expectMsg(Done)
+
+        // FIXME restart without passivating is not saved and re-started again without storing the stop so this isn't testing anything
+        sharding ! EntityEnvelope(1, "stop")
+
+        shard1Store.tell(FakeShardStoreActor.ClearFail, storeProbe.ref)
+        storeProbe.expectMsg(Done)
+
+        // it takes a while - timeout hits and then backoff
+        awaitAssert({
+          sharding.tell(EntityEnvelope(1, "hello-2"), probe.ref)
+          probe.expectMsg("hello-2")
+        }, 10.seconds)
+        system.stop(sharding)
+      }
+
+      s"recover on graceful entity stop when storing a stop event fails $wayToFail" in {
+        val storeProbe = TestProbe()
+        system.eventStream.subscribe(storeProbe.ref, classOf[ShardStoreCreated])
+
+        val sharding = ClusterSharding(system).start(
+          s"shardStoreStopGraceful-$wayToFail",
+          Props[EntityActor],
+          ClusterShardingSettings(system).withRememberEntities(true),
+          extractEntityId,
+          extractShardId,
+          new ShardCoordinator.LeastShardAllocationStrategy(rebalanceThreshold = 1, maxSimultaneousRebalance = 3),
+          "graceful-stop")
+
+        val probe = TestProbe()
+
+        // trigger shard start and store creation
+        sharding.tell(EntityEnvelope(1, "hello-1"), probe.ref)
+        val shard1Store = storeProbe.expectMsgType[ShardStoreCreated].store
+        probe.expectMsg("hello-1")
+
+        // fail it when stopping
+        shard1Store.tell(FakeShardStoreActor.FailUpdateEntity(wayToFail), storeProbe.ref)
+        storeProbe.expectMsg(Done)
+
+        sharding ! EntityEnvelope(1, "graceful-stop")
+
+        if (wayToFail != CrashStore && wayToFail != StopStore) {
+          // race, give the shard some time to see the passivation before restoring the fake shard store
+          Thread.sleep(250)
+          shard1Store.tell(FakeShardStoreActor.ClearFail, probe.ref)
+          probe.expectMsg(Done)
+        }
+
+        // it takes a while?
+        awaitAssert({
+          sharding.tell(EntityEnvelope(1, "hello-2"), probe.ref)
+          probe.expectMsg("hello-2")
+        }, 5.seconds)
+        system.stop(sharding)
+      }
+
+      s"recover when coordinator storing shard start fails $wayToFail" in {
+        val storeProbe = TestProbe()
+        system.eventStream.subscribe(storeProbe.ref, classOf[CoordinatorStoreCreated])
+
+        val sharding = ClusterSharding(system).start(
+          s"coordinatorStoreStopGraceful-$wayToFail",
+          Props[EntityActor],
+          ClusterShardingSettings(system).withRememberEntities(true),
+          extractEntityId,
+          extractShardId,
+          new ShardCoordinator.LeastShardAllocationStrategy(rebalanceThreshold = 1, maxSimultaneousRebalance = 3),
+          "graceful-stop")
+
+        val probe = TestProbe()
+
+        // coordinator store is triggered by coordinator starting up
+        var coordinatorStore = storeProbe.expectMsgType[CoordinatorStoreCreated].store
+        coordinatorStore.tell(FakeCoordinatorStoreActor.FailAddShard("1", wayToFail), probe.ref)
+        probe.expectMsg(Done)
+
+        sharding.tell(EntityEnvelope(1, "hello-1"), probe.ref)
+        probe.expectNoMessage(1.second) // because shard cannot start while store failing
+
+        if (wayToFail == StopStore || wayToFail == CrashStore) {
+          // a new store should be started
+          coordinatorStore = storeProbe.expectMsgType[CoordinatorStoreCreated].store
+        }
+
+        // fail it when stopping
+        coordinatorStore.tell(FakeCoordinatorStoreActor.ClearFailShard("1"), storeProbe.ref)
+        storeProbe.expectMsg(Done)
+
+        probe.awaitAssert({
+          sharding.tell(EntityEnvelope(1, "hello-2"), probe.ref)
+          probe.expectMsg("hello-2") // should now work again
+        }, 5.seconds)
+
+        system.stop(sharding)
+      }
+    }
+  }
+
+}
--- a/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/RememberEntitiesShardIdExtractorChangeSpec.scala
+++ b/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/RememberEntitiesShardIdExtractorChangeSpec.scala
@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2020 Lightbend Inc. <https://www.lightbend.com>
+ */
+
+package akka.cluster.sharding
+
+import java.util.UUID
+
+import akka.actor.ActorRef
+import akka.actor.ActorSystem
+import akka.actor.Props
+import akka.cluster.Cluster
+import akka.persistence.PersistentActor
+import akka.testkit.AkkaSpec
+import akka.testkit.ImplicitSender
+import akka.testkit.TestProbe
+import com.typesafe.config.ConfigFactory
+
+import scala.concurrent.Await
+import scala.concurrent.duration._
+
+/**
+ * Covers that remembered entities is correctly migrated when used and the shard id extractor
+ * is changed so that entities should live on other shards after a full restart of the cluster.
+ */
+object RememberEntitiesShardIdExtractorChangeSpec {
+  val config = ConfigFactory.parseString(s"""
+       akka.loglevel = INFO
+       akka.actor.provider = "cluster"
+       akka.remote.artery.canonical.port = 0 
+       akka.cluster.sharding {
+        remember-entities = on
+        remember-entities-store = "eventsourced"
+        state-store-mode = "ddata"
+       }
+       
+       akka.persistence.journal.plugin = "akka.persistence.journal.leveldb"
+       akka.persistence.snapshot-store.plugin = "akka.persistence.snapshot-store.local"
+       akka.persistence.snapshot-store.local.dir = "target/RememberEntitiesShardIdExtractorChangeSpec-${UUID
+    .randomUUID()
+    .toString}"
+       akka.persistence.journal.leveldb {
+         native = off
+          dir = "target/journal-PersistentShardingMigrationSpec-${UUID.randomUUID()}"
+      }
+      """)
+
+  case class Message(id: Long)
+
+  class PA extends PersistentActor {
+    override def persistenceId: String = "pa-" + self.path.name
+    override def receiveRecover: Receive = {
+      case _ =>
+    }
+    override def receiveCommand: Receive = {
+      case _ =>
+        sender() ! "ack"
+    }
+  }
+
+  val extractEntityId: ShardRegion.ExtractEntityId = {
+    case msg @ Message(id) => (id.toString, msg)
+  }
+
+  val firstExtractShardId: ShardRegion.ExtractShardId = {
+    case Message(id)                 => (id % 10).toString
+    case ShardRegion.StartEntity(id) => (id.toInt % 10).toString
+  }
+
+  val secondExtractShardId: ShardRegion.ExtractShardId = {
+    case Message(id)                 => (id % 10 + 1L).toString
+    case ShardRegion.StartEntity(id) => (id.toInt % 10 + 1L).toString
+  }
+
+  val TypeName = "ShardIdExtractorChange"
+}
+class RememberEntitiesShardIdExtractorChangeSpec
+    extends AkkaSpec(PersistentShardingMigrationSpec.config)
+    with ImplicitSender {
+
+  import RememberEntitiesShardIdExtractorChangeSpec._
+
+  "Sharding with remember entities enabled" should {
+    "allow a change to the shard id extractor" in {
+
+      withSystem("FirstShardIdExtractor", firstExtractShardId) { (_, region) =>
+        region ! Message(1)
+        expectMsg("ack")
+        region ! Message(11)
+        expectMsg("ack")
+        region ! Message(21)
+        expectMsg("ack")
+      }
+
+      withSystem("SecondShardIdExtractor", secondExtractShardId) { (system, region) =>
+        val probe = TestProbe()(system)
+
+        awaitAssert {
+          region.tell(ShardRegion.GetShardRegionState, probe.ref)
+          val state = probe.expectMsgType[ShardRegion.CurrentShardRegionState]
+          // shards should have been remembered but migrated over to shard 2
+          state.shards.collect { case ShardRegion.ShardState("1", entities) => entities } shouldEqual Set(Set.empty)
+          state.shards.collect { case ShardRegion.ShardState("2", entities) => entities } shouldEqual Set(
+            Set("1", "11", "21"))
+        }
+      }
+
+      withSystem("ThirdIncarnation", secondExtractShardId) { (system, region) =>
+        val probe = TestProbe()(system)
+        // Only way to verify that they were "normal"-remember-started here is to look at debug logs, will show
+        // [akka://ThirdIncarnation@127.0.0.1:51533/system/sharding/ShardIdExtractorChange/1/RememberEntitiesStore] Recovery completed for shard [1] with [0] entities
+        // [akka://ThirdIncarnation@127.0.0.1:51533/system/sharding/ShardIdExtractorChange/2/RememberEntitiesStore] Recovery completed for shard [2] with [3] entities
+        awaitAssert {
+          region.tell(ShardRegion.GetShardRegionState, probe.ref)
+          val state = probe.expectMsgType[ShardRegion.CurrentShardRegionState]
+          state.shards.collect { case ShardRegion.ShardState("1", entities) => entities } shouldEqual Set(Set.empty)
+          state.shards.collect { case ShardRegion.ShardState("2", entities) => entities } shouldEqual Set(
+            Set("1", "11", "21"))
+        }
+      }
+    }
+
+    def withSystem(systemName: String, extractShardId: ShardRegion.ExtractShardId)(
+        f: (ActorSystem, ActorRef) => Unit): Unit = {
+      val system = ActorSystem(systemName, config)
+      Cluster(system).join(Cluster(system).selfAddress)
+      try {
+        val region = ClusterSharding(system).start(TypeName, Props(new PA()), extractEntityId, extractShardId)
+        f(system, region)
+      } finally {
+        Await.ready(system.terminate(), 20.seconds)
+      }
+    }
+  }
+}
--- a/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/RemoveInternalClusterShardingDataSpec.scala
+++ b/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/RemoveInternalClusterShardingDataSpec.scala
@ -25,10 +25,12 @@ import akka.persistence.SnapshotSelectionCriteria
 import akka.testkit.AkkaSpec
 import akka.testkit.ImplicitSender
 import akka.testkit.TestActors.EchoActor
+import akka.testkit.WithLogCapturing

 object RemoveInternalClusterShardingDataSpec {
  val config = """
-    akka.loglevel = INFO
+    akka.loglevel = DEBUG
+    akka.loggers = ["akka.testkit.SilenceAllTestEventListener"]
    akka.actor.provider = "cluster"
    akka.remote.classic.netty.tcp.port = 0
    akka.remote.artery.canonical.port = 0
@ -42,6 +44,7 @@ object RemoveInternalClusterShardingDataSpec {
    akka.cluster.sharding.snapshot-after = 5
    akka.cluster.sharding.state-store-mode = persistence
    akka.cluster.sharding.keep-nr-of-batches = 0
+    akka.cluster.sharding.verbose-debug-logging = on
    """

  val extractEntityId: ShardRegion.ExtractEntityId = {
@ -94,7 +97,8 @@ object RemoveInternalClusterShardingDataSpec {

 class RemoveInternalClusterShardingDataSpec
    extends AkkaSpec(RemoveInternalClusterShardingDataSpec.config)
-    with ImplicitSender {
+    with ImplicitSender
+    with WithLogCapturing {
  import RemoveInternalClusterShardingDataSpec._

  val storageLocations =
--- a/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/ShardRegionSpec.scala
+++ b/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/ShardRegionSpec.scala
@ -8,11 +8,10 @@ import java.io.File

 import com.typesafe.config.ConfigFactory
 import org.apache.commons.io.FileUtils
-
 import akka.actor.{ Actor, ActorLogging, ActorRef, ActorSystem, PoisonPill, Props }
 import akka.cluster.{ Cluster, MemberStatus }
 import akka.cluster.ClusterEvent.CurrentClusterState
-import akka.testkit.{ AkkaSpec, DeadLettersFilter, TestProbe }
+import akka.testkit.{ AkkaSpec, DeadLettersFilter, TestProbe, WithLogCapturing }
 import akka.testkit.TestEvent.Mute

 object ShardRegionSpec {
@ -25,7 +24,8 @@ object ShardRegionSpec {

  val config =
    ConfigFactory.parseString(tempConfig).withFallback(ConfigFactory.parseString(s"""
-        akka.loglevel = INFO
+        akka.loglevel = DEBUG
+        akka.loggers = ["akka.testkit.SilenceAllTestEventListener"]
        akka.actor.provider = "cluster"
        akka.remote.classic.netty.tcp.port = 0
        akka.remote.artery.canonical.port = 0
@ -37,6 +37,7 @@ object ShardRegionSpec {
        }
        akka.cluster.downing-provider-class = akka.cluster.testkit.AutoDowning
        akka.cluster.jmx.enabled = off
+        akka.cluster.sharding.verbose-debug-logging = on
        """))

  val shardTypeName = "Caat"
@ -58,7 +59,7 @@ object ShardRegionSpec {
    }
  }
 }
-class ShardRegionSpec extends AkkaSpec(ShardRegionSpec.config) {
+class ShardRegionSpec extends AkkaSpec(ShardRegionSpec.config) with WithLogCapturing {

  import scala.concurrent.duration._

--- a/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/ShardSpec.scala
+++ b/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/ShardSpec.scala
@ -1,124 +0,0 @@
-/*
- * Copyright (C) 2019-2020 Lightbend Inc. <https://www.lightbend.com>
- */
-
-package akka.cluster.sharding
-
-import java.util.concurrent.atomic.AtomicInteger
-
-import scala.concurrent.Future
-import scala.concurrent.duration._
-import scala.util.Success
-import scala.util.control.NoStackTrace
-
-import akka.actor.{ Actor, ActorLogging, PoisonPill, Props }
-import akka.cluster.sharding.ShardRegion.ShardInitialized
-import akka.coordination.lease.LeaseUsageSettings
-import akka.coordination.lease.TestLease
-import akka.coordination.lease.TestLeaseExt
-import akka.testkit.{ AkkaSpec, ImplicitSender, TestProbe }
-
-object ShardSpec {
-  val config =
-    s"""
-  akka.loglevel = INFO
-  akka.actor.provider = "cluster"
-  akka.remote.classic.netty.tcp.port = 0
-  akka.remote.artery.canonical.port = 0
-  test-lease {
-      lease-class = ${classOf[TestLease].getName}
-      heartbeat-interval = 1s
-      heartbeat-timeout = 120s
-      lease-operation-timeout = 3s
-  }
-  """
-
-  class EntityActor extends Actor with ActorLogging {
-    override def receive: Receive = {
-      case msg =>
-        log.info("Msg {}", msg)
-        sender() ! s"ack ${msg}"
-    }
-  }
-
-  val numberOfShards = 5
-
-  case class EntityEnvelope(entityId: Int, msg: Any)
-
-  val extractEntityId: ShardRegion.ExtractEntityId = {
-    case EntityEnvelope(id, payload) => (id.toString, payload)
-  }
-
-  val extractShardId: ShardRegion.ExtractShardId = {
-    case EntityEnvelope(id, _) => (id % numberOfShards).toString
-  }
-
-  case class BadLease(msg: String) extends RuntimeException(msg) with NoStackTrace
-}
-
-class ShardSpec extends AkkaSpec(ShardSpec.config) with ImplicitSender {
-
-  import ShardSpec._
-
-  val shortDuration = 100.millis
-  val testLeaseExt = TestLeaseExt(system)
-
-  def leaseNameForShard(typeName: String, shardId: String) = s"${system.name}-shard-${typeName}-${shardId}"
-
-  "A Cluster Shard" should {
-    "not initialize the shard until the lease is acquired" in new Setup {
-      parent.expectNoMessage(shortDuration)
-      lease.initialPromise.complete(Success(true))
-      parent.expectMsg(ShardInitialized(shardId))
-    }
-
-    "retry if lease acquire returns false" in new Setup {
-      lease.initialPromise.complete(Success(false))
-      parent.expectNoMessage(shortDuration)
-      lease.setNextAcquireResult(Future.successful(true))
-      parent.expectMsg(ShardInitialized(shardId))
-    }
-
-    "retry if the lease acquire fails" in new Setup {
-      lease.initialPromise.failure(BadLease("no lease for you"))
-      parent.expectNoMessage(shortDuration)
-      lease.setNextAcquireResult(Future.successful(true))
-      parent.expectMsg(ShardInitialized(shardId))
-    }
-
-    "shutdown if lease is lost" in new Setup {
-      val probe = TestProbe()
-      probe.watch(shard)
-      lease.initialPromise.complete(Success(true))
-      parent.expectMsg(ShardInitialized(shardId))
-      lease.getCurrentCallback().apply(Some(BadLease("bye bye lease")))
-      probe.expectTerminated(shard)
-    }
-  }
-
-  val shardIds = new AtomicInteger(0)
-  def nextShardId = s"${shardIds.getAndIncrement()}"
-
-  trait Setup {
-    val shardId = nextShardId
-    val parent = TestProbe()
-    val settings = ClusterShardingSettings(system).withLeaseSettings(new LeaseUsageSettings("test-lease", 2.seconds))
-    def lease = awaitAssert {
-      testLeaseExt.getTestLease(leaseNameForShard(typeName, shardId))
-    }
-
-    val typeName = "type1"
-    val shard = parent.childActorOf(
-      Shard.props(
-        typeName,
-        shardId,
-        _ => Props(new EntityActor()),
-        settings,
-        extractEntityId,
-        extractShardId,
-        PoisonPill,
-        system.deadLetters,
-        1))
-  }
-
-}
--- a/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/ShardWithLeaseSpec.scala
+++ b/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/ShardWithLeaseSpec.scala
@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2019-2020 Lightbend Inc. <https://www.lightbend.com>
+ */
+
+package akka.cluster.sharding
+
+import akka.actor.Actor
+import akka.actor.ActorLogging
+import akka.actor.Props
+import akka.cluster.Cluster
+import akka.cluster.MemberStatus
+import akka.cluster.sharding.ShardRegion.ShardId
+import akka.coordination.lease.{ LeaseUsageSettings, TestLeaseExt }
+import akka.testkit.AkkaSpec
+import akka.testkit.EventFilter
+import akka.testkit.TestProbe
+import akka.testkit.WithLogCapturing
+
+import scala.concurrent.Future
+import scala.concurrent.duration._
+import scala.util.Success
+import scala.util.control.NoStackTrace
+
+// FIXME this looks like it is the same test as ClusterShardingLeaseSpec is there any difference?
+object ShardWithLeaseSpec {
+  val config =
+    """
+      akka.loglevel = DEBUG
+      akka.loggers = ["akka.testkit.SilenceAllTestEventListener"]
+      akka.actor.provider = "cluster"
+      akka.remote.classic.netty.tcp.port = 0
+      akka.remote.artery.canonical.port = 0
+      test-lease {
+          lease-class = akka.coordination.lease.TestLease
+          heartbeat-interval = 1s
+          heartbeat-timeout = 120s
+          lease-operation-timeout = 3s
+      }
+      akka.cluster.sharding.verbose-debug-logging = on
+    """
+
+  class EntityActor extends Actor with ActorLogging {
+    override def receive: Receive = {
+      case msg =>
+        log.info("Msg {}", msg)
+        sender() ! s"ack ${msg}"
+    }
+  }
+
+  val numberOfShards = 5
+
+  case class EntityEnvelope(entityId: Int, msg: Any)
+
+  val extractEntityId: ShardRegion.ExtractEntityId = {
+    case EntityEnvelope(id, payload) => (id.toString, payload)
+  }
+
+  val extractShardId: ShardRegion.ExtractShardId = {
+    case EntityEnvelope(id, _) => (id % numberOfShards).toString
+  }
+
+  case class BadLease(msg: String) extends RuntimeException(msg) with NoStackTrace
+}
+
+class ShardWithLeaseSpec extends AkkaSpec(ShardWithLeaseSpec.config) with WithLogCapturing {
+
+  import ShardWithLeaseSpec._
+
+  val shortDuration = 100.millis
+  val testLeaseExt = TestLeaseExt(system)
+
+  override def atStartup(): Unit = {
+    // Form a one node cluster
+    val cluster = Cluster(system)
+    cluster.join(cluster.selfAddress)
+    awaitAssert(cluster.readView.members.count(_.status == MemberStatus.Up) should ===(1))
+  }
+
+  "Lease handling in sharding" must {
+    "not initialize the shard until the lease is acquired" in new Setup {
+      val probe = TestProbe()
+      sharding.tell(EntityEnvelope(1, "hello"), probe.ref)
+      probe.expectNoMessage(shortDuration)
+      leaseFor("1").initialPromise.complete(Success(true))
+      probe.expectMsg("ack hello")
+    }
+
+    "retry if lease acquire returns false" in new Setup {
+      val probe = TestProbe()
+      val lease =
+        EventFilter.error(start = s"Failed to get lease for shard type [$typeName] id [1]", occurrences = 1).intercept {
+          sharding.tell(EntityEnvelope(1, "hello"), probe.ref)
+          val lease = leaseFor("1")
+          lease.initialPromise.complete(Success(false))
+          probe.expectNoMessage(shortDuration)
+          lease
+        }
+
+      lease.setNextAcquireResult(Future.successful(true))
+      probe.expectMsg("ack hello")
+    }
+
+    "retry if the lease acquire fails" in new Setup {
+      val probe = TestProbe()
+      val lease =
+        EventFilter.error(start = s"Failed to get lease for shard type [$typeName] id [1]", occurrences = 1).intercept {
+          sharding.tell(EntityEnvelope(1, "hello"), probe.ref)
+          val lease = leaseFor("1")
+          lease.initialPromise.failure(BadLease("no lease for you"))
+          probe.expectNoMessage(shortDuration)
+          lease
+        }
+      lease.setNextAcquireResult(Future.successful(true))
+      probe.expectMsg("ack hello")
+    }
+
+    "shutdown if lease is lost" in new Setup {
+      val probe = TestProbe()
+      sharding.tell(EntityEnvelope(1, "hello"), probe.ref)
+      val lease = leaseFor("1")
+      lease.initialPromise.complete(Success(true))
+      probe.expectMsg("ack hello")
+
+      EventFilter
+        .error(
+          start =
+            s"Shard type [$typeName] id [1] lease lost, stopping shard and killing [1] entities. Reason for losing lease: ${classOf[
+              BadLease].getName}: bye bye lease",
+          occurrences = 1)
+        .intercept {
+          lease.getCurrentCallback().apply(Some(BadLease("bye bye lease")))
+          sharding.tell(EntityEnvelope(1, "hello"), probe.ref)
+          probe.expectNoMessage(shortDuration)
+        }
+    }
+  }
+
+  var typeIdx = 0
+
+  trait Setup {
+    val settings = ClusterShardingSettings(system).withLeaseSettings(new LeaseUsageSettings("test-lease", 2.seconds))
+
+    // unique type name for each test
+    val typeName = {
+      typeIdx += 1
+      s"type$typeIdx"
+    }
+
+    val sharding =
+      ClusterSharding(system).start(typeName, Props(new EntityActor()), settings, extractEntityId, extractShardId)
+
+    def leaseFor(shardId: ShardId) = awaitAssert {
+      val leaseName = s"${system.name}-shard-${typeName}-${shardId}"
+      testLeaseExt.getTestLease(leaseName)
+    }
+  }
+
+}
--- a/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/StartEntitySpec.scala
+++ b/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/StartEntitySpec.scala
@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2020 Lightbend Inc. <https://www.lightbend.com>
+ */
+
+package akka.cluster.sharding
+
+import akka.actor.Actor
+import akka.actor.ActorRef
+import akka.actor.Props
+import akka.cluster.Cluster
+import akka.cluster.MemberStatus
+import akka.testkit.AkkaSpec
+import akka.testkit.ImplicitSender
+import akka.testkit.WithLogCapturing
+import com.typesafe.config.ConfigFactory
+
+import scala.concurrent.duration._
+
+/**
+ * Covers some corner cases around sending triggering an entity with StartEntity
+ */
+object StartEntitySpec {
+
+  final case class EntityEnvelope(id: String, msg: Any)
+
+  def config = ConfigFactory.parseString("""
+      akka.loglevel=DEBUG
+      akka.loggers = ["akka.testkit.SilenceAllTestEventListener"]
+      akka.actor.provider = cluster
+      akka.remote.artery.canonical.port = 0
+      akka.remote.classic.netty.tcp.port = 0
+      akka.cluster.sharding.state-store-mode = ddata
+      akka.cluster.sharding.remember-entities = on
+      # no leaks between test runs thank you
+      akka.cluster.sharding.distributed-data.durable.keys = []
+      akka.cluster.sharding.verbose-debug-logging = on
+    """.stripMargin)
+
+  object EntityActor {
+    def props(): Props = Props(new EntityActor)
+  }
+  class EntityActor extends Actor {
+    private var waitingForPassivateAck: Option[ActorRef] = None
+    override def receive: Receive = {
+      case "ping" =>
+        sender() ! "pong"
+      case "passivate" =>
+        context.parent ! ShardRegion.Passivate("complete-passivation")
+        waitingForPassivateAck = Some(sender())
+      case "simulate-slow-passivate" =>
+        context.parent ! ShardRegion.Passivate("slow-passivate-stop")
+        waitingForPassivateAck = Some(sender())
+      case "slow-passivate-stop" =>
+        // actually, we just don't stop, keeping the passivation state forever for this test
+        waitingForPassivateAck.foreach(_ ! "slow-passivate-ack")
+        waitingForPassivateAck = None
+      case "complete-passivation" | "just-stop" =>
+        context.stop(self)
+    }
+  }
+
+}
+
+class StartEntitySpec extends AkkaSpec(StartEntitySpec.config) with ImplicitSender with WithLogCapturing {
+  import StartEntitySpec._
+
+  val extractEntityId: ShardRegion.ExtractEntityId = {
+    case EntityEnvelope(id, payload) => (id.toString, payload)
+  }
+
+  val extractShardId: ShardRegion.ExtractShardId = {
+    case EntityEnvelope(_, _)       => "1" // single shard for all entities
+    case ShardRegion.StartEntity(_) => "1"
+  }
+
+  override def atStartup(): Unit = {
+    // Form a one node cluster
+    val cluster = Cluster(system)
+    cluster.join(cluster.selfAddress)
+    awaitAssert(cluster.readView.members.count(_.status == MemberStatus.Up) should ===(1))
+  }
+
+  "StartEntity while entity is passivating" should {
+    "start it again when the entity terminates" in {
+      val sharding = ClusterSharding(system).start(
+        "start-entity-1",
+        EntityActor.props(),
+        ClusterShardingSettings(system),
+        extractEntityId,
+        extractShardId)
+
+      sharding ! EntityEnvelope("1", "ping")
+      expectMsg("pong")
+      val entity = lastSender
+
+      sharding ! EntityEnvelope("1", "simulate-slow-passivate")
+      expectMsg("slow-passivate-ack")
+
+      // entity is now in passivating state in shard
+      // bypass region and send start entity directly to shard
+      system.actorSelection(entity.path.parent) ! ShardRegion.StartEntity("1")
+      // bypass sharding and tell entity to complete passivation
+      entity ! "complete-passivation"
+
+      // should trigger start of entity again, and an ack
+      expectMsg(ShardRegion.StartEntityAck("1", "1"))
+      awaitAssert({
+        sharding ! ShardRegion.GetShardRegionState
+        val state = expectMsgType[ShardRegion.CurrentShardRegionState]
+        state.shards should have size (1)
+        state.shards.head.entityIds should ===(Set("1"))
+      })
+    }
+  }
+
+  // entity crashed and before restart-backoff hit we sent it a StartEntity
+  "StartEntity while the entity is waiting for restart" should {
+    "restart it immediately" in {
+      val sharding = ClusterSharding(system).start(
+        "start-entity-2",
+        EntityActor.props(),
+        ClusterShardingSettings(system),
+        extractEntityId,
+        extractShardId)
+      sharding ! EntityEnvelope("1", "ping")
+      expectMsg("pong")
+      val entity = lastSender
+      watch(entity)
+
+      // stop without passivation
+      entity ! "just-stop"
+      expectTerminated(entity)
+
+      // the backoff is 10s by default, so plenty time to
+      // bypass region and send start entity directly to shard
+      system.actorSelection(entity.path.parent) ! ShardRegion.StartEntity("1")
+      expectMsg(ShardRegion.StartEntityAck("1", "1"))
+      awaitAssert({
+        sharding ! ShardRegion.GetShardRegionState
+        val state = expectMsgType[ShardRegion.CurrentShardRegionState]
+        state.shards should have size (1)
+        state.shards.head.entityIds should ===(Set("1"))
+      })
+    }
+  }
+
+  "StartEntity while the entity is queued remember stop" should {
+    "start it again when that is done" in {
+      // this is hard to do deterministically
+      val sharding = ClusterSharding(system).start(
+        "start-entity-3",
+        EntityActor.props(),
+        ClusterShardingSettings(system),
+        extractEntityId,
+        extractShardId)
+      sharding ! EntityEnvelope("1", "ping")
+      expectMsg("pong")
+      val entity = lastSender
+      watch(entity)
+
+      // resolve before passivation to save some time
+      val shard = system.actorSelection(entity.path.parent).resolveOne(3.seconds).futureValue
+
+      // stop passivation
+      entity ! "passivate"
+      // store of stop happens after passivation when entity has terminated
+      expectTerminated(entity)
+      shard ! ShardRegion.StartEntity("1") // if we are lucky this happens while remember stop is in progress
+
+      // regardless we should get an ack and the entity should be alive
+      expectMsg(ShardRegion.StartEntityAck("1", "1"))
+      awaitAssert({
+        sharding ! ShardRegion.GetShardRegionState
+        val state = expectMsgType[ShardRegion.CurrentShardRegionState]
+        state.shards should have size (1)
+        state.shards.head.entityIds should ===(Set("1"))
+      })
+
+    }
+  }
+
+}
--- a/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/SupervisionSpec.scala
+++ b/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/SupervisionSpec.scala
@ -12,13 +12,18 @@ import akka.actor.{ Actor, ActorLogging, ActorRef, PoisonPill, Props }
 import akka.cluster.Cluster
 import akka.cluster.sharding.ShardRegion.Passivate
 import akka.pattern.{ BackoffOpts, BackoffSupervisor }
+import akka.testkit.WithLogCapturing
 import akka.testkit.{ AkkaSpec, ImplicitSender }

 object SupervisionSpec {
  val config =
    ConfigFactory.parseString("""
    akka.actor.provider = "cluster"
-    akka.loglevel = INFO
+    akka.remote.artery.canonical.port = 0
+    akka.remote.classic.netty.tcp.port = 0
+    akka.loggers = ["akka.testkit.SilenceAllTestEventListener"]
+    akka.loglevel = DEBUG
+    akka.cluster.sharding.verbose-debug-logging = on
    """)

  case class Msg(id: Long, msg: Any)
@ -49,6 +54,7 @@ object SupervisionSpec {
        context.parent ! Passivate(StopMessage)
        // simulate another message causing a stop before the region sends the stop message
        // e.g. a persistent actor having a persist failure while processing the next message
+        // note that this means the StopMessage will go to dead letters
        context.stop(self)
      case "hello" =>
        sender() ! Response(self)
@ -60,8 +66,7 @@ object SupervisionSpec {

 }

-class SupervisionSpec extends AkkaSpec(SupervisionSpec.config) with ImplicitSender {
-
+class DeprecatedSupervisionSpec extends AkkaSpec(SupervisionSpec.config) with ImplicitSender with WithLogCapturing {
  import SupervisionSpec._

  "Supervision for a sharded actor (deprecated)" must {
@ -99,6 +104,11 @@ class SupervisionSpec extends AkkaSpec(SupervisionSpec.config) with ImplicitSend
      expectMsgType[Response](20.seconds)
    }
  }
+}
+
+class SupervisionSpec extends AkkaSpec(SupervisionSpec.config) with ImplicitSender {
+
+  import SupervisionSpec._

  "Supervision for a sharded actor" must {

@ -126,10 +136,16 @@ class SupervisionSpec extends AkkaSpec(SupervisionSpec.config) with ImplicitSend
      val response = expectMsgType[Response](5.seconds)
      watch(response.self)

+      // 1. passivation message is passed on from supervisor to shard (which starts buffering messages for the entity id)
+      // 2. child stops
+      // 3. the supervisor has or has not yet seen gotten the stop message back from the shard
+      //   a. if has it will stop immediatel, and the next message will trigger the shard to restart it
+      //   b. if it hasn't the supervisor will back off before restarting the child, when the
+      //     final stop message `StopMessage` comes in from the shard it will stop itself
+      // 4. when the supervisor stops the shard should start it anew and deliver the buffered messages
      region ! Msg(10, "passivate")
      expectTerminated(response.self)

-      // This would fail before as sharded actor would be stuck passivating
      region ! Msg(10, "hello")
      expectMsgType[Response](20.seconds)
    }
--- a/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/external/ExternalShardAllocationStrategySpec.scala
+++ b/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/external/ExternalShardAllocationStrategySpec.scala
@ -16,6 +16,7 @@ import akka.util.Timeout
 class ExternalShardAllocationStrategySpec extends AkkaSpec("""
    akka.actor.provider = cluster 
    akka.loglevel = INFO 
+    akka.remote.artery.canonical.port = 0
    """) {

  val requester = TestProbe()
--- a/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/internal/RememberEntitiesShardStoreSpec.scala
+++ b/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/internal/RememberEntitiesShardStoreSpec.scala
@ -0,0 +1,129 @@
+/*
+ * Copyright (C) 2020 Lightbend Inc. <https://www.lightbend.com>
+ */
+
+package akka.cluster.sharding.internal
+
+import akka.actor.Props
+import akka.cluster.ddata.{ Replicator, ReplicatorSettings }
+import akka.cluster.sharding.ClusterShardingSettings
+import akka.cluster.sharding.ShardRegion.ShardId
+import akka.cluster.{ Cluster, MemberStatus }
+import akka.testkit.{ AkkaSpec, ImplicitSender, WithLogCapturing }
+import com.typesafe.config.ConfigFactory
+import org.scalatest.wordspec.AnyWordSpecLike
+
+/**
+ * Covers the interaction between the shard and the remember entities store
+ */
+object RememberEntitiesShardStoreSpec {
+  def config = ConfigFactory.parseString("""
+      akka.loglevel=DEBUG
+      akka.loggers = ["akka.testkit.SilenceAllTestEventListener"]
+      akka.actor.provider = cluster
+      akka.remote.artery.canonical.port = 0
+      akka.remote.classic.netty.tcp.port = 0
+      akka.cluster.sharding.state-store-mode = ddata
+      akka.cluster.sharding.remember-entities = on
+      # no leaks between test runs thank you
+      akka.cluster.sharding.distributed-data.durable.keys = []
+      akka.persistence.journal.plugin = "akka.persistence.journal.inmem"
+    """.stripMargin)
+}
+
+// shared base class for both persistence and ddata specs
+abstract class RememberEntitiesShardStoreSpec
+    extends AkkaSpec(RememberEntitiesShardStoreSpec.config)
+    with AnyWordSpecLike
+    with ImplicitSender
+    with WithLogCapturing {
+
+  def storeName: String
+  def storeProps(shardId: ShardId, typeName: String, settings: ClusterShardingSettings): Props
+
+  override def atStartup(): Unit = {
+    // Form a one node cluster
+    val cluster = Cluster(system)
+    cluster.join(cluster.selfAddress)
+    awaitAssert(cluster.readView.members.count(_.status == MemberStatus.Up) should ===(1))
+  }
+
+  s"The $storeName" must {
+
+    val shardingSettings = ClusterShardingSettings(system)
+
+    "store starts and stops and list remembered entity ids" in {
+
+      val store = system.actorOf(storeProps("FakeShardId", "FakeTypeName", shardingSettings))
+
+      store ! RememberEntitiesShardStore.GetEntities
+      expectMsgType[RememberEntitiesShardStore.RememberedEntities].entities should be(empty)
+
+      store ! RememberEntitiesShardStore.Update(Set("1", "2", "3"), Set.empty)
+      expectMsg(RememberEntitiesShardStore.UpdateDone(Set("1", "2", "3"), Set.empty))
+
+      store ! RememberEntitiesShardStore.Update(Set("4", "5", "6"), Set("2", "3"))
+      expectMsg(RememberEntitiesShardStore.UpdateDone(Set("4", "5", "6"), Set("2", "3")))
+
+      store ! RememberEntitiesShardStore.Update(Set.empty, Set("6"))
+      expectMsg(RememberEntitiesShardStore.UpdateDone(Set.empty, Set("6")))
+
+      store ! RememberEntitiesShardStore.Update(Set("2"), Set.empty)
+      expectMsg(RememberEntitiesShardStore.UpdateDone(Set("2"), Set.empty))
+
+      // the store does not support get after update
+      val storeIncarnation2 = system.actorOf(storeProps("FakeShardId", "FakeTypeName", shardingSettings))
+
+      storeIncarnation2 ! RememberEntitiesShardStore.GetEntities
+      expectMsgType[RememberEntitiesShardStore.RememberedEntities].entities should ===(Set("1", "2", "4", "5"))
+    }
+
+    "handle a late request" in {
+      // the store does not support get after update
+      val storeIncarnation3 = system.actorOf(storeProps("FakeShardId", "FakeTypeName", shardingSettings))
+
+      Thread.sleep(500)
+      storeIncarnation3 ! RememberEntitiesShardStore.GetEntities
+      expectMsgType[RememberEntitiesShardStore.RememberedEntities].entities should ===(Set("1", "2", "4", "5")) // from previous test
+    }
+
+    "handle a large batch" in {
+      var store = system.actorOf(storeProps("FakeShardIdLarge", "FakeTypeNameLarge", shardingSettings))
+      store ! RememberEntitiesShardStore.GetEntities
+      expectMsgType[RememberEntitiesShardStore.RememberedEntities].entities should be(empty)
+
+      store ! RememberEntitiesShardStore.Update((1 to 1000).map(_.toString).toSet, (1001 to 2000).map(_.toString).toSet)
+      val response = expectMsgType[RememberEntitiesShardStore.UpdateDone]
+      response.started should have size (1000)
+      response.stopped should have size (1000)
+
+      watch(store)
+      system.stop(store)
+      expectTerminated(store)
+
+      store = system.actorOf(storeProps("FakeShardIdLarge", "FakeTypeNameLarge", shardingSettings))
+      store ! RememberEntitiesShardStore.GetEntities
+      expectMsgType[RememberEntitiesShardStore.RememberedEntities].entities should have size (1000)
+    }
+
+  }
+
+}
+
+class DDataRememberEntitiesShardStoreSpec extends RememberEntitiesShardStoreSpec {
+
+  val replicatorSettings = ReplicatorSettings(system)
+  val replicator = system.actorOf(Replicator.props(replicatorSettings))
+
+  override def storeName: String = "DDataRememberEntitiesShardStore"
+  override def storeProps(shardId: ShardId, typeName: String, settings: ClusterShardingSettings): Props =
+    DDataRememberEntitiesShardStore.props(shardId, typeName, settings, replicator, majorityMinCap = 1)
+}
+
+class EventSourcedRememberEntitiesShardStoreSpec extends RememberEntitiesShardStoreSpec {
+
+  override def storeName: String = "EventSourcedRememberEntitiesShardStore"
+  override def storeProps(shardId: ShardId, typeName: String, settings: ClusterShardingSettings): Props =
+    EventSourcedRememberEntitiesShardStore.props(typeName, shardId, settings)
+
+}
--- a/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/internal/RememberEntitiesStarterSpec.scala
+++ b/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/internal/RememberEntitiesStarterSpec.scala
@ -0,0 +1,165 @@
+/*
+ * Copyright (C) 2018-2020 Lightbend Inc. <https://www.lightbend.com>
+ */
+
+package akka.cluster.sharding.internal
+
+import akka.cluster.sharding.ClusterShardingSettings
+import akka.cluster.sharding.Shard
+import akka.cluster.sharding.ShardRegion
+import akka.cluster.sharding.ShardRegion.ShardId
+import akka.testkit.AkkaSpec
+import akka.testkit.TestProbe
+import com.typesafe.config.ConfigFactory
+
+import scala.concurrent.duration._
+
+class RememberEntitiesStarterSpec extends AkkaSpec {
+
+  var shardIdCounter = 1
+  def nextShardId(): ShardId = {
+    val id = s"ShardId$shardIdCounter"
+    shardIdCounter += 1
+    id
+  }
+
+  "The RememberEntitiesStarter" must {
+    "try start all entities directly with entity-recovery-strategy = all (default)" in {
+      val regionProbe = TestProbe()
+      val shardProbe = TestProbe()
+      val shardId = nextShardId()
+
+      val defaultSettings = ClusterShardingSettings(system)
+
+      val rememberEntityStarter = system.actorOf(
+        RememberEntityStarter.props(regionProbe.ref, shardProbe.ref, shardId, Set("1", "2", "3"), defaultSettings))
+
+      watch(rememberEntityStarter)
+      val startedEntityIds = (1 to 3).map { _ =>
+        val start = regionProbe.expectMsgType[ShardRegion.StartEntity]
+        regionProbe.lastSender ! ShardRegion.StartEntityAck(start.entityId, shardId)
+        start.entityId
+      }.toSet
+      startedEntityIds should ===(Set("1", "2", "3"))
+
+      // the starter should then stop itself, not sending anything more to the shard or region
+      expectTerminated(rememberEntityStarter)
+      shardProbe.expectNoMessage()
+      regionProbe.expectNoMessage()
+    }
+
+    "retry start all entities with no ack with entity-recovery-strategy = all (default)" in {
+      val regionProbe = TestProbe()
+      val shardProbe = TestProbe()
+      val shardId = nextShardId()
+
+      val customSettings = ClusterShardingSettings(
+        ConfigFactory
+          .parseString(
+            // the restarter somewhat surprisingly uses this for no-ack-retry. Tune it down to speed up test
+            """
+             retry-interval = 1 second
+            """)
+          .withFallback(system.settings.config.getConfig("akka.cluster.sharding")))
+
+      val rememberEntityStarter = system.actorOf(
+        RememberEntityStarter.props(regionProbe.ref, shardProbe.ref, shardId, Set("1", "2", "3"), customSettings))
+
+      watch(rememberEntityStarter)
+      (1 to 3).foreach { _ =>
+        regionProbe.expectMsgType[ShardRegion.StartEntity]
+      }
+      val startedOnSecondTry = (1 to 3).map { _ =>
+        val start = regionProbe.expectMsgType[ShardRegion.StartEntity]
+        regionProbe.lastSender ! ShardRegion.StartEntityAck(start.entityId, shardId)
+        start.entityId
+      }.toSet
+      startedOnSecondTry should ===(Set("1", "2", "3"))
+
+      // should stop itself, not sending anything to the shard
+      expectTerminated(rememberEntityStarter)
+      shardProbe.expectNoMessage()
+    }
+
+    "inform the shard when entities has been reallocated to different shard id" in {
+      val regionProbe = TestProbe()
+      val shardProbe = TestProbe()
+      val shardId = nextShardId()
+
+      val customSettings = ClusterShardingSettings(
+        ConfigFactory
+          .parseString(
+            // the restarter somewhat surprisingly uses this for no-ack-retry. Tune it down to speed up test
+            """
+             retry-interval = 1 second
+            """)
+          .withFallback(system.settings.config.getConfig("akka.cluster.sharding")))
+
+      val rememberEntityStarter = system.actorOf(
+        RememberEntityStarter.props(regionProbe.ref, shardProbe.ref, shardId, Set("1", "2", "3"), customSettings))
+
+      watch(rememberEntityStarter)
+      val start1 = regionProbe.expectMsgType[ShardRegion.StartEntity]
+      regionProbe.lastSender ! ShardRegion.StartEntityAck(start1.entityId, shardId) // keep on current shard
+
+      val start2 = regionProbe.expectMsgType[ShardRegion.StartEntity]
+      regionProbe.lastSender ! ShardRegion.StartEntityAck(start2.entityId, shardId = "Relocated1")
+
+      val start3 = regionProbe.expectMsgType[ShardRegion.StartEntity]
+      regionProbe.lastSender ! ShardRegion.StartEntityAck(start3.entityId, shardId = "Relocated2")
+
+      shardProbe.expectMsg(Shard.EntitiesMovedToOtherShard(Set("2", "3")))
+      expectTerminated(rememberEntityStarter)
+    }
+
+    "try start all entities in a throttled way with entity-recovery-strategy = constant" in {
+      val regionProbe = TestProbe()
+      val shardProbe = TestProbe()
+      val shardId = nextShardId()
+
+      val customSettings = ClusterShardingSettings(
+        ConfigFactory
+          .parseString(
+            // slow constant restart
+            """
+             entity-recovery-strategy = constant
+             entity-recovery-constant-rate-strategy {
+               frequency = 2 s
+               number-of-entities = 2
+             }
+             retry-interval = 1 second
+            """)
+          .withFallback(system.settings.config.getConfig("akka.cluster.sharding")))
+
+      val rememberEntityStarter = system.actorOf(
+        RememberEntityStarter
+          .props(regionProbe.ref, shardProbe.ref, shardId, Set("1", "2", "3", "4", "5"), customSettings))
+
+      def recieveStartAndAck() = {
+        val start = regionProbe.expectMsgType[ShardRegion.StartEntity]
+        regionProbe.lastSender ! ShardRegion.StartEntityAck(start.entityId, shardId)
+      }
+
+      watch(rememberEntityStarter)
+      // first batch should be immediate
+      recieveStartAndAck()
+      recieveStartAndAck()
+      // second batch holding off (with some room for unstable test env)
+      regionProbe.expectNoMessage(600.millis)
+
+      // second batch should be immediate
+      recieveStartAndAck()
+      recieveStartAndAck()
+      // third batch holding off
+      regionProbe.expectNoMessage(600.millis)
+
+      recieveStartAndAck()
+
+      // the starter should then stop itself, not sending anything more to the shard or region
+      expectTerminated(rememberEntityStarter)
+      shardProbe.expectNoMessage()
+      regionProbe.expectNoMessage()
+    }
+
+  }
+}
--- a/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/protobuf/ClusterShardingMessageSerializerSpec.scala
+++ b/akka-cluster-sharding/src/test/scala/akka/cluster/sharding/protobuf/ClusterShardingMessageSerializerSpec.scala
@ -5,7 +5,6 @@
 package akka.cluster.sharding.protobuf

 import scala.concurrent.duration._
-
 import akka.actor.Address
 import akka.actor.ExtendedActorSystem
 import akka.actor.Props
@ -13,6 +12,8 @@ import akka.cluster.sharding.Shard
 import akka.cluster.sharding.ShardCoordinator
 import akka.cluster.sharding.ShardRegion
 import akka.cluster.sharding.ShardRegion.ShardId
+import akka.cluster.sharding.internal.EventSourcedRememberEntitiesShardStore
+import akka.cluster.sharding.internal.EventSourcedRememberEntitiesShardStore.EntitiesStarted
 import akka.serialization.SerializationExtension
 import akka.testkit.AkkaSpec

@ -70,12 +71,19 @@ class ClusterShardingMessageSerializerSpec extends AkkaSpec {
    }

    "be able to serialize PersistentShard snapshot state" in {
-      checkSerialization(Shard.State(Set("e1", "e2", "e3")))
+      checkSerialization(EventSourcedRememberEntitiesShardStore.State(Set("e1", "e2", "e3")))
    }

    "be able to serialize PersistentShard domain events" in {
-      checkSerialization(Shard.EntityStarted("e1"))
-      checkSerialization(Shard.EntityStopped("e1"))
+      checkSerialization(EventSourcedRememberEntitiesShardStore.EntitiesStarted(Set("e1", "e2")))
+      checkSerialization(EventSourcedRememberEntitiesShardStore.EntitiesStopped(Set("e1", "e2")))
+    }
+
+    "be able to deserialize old entity started event into entities started" in {
+      import akka.cluster.sharding.protobuf.msg.{ ClusterShardingMessages => sm }
+
+      val asBytes = sm.EntityStarted.newBuilder().setEntityId("e1").build().toByteArray
+      SerializationExtension(system).deserialize(asBytes, 13, "CB").get shouldEqual EntitiesStarted(Set("e1"))
    }

    "be able to serialize GetShardStats" in {
--- a/akka-docs/src/main/paradox/includes/cluster.md
+++ b/akka-docs/src/main/paradox/includes/cluster.md
@ -59,7 +59,12 @@ Reliable delivery and flow control of messages between actors in the Cluster.
 <!--- #sharding-persistence-mode-deprecated --->
@@@ warning

-Persistence for state store mode is deprecated. 
+Persistence for state store mode is deprecated. It is recommended to migrate to `ddata` for the coordinator state and if using replicated entities
+migrate to `eventsourced` for the replicated entities state.
+
+The data written by the deprecated `persistence` state store mode for remembered entities can be read by the new remember entities `eventsourced` mode.
+
+Once you've migrated you can not go back to `persistence` mode.

@@@
 <!--- #sharding-persistence-mode-deprecated --->
--- a/akka-docs/src/main/paradox/typed/cluster-sharding.md
+++ b/akka-docs/src/main/paradox/typed/cluster-sharding.md
@ -210,66 +210,6 @@ See the API documentation of @scala[`akka.cluster.sharding.ShardAllocationStrate

 See @ref:[Cluster Sharding concepts](cluster-sharding-concepts.md).

-## Sharding State Store Mode
-
-There are two cluster sharding states managed:
-
-1. @ref:[ShardCoordinator State](cluster-sharding-concepts.md#shardcoordinator-state) - the `Shard` locations
-1. @ref:[Remembering Entities](#remembering-entities) - the entities in each `Shard`, which is optional, and disabled by default
- 
-For these, there are currently two modes which define how these states are stored:
-
-* @ref:[Distributed Data Mode](#distributed-data-mode) - uses Akka @ref:[Distributed Data](distributed-data.md) (CRDTs) (the default)
-* @ref:[Persistence Mode](#persistence-mode) - (deprecated) uses Akka @ref:[Persistence](persistence.md) (Event Sourcing)
-
-@@include[cluster.md](../includes/cluster.md) { #sharding-persistence-mode-deprecated }
- 
-Changing the mode requires @ref:[a full cluster restart](../additional/rolling-updates.md#cluster-sharding-configuration-change).
-
-### Distributed Data Mode
-
-This mode is enabled with configuration (enabled by default):
-
-```
-akka.cluster.sharding.state-store-mode = ddata
-```
-
-The state of the `ShardCoordinator` is replicated across the cluster but is not durable, not stored to disk.
-The `ShardCoordinator` state replication is handled by @ref:[Distributed Data](distributed-data.md) with `WriteMajority`/`ReadMajority` consistency.
-When all nodes in the cluster have been stopped, the state is no longer needed and dropped.
-
-The state of @ref:[Remembering Entities](#remembering-entities) is durable and stored to
-disk. This means remembered entities are restarted even after a complete (non-rolling) cluster restart when the disk is still available.
-
-Cluster Sharding uses its own Distributed Data `Replicator` per node. 
-If using roles with sharding there is one `Replicator` per role, which enables a subset of
-all nodes for some entity types and another subset for other entity types. Each such replicator has a name
-that contains the node role and therefore the role configuration must be the same on all nodes in the
-cluster, for example you can't change the roles when performing a rolling upgrade.
-Changing roles requires @ref:[a full cluster restart](../additional/rolling-updates.md#cluster-sharding-configuration-change).
-
-The settings for Distributed Data are configured in the section
-`akka.cluster.sharding.distributed-data`. It's not possible to have different
-`distributed-data` settings for different sharding entity types.
-
-### Persistence Mode
-
-This mode is enabled with configuration:
-
-```
-akka.cluster.sharding.state-store-mode = persistence
-```
-
-Since it is running in a cluster @ref:[Persistence](persistence.md) must be configured with a distributed journal.
-
-@@@ note
-
-Persistence mode for @ref:[Remembering Entities](#remembering-entities) will be replaced by a pluggable data access API with storage implementations,
-see @github[#27763](#27763).
-New sharding applications should no longer choose persistence mode. Existing users of persistence mode
-[can eventually migrate to the replacement options](https://github.com/akka/akka/issues/26177). 
-
-@@@

 ## Passivation

@ -315,32 +255,163 @@ to the `ActorRef` or messages that the actor sends to itself are not counted in
 Passivation can be disabled by setting `akka.cluster.sharding.passivate-idle-entity-after = off`.
 It is disabled automatically if @ref:[Remembering Entities](#remembering-entities) is enabled.

+## Sharding State 
+
+There are two types of state managed:
+
+1. @ref:[ShardCoordinator State](cluster-sharding-concepts.md#shardcoordinator-state) - the `Shard` locations. This is stored in the `State Store`.
+1. @ref:[Remembering Entities](#remembering-entities) - the active shards and the entities in each `Shard`, which is optional, and disabled by default. This is stored in the `Remember Entities Store`. 
+ 
+
+### State Store
+
+A state store is mandatory for sharding, it contains the location of shards. The `ShardCoordinator` needs to load this state after
+it moves between nodes.
+
+There are two options for the state store:
+
+* @ref:[Distributed Data Mode](#distributed-data-mode) - uses Akka @ref:[Distributed Data](distributed-data.md) (CRDTs) (the default)
+* @ref:[Persistence Mode](#persistence-mode) - (deprecated) uses Akka @ref:[Persistence](persistence.md) (Event Sourcing)
+
+@@include[cluster.md](../includes/cluster.md) { #sharding-persistence-mode-deprecated }
+
+#### Distributed Data Mode
+
+To enable distributed data store mode (the default):
+
+```
+akka.cluster.sharding.state-store-mode = ddata
+```
+
+The state of the `ShardCoordinator` is replicated across the cluster but is not stored to disk.
+@ref:[Distributed Data](distributed-data.md) handles the `ShardCoordinator`'s state with `WriteMajorityPlus`/`ReadMajorityPlus` consistency.
+When all nodes in the cluster have been stopped, the state is no longer needed and dropped.
+
+Cluster Sharding uses its own Distributed Data `Replicator` per node. 
+If using roles with sharding there is one `Replicator` per role, which enables a subset of
+all nodes for some entity types and another subset for other entity types. Each replicator has a name
+that contains the node role and therefore the role configuration must be the same on all nodes in the
+cluster, for example you can't change the roles when performing a rolling upgrade.
+Changing roles requires @ref:[a full cluster restart](../additional/rolling-updates.md#cluster-sharding-configuration-change).
+
+The `akka.cluster.sharding.distributed-data` config section configures the settings for Distributed Data. 
+It's not possible to have different `distributed-data` settings for different sharding entity types.
+
+#### Persistence mode
+
+To enable persistence store mode:
+
+```
+akka.cluster.sharding.state-store-mode = persistence
+```
+
+Since it is running in a cluster @ref:[Persistence](persistence.md) must be configured with a distributed journal.
+
+@@@ warning 
+
+Persistence mode for @ref:[Remembering Entities](#remembering-entities) has been replaced by a remember entities state mode. It should not be
+used for new projects and existing projects should migrate as soon as possible.
+
+@@@
+
 ## Remembering Entities

-Remembering entities pertains to restarting entities after a rebalance or recovering from a crash.
-Enabling or disabling (the default) this feature drives the behavior of the restarts:
+Remembering entities automatically restarts entities after a rebalance or entity crash. 
+Without remembered entities restarts happen on the arrival of a message.

-* enabled: entities are restarted, even though no new messages are sent to them. This will also disable @ref:[Automtic Passivation](#passivation).
-* disabled: entities are restarted, on demand when a new message arrives.
+Enabling remembered entities disables @ref:[Automtic Passivation](#passivation).

-Note that the state of the entities themselves will not be restored unless they have been made persistent,
+The state of the entities themselves is not restored unless they have been made persistent,
 for example with @ref:[Event Sourcing](persistence.md).

-To make the list of entities in each `Shard` persistent (durable) set the `rememberEntities` flag to true in
+To enable remember entities set `rememberEntities` flag to true in
 `ClusterShardingSettings` when starting a shard region (or its proxy) for a given `entity` type or configure
 `akka.cluster.sharding.remember-entities = on`.

-The performance cost of `rememberEntities` is rather high when starting/stopping entities and when
-shards are rebalanced. This cost increases with number of entities per shard, thus it is not
-recommended with more than 10000 active entities per shard.  
+Starting and stopping entities has an overhead but this is limited by batching operations to the
+underlying remember entities store.

 ### Behavior When Enabled 

 When `rememberEntities` is enabled, whenever a `Shard` is rebalanced onto another
-node or recovers after a crash it will recreate all the entities which were previously
-running in that `Shard`. To permanently stop entities, a `Passivate` message must be
-sent to the parent of the entity actor, otherwise the entity will be automatically
-restarted after the entity restart backoff specified in the configuration.
+node or recovers after a crash, it will recreate all the entities which were previously
+running in that `Shard`. 
+
+To permanently stop entities send a `ClusterSharding.Passivate` to the
+@scala[`ActorRef[ShardCommand]`]@java[`ActorRef<ShardCommand>`] that was passed in to
+the factory method when creating the entity.
+Otherwise, the entity will be automatically restarted after the entity restart backoff specified in the configuration.
+
+### Remember entities store
+
+There are two options for the remember entities store:
+
+1. `ddata` 
+1. `eventsourced` 
+
+#### Remember entities distributed data mode
+
+Enable ddata mode with (enabled by default):
+
+```
+akka.cluster.sharding.remember-entities-store = ddata
+```
+
+To support restarting entities after a full cluster restart (non-rolling) the remember entities store is persisted to disk by distributed data.
+This can be disabled if not needed:
+```
+akka.cluster.sharding.distributed-data.durable.keys = []
+```
+
+Reasons for disabling:
+
+* No requirement for remembering entities after a full cluster shutdown
+* Running in an environment without access to disk between restarts e.g. Kubernetes without persistent volumes
+
+For supporting remembered entities in an environment without disk storage use `eventsourced` mode instead.
+
+#### Event sourced mode
+
+Enable `eventsourced` mode with:
+
+```
+akka.cluster.sharding.remember-entities-store = eventsourced
+```
+
+This mode uses @ref:[Event Sourcing](./persistence.md) to store the active shards and active entities for each shard 
+so a persistence and snapshot plugin must be configured.
+
+```
+akka.cluster.sharding.journal-plugin-id = <plugin>
+akka.cluster.sharding.snapshot-plugin-id = <plugin>
+```
+
+### Migrating from deprecated persistence mode
+
+If not using remembered entities you can migrate to ddata with a full cluster restart.
+
+If using remembered entities there are two migration options: 
+
+* `ddata` for the state store and `ddata` for remembering entities. All remembered entities will be lost after a full cluster restart.
+* `ddata` for the state store and `eventsourced` for remembering entities. The new `eventsourced` remembering entities store 
+   reads the data written by the old `persistence` mode. Your remembered entities will be remembered after a full cluster restart. 
+
+For migrating existing remembered entities an event adapter needs to be configured in the config for the journal you use in your `application.conf`.
+In this example `cassandra` is the used journal:
+
+```
+akka.persistence.cassandra.journal {
+  event-adapters {
+    coordinator-migration = "akka.cluster.sharding.OldCoordinatorStateMigrationEventAdapter"
+  }
+
+  event-adapter-bindings {
+    "akka.cluster.sharding.ShardCoordinator$Internal$DomainEvent" = coordinator-migration
+  }
+}
+```
+
+Once you have migrated you cannot go back to the old persistence store, a rolling upgrade is therefore not possible.

 When @ref:[Distributed Data mode](#distributed-data-mode) is used the identifiers of the entities are
 stored in @ref:[Durable Storage](distributed-data.md#durable-storage) of Distributed Data. You may want to change the
@ -356,14 +427,7 @@ you can disable durable storage and benefit from better performance by using the
 ```
 akka.cluster.sharding.distributed-data.durable.keys = []
 ```
-
-### Behavior When Not Enabled 
-
-When `rememberEntities` is disabled (the default), a `Shard` will not automatically restart any entities
-after a rebalance or recovering from a crash. Instead, entities are started once the first message
-for that entity has been received in the `Shard`.
-
-### Startup after minimum number of members
+## Startup after minimum number of members

 It's recommended to use Cluster Sharding with the Cluster setting `akka.cluster.min-nr-of-members` or
 `akka.cluster.role.<role-name>.min-nr-of-members`. `min-nr-of-members` will defer the allocation of the shards
--- a/akka-remote/src/main/scala-jdk-9/akka/remote/artery/jfr/JFRRemotingFlightRecorder.scala
+++ b/akka-remote/src/main/scala-jdk-9/akka/remote/artery/jfr/JFRRemotingFlightRecorder.scala
@ -16,7 +16,7 @@ import akka.remote.artery.RemotingFlightRecorder
 * INTERNAL API
 */
@InternalApi
-private[akka] final class JFRRemotingFlightRecorder(system: ExtendedActorSystem) extends RemotingFlightRecorder {
+private[akka] final class JFRRemotingFlightRecorder() extends RemotingFlightRecorder {
  override def transportMediaDriverStarted(directoryName: String): Unit =
    new TransportMediaDriverStarted(directoryName).commit()

--- a/akka-remote/src/main/scala/akka/remote/artery/RemotingFlightRecorder.scala
+++ b/akka-remote/src/main/scala/akka/remote/artery/RemotingFlightRecorder.scala
@ -6,9 +6,6 @@ package akka.remote.artery

 import java.net.InetSocketAddress

-import scala.util.Failure
-import scala.util.Success
-
 import akka.actor.Address
 import akka.actor.ExtendedActorSystem
 import akka.actor.Extension
@ -16,7 +13,7 @@ import akka.actor.ExtensionId
 import akka.actor.ExtensionIdProvider
 import akka.annotation.InternalApi
 import akka.remote.UniqueAddress
-import akka.util.JavaVersion
+import akka.util.FlightRecorderLoader

 /**
 * INTERNAL API
@ -25,19 +22,10 @@ import akka.util.JavaVersion
 object RemotingFlightRecorder extends ExtensionId[RemotingFlightRecorder] with ExtensionIdProvider {

  override def createExtension(system: ExtendedActorSystem): RemotingFlightRecorder =
-    if (JavaVersion.majorVersion >= 11 && system.settings.config.getBoolean("akka.java-flight-recorder.enabled")) {
-      // Dynamic instantiation to not trigger class load on earlier JDKs
-      system.dynamicAccess.createInstanceFor[RemotingFlightRecorder](
-        "akka.remote.artery.jfr.JFRRemotingFlightRecorder",
-        (classOf[ExtendedActorSystem], system) :: Nil) match {
-        case Success(jfr) => jfr
-        case Failure(ex) =>
-          system.log.warning("Failed to load JFR remoting flight recorder, falling back to noop. Exception: {}", ex)
-          NoOpRemotingFlightRecorder
-      } // fallback if not possible to dynamically load for some reason
-    } else
-      // JFR not available on Java 8
-      NoOpRemotingFlightRecorder
+    FlightRecorderLoader.load[RemotingFlightRecorder](
+      system,
+      "akka.remote.artery.jfr.JFRRemotingFlightRecorder",
+      NoOpRemotingFlightRecorder)

  override def lookup(): ExtensionId[_ <: Extension] = this
 }
--- a/build.sbt
+++ b/build.sbt
@ -164,6 +164,7 @@ lazy val clusterSharding = akkaModule("akka-cluster-sharding")
  .settings(Protobuf.settings)
  .configs(MultiJvm)
  .enablePlugins(MultiNode, ScaladocNoVerificationOfDiagrams)
+  .enablePlugins(Jdk9)

 lazy val clusterTools = akkaModule("akka-cluster-tools")
  .dependsOn(
@ -467,7 +468,7 @@ lazy val clusterShardingTyped = akkaModule("akka-cluster-sharding-typed")
  .dependsOn(
    actorTyped % "compile->CompileJdk9",
    clusterTyped % "compile->compile;test->test;multi-jvm->multi-jvm",
-    clusterSharding,
+    clusterSharding % "compile->compile;compile->CompileJdk9;multi-jvm->multi-jvm",
    actorTestkitTyped % "test->test",
    actorTypedTests % "test->test",
    persistenceTyped % "test->test",
--- a/project/AkkaDisciplinePlugin.scala
+++ b/project/AkkaDisciplinePlugin.scala
@ -30,6 +30,7 @@ object AkkaDisciplinePlugin extends AutoPlugin {
    "akka-cluster",
    "akka-cluster-metrics",
    "akka-cluster-sharding",
+    "akka-cluster-sharding-typed",
    "akka-distributed-data",
    "akka-persistence",
    "akka-persistence-tck",