Improve performance of DDataShard stashing, #26877
* While waiting for update to comple it will now deliver messages to other already started entities immediately, instead of stashing * Unstash one message at a time, instead of unstashAll * Append messageBuffer for messages to the entity that we are waiting for, instead of stashing * Test to confirm the improvements * Fixing a few other missing things * receiveStartEntity should process the change before starting the entity * lastMessageTimestamp should be touched from overridden deliverTo * handle StoreFailure
This commit is contained in:
parent
35e7e07488
commit
ce438637bb
4 changed files with 435 additions and 80 deletions
|
|
@ -0,0 +1,3 @@
|
||||||
|
# #26877 Performance improvements of DDataShard
|
||||||
|
ProblemFilters.exclude[Problem]("akka.cluster.sharding.Shard.*")
|
||||||
|
|
||||||
|
|
@ -6,34 +6,34 @@ package akka.cluster.sharding
|
||||||
|
|
||||||
import java.net.URLEncoder
|
import java.net.URLEncoder
|
||||||
|
|
||||||
import akka.actor.{
|
|
||||||
Actor,
|
|
||||||
ActorLogging,
|
|
||||||
ActorRef,
|
|
||||||
ActorSystem,
|
|
||||||
DeadLetterSuppression,
|
|
||||||
Deploy,
|
|
||||||
NoSerializationVerificationNeeded,
|
|
||||||
Props,
|
|
||||||
Stash,
|
|
||||||
Terminated,
|
|
||||||
Timers
|
|
||||||
}
|
|
||||||
import akka.util.{ ConstantFun, MessageBufferMap }
|
|
||||||
|
|
||||||
import scala.concurrent.Future
|
import scala.concurrent.Future
|
||||||
|
import scala.concurrent.duration._
|
||||||
|
|
||||||
|
import akka.actor.Actor
|
||||||
|
import akka.actor.ActorLogging
|
||||||
|
import akka.actor.ActorRef
|
||||||
|
import akka.actor.ActorSystem
|
||||||
|
import akka.actor.DeadLetterSuppression
|
||||||
|
import akka.actor.Deploy
|
||||||
|
import akka.actor.NoSerializationVerificationNeeded
|
||||||
|
import akka.actor.Props
|
||||||
|
import akka.actor.Stash
|
||||||
|
import akka.actor.Terminated
|
||||||
|
import akka.actor.Timers
|
||||||
import akka.cluster.Cluster
|
import akka.cluster.Cluster
|
||||||
import akka.cluster.ddata.ORSet
|
import akka.cluster.ddata.ORSet
|
||||||
import akka.cluster.ddata.ORSetKey
|
import akka.cluster.ddata.ORSetKey
|
||||||
import akka.cluster.ddata.Replicator._
|
import akka.cluster.ddata.Replicator._
|
||||||
import akka.cluster.ddata.SelfUniqueAddress
|
import akka.cluster.ddata.SelfUniqueAddress
|
||||||
import akka.persistence._
|
import akka.cluster.sharding.ShardCoordinator.Internal.CoordinatorMessage
|
||||||
import akka.util.PrettyDuration._
|
|
||||||
import akka.coordination.lease.scaladsl.{ Lease, LeaseProvider }
|
|
||||||
import akka.pattern.pipe
|
|
||||||
|
|
||||||
import scala.concurrent.duration._
|
|
||||||
import akka.cluster.sharding.ShardRegion.ShardInitialized
|
import akka.cluster.sharding.ShardRegion.ShardInitialized
|
||||||
|
import akka.cluster.sharding.ShardRegion.ShardRegionCommand
|
||||||
|
import akka.coordination.lease.scaladsl.Lease
|
||||||
|
import akka.coordination.lease.scaladsl.LeaseProvider
|
||||||
|
import akka.pattern.pipe
|
||||||
|
import akka.persistence._
|
||||||
|
import akka.util.MessageBufferMap
|
||||||
|
import akka.util.PrettyDuration._
|
||||||
import akka.util.unused
|
import akka.util.unused
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -150,7 +150,7 @@ private[akka] object Shard {
|
||||||
.withDeploy(Deploy.local)
|
.withDeploy(Deploy.local)
|
||||||
}
|
}
|
||||||
|
|
||||||
private case object PassivateIdleTick extends NoSerializationVerificationNeeded
|
case object PassivateIdleTick extends NoSerializationVerificationNeeded
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -174,9 +174,14 @@ private[akka] class Shard(
|
||||||
with ActorLogging
|
with ActorLogging
|
||||||
with Timers {
|
with Timers {
|
||||||
|
|
||||||
import ShardRegion.{ handOffStopperProps, EntityId, Msg, Passivate, ShardInitialized }
|
|
||||||
import ShardCoordinator.Internal.{ HandOff, ShardStopped }
|
|
||||||
import Shard._
|
import Shard._
|
||||||
|
import ShardCoordinator.Internal.HandOff
|
||||||
|
import ShardCoordinator.Internal.ShardStopped
|
||||||
|
import ShardRegion.EntityId
|
||||||
|
import ShardRegion.Msg
|
||||||
|
import ShardRegion.Passivate
|
||||||
|
import ShardRegion.ShardInitialized
|
||||||
|
import ShardRegion.handOffStopperProps
|
||||||
import akka.cluster.sharding.ShardCoordinator.Internal.CoordinatorMessage
|
import akka.cluster.sharding.ShardCoordinator.Internal.CoordinatorMessage
|
||||||
import akka.cluster.sharding.ShardRegion.ShardRegionCommand
|
import akka.cluster.sharding.ShardRegion.ShardRegionCommand
|
||||||
import settings.tuningParameters._
|
import settings.tuningParameters._
|
||||||
|
|
@ -188,7 +193,7 @@ private[akka] class Shard(
|
||||||
var passivating = Set.empty[ActorRef]
|
var passivating = Set.empty[ActorRef]
|
||||||
val messageBuffers = new MessageBufferMap[EntityId]
|
val messageBuffers = new MessageBufferMap[EntityId]
|
||||||
|
|
||||||
var handOffStopper: Option[ActorRef] = None
|
private var handOffStopper: Option[ActorRef] = None
|
||||||
|
|
||||||
import context.dispatcher
|
import context.dispatcher
|
||||||
val passivateIdleTask = if (settings.passivateIdleEntityAfter > Duration.Zero) {
|
val passivateIdleTask = if (settings.passivateIdleEntityAfter > Duration.Zero) {
|
||||||
|
|
@ -198,14 +203,14 @@ private[akka] class Shard(
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
val lease = settings.leaseSettings.map(
|
private val lease = settings.leaseSettings.map(
|
||||||
ls =>
|
ls =>
|
||||||
LeaseProvider(context.system).getLease(
|
LeaseProvider(context.system).getLease(
|
||||||
s"${context.system.name}-shard-$typeName-$shardId",
|
s"${context.system.name}-shard-$typeName-$shardId",
|
||||||
ls.leaseImplementation,
|
ls.leaseImplementation,
|
||||||
Cluster(context.system).selfAddress.hostPort))
|
Cluster(context.system).selfAddress.hostPort))
|
||||||
|
|
||||||
val leaseRetryInterval = settings.leaseSettings match {
|
private val leaseRetryInterval = settings.leaseSettings match {
|
||||||
case Some(l) => l.leaseRetryInterval
|
case Some(l) => l.leaseRetryInterval
|
||||||
case None => 5.seconds // not used
|
case None => 5.seconds // not used
|
||||||
}
|
}
|
||||||
|
|
@ -249,7 +254,7 @@ private[akka] class Shard(
|
||||||
|
|
||||||
// Don't send back ShardInitialized so that messages are buffered in the ShardRegion
|
// Don't send back ShardInitialized so that messages are buffered in the ShardRegion
|
||||||
// while awaiting the lease
|
// while awaiting the lease
|
||||||
def awaitingLease(): Receive = {
|
private def awaitingLease(): Receive = {
|
||||||
case LeaseAcquireResult(true, _) =>
|
case LeaseAcquireResult(true, _) =>
|
||||||
log.debug("Acquired lease")
|
log.debug("Acquired lease")
|
||||||
onLeaseAcquired()
|
onLeaseAcquired()
|
||||||
|
|
@ -292,27 +297,32 @@ private[akka] class Shard(
|
||||||
log.error("Shard type [{}] id [{}] lease lost. Reason: {}", typeName, shardId, msg.reason)
|
log.error("Shard type [{}] id [{}] lease lost. Reason: {}", typeName, shardId, msg.reason)
|
||||||
// Stop entities ASAP rather than send termination message
|
// Stop entities ASAP rather than send termination message
|
||||||
context.stop(self)
|
context.stop(self)
|
||||||
|
|
||||||
}
|
}
|
||||||
def receiveShardCommand(msg: ShardCommand): Unit = msg match {
|
|
||||||
|
private def receiveShardCommand(msg: ShardCommand): Unit = msg match {
|
||||||
|
// those are only used with remembering entities
|
||||||
case RestartEntity(id) => getOrCreateEntity(id)
|
case RestartEntity(id) => getOrCreateEntity(id)
|
||||||
case RestartEntities(ids) => restartEntities(ids)
|
case RestartEntities(ids) => restartEntities(ids)
|
||||||
}
|
}
|
||||||
|
|
||||||
def receiveStartEntity(start: ShardRegion.StartEntity): Unit = {
|
private def receiveStartEntity(start: ShardRegion.StartEntity): Unit = {
|
||||||
val requester = sender()
|
val requester = sender()
|
||||||
log.debug("Got a request from [{}] to start entity [{}] in shard [{}]", requester, start.entityId, shardId)
|
log.debug("Got a request from [{}] to start entity [{}] in shard [{}]", requester, start.entityId, shardId)
|
||||||
if (passivateIdleTask.isDefined) {
|
touchLastMessageTimestamp(start.entityId)
|
||||||
lastMessageTimestamp = lastMessageTimestamp.updated(start.entityId, System.nanoTime())
|
|
||||||
|
if (state.entities(start.entityId)) {
|
||||||
|
getOrCreateEntity(start.entityId)
|
||||||
|
requester ! ShardRegion.StartEntityAck(start.entityId, shardId)
|
||||||
|
} else {
|
||||||
|
processChange(EntityStarted(start.entityId)) { evt =>
|
||||||
|
getOrCreateEntity(start.entityId)
|
||||||
|
sendMsgBuffer(evt)
|
||||||
|
requester ! ShardRegion.StartEntityAck(start.entityId, shardId)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
getOrCreateEntity(
|
|
||||||
start.entityId,
|
|
||||||
_ =>
|
|
||||||
processChange(EntityStarted(start.entityId))(_ =>
|
|
||||||
requester ! ShardRegion.StartEntityAck(start.entityId, shardId)))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def receiveStartEntityAck(ack: ShardRegion.StartEntityAck): Unit = {
|
private def receiveStartEntityAck(ack: ShardRegion.StartEntityAck): Unit = {
|
||||||
if (ack.shardId != shardId && state.entities.contains(ack.entityId)) {
|
if (ack.shardId != shardId && state.entities.contains(ack.entityId)) {
|
||||||
log.debug("Entity [{}] previously owned by shard [{}] started in shard [{}]", ack.entityId, shardId, ack.shardId)
|
log.debug("Entity [{}] previously owned by shard [{}] started in shard [{}]", ack.entityId, shardId, ack.shardId)
|
||||||
processChange(EntityStopped(ack.entityId)) { _ =>
|
processChange(EntityStopped(ack.entityId)) { _ =>
|
||||||
|
|
@ -322,16 +332,16 @@ private[akka] class Shard(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def restartEntities(ids: Set[EntityId]): Unit = {
|
private def restartEntities(ids: Set[EntityId]): Unit = {
|
||||||
context.actorOf(RememberEntityStarter.props(context.parent, ids, settings, sender()))
|
context.actorOf(RememberEntityStarter.props(context.parent, ids, settings, sender()))
|
||||||
}
|
}
|
||||||
|
|
||||||
def receiveShardRegionCommand(msg: ShardRegionCommand): Unit = msg match {
|
private def receiveShardRegionCommand(msg: ShardRegionCommand): Unit = msg match {
|
||||||
case Passivate(stopMessage) => passivate(sender(), stopMessage)
|
case Passivate(stopMessage) => passivate(sender(), stopMessage)
|
||||||
case _ => unhandled(msg)
|
case _ => unhandled(msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
def receiveCoordinatorMessage(msg: CoordinatorMessage): Unit = msg match {
|
private def receiveCoordinatorMessage(msg: CoordinatorMessage): Unit = msg match {
|
||||||
case HandOff(`shardId`) => handOff(sender())
|
case HandOff(`shardId`) => handOff(sender())
|
||||||
case HandOff(shard) => log.warning("Shard [{}] can not hand off for another Shard [{}]", shardId, shard)
|
case HandOff(shard) => log.warning("Shard [{}] can not hand off for another Shard [{}]", shardId, shard)
|
||||||
case _ => unhandled(msg)
|
case _ => unhandled(msg)
|
||||||
|
|
@ -342,7 +352,7 @@ private[akka] class Shard(
|
||||||
case GetShardStats => sender() ! ShardStats(shardId, state.entities.size)
|
case GetShardStats => sender() ! ShardStats(shardId, state.entities.size)
|
||||||
}
|
}
|
||||||
|
|
||||||
def handOff(replyTo: ActorRef): Unit = handOffStopper match {
|
private def handOff(replyTo: ActorRef): Unit = handOffStopper match {
|
||||||
case Some(_) => log.warning("HandOff shard [{}] received during existing handOff", shardId)
|
case Some(_) => log.warning("HandOff shard [{}] received during existing handOff", shardId)
|
||||||
case None =>
|
case None =>
|
||||||
log.debug("HandOff shard [{}]", shardId)
|
log.debug("HandOff shard [{}]", shardId)
|
||||||
|
|
@ -363,7 +373,7 @@ private[akka] class Shard(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def receiveTerminated(ref: ActorRef): Unit = {
|
private def receiveTerminated(ref: ActorRef): Unit = {
|
||||||
if (handOffStopper.contains(ref))
|
if (handOffStopper.contains(ref))
|
||||||
context.stop(self)
|
context.stop(self)
|
||||||
else if (idByRef.contains(ref) && handOffStopper.isEmpty)
|
else if (idByRef.contains(ref) && handOffStopper.isEmpty)
|
||||||
|
|
@ -387,7 +397,7 @@ private[akka] class Shard(
|
||||||
passivating = passivating - ref
|
passivating = passivating - ref
|
||||||
}
|
}
|
||||||
|
|
||||||
def passivate(entity: ActorRef, stopMessage: Any): Unit = {
|
private def passivate(entity: ActorRef, stopMessage: Any): Unit = {
|
||||||
idByRef.get(entity) match {
|
idByRef.get(entity) match {
|
||||||
case Some(id) =>
|
case Some(id) =>
|
||||||
if (!messageBuffers.contains(id)) {
|
if (!messageBuffers.contains(id)) {
|
||||||
|
|
@ -401,7 +411,13 @@ private[akka] class Shard(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def passivateIdleEntities(): Unit = {
|
def touchLastMessageTimestamp(id: EntityId): Unit = {
|
||||||
|
if (passivateIdleTask.isDefined) {
|
||||||
|
lastMessageTimestamp = lastMessageTimestamp.updated(id, System.nanoTime())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private def passivateIdleEntities(): Unit = {
|
||||||
val deadline = System.nanoTime() - settings.passivateIdleEntityAfter.toNanos
|
val deadline = System.nanoTime() - settings.passivateIdleEntityAfter.toNanos
|
||||||
val refsToPassivate = lastMessageTimestamp.collect {
|
val refsToPassivate = lastMessageTimestamp.collect {
|
||||||
case (entityId, lastMessageTimestamp) if lastMessageTimestamp < deadline => refById(entityId)
|
case (entityId, lastMessageTimestamp) if lastMessageTimestamp < deadline => refById(entityId)
|
||||||
|
|
@ -447,29 +463,30 @@ private[akka] class Shard(
|
||||||
// in case it was wrapped, used in Typed
|
// in case it was wrapped, used in Typed
|
||||||
receiveStartEntity(start)
|
receiveStartEntity(start)
|
||||||
case _ =>
|
case _ =>
|
||||||
messageBuffers.contains(id) match {
|
if (messageBuffers.contains(id))
|
||||||
case false => deliverTo(id, msg, payload, snd)
|
appendToMessageBuffer(id, msg, snd)
|
||||||
|
else
|
||||||
case true if messageBuffers.totalSize >= bufferSize =>
|
deliverTo(id, msg, payload, snd)
|
||||||
log.debug("Buffer is full, dropping message for entity [{}]", id)
|
|
||||||
context.system.deadLetters ! msg
|
|
||||||
|
|
||||||
case true =>
|
|
||||||
log.debug("Message for entity [{}] buffered", id)
|
|
||||||
messageBuffers.append(id, msg, snd)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def deliverTo(id: EntityId, @unused msg: Any, payload: Msg, snd: ActorRef): Unit = {
|
def appendToMessageBuffer(id: EntityId, msg: Any, snd: ActorRef): Unit = {
|
||||||
if (passivateIdleTask.isDefined) {
|
if (messageBuffers.totalSize >= bufferSize) {
|
||||||
lastMessageTimestamp = lastMessageTimestamp.updated(id, System.nanoTime())
|
log.debug("Buffer is full, dropping message for entity [{}]", id)
|
||||||
|
context.system.deadLetters ! msg
|
||||||
|
} else {
|
||||||
|
log.debug("Message for entity [{}] buffered", id)
|
||||||
|
messageBuffers.append(id, msg, snd)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def deliverTo(id: EntityId, @unused msg: Any, payload: Msg, snd: ActorRef): Unit = {
|
||||||
|
touchLastMessageTimestamp(id)
|
||||||
getOrCreateEntity(id).tell(payload, snd)
|
getOrCreateEntity(id).tell(payload, snd)
|
||||||
}
|
}
|
||||||
|
|
||||||
def getOrCreateEntity(id: EntityId, onCreate: ActorRef => Unit = ConstantFun.scalaAnyToUnit): ActorRef = {
|
def getOrCreateEntity(id: EntityId): ActorRef = {
|
||||||
val name = URLEncoder.encode(id, "utf-8")
|
val name = URLEncoder.encode(id, "utf-8")
|
||||||
context.child(name) match {
|
context.child(name) match {
|
||||||
case Some(child) => child
|
case Some(child) => child
|
||||||
|
|
@ -478,11 +495,8 @@ private[akka] class Shard(
|
||||||
val a = context.watch(context.actorOf(entityProps(id), name))
|
val a = context.watch(context.actorOf(entityProps(id), name))
|
||||||
idByRef = idByRef.updated(a, id)
|
idByRef = idByRef.updated(a, id)
|
||||||
refById = refById.updated(id, a)
|
refById = refById.updated(id, a)
|
||||||
if (passivateIdleTask.isDefined) {
|
|
||||||
lastMessageTimestamp += (id -> System.nanoTime())
|
|
||||||
}
|
|
||||||
state = state.copy(state.entities + id)
|
state = state.copy(state.entities + id)
|
||||||
onCreate(a)
|
touchLastMessageTimestamp(id)
|
||||||
a
|
a
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -510,8 +524,8 @@ private[akka] class RememberEntityStarter(
|
||||||
extends Actor
|
extends Actor
|
||||||
with ActorLogging {
|
with ActorLogging {
|
||||||
|
|
||||||
import context.dispatcher
|
|
||||||
import RememberEntityStarter.Tick
|
import RememberEntityStarter.Tick
|
||||||
|
import context.dispatcher
|
||||||
|
|
||||||
var waitingForAck = ids
|
var waitingForAck = ids
|
||||||
|
|
||||||
|
|
@ -551,8 +565,9 @@ private[akka] class RememberEntityStarter(
|
||||||
private[akka] trait RememberingShard {
|
private[akka] trait RememberingShard {
|
||||||
selfType: Shard =>
|
selfType: Shard =>
|
||||||
|
|
||||||
import ShardRegion.{ EntityId, Msg }
|
|
||||||
import Shard._
|
import Shard._
|
||||||
|
import ShardRegion.EntityId
|
||||||
|
import ShardRegion.Msg
|
||||||
import akka.pattern.pipe
|
import akka.pattern.pipe
|
||||||
|
|
||||||
protected val settings: ClusterShardingSettings
|
protected val settings: ClusterShardingSettings
|
||||||
|
|
@ -592,6 +607,7 @@ private[akka] trait RememberingShard {
|
||||||
} else {
|
} else {
|
||||||
if (!passivating.contains(ref)) {
|
if (!passivating.contains(ref)) {
|
||||||
log.debug("Entity [{}] stopped without passivating, will restart after backoff", id)
|
log.debug("Entity [{}] stopped without passivating, will restart after backoff", id)
|
||||||
|
// note that it's not removed from state here, will be started again via RestartEntity
|
||||||
import context.dispatcher
|
import context.dispatcher
|
||||||
context.system.scheduler.scheduleOnce(entityRestartBackoff, self, RestartEntity(id))
|
context.system.scheduler.scheduleOnce(entityRestartBackoff, self, RestartEntity(id))
|
||||||
} else processChange(EntityStopped(id))(passivateCompleted)
|
} else processChange(EntityStopped(id))(passivateCompleted)
|
||||||
|
|
@ -604,9 +620,11 @@ private[akka] trait RememberingShard {
|
||||||
val name = URLEncoder.encode(id, "utf-8")
|
val name = URLEncoder.encode(id, "utf-8")
|
||||||
context.child(name) match {
|
context.child(name) match {
|
||||||
case Some(actor) =>
|
case Some(actor) =>
|
||||||
|
touchLastMessageTimestamp(id)
|
||||||
actor.tell(payload, snd)
|
actor.tell(payload, snd)
|
||||||
case None =>
|
case None =>
|
||||||
if (state.entities.contains(id)) {
|
if (state.entities.contains(id)) {
|
||||||
|
// this may happen when entity is stopped without passivation
|
||||||
require(!messageBuffers.contains(id), s"Message buffers contains id [$id].")
|
require(!messageBuffers.contains(id), s"Message buffers contains id [$id].")
|
||||||
getOrCreateEntity(id).tell(payload, snd)
|
getOrCreateEntity(id).tell(payload, snd)
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -740,8 +758,8 @@ private[akka] class DDataShard(
|
||||||
with Stash
|
with Stash
|
||||||
with ActorLogging {
|
with ActorLogging {
|
||||||
|
|
||||||
import ShardRegion.EntityId
|
|
||||||
import Shard._
|
import Shard._
|
||||||
|
import ShardRegion.EntityId
|
||||||
import settings.tuningParameters._
|
import settings.tuningParameters._
|
||||||
|
|
||||||
private val readMajority = ReadMajority(settings.tuningParameters.waitingForStateTimeout, majorityMinCap)
|
private val readMajority = ReadMajority(settings.tuningParameters.waitingForStateTimeout, majorityMinCap)
|
||||||
|
|
@ -759,10 +777,12 @@ private[akka] class DDataShard(
|
||||||
// configuration on each node.
|
// configuration on each node.
|
||||||
private val numberOfKeys = 5
|
private val numberOfKeys = 5
|
||||||
private val stateKeys: Array[ORSetKey[EntityId]] =
|
private val stateKeys: Array[ORSetKey[EntityId]] =
|
||||||
Array.tabulate(numberOfKeys)(i => ORSetKey[EntityId](s"shard-${typeName}-${shardId}-$i"))
|
Array.tabulate(numberOfKeys)(i => ORSetKey[EntityId](s"shard-$typeName-$shardId-$i"))
|
||||||
|
|
||||||
|
private var waiting = true
|
||||||
|
|
||||||
private def key(entityId: EntityId): ORSetKey[EntityId] = {
|
private def key(entityId: EntityId): ORSetKey[EntityId] = {
|
||||||
val i = (math.abs(entityId.hashCode % numberOfKeys))
|
val i = math.abs(entityId.hashCode % numberOfKeys)
|
||||||
stateKeys(i)
|
stateKeys(i)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -773,11 +793,17 @@ private[akka] class DDataShard(
|
||||||
}
|
}
|
||||||
|
|
||||||
private def getState(): Unit = {
|
private def getState(): Unit = {
|
||||||
(0 until numberOfKeys).map { i =>
|
(0 until numberOfKeys).foreach { i =>
|
||||||
replicator ! Get(stateKeys(i), readMajority, Some(i))
|
replicator ! Get(stateKeys(i), readMajority, Some(i))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
override protected[akka] def aroundReceive(rcv: Receive, msg: Any): Unit = {
|
||||||
|
super.aroundReceive(rcv, msg)
|
||||||
|
if (!waiting)
|
||||||
|
unstash() // unstash one message
|
||||||
|
}
|
||||||
|
|
||||||
override def receive = waitingForState(Set.empty)
|
override def receive = waitingForState(Set.empty)
|
||||||
|
|
||||||
// This state will stash all commands
|
// This state will stash all commands
|
||||||
|
|
@ -807,24 +833,26 @@ private[akka] class DDataShard(
|
||||||
receiveOne(i)
|
receiveOne(i)
|
||||||
|
|
||||||
case _ =>
|
case _ =>
|
||||||
|
log.debug("Stashing while waiting for DDataShard initial state")
|
||||||
stash()
|
stash()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private def recoveryCompleted(): Unit = {
|
private def recoveryCompleted(): Unit = {
|
||||||
log.debug("DDataShard recovery completed shard [{}] with [{}] entities", shardId, state.entities.size)
|
log.debug("DDataShard recovery completed shard [{}] with [{}] entities", shardId, state.entities.size)
|
||||||
|
waiting = false
|
||||||
context.parent ! ShardInitialized(shardId)
|
context.parent ! ShardInitialized(shardId)
|
||||||
context.become(receiveCommand)
|
context.become(receiveCommand)
|
||||||
restartRememberedEntities()
|
restartRememberedEntities()
|
||||||
unstashAll()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
override def processChange[E <: StateChange](event: E)(handler: E => Unit): Unit = {
|
override def processChange[E <: StateChange](event: E)(handler: E => Unit): Unit = {
|
||||||
|
waiting = true
|
||||||
context.become(waitingForUpdate(event, handler), discardOld = false)
|
context.become(waitingForUpdate(event, handler), discardOld = false)
|
||||||
sendUpdate(event, retryCount = 1)
|
sendUpdate(event, retryCount = 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
private def sendUpdate(evt: StateChange, retryCount: Int) = {
|
private def sendUpdate(evt: StateChange, retryCount: Int): Unit = {
|
||||||
replicator ! Update(key(evt.entityId), ORSet.empty[EntityId], writeMajority, Some((evt, retryCount))) { existing =>
|
replicator ! Update(key(evt.entityId), ORSet.empty[EntityId], writeMajority, Some((evt, retryCount))) { existing =>
|
||||||
evt match {
|
evt match {
|
||||||
case EntityStarted(id) => existing :+ id
|
case EntityStarted(id) => existing :+ id
|
||||||
|
|
@ -837,9 +865,9 @@ private[akka] class DDataShard(
|
||||||
private def waitingForUpdate[E <: StateChange](evt: E, afterUpdateCallback: E => Unit): Receive = {
|
private def waitingForUpdate[E <: StateChange](evt: E, afterUpdateCallback: E => Unit): Receive = {
|
||||||
case UpdateSuccess(_, Some((`evt`, _))) =>
|
case UpdateSuccess(_, Some((`evt`, _))) =>
|
||||||
log.debug("The DDataShard state was successfully updated with {}", evt)
|
log.debug("The DDataShard state was successfully updated with {}", evt)
|
||||||
|
waiting = false
|
||||||
context.unbecome()
|
context.unbecome()
|
||||||
afterUpdateCallback(evt)
|
afterUpdateCallback(evt)
|
||||||
unstashAll()
|
|
||||||
|
|
||||||
case UpdateTimeout(_, Some((`evt`, retryCount: Int))) =>
|
case UpdateTimeout(_, Some((`evt`, retryCount: Int))) =>
|
||||||
if (retryCount == maxUpdateAttempts) {
|
if (retryCount == maxUpdateAttempts) {
|
||||||
|
|
@ -861,16 +889,73 @@ private[akka] class DDataShard(
|
||||||
sendUpdate(evt, retryCount + 1)
|
sendUpdate(evt, retryCount + 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case StoreFailure(_, Some((`evt`, _))) =>
|
||||||
|
log.error(
|
||||||
|
"The DDataShard was unable to update state with event {} due to StoreFailure. " +
|
||||||
|
"Shard will be restarted after backoff.",
|
||||||
|
evt)
|
||||||
|
context.stop(self)
|
||||||
|
|
||||||
case ModifyFailure(_, error, cause, Some((`evt`, _))) =>
|
case ModifyFailure(_, error, cause, Some((`evt`, _))) =>
|
||||||
log.error(
|
log.error(
|
||||||
cause,
|
cause,
|
||||||
"The DDataShard was unable to update state with error {} and event {}. Shard will be restarted",
|
"The DDataShard was unable to update state with event {} due to ModifyFailure. " +
|
||||||
error,
|
"Shard will be restarted. {}",
|
||||||
evt)
|
evt,
|
||||||
|
error)
|
||||||
throw cause
|
throw cause
|
||||||
|
|
||||||
// TODO what can this actually be? We're unitialized in the ShardRegion
|
// below cases should handle same messages as in Shard.receiveCommand
|
||||||
case _ => stash()
|
case _: Terminated => stash()
|
||||||
|
case _: CoordinatorMessage => stash()
|
||||||
|
case _: ShardCommand => stash()
|
||||||
|
case _: ShardRegion.StartEntity => stash()
|
||||||
|
case _: ShardRegion.StartEntityAck => stash()
|
||||||
|
case _: ShardRegionCommand => stash()
|
||||||
|
case msg: ShardQuery => receiveShardQuery(msg)
|
||||||
|
case PassivateIdleTick => stash()
|
||||||
|
case msg: LeaseLost => receiveLeaseLost(msg)
|
||||||
|
case msg if extractEntityId.isDefinedAt(msg) => deliverOrBufferMessage(msg, evt)
|
||||||
|
case msg =>
|
||||||
|
// shouldn't be any other message types, but just in case
|
||||||
|
log.debug("Stashing unexpected message [{}] while waiting for DDataShard update of {}", msg.getClass, evt)
|
||||||
|
stash()
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If the message is for the same entity as we are waiting for the update it will be added to
|
||||||
|
* its messageBuffer, which will be sent after the update has completed.
|
||||||
|
*
|
||||||
|
* If the message is for another entity that is already started (and not in progress of passivating)
|
||||||
|
* it will be delivered immediately.
|
||||||
|
*
|
||||||
|
* Otherwise it will be stashed, and processed after the update has been completed.
|
||||||
|
*/
|
||||||
|
private def deliverOrBufferMessage(msg: Any, waitingForUpdateEvent: StateChange): Unit = {
|
||||||
|
val (id, payload) = extractEntityId(msg)
|
||||||
|
if (id == null || id == "") {
|
||||||
|
log.warning("Id must not be empty, dropping message [{}]", msg.getClass.getName)
|
||||||
|
context.system.deadLetters ! msg
|
||||||
|
} else {
|
||||||
|
payload match {
|
||||||
|
case _: ShardRegion.StartEntity =>
|
||||||
|
// in case it was wrapped, used in Typed
|
||||||
|
stash()
|
||||||
|
case _ =>
|
||||||
|
if (id == waitingForUpdateEvent.entityId) {
|
||||||
|
appendToMessageBuffer(id, msg, sender())
|
||||||
|
} else {
|
||||||
|
val name = URLEncoder.encode(id, "utf-8")
|
||||||
|
// messageBuffers.contains(id) when passivation is in progress
|
||||||
|
if (!messageBuffers.contains(id) && context.child(name).nonEmpty) {
|
||||||
|
deliverTo(id, msg, payload, sender())
|
||||||
|
} else {
|
||||||
|
log.debug("Stashing to [{}] while waiting for DDataShard update of {}", id, waitingForUpdateEvent)
|
||||||
|
stash()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -887,9 +972,10 @@ object EntityRecoveryStrategy {
|
||||||
|
|
||||||
trait EntityRecoveryStrategy {
|
trait EntityRecoveryStrategy {
|
||||||
|
|
||||||
import ShardRegion.EntityId
|
|
||||||
import scala.concurrent.Future
|
import scala.concurrent.Future
|
||||||
|
|
||||||
|
import ShardRegion.EntityId
|
||||||
|
|
||||||
def recoverEntities(entities: Set[EntityId]): Set[Future[Set[EntityId]]]
|
def recoverEntities(entities: Set[EntityId]): Set[Future[Set[EntityId]]]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,258 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2019 Lightbend Inc. <https://www.lightbend.com>
|
||||||
|
*/
|
||||||
|
|
||||||
|
package akka.cluster.sharding
|
||||||
|
|
||||||
|
import java.io.File
|
||||||
|
import java.util.concurrent.TimeUnit.NANOSECONDS
|
||||||
|
|
||||||
|
import scala.concurrent.duration._
|
||||||
|
|
||||||
|
import akka.actor._
|
||||||
|
import akka.cluster.Cluster
|
||||||
|
import akka.cluster.MemberStatus
|
||||||
|
import akka.cluster.MultiNodeClusterSpec
|
||||||
|
import akka.remote.testconductor.RoleName
|
||||||
|
import akka.remote.testkit.MultiNodeConfig
|
||||||
|
import akka.remote.testkit.MultiNodeSpec
|
||||||
|
import akka.remote.testkit.STMultiNodeSpec
|
||||||
|
import akka.testkit._
|
||||||
|
import akka.util.ccompat._
|
||||||
|
import com.typesafe.config.ConfigFactory
|
||||||
|
import org.apache.commons.io.FileUtils
|
||||||
|
|
||||||
|
@ccompatUsedUntil213
|
||||||
|
object ClusterShardingRememberEntitiesPerfSpec {
|
||||||
|
|
||||||
|
def props(): Props = Props(new TestEntity)
|
||||||
|
|
||||||
|
class TestEntity extends Actor with ActorLogging {
|
||||||
|
|
||||||
|
log.debug("Started TestEntity: {}", self)
|
||||||
|
|
||||||
|
def receive = {
|
||||||
|
case m => sender() ! m
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
val extractEntityId: ShardRegion.ExtractEntityId = {
|
||||||
|
case id: Int => (id.toString, id)
|
||||||
|
}
|
||||||
|
|
||||||
|
val extractShardId: ShardRegion.ExtractShardId = msg =>
|
||||||
|
msg match {
|
||||||
|
case _: Int => "0" // only one shard
|
||||||
|
case ShardRegion.StartEntity(_) => "0"
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
object ClusterShardingRememberEntitiesPerfSpecConfig extends MultiNodeConfig {
|
||||||
|
val first = role("first")
|
||||||
|
val second = role("second")
|
||||||
|
val third = role("third")
|
||||||
|
|
||||||
|
commonConfig(ConfigFactory.parseString(s"""
|
||||||
|
akka.loglevel = INFO
|
||||||
|
akka.actor.provider = "cluster"
|
||||||
|
akka.cluster.auto-down-unreachable-after = 0s
|
||||||
|
akka.remote.log-remote-lifecycle-events = off
|
||||||
|
akka.testconductor.barrier-timeout = 3 minutes
|
||||||
|
akka.remote.artery.advanced.outbound-message-queue-size = 10000
|
||||||
|
akka.remote.artery.advanced.maximum-frame-size = 512 KiB
|
||||||
|
akka.cluster.sharding.state-store-mode = "ddata"
|
||||||
|
akka.cluster.sharding.distributed-data.durable.lmdb {
|
||||||
|
dir = target/ShardingRememberEntitiesPerfSpec/sharding-ddata
|
||||||
|
}
|
||||||
|
# comment next line to enable durable lmdb storage
|
||||||
|
akka.cluster.sharding.distributed-data.durable.keys = []
|
||||||
|
""").withFallback(MultiNodeClusterSpec.clusterConfig))
|
||||||
|
|
||||||
|
nodeConfig(third)(ConfigFactory.parseString(s"""
|
||||||
|
akka.cluster.sharding.distributed-data.durable.lmdb {
|
||||||
|
# use same directory when starting new node on third (not used at same time)
|
||||||
|
dir = target/ShardingRememberEntitiesSpec/sharding-third
|
||||||
|
}
|
||||||
|
"""))
|
||||||
|
}
|
||||||
|
|
||||||
|
class ClusterShardingRememberEntitiesPerfSpecMultiJvmNode1 extends ClusterShardingRememberEntitiesPerfSpec
|
||||||
|
class ClusterShardingRememberEntitiesPerfSpecMultiJvmNode2 extends ClusterShardingRememberEntitiesPerfSpec
|
||||||
|
class ClusterShardingRememberEntitiesPerfSpecMultiJvmNode3 extends ClusterShardingRememberEntitiesPerfSpec
|
||||||
|
|
||||||
|
abstract class ClusterShardingRememberEntitiesPerfSpec
|
||||||
|
extends MultiNodeSpec(ClusterShardingRememberEntitiesPerfSpecConfig)
|
||||||
|
with STMultiNodeSpec
|
||||||
|
with ImplicitSender {
|
||||||
|
import ClusterShardingRememberEntitiesPerfSpec._
|
||||||
|
import ClusterShardingRememberEntitiesPerfSpecConfig._
|
||||||
|
|
||||||
|
override def initialParticipants = roles.size
|
||||||
|
|
||||||
|
val storageLocations = List(
|
||||||
|
new File(system.settings.config.getString("akka.cluster.sharding.distributed-data.durable.lmdb.dir")).getParentFile)
|
||||||
|
|
||||||
|
override protected def atStartup(): Unit = {
|
||||||
|
storageLocations.foreach(dir => if (dir.exists) FileUtils.deleteQuietly(dir))
|
||||||
|
enterBarrier("startup")
|
||||||
|
}
|
||||||
|
|
||||||
|
override protected def afterTermination(): Unit = {
|
||||||
|
storageLocations.foreach(dir => if (dir.exists) FileUtils.deleteQuietly(dir))
|
||||||
|
}
|
||||||
|
|
||||||
|
def join(from: RoleName, to: RoleName): Unit = {
|
||||||
|
runOn(from) {
|
||||||
|
Cluster(system).join(node(to).address)
|
||||||
|
}
|
||||||
|
enterBarrier(from.name + "-joined")
|
||||||
|
}
|
||||||
|
|
||||||
|
val cluster = Cluster(system)
|
||||||
|
|
||||||
|
def startSharding(): Unit = {
|
||||||
|
(1 to 3).foreach { n =>
|
||||||
|
ClusterSharding(system).start(
|
||||||
|
typeName = s"Entity$n",
|
||||||
|
entityProps = ClusterShardingRememberEntitiesPerfSpec.props(),
|
||||||
|
settings = ClusterShardingSettings(system).withRememberEntities(true),
|
||||||
|
extractEntityId = extractEntityId,
|
||||||
|
extractShardId = extractShardId)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
lazy val region1 = ClusterSharding(system).shardRegion("Entity1")
|
||||||
|
lazy val region2 = ClusterSharding(system).shardRegion("Entity2")
|
||||||
|
lazy val region3 = ClusterSharding(system).shardRegion("Entity3")
|
||||||
|
|
||||||
|
// use 5 for "real" testing
|
||||||
|
private val nrIterations = 2
|
||||||
|
// use 5 for "real" testing
|
||||||
|
private val numberOfMessagesFactor = 1
|
||||||
|
|
||||||
|
s"Cluster sharding with remember entities performance" must {
|
||||||
|
|
||||||
|
"form cluster" in within(20.seconds) {
|
||||||
|
join(first, first)
|
||||||
|
|
||||||
|
startSharding()
|
||||||
|
|
||||||
|
// this will make it run on first
|
||||||
|
runOn(first) {
|
||||||
|
region1 ! 0
|
||||||
|
expectMsg(0)
|
||||||
|
region2 ! 0
|
||||||
|
expectMsg(0)
|
||||||
|
region3 ! 0
|
||||||
|
expectMsg(0)
|
||||||
|
}
|
||||||
|
enterBarrier("allocated-on-first")
|
||||||
|
|
||||||
|
join(second, first)
|
||||||
|
join(third, first)
|
||||||
|
|
||||||
|
within(remaining) {
|
||||||
|
awaitAssert {
|
||||||
|
cluster.state.members.size should ===(3)
|
||||||
|
cluster.state.members.unsorted.map(_.status) should ===(Set(MemberStatus.Up))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
enterBarrier("all-up")
|
||||||
|
}
|
||||||
|
|
||||||
|
"test when starting new entity" in {
|
||||||
|
runOn(first) {
|
||||||
|
val numberOfMessages = 200 * numberOfMessagesFactor
|
||||||
|
(1 to nrIterations).foreach { iteration =>
|
||||||
|
val startTime = System.nanoTime()
|
||||||
|
(1 to numberOfMessages).foreach { n =>
|
||||||
|
region1 ! (iteration * 100000 + n)
|
||||||
|
}
|
||||||
|
receiveN(numberOfMessages, 20.seconds)
|
||||||
|
val took = NANOSECONDS.toMillis(System.nanoTime - startTime)
|
||||||
|
val throughput = numberOfMessages * 1000.0 / took
|
||||||
|
println(
|
||||||
|
s"### Test1 with $numberOfMessages took ${(System.nanoTime() - startTime) / 1000 / 1000} ms, " +
|
||||||
|
f"throughput $throughput%,.0f msg/s")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
enterBarrier("after-1")
|
||||||
|
}
|
||||||
|
|
||||||
|
"test when starting new entity and sending a few messages to it" in {
|
||||||
|
runOn(first) {
|
||||||
|
val numberOfMessages = 800 * numberOfMessagesFactor
|
||||||
|
(1 to nrIterations).foreach { iteration =>
|
||||||
|
val startTime = System.nanoTime()
|
||||||
|
for (n <- 1 to numberOfMessages / 5; _ <- 1 to 5) {
|
||||||
|
region2 ! (iteration * 100000 + n)
|
||||||
|
}
|
||||||
|
receiveN(numberOfMessages, 20.seconds)
|
||||||
|
val took = NANOSECONDS.toMillis(System.nanoTime - startTime)
|
||||||
|
val throughput = numberOfMessages * 1000.0 / took
|
||||||
|
println(
|
||||||
|
s"### Test2 with $numberOfMessages took ${(System.nanoTime() - startTime) / 1000 / 1000} ms, " +
|
||||||
|
f"throughput $throughput%,.0f msg/s")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
enterBarrier("after-2")
|
||||||
|
}
|
||||||
|
|
||||||
|
"test when starting some new entities mixed with sending to started" in {
|
||||||
|
runOn(first) {
|
||||||
|
val numberOfMessages = 1600 * numberOfMessagesFactor
|
||||||
|
(1 to nrIterations).foreach { iteration =>
|
||||||
|
val startTime = System.nanoTime()
|
||||||
|
(1 to numberOfMessages).foreach { n =>
|
||||||
|
val msg =
|
||||||
|
if (n % 20 == 0)
|
||||||
|
-(iteration * 100000 + n) // unique, will start new entity
|
||||||
|
else
|
||||||
|
iteration * 100000 + (n % 10) // these will go to same 10 started entities
|
||||||
|
region3 ! msg
|
||||||
|
|
||||||
|
if (n == 10) {
|
||||||
|
// wait for the first 10 to avoid filling up stash
|
||||||
|
receiveN(10, 5.seconds)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
receiveN(numberOfMessages - 10, 20.seconds)
|
||||||
|
val took = NANOSECONDS.toMillis(System.nanoTime - startTime)
|
||||||
|
val throughput = numberOfMessages * 1000.0 / took
|
||||||
|
println(
|
||||||
|
s"### Test3 with $numberOfMessages took ${(System.nanoTime() - startTime) / 1000 / 1000} ms, " +
|
||||||
|
f"throughput $throughput%,.0f msg/s")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
enterBarrier("after-3")
|
||||||
|
}
|
||||||
|
|
||||||
|
"test sending to started" in {
|
||||||
|
runOn(first) {
|
||||||
|
val numberOfMessages = 1600 * numberOfMessagesFactor
|
||||||
|
(1 to nrIterations).foreach { iteration =>
|
||||||
|
var startTime = System.nanoTime()
|
||||||
|
(1 to numberOfMessages).foreach { n =>
|
||||||
|
region3 ! (iteration * 100000 + (n % 10)) // these will go to same 10 started entities
|
||||||
|
|
||||||
|
if (n == 10) {
|
||||||
|
// wait for the first 10 and then start the clock
|
||||||
|
receiveN(10, 5.seconds)
|
||||||
|
startTime = System.nanoTime()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
receiveN(numberOfMessages - 10, 20.seconds)
|
||||||
|
val took = NANOSECONDS.toMillis(System.nanoTime - startTime)
|
||||||
|
val throughput = numberOfMessages * 1000.0 / took
|
||||||
|
println(
|
||||||
|
s"### Test4 with $numberOfMessages took ${(System.nanoTime() - startTime) / 1000 / 1000} ms, " +
|
||||||
|
f"throughput $throughput%,.0f msg/s")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
enterBarrier("after-4")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -379,6 +379,14 @@ the default directory contains the remote port of the actor system. If using a d
|
||||||
assigned port (0) it will be different each time and the previously stored data will not
|
assigned port (0) it will be different each time and the previously stored data will not
|
||||||
be loaded.
|
be loaded.
|
||||||
|
|
||||||
|
The reason for storing the identifiers of the active entities in durable storage, i.e. stored to
|
||||||
|
disk, is that the same entities should be started also after a complete cluster restart. If this is not needed
|
||||||
|
you can disable durable storage and benefit from better performance by using the following configuration:
|
||||||
|
|
||||||
|
```
|
||||||
|
akka.cluster.sharding.distributed-data.durable.keys = []
|
||||||
|
```
|
||||||
|
|
||||||
When `rememberEntities` is set to false, a `Shard` will not automatically restart any entities
|
When `rememberEntities` is set to false, a `Shard` will not automatically restart any entities
|
||||||
after a rebalance or recovering from a crash. Entities will only be started once the first message
|
after a rebalance or recovering from a crash. Entities will only be started once the first message
|
||||||
for that entity has been received in the `Shard`. Entities will not be restarted if they stop without
|
for that entity has been received in the `Shard`. Entities will not be restarted if they stop without
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue