=cls #16090 Add more logging for buffering and unavailable coordinator
This commit is contained in:
parent
1d5f3726c9
commit
c9f0021297
1 changed files with 39 additions and 5 deletions
|
|
@ -255,6 +255,7 @@ class ShardRegion(
|
||||||
var regions = Map.empty[ActorRef, Set[ShardId]]
|
var regions = Map.empty[ActorRef, Set[ShardId]]
|
||||||
var regionByShard = Map.empty[ShardId, ActorRef]
|
var regionByShard = Map.empty[ShardId, ActorRef]
|
||||||
var shardBuffers = Map.empty[ShardId, Vector[(Msg, ActorRef)]]
|
var shardBuffers = Map.empty[ShardId, Vector[(Msg, ActorRef)]]
|
||||||
|
var loggedFullBufferWarning = false
|
||||||
var shards = Map.empty[ShardId, ActorRef]
|
var shards = Map.empty[ShardId, ActorRef]
|
||||||
var shardsByRef = Map.empty[ActorRef, ShardId]
|
var shardsByRef = Map.empty[ActorRef, ShardId]
|
||||||
var handingOff = Set.empty[ActorRef]
|
var handingOff = Set.empty[ActorRef]
|
||||||
|
|
@ -264,6 +265,7 @@ class ShardRegion(
|
||||||
|
|
||||||
import context.dispatcher
|
import context.dispatcher
|
||||||
val retryTask = context.system.scheduler.schedule(retryInterval, retryInterval, self, Retry)
|
val retryTask = context.system.scheduler.schedule(retryInterval, retryInterval, self, Retry)
|
||||||
|
var retryCount = 0
|
||||||
|
|
||||||
// subscribe to MemberEvent, re-subscribe when restart
|
// subscribe to MemberEvent, re-subscribe when restart
|
||||||
override def preStart(): Unit = {
|
override def preStart(): Unit = {
|
||||||
|
|
@ -376,8 +378,10 @@ class ShardRegion(
|
||||||
// must drop requests that came in between the BeginHandOff and now,
|
// must drop requests that came in between the BeginHandOff and now,
|
||||||
// because they might be forwarded from other regions and there
|
// because they might be forwarded from other regions and there
|
||||||
// is a risk or message re-ordering otherwise
|
// is a risk or message re-ordering otherwise
|
||||||
if (shardBuffers.contains(shard))
|
if (shardBuffers.contains(shard)) {
|
||||||
shardBuffers -= shard
|
shardBuffers -= shard
|
||||||
|
loggedFullBufferWarning = false
|
||||||
|
}
|
||||||
|
|
||||||
if (shards.contains(shard)) {
|
if (shards.contains(shard)) {
|
||||||
handingOff += shards(shard)
|
handingOff += shards(shard)
|
||||||
|
|
@ -391,6 +395,8 @@ class ShardRegion(
|
||||||
|
|
||||||
def receiveCommand(cmd: ShardRegionCommand): Unit = cmd match {
|
def receiveCommand(cmd: ShardRegionCommand): Unit = cmd match {
|
||||||
case Retry ⇒
|
case Retry ⇒
|
||||||
|
if (shardBuffers.nonEmpty)
|
||||||
|
retryCount += 1
|
||||||
if (coordinator.isEmpty)
|
if (coordinator.isEmpty)
|
||||||
register()
|
register()
|
||||||
else {
|
else {
|
||||||
|
|
@ -444,6 +450,9 @@ class ShardRegion(
|
||||||
|
|
||||||
def register(): Unit = {
|
def register(): Unit = {
|
||||||
coordinatorSelection.foreach(_ ! registrationMessage)
|
coordinatorSelection.foreach(_ ! registrationMessage)
|
||||||
|
if (shardBuffers.nonEmpty && retryCount >= 5)
|
||||||
|
log.warning("Trying to register to coordinator at [{}], but no acknoledgment. Total [{}] buffered messages.",
|
||||||
|
coordinatorSelection, totalBufferSize)
|
||||||
}
|
}
|
||||||
|
|
||||||
def registrationMessage: Any =
|
def registrationMessage: Any =
|
||||||
|
|
@ -451,8 +460,13 @@ class ShardRegion(
|
||||||
|
|
||||||
def requestShardBufferHomes(): Unit = {
|
def requestShardBufferHomes(): Unit = {
|
||||||
shardBuffers.foreach {
|
shardBuffers.foreach {
|
||||||
case (shard, _) ⇒ coordinator.foreach { c ⇒
|
case (shard, buf) ⇒ coordinator.foreach { c ⇒
|
||||||
log.debug("Retry request for shard [{}] homes", shard)
|
val logMsg = "Retry request for shard [{}] homes from coordinator at [{}]. [{}] buffered messages."
|
||||||
|
if (retryCount >= 5)
|
||||||
|
log.warning(logMsg, shard, c, buf.size)
|
||||||
|
else
|
||||||
|
log.debug(logMsg, shard, c, buf.size)
|
||||||
|
|
||||||
c ! GetShardHome(shard)
|
c ! GetShardHome(shard)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -461,10 +475,13 @@ class ShardRegion(
|
||||||
def deliverBufferedMessages(shard: String): Unit = {
|
def deliverBufferedMessages(shard: String): Unit = {
|
||||||
shardBuffers.get(shard) match {
|
shardBuffers.get(shard) match {
|
||||||
case Some(buf) ⇒
|
case Some(buf) ⇒
|
||||||
|
log.debug("Deliver [{}] buffered messages for shard [{}]", buf.size, shard)
|
||||||
buf.foreach { case (msg, snd) ⇒ deliverMessage(msg, snd) }
|
buf.foreach { case (msg, snd) ⇒ deliverMessage(msg, snd) }
|
||||||
shardBuffers -= shard
|
shardBuffers -= shard
|
||||||
case None ⇒
|
case None ⇒
|
||||||
}
|
}
|
||||||
|
loggedFullBufferWarning = false
|
||||||
|
retryCount = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
def deliverMessage(msg: Any, snd: ActorRef): Unit =
|
def deliverMessage(msg: Any, snd: ActorRef): Unit =
|
||||||
|
|
@ -480,6 +497,7 @@ class ShardRegion(
|
||||||
coordinator.foreach(_ ! GetShardHome(shardId))
|
coordinator.foreach(_ ! GetShardHome(shardId))
|
||||||
}
|
}
|
||||||
val buf = shardBuffers.getOrElse(shardId, Vector.empty)
|
val buf = shardBuffers.getOrElse(shardId, Vector.empty)
|
||||||
|
log.debug("Buffer message for shard [{}]. Total [{}] buffered messages.", shardId, buf.size + 1)
|
||||||
shardBuffers = shardBuffers.updated(shardId, buf :+ ((msg, snd)))
|
shardBuffers = shardBuffers.updated(shardId, buf :+ ((msg, snd)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -499,12 +517,28 @@ class ShardRegion(
|
||||||
log.debug("Request shard [{}] home", shardId)
|
log.debug("Request shard [{}] home", shardId)
|
||||||
coordinator.foreach(_ ! GetShardHome(shardId))
|
coordinator.foreach(_ ! GetShardHome(shardId))
|
||||||
}
|
}
|
||||||
if (totalBufferSize >= bufferSize) {
|
val totBufSize = totalBufferSize
|
||||||
log.debug("Buffer is full, dropping message for shard [{}]", shardId)
|
if (totBufSize >= bufferSize) {
|
||||||
|
if (loggedFullBufferWarning)
|
||||||
|
log.debug("Buffer is full, dropping message for shard [{}]", shardId)
|
||||||
|
else {
|
||||||
|
log.warning("Buffer is full, dropping message for shard [{}]", shardId)
|
||||||
|
loggedFullBufferWarning = true
|
||||||
|
}
|
||||||
context.system.deadLetters ! msg
|
context.system.deadLetters ! msg
|
||||||
} else {
|
} else {
|
||||||
val buf = shardBuffers.getOrElse(shardId, Vector.empty)
|
val buf = shardBuffers.getOrElse(shardId, Vector.empty)
|
||||||
shardBuffers = shardBuffers.updated(shardId, buf :+ ((msg, snd)))
|
shardBuffers = shardBuffers.updated(shardId, buf :+ ((msg, snd)))
|
||||||
|
|
||||||
|
// log some insight to how buffers are filled up every 10% of the buffer capacity
|
||||||
|
val tot = totBufSize + 1
|
||||||
|
if (tot % (bufferSize / 10) == 0) {
|
||||||
|
val logMsg = s"ShardRegion for [$typeName] is using [${100.0 * tot / bufferSize} %] of its buffer capacity."
|
||||||
|
if (tot <= bufferSize / 2)
|
||||||
|
log.info(logMsg)
|
||||||
|
else
|
||||||
|
log.warning(logMsg + " The coordinator might not be available. You might want to check cluster membership status.")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue