Quarantine and cleanup idle associations, #24972

* fix NPE in shutdownTransport
  * perhaps because shutdown before started
  * system.dispatcher is used in other places of the shutdown
* improve logging of compression advertisment progress
* adjust RestartFlow.withBackoff parameters
* quarantine after ActorSystemTerminating signal
  (will cleanup compressions)
* Quarantine idle associations
  * liveness checks by sending extra HandshakeReq and update the
    lastUsed when reply received
  * concervative default value to survive network partition, in
    case no other messages are sent
* Adjust logging and QuarantinedEvent for harmless quarantine
  * Harmless if it was via the shutdown signal or cluster leaving
This commit is contained in:
Patrik Nordwall 2018-04-25 08:38:27 +02:00
parent f976f8d793
commit 7fc7744049
15 changed files with 297 additions and 176 deletions

View file

@ -108,7 +108,8 @@ private[cluster] class ClusterRemoteWatcher(
clusterNodes -= m.address
if (previousStatus == MemberStatus.Down) {
quarantine(m.address, Some(m.uniqueAddress.longUid), s"Cluster member removed, previous status [$previousStatus]")
quarantine(m.address, Some(m.uniqueAddress.longUid),
s"Cluster member removed, previous status [$previousStatus]", harmless = false)
} else if (arteryEnabled) {
// Don't quarantine gracefully removed members (leaving) directly,
// give Cluster Singleton some time to exchange TakeOver/HandOver messages.
@ -128,14 +129,15 @@ private[cluster] class ClusterRemoteWatcher(
pendingDelayedQuarantine.find(_.address == newIncarnation.address).foreach { oldIncarnation
pendingDelayedQuarantine -= oldIncarnation
quarantine(oldIncarnation.address, Some(oldIncarnation.longUid),
s"Cluster member removed, new incarnation joined")
s"Cluster member removed, new incarnation joined", harmless = true)
}
}
def delayedQuarantine(m: Member, previousStatus: MemberStatus): Unit = {
if (pendingDelayedQuarantine(m.uniqueAddress)) {
pendingDelayedQuarantine -= m.uniqueAddress
quarantine(m.address, Some(m.uniqueAddress.longUid), s"Cluster member removed, previous status [$previousStatus]")
quarantine(m.address, Some(m.uniqueAddress.longUid), s"Cluster member removed, previous status [$previousStatus]",
harmless = true)
}
}