Harden ShardCoordinator state replication, #28856 (#28895) (#29094)

* Possibility to prefer oldest in ddata writes and reads
  * enabled for Cluster Sharding
* New ReadMajorityPlus and WriteMajorityPlus
  * used by Cluster Sharding, with configuration
  * also possible to define ReadAll in config

(cherry picked from commit 4ba835d328)
This commit is contained in:
Patrik Nordwall 2020-05-20 08:34:22 +02:00 committed by GitHub
parent 9299f387dd
commit 3deffc8de2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 758 additions and 190 deletions

View file

@ -7,9 +7,12 @@ package akka.cluster.sharding
import scala.concurrent.duration._
import akka.actor.{ Actor, ActorRef, Props }
import akka.cluster.MemberStatus
import akka.serialization.jackson.CborSerializable
import akka.testkit._
import akka.util.ccompat._
@ccompatUsedUntil213
object ClusterShardingLeavingSpec {
case class Ping(id: String) extends CborSerializable
@ -39,11 +42,21 @@ object ClusterShardingLeavingSpec {
}
}
abstract class ClusterShardingLeavingSpecConfig(mode: String) extends MultiNodeClusterShardingConfig(mode) {
abstract class ClusterShardingLeavingSpecConfig(mode: String)
extends MultiNodeClusterShardingConfig(
mode,
loglevel = "INFO",
additionalConfig = """
akka.cluster.sharding.rebalance-interval = 120 s
akka.cluster.sharding.distributed-data.majority-min-cap = 1
akka.cluster.sharding.coordinator-state.write-majority-plus = 1
akka.cluster.sharding.coordinator-state.read-majority-plus = 1
""") {
val first = role("first")
val second = role("second")
val third = role("third")
val fourth = role("fourth")
val fifth = role("fifth")
}
@ -60,11 +73,13 @@ class PersistentClusterShardingLeavingMultiJvmNode1 extends PersistentClusterSha
class PersistentClusterShardingLeavingMultiJvmNode2 extends PersistentClusterShardingLeavingSpec
class PersistentClusterShardingLeavingMultiJvmNode3 extends PersistentClusterShardingLeavingSpec
class PersistentClusterShardingLeavingMultiJvmNode4 extends PersistentClusterShardingLeavingSpec
class PersistentClusterShardingLeavingMultiJvmNode5 extends PersistentClusterShardingLeavingSpec
class DDataClusterShardingLeavingMultiJvmNode1 extends DDataClusterShardingLeavingSpec
class DDataClusterShardingLeavingMultiJvmNode2 extends DDataClusterShardingLeavingSpec
class DDataClusterShardingLeavingMultiJvmNode3 extends DDataClusterShardingLeavingSpec
class DDataClusterShardingLeavingMultiJvmNode4 extends DDataClusterShardingLeavingSpec
class DDataClusterShardingLeavingMultiJvmNode5 extends DDataClusterShardingLeavingSpec
abstract class ClusterShardingLeavingSpec(multiNodeConfig: ClusterShardingLeavingSpecConfig)
extends MultiNodeClusterShardingSpec(multiNodeConfig)
@ -89,9 +104,16 @@ abstract class ClusterShardingLeavingSpec(multiNodeConfig: ClusterShardingLeavin
startPersistenceIfNeeded(startOn = first, setStoreOn = roles)
join(first, first, onJoinedRunOnFrom = startSharding())
join(second, first, onJoinedRunOnFrom = startSharding())
join(third, first, onJoinedRunOnFrom = startSharding())
join(fourth, first, onJoinedRunOnFrom = startSharding())
join(second, first, onJoinedRunOnFrom = startSharding(), assertNodeUp = false)
join(third, first, onJoinedRunOnFrom = startSharding(), assertNodeUp = false)
join(fourth, first, onJoinedRunOnFrom = startSharding(), assertNodeUp = false)
join(fifth, first, onJoinedRunOnFrom = startSharding(), assertNodeUp = false)
// all Up, everywhere before continuing
awaitAssert {
cluster.state.members.size should ===(roles.size)
cluster.state.members.unsorted.map(_.status) should ===(Set(MemberStatus.Up))
}
enterBarrier("after-2")
}
@ -105,6 +127,7 @@ abstract class ClusterShardingLeavingSpec(multiNodeConfig: ClusterShardingLeavin
id -> expectMsgType[ActorRef]
}).toMap
shardLocations ! Locations(locations)
system.log.debug("Original locations: {}", locations)
}
enterBarrier("after-3")
}
@ -112,28 +135,36 @@ abstract class ClusterShardingLeavingSpec(multiNodeConfig: ClusterShardingLeavin
"recover after leaving coordinator node" in {
system.actorSelection(node(first) / "user" / "shardLocations") ! GetLocations
val Locations(originalLocations) = expectMsgType[Locations]
val firstAddress = node(first).address
runOn(third) {
cluster.leave(node(first).address)
val numberOfNodesLeaving = 2
val leavingRoles = roles.take(numberOfNodesLeaving)
val leavingNodes = leavingRoles.map(address)
val remainingRoles = roles.drop(numberOfNodesLeaving)
runOn(roles.last) {
leavingNodes.foreach { a =>
cluster.leave(a)
}
}
runOn(first) {
runOn(leavingRoles: _*) {
watch(region)
expectTerminated(region, 15.seconds)
}
enterBarrier("stopped")
// more stress by not having the barrier here
runOn(second, third, fourth) {
runOn(remainingRoles: _*) {
within(15.seconds) {
awaitAssert {
val probe = TestProbe()
originalLocations.foreach {
case (id, ref) =>
region.tell(Ping(id), probe.ref)
if (ref.path.address == firstAddress)
probe.expectMsgType[ActorRef](1.second) should not be (ref)
else
if (leavingNodes.contains(ref.path.address)) {
val newRef = probe.expectMsgType[ActorRef](1.second)
newRef should not be (ref)
system.log.debug("Moved [{}] from [{}] to [{}]", id, ref, newRef)
} else
probe.expectMsg(1.second, ref) // should not move
}
}