2018-10-29 17:19:37 +08:00
|
|
|
/*
|
2019-01-02 18:55:26 +08:00
|
|
|
* Copyright (C) 2009-2019 Lightbend Inc. <https://www.lightbend.com>
|
2014-03-23 20:09:46 +01:00
|
|
|
*/
|
2018-04-24 16:03:55 +01:00
|
|
|
|
2015-04-27 14:48:28 +02:00
|
|
|
package akka.cluster.sharding
|
2014-03-23 20:09:46 +01:00
|
|
|
|
|
|
|
|
import java.io.File
|
2018-06-05 06:58:17 +01:00
|
|
|
|
2015-04-27 14:48:28 +02:00
|
|
|
import akka.cluster.sharding.ShardRegion.Passivate
|
2014-07-08 17:51:18 +01:00
|
|
|
|
2014-03-23 20:09:46 +01:00
|
|
|
import scala.concurrent.duration._
|
|
|
|
|
import org.apache.commons.io.FileUtils
|
|
|
|
|
import com.typesafe.config.ConfigFactory
|
2014-07-08 17:51:18 +01:00
|
|
|
import akka.actor._
|
2018-06-05 06:58:17 +01:00
|
|
|
import akka.cluster.{ Cluster, MemberStatus, MultiNodeClusterSpec }
|
2014-03-23 20:09:46 +01:00
|
|
|
import akka.persistence.Persistence
|
|
|
|
|
import akka.persistence.journal.leveldb.SharedLeveldbJournal
|
|
|
|
|
import akka.persistence.journal.leveldb.SharedLeveldbStore
|
|
|
|
|
import akka.remote.testconductor.RoleName
|
|
|
|
|
import akka.remote.testkit.MultiNodeConfig
|
|
|
|
|
import akka.remote.testkit.MultiNodeSpec
|
|
|
|
|
import akka.remote.testkit.STMultiNodeSpec
|
|
|
|
|
import akka.remote.transport.ThrottlerTransportAdapter.Direction
|
|
|
|
|
import akka.testkit._
|
2019-03-26 14:41:29 +01:00
|
|
|
import akka.util.ccompat._
|
2014-03-23 20:09:46 +01:00
|
|
|
|
2015-08-20 13:24:39 +03:00
|
|
|
object ClusterShardingFailureSpec {
|
|
|
|
|
case class Get(id: String)
|
|
|
|
|
case class Add(id: String, i: Int)
|
|
|
|
|
case class Value(id: String, n: Int)
|
|
|
|
|
|
|
|
|
|
class Entity extends Actor {
|
|
|
|
|
var n = 0
|
|
|
|
|
|
|
|
|
|
def receive = {
|
2019-02-09 15:25:39 +01:00
|
|
|
case Get(id) => sender() ! Value(id, n)
|
|
|
|
|
case Add(_, i) => n += i
|
2015-08-20 13:24:39 +03:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
val extractEntityId: ShardRegion.ExtractEntityId = {
|
2019-02-09 15:25:39 +01:00
|
|
|
case m @ Get(id) => (id, m)
|
|
|
|
|
case m @ Add(id, _) => (id, m)
|
2015-08-20 13:24:39 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
val extractShardId: ShardRegion.ExtractShardId = {
|
2019-02-09 15:25:39 +01:00
|
|
|
case Get(id) => id.charAt(0).toString
|
|
|
|
|
case Add(id, _) => id.charAt(0).toString
|
2015-08-20 13:24:39 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
abstract class ClusterShardingFailureSpecConfig(val mode: String) extends MultiNodeConfig {
|
2014-03-23 20:09:46 +01:00
|
|
|
val controller = role("controller")
|
|
|
|
|
val first = role("first")
|
|
|
|
|
val second = role("second")
|
|
|
|
|
|
2015-08-20 13:24:39 +03:00
|
|
|
commonConfig(ConfigFactory.parseString(s"""
|
2014-03-23 20:09:46 +01:00
|
|
|
akka.loglevel = INFO
|
2016-06-10 15:04:13 +02:00
|
|
|
akka.actor.provider = "cluster"
|
2014-03-23 20:09:46 +01:00
|
|
|
akka.remote.log-remote-lifecycle-events = off
|
|
|
|
|
akka.cluster.auto-down-unreachable-after = 0s
|
|
|
|
|
akka.cluster.roles = ["backend"]
|
|
|
|
|
akka.persistence.journal.plugin = "akka.persistence.journal.leveldb-shared"
|
|
|
|
|
akka.persistence.journal.leveldb-shared {
|
|
|
|
|
timeout = 5s
|
|
|
|
|
store {
|
|
|
|
|
native = off
|
2017-01-18 16:28:24 +01:00
|
|
|
dir = "target/ClusterShardingFailureSpec/journal"
|
2014-03-23 20:09:46 +01:00
|
|
|
}
|
|
|
|
|
}
|
2015-03-06 11:58:51 -06:00
|
|
|
akka.persistence.snapshot-store.plugin = "akka.persistence.snapshot-store.local"
|
2017-01-18 16:28:24 +01:00
|
|
|
akka.persistence.snapshot-store.local.dir = "target/ClusterShardingFailureSpec/snapshots"
|
2015-08-20 13:24:39 +03:00
|
|
|
akka.cluster.sharding {
|
|
|
|
|
coordinator-failure-backoff = 3s
|
|
|
|
|
shard-failure-backoff = 3s
|
|
|
|
|
state-store-mode = "$mode"
|
|
|
|
|
}
|
2017-01-18 16:28:24 +01:00
|
|
|
akka.cluster.sharding.distributed-data.durable.lmdb {
|
|
|
|
|
dir = target/ClusterShardingFailureSpec/sharding-ddata
|
|
|
|
|
map-size = 10 MiB
|
|
|
|
|
}
|
2018-06-05 06:58:17 +01:00
|
|
|
""").withFallback(MultiNodeClusterSpec.clusterConfig))
|
2014-03-23 20:09:46 +01:00
|
|
|
|
|
|
|
|
testTransport(on = true)
|
2015-08-20 13:24:39 +03:00
|
|
|
}
|
2014-03-23 20:09:46 +01:00
|
|
|
|
2015-08-20 13:24:39 +03:00
|
|
|
object PersistentClusterShardingFailureSpecConfig extends ClusterShardingFailureSpecConfig("persistence")
|
|
|
|
|
object DDataClusterShardingFailureSpecConfig extends ClusterShardingFailureSpecConfig("ddata")
|
2014-03-23 20:09:46 +01:00
|
|
|
|
2019-03-11 10:38:24 +01:00
|
|
|
class PersistentClusterShardingFailureSpec
|
|
|
|
|
extends ClusterShardingFailureSpec(PersistentClusterShardingFailureSpecConfig)
|
2015-08-20 13:24:39 +03:00
|
|
|
class DDataClusterShardingFailureSpec extends ClusterShardingFailureSpec(DDataClusterShardingFailureSpecConfig)
|
2014-03-23 20:09:46 +01:00
|
|
|
|
2015-08-20 13:24:39 +03:00
|
|
|
class PersistentClusterShardingFailureMultiJvmNode1 extends PersistentClusterShardingFailureSpec
|
|
|
|
|
class PersistentClusterShardingFailureMultiJvmNode2 extends PersistentClusterShardingFailureSpec
|
|
|
|
|
class PersistentClusterShardingFailureMultiJvmNode3 extends PersistentClusterShardingFailureSpec
|
2014-03-23 20:09:46 +01:00
|
|
|
|
2015-08-20 13:24:39 +03:00
|
|
|
class DDataClusterShardingFailureMultiJvmNode1 extends DDataClusterShardingFailureSpec
|
|
|
|
|
class DDataClusterShardingFailureMultiJvmNode2 extends DDataClusterShardingFailureSpec
|
|
|
|
|
class DDataClusterShardingFailureMultiJvmNode3 extends DDataClusterShardingFailureSpec
|
2014-03-23 20:09:46 +01:00
|
|
|
|
2019-03-11 10:38:24 +01:00
|
|
|
abstract class ClusterShardingFailureSpec(config: ClusterShardingFailureSpecConfig)
|
|
|
|
|
extends MultiNodeSpec(config)
|
|
|
|
|
with STMultiNodeSpec
|
|
|
|
|
with ImplicitSender {
|
2014-03-23 20:09:46 +01:00
|
|
|
import ClusterShardingFailureSpec._
|
2015-08-20 13:24:39 +03:00
|
|
|
import config._
|
2014-03-23 20:09:46 +01:00
|
|
|
|
|
|
|
|
override def initialParticipants = roles.size
|
|
|
|
|
|
2019-03-11 10:38:24 +01:00
|
|
|
val storageLocations = List(
|
|
|
|
|
new File(system.settings.config.getString("akka.cluster.sharding.distributed-data.durable.lmdb.dir")).getParentFile)
|
2014-03-23 20:09:46 +01:00
|
|
|
|
2018-07-25 20:38:27 +09:00
|
|
|
override protected def atStartup(): Unit = {
|
2019-02-09 15:25:39 +01:00
|
|
|
storageLocations.foreach(dir => if (dir.exists) FileUtils.deleteQuietly(dir))
|
2017-01-18 16:28:24 +01:00
|
|
|
enterBarrier("startup")
|
2014-03-23 20:09:46 +01:00
|
|
|
}
|
|
|
|
|
|
2018-07-25 20:38:27 +09:00
|
|
|
override protected def afterTermination(): Unit = {
|
2019-02-09 15:25:39 +01:00
|
|
|
storageLocations.foreach(dir => if (dir.exists) FileUtils.deleteQuietly(dir))
|
2014-03-23 20:09:46 +01:00
|
|
|
}
|
|
|
|
|
|
2017-01-18 16:28:24 +01:00
|
|
|
val cluster = Cluster(system)
|
|
|
|
|
|
2014-03-23 20:09:46 +01:00
|
|
|
def join(from: RoleName, to: RoleName): Unit = {
|
|
|
|
|
runOn(from) {
|
2019-03-11 10:38:24 +01:00
|
|
|
cluster.join(node(to).address)
|
2014-03-23 20:09:46 +01:00
|
|
|
startSharding()
|
2017-01-18 16:28:24 +01:00
|
|
|
|
|
|
|
|
within(remaining) {
|
|
|
|
|
awaitAssert {
|
2018-11-22 16:18:10 +01:00
|
|
|
cluster.state.members.unsorted.map(_.uniqueAddress) should contain(cluster.selfUniqueAddress)
|
|
|
|
|
cluster.state.members.unsorted.map(_.status) should ===(Set(MemberStatus.Up))
|
2017-01-18 16:28:24 +01:00
|
|
|
}
|
|
|
|
|
}
|
2014-03-23 20:09:46 +01:00
|
|
|
}
|
|
|
|
|
enterBarrier(from.name + "-joined")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def startSharding(): Unit = {
|
2019-03-13 10:56:20 +01:00
|
|
|
ClusterSharding(system).start(
|
|
|
|
|
typeName = "Entity",
|
|
|
|
|
entityProps = Props[Entity],
|
|
|
|
|
settings = ClusterShardingSettings(system).withRememberEntities(true),
|
|
|
|
|
extractEntityId = extractEntityId,
|
|
|
|
|
extractShardId = extractShardId)
|
2014-03-23 20:09:46 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
lazy val region = ClusterSharding(system).shardRegion("Entity")
|
|
|
|
|
|
2017-01-18 16:28:24 +01:00
|
|
|
def isDdataMode: Boolean = mode == ClusterShardingSettings.StateStoreModeDData
|
2014-03-23 20:09:46 +01:00
|
|
|
|
2017-01-18 16:28:24 +01:00
|
|
|
s"Cluster sharding ($mode) with flaky journal/network" must {
|
2014-03-23 20:09:46 +01:00
|
|
|
|
2017-01-18 16:28:24 +01:00
|
|
|
if (!isDdataMode) {
|
|
|
|
|
"setup shared journal" in {
|
|
|
|
|
// start the Persistence extension
|
|
|
|
|
Persistence(system)
|
|
|
|
|
runOn(controller) {
|
|
|
|
|
system.actorOf(Props[SharedLeveldbStore], "store")
|
|
|
|
|
}
|
2017-11-20 16:34:38 +00:00
|
|
|
enterBarrier("persistence-started")
|
2014-03-23 20:09:46 +01:00
|
|
|
|
2017-01-18 16:28:24 +01:00
|
|
|
runOn(first, second) {
|
|
|
|
|
system.actorSelection(node(controller) / "user" / "store") ! Identify(None)
|
|
|
|
|
val sharedStore = expectMsgType[ActorIdentity](10.seconds).ref.get
|
|
|
|
|
SharedLeveldbJournal.setStore(sharedStore, system)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
enterBarrier("after-1")
|
|
|
|
|
}
|
2014-03-23 20:09:46 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
"join cluster" in within(20.seconds) {
|
|
|
|
|
join(first, first)
|
|
|
|
|
join(second, first)
|
|
|
|
|
|
|
|
|
|
runOn(first) {
|
|
|
|
|
region ! Add("10", 1)
|
|
|
|
|
region ! Add("20", 2)
|
2014-07-08 17:51:18 +01:00
|
|
|
region ! Add("21", 3)
|
2014-03-23 20:09:46 +01:00
|
|
|
region ! Get("10")
|
|
|
|
|
expectMsg(Value("10", 1))
|
|
|
|
|
region ! Get("20")
|
|
|
|
|
expectMsg(Value("20", 2))
|
2014-07-08 17:51:18 +01:00
|
|
|
region ! Get("21")
|
|
|
|
|
expectMsg(Value("21", 3))
|
2014-03-23 20:09:46 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
enterBarrier("after-2")
|
|
|
|
|
}
|
|
|
|
|
|
2017-01-18 16:28:24 +01:00
|
|
|
"recover after journal/network failure" in within(20.seconds) {
|
2014-03-23 20:09:46 +01:00
|
|
|
runOn(controller) {
|
2017-01-18 16:28:24 +01:00
|
|
|
if (isDdataMode)
|
|
|
|
|
testConductor.blackhole(first, second, Direction.Both).await
|
|
|
|
|
else {
|
|
|
|
|
testConductor.blackhole(controller, first, Direction.Both).await
|
|
|
|
|
testConductor.blackhole(controller, second, Direction.Both).await
|
|
|
|
|
}
|
2014-03-23 20:09:46 +01:00
|
|
|
}
|
|
|
|
|
enterBarrier("journal-blackholed")
|
|
|
|
|
|
2014-11-21 11:39:18 +01:00
|
|
|
runOn(first) {
|
2017-01-18 16:28:24 +01:00
|
|
|
// try with a new shard, will not reply until journal/network is available again
|
2014-11-21 11:39:18 +01:00
|
|
|
region ! Add("40", 4)
|
|
|
|
|
val probe = TestProbe()
|
|
|
|
|
region.tell(Get("40"), probe.ref)
|
2017-11-20 16:34:38 +00:00
|
|
|
probe.expectNoMessage(1.second)
|
2014-11-21 11:39:18 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
enterBarrier("first-delayed")
|
|
|
|
|
|
|
|
|
|
runOn(controller) {
|
2017-01-18 16:28:24 +01:00
|
|
|
if (isDdataMode)
|
|
|
|
|
testConductor.passThrough(first, second, Direction.Both).await
|
|
|
|
|
else {
|
|
|
|
|
testConductor.passThrough(controller, first, Direction.Both).await
|
|
|
|
|
testConductor.passThrough(controller, second, Direction.Both).await
|
|
|
|
|
}
|
2014-11-21 11:39:18 +01:00
|
|
|
}
|
|
|
|
|
enterBarrier("journal-ok")
|
|
|
|
|
|
2014-03-23 20:09:46 +01:00
|
|
|
runOn(first) {
|
2014-07-08 17:51:18 +01:00
|
|
|
region ! Get("21")
|
|
|
|
|
expectMsg(Value("21", 3))
|
2015-06-09 12:25:58 +02:00
|
|
|
val entity21 = lastSender
|
|
|
|
|
val shard2 = system.actorSelection(entity21.path.parent)
|
2014-07-08 17:51:18 +01:00
|
|
|
|
2017-01-18 16:28:24 +01:00
|
|
|
//Test the ShardCoordinator allocating shards after a journal/network failure
|
2014-03-23 20:09:46 +01:00
|
|
|
region ! Add("30", 3)
|
2014-07-08 17:51:18 +01:00
|
|
|
|
2017-01-18 16:28:24 +01:00
|
|
|
//Test the Shard starting entities and persisting after a journal/network failure
|
2014-07-08 17:51:18 +01:00
|
|
|
region ! Add("11", 1)
|
|
|
|
|
|
2017-01-18 16:28:24 +01:00
|
|
|
//Test the Shard passivate works after a journal failure
|
2015-06-09 12:25:58 +02:00
|
|
|
shard2.tell(Passivate(PoisonPill), entity21)
|
2017-11-20 16:34:38 +00:00
|
|
|
|
|
|
|
|
awaitCond({
|
|
|
|
|
region ! Get("21")
|
|
|
|
|
expectMsgType[Value] == Value("21", 0)
|
|
|
|
|
}, message = "Passivating did not reset Value down to 0")
|
|
|
|
|
|
2014-07-08 17:51:18 +01:00
|
|
|
region ! Add("21", 1)
|
|
|
|
|
|
|
|
|
|
region ! Get("21")
|
|
|
|
|
expectMsg(Value("21", 1))
|
|
|
|
|
|
2014-03-23 20:09:46 +01:00
|
|
|
region ! Get("30")
|
|
|
|
|
expectMsg(Value("30", 3))
|
2014-07-08 17:51:18 +01:00
|
|
|
|
|
|
|
|
region ! Get("11")
|
|
|
|
|
expectMsg(Value("11", 1))
|
2014-03-23 20:09:46 +01:00
|
|
|
|
2014-11-21 11:39:18 +01:00
|
|
|
region ! Get("40")
|
|
|
|
|
expectMsg(Value("40", 4))
|
2014-03-23 20:09:46 +01:00
|
|
|
}
|
2014-11-21 11:39:18 +01:00
|
|
|
|
|
|
|
|
enterBarrier("verified-first")
|
2014-03-23 20:09:46 +01:00
|
|
|
|
|
|
|
|
runOn(second) {
|
|
|
|
|
region ! Add("10", 1)
|
|
|
|
|
region ! Add("20", 2)
|
|
|
|
|
region ! Add("30", 3)
|
2014-07-08 17:51:18 +01:00
|
|
|
region ! Add("11", 4)
|
2014-03-23 20:09:46 +01:00
|
|
|
region ! Get("10")
|
|
|
|
|
expectMsg(Value("10", 2))
|
2014-07-08 17:51:18 +01:00
|
|
|
region ! Get("11")
|
|
|
|
|
expectMsg(Value("11", 5))
|
2014-03-23 20:09:46 +01:00
|
|
|
region ! Get("20")
|
|
|
|
|
expectMsg(Value("20", 4))
|
|
|
|
|
region ! Get("30")
|
|
|
|
|
expectMsg(Value("30", 6))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
enterBarrier("after-3")
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
}
|