make pruning of CRDT garbage work, #21647

* fix merge issues of DataEnvelope and its pruning
* simplify by removing the tombstones, which didn't work in all cases anyway
* keep the PruningPerformed markers in the DataEnvelope until configured
  TTL has elapsed (wall clock)
* simplify PruningState structure
* also store the pruning markers in durable data
* collect removed nodes from the data, listing on MemberRemoved is not enough
* possibility to disable pruning altogether
* documented caveat for durable data
This commit is contained in:
Patrik Nordwall 2017-01-11 13:19:45 +01:00
parent c5d18c30d6
commit 952be31a7d
28 changed files with 951 additions and 229 deletions

View file

@ -23,7 +23,7 @@ final case class DurableDataSpecConfig(writeBehind: Boolean) extends MultiNodeCo
val second = role("second")
commonConfig(ConfigFactory.parseString(s"""
akka.loglevel = DEBUG
akka.loglevel = INFO
akka.actor.provider = "akka.cluster.ClusterActorRefProvider"
akka.log-dead-letters-during-shutdown = off
akka.cluster.distributed-data.durable.keys = ["durable*"]

View file

@ -126,18 +126,19 @@ class DurablePruningSpec extends MultiNodeSpec(DurablePruningSpec) with STMultiN
}
within(15.seconds) {
var values = Set.empty[Int]
awaitAssert {
replicator ! Get(KeyA, ReadLocal)
val counter3 = expectMsgType[GetSuccess[GCounter]].dataValue
counter3.value should be(10)
val value = counter3.value.intValue
values += value
value should be(10)
counter3.state.size should be(3)
}
values should ===(Set(10))
}
enterBarrier("pruned")
// let it become tombstone
Thread.sleep(5000)
runOn(first) {
val addr = cluster2.selfAddress
val sys3 = ActorSystem(system.name, ConfigFactory.parseString(s"""
@ -150,15 +151,31 @@ class DurablePruningSpec extends MultiNodeSpec(DurablePruningSpec) with STMultiN
Cluster(sys3).join(node(first).address)
within(10.seconds) {
var values = Set.empty[Int]
awaitAssert {
replicator3.tell(Get(KeyA, ReadLocal), probe3.ref)
val counter4 = probe3.expectMsgType[GetSuccess[GCounter]].dataValue
counter4.value should be(10)
val value = counter4.value.intValue
values += value
value should be(10)
counter4.state.size should be(3)
}
values should ===(Set(10))
}
// after merging with others
replicator3 ! Get(KeyA, ReadAll(remainingOrDefault))
val counter5 = expectMsgType[GetSuccess[GCounter]].dataValue
counter5.value should be(10)
counter5.state.size should be(3)
}
enterBarrier("sys3-started")
replicator ! Get(KeyA, ReadAll(remainingOrDefault))
val counter6 = expectMsgType[GetSuccess[GCounter]].dataValue
counter6.value should be(10)
counter6.state.size should be(3)
enterBarrier("after-1")
}
}

View file

@ -122,14 +122,18 @@ class ReplicatorPruningSpec extends MultiNodeSpec(ReplicatorPruningSpec) with ST
runOn(first, second) {
within(15.seconds) {
var values = Set.empty[Int]
awaitAssert {
replicator ! Get(KeyA, ReadLocal)
expectMsgPF() {
case g @ GetSuccess(KeyA, _)
g.get(KeyA).value should be(9)
val value = g.get(KeyA).value.toInt
values += value
value should be(9)
g.get(KeyA).needPruningFrom(thirdUniqueAddress) should be(false)
}
}
values should ===(Set(9))
}
within(5.seconds) {
awaitAssert {
@ -154,10 +158,12 @@ class ReplicatorPruningSpec extends MultiNodeSpec(ReplicatorPruningSpec) with ST
}
enterBarrier("pruning-done")
// on one of the nodes the data has been updated by the pruning,
// client can update anyway
// after pruning performed we should not be able to update with data from removed node
def updateAfterPruning(expectedValue: Int): Unit = {
replicator ! Update(KeyA, GCounter(), WriteAll(timeout), None)(_ + 1)
replicator ! Update(KeyA, GCounter(), WriteAll(timeout), None) { existing
// inject data from removed node to simulate bad data
existing.merge(oldCounter) + 1
}
expectMsgPF() {
case UpdateSuccess(KeyA, _)
replicator ! Get(KeyA, ReadLocal)
@ -165,6 +171,7 @@ class ReplicatorPruningSpec extends MultiNodeSpec(ReplicatorPruningSpec) with ST
retrieved.value should be(expectedValue)
}
}
runOn(first) {
updateAfterPruning(expectedValue = 10)
}
@ -175,19 +182,19 @@ class ReplicatorPruningSpec extends MultiNodeSpec(ReplicatorPruningSpec) with ST
}
enterBarrier("update-second-after-pruning")
// after pruning performed and maxDissemination it is tombstoned
// and we should still not be able to update with data from removed node
// after full replication should still not be able to update with data from removed node
// but it would not work after removal of the PruningPerformed markers
expectNoMsg(maxPruningDissemination + 3.seconds)
runOn(first) {
updateAfterPruning(expectedValue = 12)
}
enterBarrier("update-first-after-tombstone")
enterBarrier("update-first-after-dissemination")
runOn(second) {
updateAfterPruning(expectedValue = 13)
}
enterBarrier("update-second-after-tombstone")
enterBarrier("update-second-after-dissemination")
enterBarrier("after-1")
}