ByteString.indexOf optimized to speed up framing stage #21530

This commit is contained in:
Johan Andrén 2016-10-19 11:26:50 +02:00 committed by GitHub
parent 3c8edee26a
commit 50370c69a3
3 changed files with 184 additions and 1 deletions

View file

@ -458,6 +458,82 @@ class ByteStringSpec extends WordSpec with Matchers with Checkers {
ByteStrings(ByteString1.fromString("a"), ByteString1.fromString("bc")).take(100) should ===(ByteString("abc"))
ByteStrings(ByteString1.fromString("a"), ByteString1.fromString("bc")).drop(1).take(100) should ===(ByteString("bc"))
}
"indexOf" in {
ByteString.empty.indexOf(5) should ===(-1)
val byteString1 = ByteString1.fromString("abc")
byteString1.indexOf('a') should ===(0)
byteString1.indexOf('b') should ===(1)
byteString1.indexOf('c') should ===(2)
byteString1.indexOf('d') should ===(-1)
val byteStrings = ByteStrings(ByteString1.fromString("abc"), ByteString1.fromString("efg"))
byteStrings.indexOf('a') should ===(0)
byteStrings.indexOf('c') should ===(2)
byteStrings.indexOf('d') should ===(-1)
byteStrings.indexOf('e') should ===(3)
byteStrings.indexOf('f') should ===(4)
byteStrings.indexOf('g') should ===(5)
val compact = byteStrings.compact
compact.indexOf('a') should ===(0)
compact.indexOf('c') should ===(2)
compact.indexOf('d') should ===(-1)
compact.indexOf('e') should ===(3)
compact.indexOf('f') should ===(4)
compact.indexOf('g') should ===(5)
}
"indexOf from offset" in {
ByteString.empty.indexOf(5, -1) should ===(-1)
ByteString.empty.indexOf(5, 0) should ===(-1)
ByteString.empty.indexOf(5, 1) should ===(-1)
val byteString1 = ByteString1.fromString("abc")
byteString1.indexOf('d', -1) should ===(-1)
byteString1.indexOf('d', 0) should ===(-1)
byteString1.indexOf('d', 1) should ===(-1)
byteString1.indexOf('d', 4) should ===(-1)
byteString1.indexOf('a', -1) should ===(0)
byteString1.indexOf('a', 0) should ===(0)
byteString1.indexOf('a', 1) should ===(-1)
val byteStrings = ByteStrings(ByteString1.fromString("abc"), ByteString1.fromString("efg"))
byteStrings.indexOf('c', -1) should ===(2)
byteStrings.indexOf('c', 0) should ===(2)
byteStrings.indexOf('c', 2) should ===(2)
byteStrings.indexOf('c', 3) should ===(-1)
byteStrings.indexOf('e', -1) should ===(3)
byteStrings.indexOf('e', 0) should ===(3)
byteStrings.indexOf('e', 1) should ===(3)
byteStrings.indexOf('e', 4) should ===(-1)
byteStrings.indexOf('e', 6) should ===(-1)
byteStrings.indexOf('g', -1) should ===(5)
byteStrings.indexOf('g', 0) should ===(5)
byteStrings.indexOf('g', 1) should ===(5)
byteStrings.indexOf('g', 4) should ===(5)
byteStrings.indexOf('g', 5) should ===(5)
byteStrings.indexOf('g', 6) should ===(-1)
val compact = byteStrings.compact
compact.indexOf('c', -1) should ===(2)
compact.indexOf('c', 0) should ===(2)
compact.indexOf('c', 2) should ===(2)
compact.indexOf('c', 3) should ===(-1)
compact.indexOf('e', -1) should ===(3)
compact.indexOf('e', 0) should ===(3)
compact.indexOf('e', 1) should ===(3)
compact.indexOf('e', 4) should ===(-1)
compact.indexOf('e', 6) should ===(-1)
compact.indexOf('g', -1) should ===(5)
compact.indexOf('g', 0) should ===(5)
compact.indexOf('g', 1) should ===(5)
compact.indexOf('g', 4) should ===(5)
compact.indexOf('g', 5) should ===(5)
compact.indexOf('g', 6) should ===(-1)
}
}
"A ByteString" must {
@ -551,6 +627,10 @@ class ByteStringSpec extends WordSpec with Matchers with Checkers {
"calling dropWhile" in { check { (a: ByteString, b: Byte) likeVector(a) { _.dropWhile(_ != b) } } }
"calling indexWhere" in { check { (a: ByteString, b: Byte) likeVector(a) { _.indexWhere(_ == b) } } }
"calling indexOf" in { check { (a: ByteString, b: Byte) likeVector(a) { _.indexOf(b) } } }
// this actually behave weird for Vector and negative indexes - SI9936, fixed in Scala 2.12
// so let's just skip negative indexes (doesn't make much sense anyway)
"calling indexOf(elem, idx)" in { check { (a: ByteString, b: Byte, idx: Int) likeVector(a) { _.indexOf(b, math.max(0, idx)) } } }
"calling foreach" in { check { a: ByteString likeVector(a) { it var acc = 0; it foreach { acc += _ }; acc } } }
"calling foldLeft" in { check { a: ByteString likeVector(a) { _.foldLeft(0) { _ + _ } } } }
"calling toArray" in { check { a: ByteString likeVector(a) { _.toArray.toSeq } } }

View file

@ -162,6 +162,20 @@ object ByteString {
if (n <= 0) this
else toByteString1.drop(n)
override def indexOf[B >: Byte](elem: B): Int = indexOf(elem, 0)
override def indexOf[B >: Byte](elem: B, from: Int): Int = {
if (from >= length) -1
else {
var found = -1
var i = math.max(from, 0)
while (i < length && found == -1) {
if (bytes(i) == elem) found = i
i += 1
}
found
}
}
override def slice(from: Int, until: Int): ByteString =
if (from <= 0 && until >= length) this
else if (from >= length || until <= 0 || from >= until) ByteString.empty
@ -305,6 +319,20 @@ object ByteString {
}
}
override def indexOf[B >: Byte](elem: B): Int = indexOf(elem, 0)
override def indexOf[B >: Byte](elem: B, from: Int): Int = {
if (from >= length) -1
else {
var found = -1
var i = math.max(from, 0)
while (i < length && found == -1) {
if (bytes(startIndex + i) == elem) found = i
i += 1
}
found
}
}
protected def writeReplace(): AnyRef = new SerializationProxy(this)
}
@ -505,6 +533,34 @@ object ByteString {
new ByteStrings(bytestrings(fullDrops).drop1(remainingToDrop) +: bytestrings.drop(fullDrops + 1), length - n)
}
override def indexOf[B >: Byte](elem: B): Int = indexOf(elem, 0)
override def indexOf[B >: Byte](elem: B, from: Int): Int = {
if (from >= length) -1
else {
val byteStringsSize = bytestrings.size
@tailrec
def find(bsIdx: Int, relativeIndex: Int, bytesPassed: Int): Int = {
if (bsIdx >= byteStringsSize) -1
else {
val bs = bytestrings(bsIdx)
if (bs.length <= relativeIndex) {
find(bsIdx + 1, relativeIndex - bs.length, bytesPassed + bs.length)
} else {
val subIndexOf = bs.indexOf(elem, relativeIndex)
if (subIndexOf < 0) {
val nextString = bsIdx + 1
find(nextString, relativeIndex - bs.length, bytesPassed + bs.length)
} else subIndexOf + bytesPassed
}
}
}
find(0, math.max(from, 0), 0)
}
}
protected def writeReplace(): AnyRef = new SerializationProxy(this)
}
@ -586,7 +642,9 @@ sealed abstract class ByteString extends IndexedSeq[Byte] with IndexedSeqOptimiz
override def splitAt(n: Int): (ByteString, ByteString) = (take(n), drop(n))
override def indexWhere(p: Byte Boolean): Int = iterator.indexWhere(p)
override def indexOf[B >: Byte](elem: B): Int = iterator.indexOf(elem)
// optimized in subclasses
override def indexOf[B >: Byte](elem: B): Int = indexOf(elem, 0)
override def toString(): String = {
val maxSize = 100

View file

@ -0,0 +1,45 @@
/**
* Copyright (C) 2014-2016 Lightbend Inc. <http://www.lightbend.com>
*/
package akka.util
import java.util.concurrent.TimeUnit
import org.openjdk.jmh.annotations._
@State(Scope.Benchmark)
@Measurement(timeUnit = TimeUnit.MILLISECONDS)
class ByteString_indexOf_Benchmark {
val start = ByteString("abcdefg") ++ ByteString("hijklmno") ++ ByteString("pqrstuv")
val bss = start ++ start ++ start ++ start ++ start ++ ByteString("xyz")
val bs = bss.compact // compacted
/*
original
ByteString_indexOf_Benchmark.bs1_indexOf_from thrpt 20 999335.124 ± 234047.176 ops/s
ByteString_indexOf_Benchmark.bss_indexOf_from_best_case thrpt 20 42735542.833 ± 1082874.815 ops/s
ByteString_indexOf_Benchmark.bss_indexOf_from_far_index_case thrpt 20 4941422.104 ± 109132.224 ops/s
ByteString_indexOf_Benchmark.bss_indexOf_from_worst_case thrpt 20 328123.207 ± 16550.271 ops/s
optimized
ByteString_indexOf_Benchmark.bs1_indexOf_from thrpt 20 339488707.553 ± 9680274.621 ops/s
ByteString_indexOf_Benchmark.bss_indexOf_from_best_case thrpt 20 126385479.889 ± 3644024.423 ops/s
ByteString_indexOf_Benchmark.bss_indexOf_from_far_index_case thrpt 20 14282036.963 ± 529652.214 ops/s
ByteString_indexOf_Benchmark.bss_indexOf_from_worst_case thrpt 20 7815676.051 ± 323031.073 ops/s
*/
@Benchmark
def bss_indexOf_from_worst_case: Int = bss.indexOf('z', 1)
@Benchmark
def bss_indexOf_from_far_index_case: Int = bss.indexOf('z', 109)
@Benchmark
def bss_indexOf_from_best_case: Int = bss.indexOf('a', 0)
@Benchmark
def bs1_indexOf_from: Int = bs.indexOf('ö', 5)
}