ByteString performance regression toArray on 2.13 (#28419)

* Benchmark with numbers
* Better optimization, now on par with 2.12
* Simplify
* Review do'hs and unused errors fixed
* Move build up of composite out to setup
* A little thing to make MiMa happy
* Apply suggestions from code review
Co-authored-by: Arnout Engelen <github@bzzt.net>
This commit is contained in:
Johan Andrén 2020-01-08 14:14:45 +01:00 committed by Patrik Nordwall
parent 56c9f7d54e
commit 0f4e146623
3 changed files with 100 additions and 16 deletions

View file

@ -0,0 +1,3 @@
# Fix for 2.13 performance regression #28419
ProblemFilters.exclude[FinalMethodProblem]("akka.util.ByteString.copyToArray")
ProblemFilters.exclude[FinalMethodProblem]("akka.util.ByteString.toArray")

View file

@ -14,7 +14,6 @@ import scala.collection.mutable.{ Builder, WrappedArray }
import scala.collection.{ immutable, mutable }
import scala.collection.immutable.{ IndexedSeq, IndexedSeqOps, StrictOptimizedSeqOps, VectorBuilder }
import scala.reflect.ClassTag
import com.github.ghik.silencer.silent
object ByteString {
@ -253,6 +252,14 @@ object ByteString {
buffer.putByteArrayUnsafe(bytes)
}
override def copyToArray[B >: Byte](dest: Array[B], start: Int, len: Int): Int = {
val copied = math.min(math.min(len, bytes.length), dest.length - start)
if (copied > 0) {
System.arraycopy(bytes, 0, dest, start, copied)
}
copied
}
}
/** INTERNAL API: ByteString backed by exactly one array, with start / end markers */
@ -386,6 +393,15 @@ object ByteString {
}
}
override def copyToArray[B >: Byte](dest: Array[B], start: Int, len: Int): Int = {
// min of the bytes available to copy, bytes there is room for in dest and the requested number of bytes
val copied = math.max(math.min(math.min(len, length), dest.length - start), 0)
if (copied > 0) {
System.arraycopy(bytes, 0, dest, start, copied)
}
copied
}
protected def writeReplace(): AnyRef = new SerializationProxy(this)
}
@ -628,6 +644,24 @@ object ByteString {
}
}
override def copyToArray[B >: Byte](dest: Array[B], start: Int, len: Int): Int = {
if (isCompact) bytestrings.head.copyToArray(dest, start, len)
else {
// min of the bytes available top copy, bytes there is room for in dest and the requested number of bytes
val copied = math.min(math.min(len, length), dest.length - start)
if (copied > 0) {
val bsIterator = bytestrings.iterator
var pos = 0
while (pos < copied) {
val current = bsIterator.next()
val copied = current.copyToArray(dest, pos, len - pos)
pos += copied
}
}
copied
}
}
protected def writeReplace(): AnyRef = new SerializationProxy(this)
}
@ -754,10 +788,21 @@ sealed abstract class ByteString
*/
protected[ByteString] def toArray: Array[Byte] = toArray[Byte]
override def toArray[B >: Byte](implicit arg0: ClassTag[B]): Array[B] = iterator.toArray
final override def toArray[B >: Byte](implicit arg0: ClassTag[B]): Array[B] = {
// super uses byteiterator
val array = new Array[B](size)
copyToArray(array, 0, size)
array
}
final override def copyToArray[B >: Byte](xs: Array[B], start: Int): Int = {
// super uses byteiterator
copyToArray(xs, start, size.min(xs.size))
}
// optimized in all subclasses, avoiding usage of the iterator to save allocations/transformations
override def copyToArray[B >: Byte](xs: Array[B], start: Int, len: Int): Int =
iterator.copyToArray(xs, start, len)
throw new UnsupportedOperationException("Method copyToArray is not implemented in ByteString")
override def foreach[@specialized U](f: Byte => U): Unit = iterator.foreach(f)

View file

@ -13,25 +13,61 @@ import org.openjdk.jmh.infra.Blackhole
@Measurement(timeUnit = TimeUnit.MILLISECONDS)
class ByteString_toArray_Benchmark {
val b = Array.ofDim[Byte](1024 * 10244)
val bb = ByteString(b)
var bs: ByteString = _
var composed: ByteString = _
@Param(Array("10", "100", "1000"))
var kb = 0
/*
Benchmark Mode Cnt Score Error Units
akka-bench-jmh/jmh:run -f 1 -wi 5 -i 5 .*ByteString_toArray_Benchmark.*
Benchmark (kb) Mode Cnt Score Error Units
2.12
ByteString_toArray_Benchmark.to_array thrpt 3 537,116 ± 525,663 ops/s
2.13 before fix
ByteString_toArray_Benchmark.to_array thrpt 3 165,869 ± 243,524 ops/s
2.13 with fix #28114
ByteString_toArray_Benchmark.to_array thrpt 3 525,521 ± 346,830 ops/s
composed_bs_to_array 10 thrpt 5 5625.395 ± 145.999 ops/s
composed_bs_to_array 100 thrpt 5 464.893 ± 33.579 ops/s
composed_bs_to_array 1000 thrpt 5 45.482 ± 4.217 ops/s
single_bs_to_array 10 thrpt 5 1450077.491 ± 27964.993 ops/s
single_bs_to_array 100 thrpt 5 72201.806 ± 637.800 ops/s
single_bs_to_array 1000 thrpt 5 5491.724 ± 178.696 ops/s
2.13 before (second) fix
composed_bs_to_array 10 thrpt 5 128.706 ± 4.590 ops/s
composed_bs_to_array 100 thrpt 5 13.147 ± 0.435 ops/s
composed_bs_to_array 1000 thrpt 5 1.255 ± 0.057 ops/s
single_bs_to_array 10 thrpt 5 1336977.040 ± 719295.197 ops/s
single_bs_to_array 100 thrpt 5 70202.111 ± 522.435 ops/s
single_bs_to_array 1000 thrpt 5 5444.186 ± 224.677 ops/s
2.13 with (second) fix
composed_bs_to_array 10 thrpt 5 5970.395 ± 348.356 ops/s
composed_bs_to_array 100 thrpt 5 479.762 ± 15.819 ops/s
composed_bs_to_array 1000 thrpt 5 45.940 ± 1.306 ops/s
single_bs_to_array 10 thrpt 5 1468430.822 ± 38730.696 ops/s
single_bs_to_array 100 thrpt 5 71313.855 ± 983.643 ops/s
single_bs_to_array 1000 thrpt 5 5526.564 ± 143.654 ops/s
*/
@Setup
def setup(): Unit = {
val bytes = Array.ofDim[Byte](1024 * kb)
bs = ByteString(bytes)
composed = ByteString.empty
for (_ <- 0 to 100) {
composed = composed ++ bs
}
}
@Benchmark
@OperationsPerInvocation(1000)
def to_array(blackhole: Blackhole) = {
for (_ <- 0 to 1000)
blackhole.consume(bb.toArray[Byte])
def single_bs_to_array(blackhole: Blackhole): Unit = {
blackhole.consume(bs.toArray[Byte])
}
@Benchmark
def composed_bs_to_array(blackhole: Blackhole): Unit = {
blackhole.consume(composed.toArray[Byte])
}
}