From 0f4e1466235ce37c5a94fca99cb4998049861445 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johan=20Andr=C3=A9n?= Date: Wed, 8 Jan 2020 14:14:45 +0100 Subject: [PATCH] ByteString performance regression toArray on 2.13 (#28419) * Benchmark with numbers * Better optimization, now on par with 2.12 * Simplify * Review do'hs and unused errors fixed * Move build up of composite out to setup * A little thing to make MiMa happy * Apply suggestions from code review Co-authored-by: Arnout Engelen --- .../pr-28419-byte-string-perf-2.13.excludes | 3 + .../scala-2.13+/akka/util/ByteString.scala | 51 ++++++++++++++- .../util/ByteString_toArray_Benchmark.scala | 62 +++++++++++++++---- 3 files changed, 100 insertions(+), 16 deletions(-) create mode 100644 akka-actor/src/main/mima-filters/2.6.1.backwards.excludes/pr-28419-byte-string-perf-2.13.excludes diff --git a/akka-actor/src/main/mima-filters/2.6.1.backwards.excludes/pr-28419-byte-string-perf-2.13.excludes b/akka-actor/src/main/mima-filters/2.6.1.backwards.excludes/pr-28419-byte-string-perf-2.13.excludes new file mode 100644 index 0000000000..7ee03bbae1 --- /dev/null +++ b/akka-actor/src/main/mima-filters/2.6.1.backwards.excludes/pr-28419-byte-string-perf-2.13.excludes @@ -0,0 +1,3 @@ +# Fix for 2.13 performance regression #28419 +ProblemFilters.exclude[FinalMethodProblem]("akka.util.ByteString.copyToArray") +ProblemFilters.exclude[FinalMethodProblem]("akka.util.ByteString.toArray") \ No newline at end of file diff --git a/akka-actor/src/main/scala-2.13+/akka/util/ByteString.scala b/akka-actor/src/main/scala-2.13+/akka/util/ByteString.scala index af5e5cf266..938c1eb428 100644 --- a/akka-actor/src/main/scala-2.13+/akka/util/ByteString.scala +++ b/akka-actor/src/main/scala-2.13+/akka/util/ByteString.scala @@ -14,7 +14,6 @@ import scala.collection.mutable.{ Builder, WrappedArray } import scala.collection.{ immutable, mutable } import scala.collection.immutable.{ IndexedSeq, IndexedSeqOps, StrictOptimizedSeqOps, VectorBuilder } import scala.reflect.ClassTag - import com.github.ghik.silencer.silent object ByteString { @@ -253,6 +252,14 @@ object ByteString { buffer.putByteArrayUnsafe(bytes) } + override def copyToArray[B >: Byte](dest: Array[B], start: Int, len: Int): Int = { + val copied = math.min(math.min(len, bytes.length), dest.length - start) + if (copied > 0) { + System.arraycopy(bytes, 0, dest, start, copied) + } + copied + } + } /** INTERNAL API: ByteString backed by exactly one array, with start / end markers */ @@ -386,6 +393,15 @@ object ByteString { } } + override def copyToArray[B >: Byte](dest: Array[B], start: Int, len: Int): Int = { + // min of the bytes available to copy, bytes there is room for in dest and the requested number of bytes + val copied = math.max(math.min(math.min(len, length), dest.length - start), 0) + if (copied > 0) { + System.arraycopy(bytes, 0, dest, start, copied) + } + copied + } + protected def writeReplace(): AnyRef = new SerializationProxy(this) } @@ -628,6 +644,24 @@ object ByteString { } } + override def copyToArray[B >: Byte](dest: Array[B], start: Int, len: Int): Int = { + if (isCompact) bytestrings.head.copyToArray(dest, start, len) + else { + // min of the bytes available top copy, bytes there is room for in dest and the requested number of bytes + val copied = math.min(math.min(len, length), dest.length - start) + if (copied > 0) { + val bsIterator = bytestrings.iterator + var pos = 0 + while (pos < copied) { + val current = bsIterator.next() + val copied = current.copyToArray(dest, pos, len - pos) + pos += copied + } + } + copied + } + } + protected def writeReplace(): AnyRef = new SerializationProxy(this) } @@ -754,10 +788,21 @@ sealed abstract class ByteString */ protected[ByteString] def toArray: Array[Byte] = toArray[Byte] - override def toArray[B >: Byte](implicit arg0: ClassTag[B]): Array[B] = iterator.toArray + final override def toArray[B >: Byte](implicit arg0: ClassTag[B]): Array[B] = { + // super uses byteiterator + val array = new Array[B](size) + copyToArray(array, 0, size) + array + } + final override def copyToArray[B >: Byte](xs: Array[B], start: Int): Int = { + // super uses byteiterator + copyToArray(xs, start, size.min(xs.size)) + } + + // optimized in all subclasses, avoiding usage of the iterator to save allocations/transformations override def copyToArray[B >: Byte](xs: Array[B], start: Int, len: Int): Int = - iterator.copyToArray(xs, start, len) + throw new UnsupportedOperationException("Method copyToArray is not implemented in ByteString") override def foreach[@specialized U](f: Byte => U): Unit = iterator.foreach(f) diff --git a/akka-bench-jmh/src/main/scala/akka/util/ByteString_toArray_Benchmark.scala b/akka-bench-jmh/src/main/scala/akka/util/ByteString_toArray_Benchmark.scala index 9c45c000db..5c4e006f14 100644 --- a/akka-bench-jmh/src/main/scala/akka/util/ByteString_toArray_Benchmark.scala +++ b/akka-bench-jmh/src/main/scala/akka/util/ByteString_toArray_Benchmark.scala @@ -13,25 +13,61 @@ import org.openjdk.jmh.infra.Blackhole @Measurement(timeUnit = TimeUnit.MILLISECONDS) class ByteString_toArray_Benchmark { - val b = Array.ofDim[Byte](1024 * 10244) - val bb = ByteString(b) + var bs: ByteString = _ + + var composed: ByteString = _ + + @Param(Array("10", "100", "1000")) + var kb = 0 + /* - Benchmark Mode Cnt Score Error Units + akka-bench-jmh/jmh:run -f 1 -wi 5 -i 5 .*ByteString_toArray_Benchmark.* + + + Benchmark (kb) Mode Cnt Score Error Units 2.12 - ByteString_toArray_Benchmark.to_array thrpt 3 537,116 ± 525,663 ops/s - 2.13 before fix - ByteString_toArray_Benchmark.to_array thrpt 3 165,869 ± 243,524 ops/s - 2.13 with fix #28114 - ByteString_toArray_Benchmark.to_array thrpt 3 525,521 ± 346,830 ops/s + composed_bs_to_array 10 thrpt 5 5625.395 ± 145.999 ops/s + composed_bs_to_array 100 thrpt 5 464.893 ± 33.579 ops/s + composed_bs_to_array 1000 thrpt 5 45.482 ± 4.217 ops/s + single_bs_to_array 10 thrpt 5 1450077.491 ± 27964.993 ops/s + single_bs_to_array 100 thrpt 5 72201.806 ± 637.800 ops/s + single_bs_to_array 1000 thrpt 5 5491.724 ± 178.696 ops/s + + 2.13 before (second) fix + composed_bs_to_array 10 thrpt 5 128.706 ± 4.590 ops/s + composed_bs_to_array 100 thrpt 5 13.147 ± 0.435 ops/s + composed_bs_to_array 1000 thrpt 5 1.255 ± 0.057 ops/s + single_bs_to_array 10 thrpt 5 1336977.040 ± 719295.197 ops/s + single_bs_to_array 100 thrpt 5 70202.111 ± 522.435 ops/s + single_bs_to_array 1000 thrpt 5 5444.186 ± 224.677 ops/s + + 2.13 with (second) fix + composed_bs_to_array 10 thrpt 5 5970.395 ± 348.356 ops/s + composed_bs_to_array 100 thrpt 5 479.762 ± 15.819 ops/s + composed_bs_to_array 1000 thrpt 5 45.940 ± 1.306 ops/s + single_bs_to_array 10 thrpt 5 1468430.822 ± 38730.696 ops/s + single_bs_to_array 100 thrpt 5 71313.855 ± 983.643 ops/s + single_bs_to_array 1000 thrpt 5 5526.564 ± 143.654 ops/s */ + @Setup + def setup(): Unit = { + val bytes = Array.ofDim[Byte](1024 * kb) + bs = ByteString(bytes) + composed = ByteString.empty + for (_ <- 0 to 100) { + composed = composed ++ bs + } + } + @Benchmark - @OperationsPerInvocation(1000) - def to_array(blackhole: Blackhole) = { - - for (_ <- 0 to 1000) - blackhole.consume(bb.toArray[Byte]) + def single_bs_to_array(blackhole: Blackhole): Unit = { + blackhole.consume(bs.toArray[Byte]) + } + @Benchmark + def composed_bs_to_array(blackhole: Blackhole): Unit = { + blackhole.consume(composed.toArray[Byte]) } }