+str #19020 reduce combinator

2016-01-15 22:51:26 -05:00 · 2016-01-15 22:51:26 -05:00 · a2ab7f29e1
commit a2ab7f29e1
parent 55425e5ef3
15 changed files with 247 additions and 37 deletions
--- a/akka-docs/rst/java/code/docs/stream/javadsl/cookbook/RecipeReduceByKeyTest.java
+++ b/akka-docs/rst/java/code/docs/stream/javadsl/cookbook/RecipeReduceByKeyTest.java
@ -55,8 +55,10 @@ public class RecipeReduceByKeyTest extends RecipeTest {
        final Source<Pair<String, Integer>, NotUsed> counts = words
            // split the words into separate streams first
          .groupBy(MAXIMUM_DISTINCT_WORDS, i -> i)
+          //transform each element to pair with number of words in it
+          .map(i -> new Pair<>(i, 1))
          // add counting logic to the streams
-          .fold(new Pair<>("", 0), (pair, elem) -> new Pair<>(elem, pair.second() + 1))
+          .reduce((left, right) -> new Pair<>(left.first(), left.second() + right.second()))
          // get a stream of word counts
          .mergeSubstreams();
        //#word-count
@ -77,17 +79,13 @@ public class RecipeReduceByKeyTest extends RecipeTest {
  static public <In, K, Out> Flow<In, Pair<K, Out>, NotUsed> reduceByKey(
      int maximumGroupSize,
      Function<In, K> groupKey,
-      Function<K, Out> foldZero,
-      Function2<Out, In, Out> fold,
-      Materializer mat) {
+      Function<In, Out> map,
+      Function2<Out, Out, Out> reduce) {

    return Flow.<In> create()
-      .groupBy(maximumGroupSize, i -> i)
-      .fold((Pair<K, Out>) null, (pair, elem) -> {
-        final K key = groupKey.apply(elem);
-        if (pair == null) return new Pair<>(key, fold.apply(foldZero.apply(key), elem));
-        else return new Pair<>(key, fold.apply(pair.second(), elem));
-      })
+      .groupBy(maximumGroupSize, groupKey)
+      .map(i -> new Pair<>(groupKey.apply(i), map.apply(i)))
+      .reduce((left, right) -> new Pair<>(left.first(), reduce.apply(left.second(), right.second())))
      .mergeSubstreams();
  }
  //#reduce-by-key-general
@ -104,9 +102,8 @@ public class RecipeReduceByKeyTest extends RecipeTest {
        Source<Pair<String, Integer>, NotUsed> counts = words.via(reduceByKey(
          MAXIMUM_DISTINCT_WORDS,
          word -> word,
-          key -> 0,
-          (count, elem) -> count + 1,
-          mat));
+          word -> 1,
+          (left, right) -> left + right));

        //#reduce-by-key-general2
        final Future<List<Pair<String, Integer>>> f = counts.grouped(10).runWith(Sink.head(), mat);
--- a/akka-docs/rst/java/stream/stream-cookbook.rst
+++ b/akka-docs/rst/java/stream/stream-cookbook.rst
@ -113,7 +113,7 @@ we have a stream of streams, where every substream will serve identical words.
 To count the words, we need to process the stream of streams (the actual groups
 containing identical words). ``groupBy`` returns a :class:`SubSource`, which
 means that we transform the resulting substreams directly. In this case we use
-the ``fold`` combinator to aggregate the word itself and the number of its
+the ``reduce`` combinator to aggregate the word itself and the number of its
 occurrences within a :class:`Pair<String, Integer>`. Each substream will then
 emit one final value—precisely such a pair—when the overall input completes. As
 a last step we merge back these values from the substreams into one single
@ -133,8 +133,8 @@ stream cannot continue without violating its resource bound, in this case
 By extracting the parts specific to *wordcount* into

 * a ``groupKey`` function that defines the groups
-* a ``foldZero`` that defines the zero element used by the fold on the substream given the group key
-* a ``fold`` function that does the actual reduction
+* a ``map`` map each element to value that is used by the reduce on the substream
+* a ``reduce`` function that does the actual reduction

 we get a generalized version below:

--- a/akka-docs/rst/scala/code/docs/stream/cookbook/RecipeReduceByKey.scala
+++ b/akka-docs/rst/scala/code/docs/stream/cookbook/RecipeReduceByKey.scala
@ -21,10 +21,10 @@ class RecipeReduceByKey extends RecipeSpec {
      val counts: Source[(String, Int), NotUsed] = words
        // split the words into separate streams first
        .groupBy(MaximumDistinctWords, identity)
+        //transform each element to pair with number of words in it
+        .map(_ -> 1)
        // add counting logic to the streams
-        .fold(("", 0)) {
-          case ((_, count), word) => (word, count + 1)
-        }
+        .reduce((l, r) => (l._1, l._2 + r._2))
        // get a stream of word counts
        .mergeSubstreams
      //#word-count
@ -46,26 +46,19 @@ class RecipeReduceByKey extends RecipeSpec {
      def reduceByKey[In, K, Out](
        maximumGroupSize: Int,
        groupKey: (In) => K,
-        foldZero: (K) => Out)(fold: (Out, In) => Out): Flow[In, (K, Out), NotUsed] = {
+        map: (In) => Out)(reduce: (Out, Out) => Out): Flow[In, (K, Out), NotUsed] = {

        Flow[In]
-          .groupBy(maximumGroupSize, groupKey)
-          .fold(Option.empty[(K, Out)]) {
-            case (None, elem) =>
-              val key = groupKey(elem)
-              Some((key, fold(foldZero(key), elem)))
-            case (Some((key, out)), elem) =>
-              Some((key, fold(out, elem)))
-          }
-          .map(_.get)
+          .groupBy[K](maximumGroupSize, groupKey)
+          .map(e => groupKey(e) -> map(e))
+          .reduce((l, r) => l._1 -> reduce(l._2, r._2))
          .mergeSubstreams
      }

-      val wordCounts = words.via(reduceByKey(
-        MaximumDistinctWords,
-        groupKey = (word: String) => word,
-        foldZero = (key: String) => 0)(fold = (count: Int, elem: String) => count + 1))
-
+      val wordCounts = words.via(
+        reduceByKey(MaximumDistinctWords,
+          groupKey = (word: String) => word,
+          map = (word: String) => 1)((left: Int, right: Int) => left + right))
      //#reduce-by-key-general

      Await.result(wordCounts.grouped(10).runWith(Sink.head), 3.seconds).toSet should be(Set(
--- a/akka-docs/rst/scala/stream/stream-cookbook.rst
+++ b/akka-docs/rst/scala/stream/stream-cookbook.rst
@ -111,7 +111,7 @@ we have a stream of streams, where every substream will serve identical words.
 To count the words, we need to process the stream of streams (the actual groups
 containing identical words). ``groupBy`` returns a :class:`SubFlow`, which
 means that we transform the resulting substreams directly. In this case we use
-the ``fold`` combinator to aggregate the word itself and the number of its
+the ``reduce`` combinator to aggregate the word itself and the number of its
 occurrences within a tuple :class:`(String, Integer)`. Each substream will then
 emit one final value—precisely such a pair—when the overall input completes. As
 a last step we merge back these values from the substreams into one single
@ -131,8 +131,8 @@ this case ``groupBy`` will terminate with a failure.
 By extracting the parts specific to *wordcount* into

 * a ``groupKey`` function that defines the groups
-* a ``foldZero`` that defines the zero element used by the fold on the substream given the group key
-* a ``fold`` function that does the actual reduction
+* a ``map`` map each element to value that is used by the reduce on the substream
+* a ``reduce`` function that does the actual reduction

 we get a generalized version below: