pekko/akka-docs/rst/scala/code/docs/stream/cookbook/RecipeReduceByKey.scala

package docs.stream.cookbook

import akka.NotUsed
import akka.stream.{ Graph, FlowShape, Inlet, Outlet, Attributes, OverflowStrategy }
import akka.stream.scaladsl._
import scala.concurrent.{ Await, Future }
import scala.concurrent.duration._
import akka.stream.stage.{ GraphStage, GraphStageLogic }

class RecipeReduceByKey extends RecipeSpec {

  "Reduce by key recipe" must {

    val MaximumDistinctWords = 1000

    "work with simple word count" in {

      def words = Source(List("hello", "world", "and", "hello", "universe", "akka") ++ List.fill(1000)("rocks!"))

      //#word-count
      val counts: Source[(String, Int), NotUsed] = words
        // split the words into separate streams first
        .groupBy(MaximumDistinctWords, identity)
        //transform each element to pair with number of words in it
        .map(_ -> 1)
        // add counting logic to the streams
        .reduce((l, r) => (l._1, l._2 + r._2))
        // get a stream of word counts
        .mergeSubstreams
      //#word-count

      Await.result(counts.limit(10).runWith(Sink.seq), 3.seconds).toSet should be(Set(
        ("hello", 2),
        ("world", 1),
        ("and", 1),
        ("universe", 1),
        ("akka", 1),
        ("rocks!", 1000)))
    }

    "work generalized" in {

      def words = Source(List("hello", "world", "and", "hello", "universe", "akka") ++ List.fill(1000)("rocks!"))

      //#reduce-by-key-general
      def reduceByKey[In, K, Out](
        maximumGroupSize: Int,
        groupKey: (In) => K,
        map: (In) => Out)(reduce: (Out, Out) => Out): Flow[In, (K, Out), NotUsed] = {

        Flow[In]
          .groupBy[K](maximumGroupSize, groupKey)
          .map(e => groupKey(e) -> map(e))
          .reduce((l, r) => l._1 -> reduce(l._2, r._2))
          .mergeSubstreams
      }

      val wordCounts = words.via(
        reduceByKey(MaximumDistinctWords,
          groupKey = (word: String) => word,
          map = (word: String) => 1)((left: Int, right: Int) => left + right))
      //#reduce-by-key-general

      Await.result(wordCounts.limit(10).runWith(Sink.seq), 3.seconds).toSet should be(Set(
        ("hello", 2),
        ("world", 1),
        ("and", 1),
        ("universe", 1),
        ("akka", 1),
        ("rocks!", 1000)))

    }
  }

}
+doc #15239: Stream cookbook 2014-12-08 17:29:40 +01:00			`package docs.stream.cookbook`

also fix FlowInterleaveSpec Also-by: Johan Andrén <johan@markatta.com> Also-by: Roland Kuhn <rk@rkuhn.info> Also-by: Martynas Mickevičius <mmartynas@gmail.com> 2016-01-20 10:00:37 +02:00			`import akka.NotUsed`
!str #19005 make groupBy et al return a SubFlow A SubFlow (or SubSource) is not a Graph, it is an unfinished builder that accepts transformations. This allows us to capture the substreams’ transformations before materializing the flow, which will be very helpful in fully fusing all operators. Another change is that groupBy now requires a maxSubstreams parameter in order to bound its resource usage. In exchange the matching merge can be unbounded. This trades silent deadlock for explicit stream failure. This commit also changes all uses of Predef.identity to use `conforms` and removes the HTTP impl.util.identityFunc. 2015-11-25 19:58:48 +01:00			`import akka.stream.{ Graph, FlowShape, Inlet, Outlet, Attributes, OverflowStrategy }`
+doc #15239: Stream cookbook 2014-12-08 17:29:40 +01:00			`import akka.stream.scaladsl._`
			`import scala.concurrent.{ Await, Future }`
			`import scala.concurrent.duration._`
!str #19005 make groupBy et al return a SubFlow A SubFlow (or SubSource) is not a Graph, it is an unfinished builder that accepts transformations. This allows us to capture the substreams’ transformations before materializing the flow, which will be very helpful in fully fusing all operators. Another change is that groupBy now requires a maxSubstreams parameter in order to bound its resource usage. In exchange the matching merge can be unbounded. This trades silent deadlock for explicit stream failure. This commit also changes all uses of Predef.identity to use `conforms` and removes the HTTP impl.util.identityFunc. 2015-11-25 19:58:48 +01:00			`import akka.stream.stage.{ GraphStage, GraphStageLogic }`
+doc #15239: Stream cookbook 2014-12-08 17:29:40 +01:00
			`class RecipeReduceByKey extends RecipeSpec {`

			`"Reduce by key recipe" must {`

fixed merge conflicts, fixed reducebykey 2014-12-19 11:39:41 +01:00			`val MaximumDistinctWords = 1000`

+doc #15239: Stream cookbook 2014-12-08 17:29:40 +01:00			`"work with simple word count" in {`

			`def words = Source(List("hello", "world", "and", "hello", "universe", "akka") ++ List.fill(1000)("rocks!"))`

			`//#word-count`
also fix FlowInterleaveSpec Also-by: Johan Andrén <johan@markatta.com> Also-by: Roland Kuhn <rk@rkuhn.info> Also-by: Martynas Mickevičius <mmartynas@gmail.com> 2016-01-20 10:00:37 +02:00			`val counts: Source[(String, Int), NotUsed] = words`
!str #19005 make groupBy et al return a SubFlow A SubFlow (or SubSource) is not a Graph, it is an unfinished builder that accepts transformations. This allows us to capture the substreams’ transformations before materializing the flow, which will be very helpful in fully fusing all operators. Another change is that groupBy now requires a maxSubstreams parameter in order to bound its resource usage. In exchange the matching merge can be unbounded. This trades silent deadlock for explicit stream failure. This commit also changes all uses of Predef.identity to use `conforms` and removes the HTTP impl.util.identityFunc. 2015-11-25 19:58:48 +01:00			`// split the words into separate streams first`
			`.groupBy(MaximumDistinctWords, identity)`
+str #19020 reduce combinator 2016-01-15 22:51:26 -05:00			`//transform each element to pair with number of words in it`
			`.map(_ -> 1)`
!str #19005 make groupBy et al return a SubFlow A SubFlow (or SubSource) is not a Graph, it is an unfinished builder that accepts transformations. This allows us to capture the substreams’ transformations before materializing the flow, which will be very helpful in fully fusing all operators. Another change is that groupBy now requires a maxSubstreams parameter in order to bound its resource usage. In exchange the matching merge can be unbounded. This trades silent deadlock for explicit stream failure. This commit also changes all uses of Predef.identity to use `conforms` and removes the HTTP impl.util.identityFunc. 2015-11-25 19:58:48 +01:00			`// add counting logic to the streams`
+str #19020 reduce combinator 2016-01-15 22:51:26 -05:00			`.reduce((l, r) => (l._1, l._2 + r._2))`
!str #19005 make groupBy et al return a SubFlow A SubFlow (or SubSource) is not a Graph, it is an unfinished builder that accepts transformations. This allows us to capture the substreams’ transformations before materializing the flow, which will be very helpful in fully fusing all operators. Another change is that groupBy now requires a maxSubstreams parameter in order to bound its resource usage. In exchange the matching merge can be unbounded. This trades silent deadlock for explicit stream failure. This commit also changes all uses of Predef.identity to use `conforms` and removes the HTTP impl.util.identityFunc. 2015-11-25 19:58:48 +01:00			`// get a stream of word counts`
			`.mergeSubstreams`
+doc #15239: Stream cookbook 2014-12-08 17:29:40 +01:00			`//#word-count`

=str #19293 fix issues in Sink.seq + minor doc fixes + use Sink.seq and limit in tests where appropriate * Sink.seq (Scala DSL) now returns immutable.Seq rather than Seq * Sink.seq will not silently truncate when incoming elements is > Int.MAX_VALUE * minor doc fixes * replacing various grouped(n) / Sink.head with limit(n) / Sink.seq in various tests * fix inconsistent indentation in RequestParserSpec 2016-02-12 01:36:21 +08:00			`Await.result(counts.limit(10).runWith(Sink.seq), 3.seconds).toSet should be(Set(`
+doc #15239: Stream cookbook 2014-12-08 17:29:40 +01:00			`("hello", 2),`
			`("world", 1),`
			`("and", 1),`
			`("universe", 1),`
			`("akka", 1),`
			`("rocks!", 1000)))`
			`}`

			`"work generalized" in {`

			`def words = Source(List("hello", "world", "and", "hello", "universe", "akka") ++ List.fill(1000)("rocks!"))`

			`//#reduce-by-key-general`
			`def reduceByKey[In, K, Out](`
fixed merge conflicts, fixed reducebykey 2014-12-19 11:39:41 +01:00			`maximumGroupSize: Int,`
+doc #15239: Stream cookbook 2014-12-08 17:29:40 +01:00			`groupKey: (In) => K,`
+str #19020 reduce combinator 2016-01-15 22:51:26 -05:00			`map: (In) => Out)(reduce: (Out, Out) => Out): Flow[In, (K, Out), NotUsed] = {`
+doc #15239: Stream cookbook 2014-12-08 17:29:40 +01:00
!str #19005 make groupBy et al return a SubFlow A SubFlow (or SubSource) is not a Graph, it is an unfinished builder that accepts transformations. This allows us to capture the substreams’ transformations before materializing the flow, which will be very helpful in fully fusing all operators. Another change is that groupBy now requires a maxSubstreams parameter in order to bound its resource usage. In exchange the matching merge can be unbounded. This trades silent deadlock for explicit stream failure. This commit also changes all uses of Predef.identity to use `conforms` and removes the HTTP impl.util.identityFunc. 2015-11-25 19:58:48 +01:00			`Flow[In]`
+str #19020 reduce combinator 2016-01-15 22:51:26 -05:00			`.groupBy[K](maximumGroupSize, groupKey)`
			`.map(e => groupKey(e) -> map(e))`
			`.reduce((l, r) => l._1 -> reduce(l._2, r._2))`
!str #19005 make groupBy et al return a SubFlow A SubFlow (or SubSource) is not a Graph, it is an unfinished builder that accepts transformations. This allows us to capture the substreams’ transformations before materializing the flow, which will be very helpful in fully fusing all operators. Another change is that groupBy now requires a maxSubstreams parameter in order to bound its resource usage. In exchange the matching merge can be unbounded. This trades silent deadlock for explicit stream failure. This commit also changes all uses of Predef.identity to use `conforms` and removes the HTTP impl.util.identityFunc. 2015-11-25 19:58:48 +01:00			`.mergeSubstreams`
+doc #15239: Stream cookbook 2014-12-08 17:29:40 +01:00			`}`

+str #19020 reduce combinator 2016-01-15 22:51:26 -05:00			`val wordCounts = words.via(`
			`reduceByKey(MaximumDistinctWords,`
			`groupKey = (word: String) => word,`
			`map = (word: String) => 1)((left: Int, right: Int) => left + right))`
+doc #15239: Stream cookbook 2014-12-08 17:29:40 +01:00			`//#reduce-by-key-general`

=str #19293 fix issues in Sink.seq + minor doc fixes + use Sink.seq and limit in tests where appropriate * Sink.seq (Scala DSL) now returns immutable.Seq rather than Seq * Sink.seq will not silently truncate when incoming elements is > Int.MAX_VALUE * minor doc fixes * replacing various grouped(n) / Sink.head with limit(n) / Sink.seq in various tests * fix inconsistent indentation in RequestParserSpec 2016-02-12 01:36:21 +08:00			`Await.result(wordCounts.limit(10).runWith(Sink.seq), 3.seconds).toSet should be(Set(`
+doc #15239: Stream cookbook 2014-12-08 17:29:40 +01:00			`("hello", 2),`
			`("world", 1),`
			`("and", 1),`
			`("universe", 1),`
			`("akka", 1),`
			`("rocks!", 1000)))`

			`}`
			`}`

			`}`