pekko/akka-docs/rst/scala/code/docs/stream/cookbook/RecipeReduceByKey.scala

83 lines
2.4 KiB
Scala
Raw Normal View History

2014-12-08 17:29:40 +01:00
package docs.stream.cookbook
import akka.NotUsed
import akka.stream.{ Graph, FlowShape, Inlet, Outlet, Attributes, OverflowStrategy }
2014-12-08 17:29:40 +01:00
import akka.stream.scaladsl._
import scala.concurrent.{ Await, Future }
import scala.concurrent.duration._
import akka.stream.stage.{ GraphStage, GraphStageLogic }
2014-12-08 17:29:40 +01:00
class RecipeReduceByKey extends RecipeSpec {
"Reduce by key recipe" must {
val MaximumDistinctWords = 1000
2014-12-08 17:29:40 +01:00
"work with simple word count" in {
def words = Source(List("hello", "world", "and", "hello", "universe", "akka") ++ List.fill(1000)("rocks!"))
//#word-count
val counts: Source[(String, Int), NotUsed] = words
// split the words into separate streams first
.groupBy(MaximumDistinctWords, identity)
// add counting logic to the streams
.fold(("", 0)) {
case ((_, count), word) => (word, count + 1)
}
// get a stream of word counts
.mergeSubstreams
2014-12-08 17:29:40 +01:00
//#word-count
Await.result(counts.grouped(10).runWith(Sink.head), 3.seconds).toSet should be(Set(
2014-12-08 17:29:40 +01:00
("hello", 2),
("world", 1),
("and", 1),
("universe", 1),
("akka", 1),
("rocks!", 1000)))
}
"work generalized" in {
def words = Source(List("hello", "world", "and", "hello", "universe", "akka") ++ List.fill(1000)("rocks!"))
//#reduce-by-key-general
def reduceByKey[In, K, Out](
maximumGroupSize: Int,
2014-12-08 17:29:40 +01:00
groupKey: (In) => K,
foldZero: (K) => Out)(fold: (Out, In) => Out): Flow[In, (K, Out), NotUsed] = {
2014-12-08 17:29:40 +01:00
Flow[In]
.groupBy(maximumGroupSize, groupKey)
.fold(Option.empty[(K, Out)]) {
case (None, elem) =>
val key = groupKey(elem)
Some((key, fold(foldZero(key), elem)))
case (Some((key, out)), elem) =>
Some((key, fold(out, elem)))
}
.map(_.get)
.mergeSubstreams
2014-12-08 17:29:40 +01:00
}
val wordCounts = words.via(reduceByKey(
MaximumDistinctWords,
2014-12-08 17:29:40 +01:00
groupKey = (word: String) => word,
foldZero = (key: String) => 0)(fold = (count: Int, elem: String) => count + 1))
//#reduce-by-key-general
Await.result(wordCounts.grouped(10).runWith(Sink.head), 3.seconds).toSet should be(Set(
2014-12-08 17:29:40 +01:00
("hello", 2),
("world", 1),
("and", 1),
("universe", 1),
("akka", 1),
("rocks!", 1000)))
}
}
}