+str #19020 reduce combinator
This commit is contained in:
parent
55425e5ef3
commit
a2ab7f29e1
15 changed files with 247 additions and 37 deletions
|
|
@ -21,10 +21,10 @@ class RecipeReduceByKey extends RecipeSpec {
|
|||
val counts: Source[(String, Int), NotUsed] = words
|
||||
// split the words into separate streams first
|
||||
.groupBy(MaximumDistinctWords, identity)
|
||||
//transform each element to pair with number of words in it
|
||||
.map(_ -> 1)
|
||||
// add counting logic to the streams
|
||||
.fold(("", 0)) {
|
||||
case ((_, count), word) => (word, count + 1)
|
||||
}
|
||||
.reduce((l, r) => (l._1, l._2 + r._2))
|
||||
// get a stream of word counts
|
||||
.mergeSubstreams
|
||||
//#word-count
|
||||
|
|
@ -46,26 +46,19 @@ class RecipeReduceByKey extends RecipeSpec {
|
|||
def reduceByKey[In, K, Out](
|
||||
maximumGroupSize: Int,
|
||||
groupKey: (In) => K,
|
||||
foldZero: (K) => Out)(fold: (Out, In) => Out): Flow[In, (K, Out), NotUsed] = {
|
||||
map: (In) => Out)(reduce: (Out, Out) => Out): Flow[In, (K, Out), NotUsed] = {
|
||||
|
||||
Flow[In]
|
||||
.groupBy(maximumGroupSize, groupKey)
|
||||
.fold(Option.empty[(K, Out)]) {
|
||||
case (None, elem) =>
|
||||
val key = groupKey(elem)
|
||||
Some((key, fold(foldZero(key), elem)))
|
||||
case (Some((key, out)), elem) =>
|
||||
Some((key, fold(out, elem)))
|
||||
}
|
||||
.map(_.get)
|
||||
.groupBy[K](maximumGroupSize, groupKey)
|
||||
.map(e => groupKey(e) -> map(e))
|
||||
.reduce((l, r) => l._1 -> reduce(l._2, r._2))
|
||||
.mergeSubstreams
|
||||
}
|
||||
|
||||
val wordCounts = words.via(reduceByKey(
|
||||
MaximumDistinctWords,
|
||||
groupKey = (word: String) => word,
|
||||
foldZero = (key: String) => 0)(fold = (count: Int, elem: String) => count + 1))
|
||||
|
||||
val wordCounts = words.via(
|
||||
reduceByKey(MaximumDistinctWords,
|
||||
groupKey = (word: String) => word,
|
||||
map = (word: String) => 1)((left: Int, right: Int) => left + right))
|
||||
//#reduce-by-key-general
|
||||
|
||||
Await.result(wordCounts.grouped(10).runWith(Sink.head), 3.seconds).toSet should be(Set(
|
||||
|
|
|
|||
|
|
@ -111,7 +111,7 @@ we have a stream of streams, where every substream will serve identical words.
|
|||
To count the words, we need to process the stream of streams (the actual groups
|
||||
containing identical words). ``groupBy`` returns a :class:`SubFlow`, which
|
||||
means that we transform the resulting substreams directly. In this case we use
|
||||
the ``fold`` combinator to aggregate the word itself and the number of its
|
||||
the ``reduce`` combinator to aggregate the word itself and the number of its
|
||||
occurrences within a tuple :class:`(String, Integer)`. Each substream will then
|
||||
emit one final value—precisely such a pair—when the overall input completes. As
|
||||
a last step we merge back these values from the substreams into one single
|
||||
|
|
@ -131,8 +131,8 @@ this case ``groupBy`` will terminate with a failure.
|
|||
By extracting the parts specific to *wordcount* into
|
||||
|
||||
* a ``groupKey`` function that defines the groups
|
||||
* a ``foldZero`` that defines the zero element used by the fold on the substream given the group key
|
||||
* a ``fold`` function that does the actual reduction
|
||||
* a ``map`` map each element to value that is used by the reduce on the substream
|
||||
* a ``reduce`` function that does the actual reduction
|
||||
|
||||
we get a generalized version below:
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue