其他WordCount解决方法
def wordcount1(sc: SparkContext): Unit = {
sc.makeRDD(List("Hello Scala", "Hello Spark"))
.flatMap(_.split(" "))
.groupBy(word => word)
.mapValues(iter => iter.size)
.collect()
.foreach(println)
}
def wordcount2(sc: SparkContext): Unit = {
sc.makeRDD(List("Hello Scala", "Hello Spark"))
.flatMap(_.split(" "))
.map(_ -> 1)
.groupByKey()
.mapValues(iter => iter.size)
.collect()
.foreach(println)
}
def wordcount3(sc: SparkContext): Unit = {
sc.makeRDD(List("Hello Scala", "Hello Spark"))
.flatMap(_.split(" "))
.map(_ -> 1)
.reduceByKey(_ + _)
.collect()
.foreach(println)
}
def wordcount4(sc: SparkContext): Unit = {
sc.makeRDD(List("Hello Scala", "Hello Spark"))
.flatMap(_.split(" "))
.map(_ -> 1)
.aggregateByKey(0)(_ + _, _ + _)
.collect()
.foreach(println)
}
def wordcount5(sc: SparkContext): Unit = {
sc.makeRDD(List("Hello Scala", "Hello Spark"))
.flatMap(_.split(" "))
.map(_ -> 1)
.foldByKey(0)(_ + _)
.collect()
.foreach(println)
}
def wordcount6(sc: SparkContext): Unit = {
sc.makeRDD(List("Hello Scala", "Hello Spark"))
.flatMap(_.split(" "))
.map(_ -> 1)
.combineByKey(
x => x,
(x: Int, y) => x + y,
(x: Int, y: Int) => x + y
)
.collect()
.foreach(println)
}
def wordcount7(sc: SparkContext): Unit = {
val rdd: collection.Map[String, Long] = sc.makeRDD(List("Hello Scala", "Hello Spark"))
.flatMap(_.split(" "))
.map(_ -> 1)
.countByKey()
println(rdd)
}
def wordcount8(sc: SparkContext): Unit = {
val rdd: collection.Map[String, Long] = sc.makeRDD(List("Hello Scala", "Hello Spark"))
.flatMap(_.split(" "))
.countByValue()
println(rdd)
}
def wordcount9(sc: SparkContext): Unit = {
val rdd: mutable.Map[String, Long] = sc.makeRDD(List("Hello Scala", "Hello Spark"))
.flatMap(_.split(" "))
.map(word => {
mutable.Map[String, Long]((word, 1))
})
.reduce((map1, map2) => {
map2.foreach {
case (word, count) => {
val newCount = map1.getOrElse(word, 0L) + count
map1.update(word, newCount)
}
}
map1
})
println(rdd)
}
def wordcount10(sc: SparkContext): Unit = {
val rdd: mutable.Map[String, Long] = sc.makeRDD(List("Hello Scala", "Hello Spark"))
.flatMap(_.split(" "))
.map(word => {
Map((word, 1))
})
.aggregate(mutable.Map[String, Long]())(
(map1, map2) => {
map2.foreach {
case (word, count) => {
val newCount = map1.getOrElse(word, 0L) + count
map1.update(word, newCount)
}
}
map1
},
(map1, map2) => {
map2.foreach {
case (word, count) => {
val newCount = map1.getOrElse(word, 0L) + count
map1.update(word, newCount)
}
}
map1
}
)
println(rdd)
}
def wordcount11(sc: SparkContext): Unit = {
val rdd: mutable.Map[String, Long] = sc.makeRDD(List("Hello Scala", "Hello Spark"))
.flatMap(_.split(" "))
.map(word => {
mutable.Map((word, 1L))
})
.fold(mutable.Map[String, Long]())(
(map1, map2) => {
map2.foreach {
case (word, count) => {
val newCount: Long = map1.getOrElse(word, 0L) + count
map1.update(word, newCount)
}
}
map1
}
)
println(rdd)
}
|