package lambda.sql
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, SparkSession}
/**
* 演示sparksql-rdd-DATAFRAME --指定类型和列名
*
* @Author wangyijie
* @Date 2021/7/15 20:42
* @Version 1.0
*/
object Demo03 {
def main(args: Array[String]): Unit = {
// TODO 0.准备环境
val spark: SparkSession = SparkSession.builder().appName("Demo02").master("local[*]").getOrCreate()
val sc: SparkContext = spark.sparkContext
sc.setLogLevel("WARN")
// TODO 1.加载数据
val lines: RDD[String] = sc.textFile("")
// TODO 2.处理数据
val tupleRDD:RDD[(Int,String,Int)] = lines.map(line => {
val arr: Array[String] = line.split(" ")
(arr(0).toInt, arr(1), arr(2).toInt)
}) //获取到了PersonrRDD
// RDD->DF
import spark.implicits._ // 这里的spark是上面的定义生产的spark对象
val personDF:DataFrame = tupleRDD.toDF("id","name","age")
// TODO 3.输出结果
personDF.printSchema()
personDF.show()
sc.stop()
}
}
|