val spark = SparkSession.builder().master("local[*]").appName(this.getClass.getSimpleName).getOrCreate()
val sc = spark.sparkContext
val fs = FileSystem.get(sc.hadoopConfiguration)
sc.setLogLevel("error")
import spark.implicits._
val value=spark.createDataset(List(
("imei1", "content_id1", ("0", 1)),
("imei1", "content_id2", ("0", 1)),
("imei2", "content_id2", ("1", 1)),
("imei2", "content_id1", ("1", 1)),
("imei3", "content_id1", ("1", 0)),
("imei4", "content_id1", ("1", 1))
)).toDF("imei","content_id","tuple")
schema信息 root |-- imei: string (nullable = true) |-- content_id: string (nullable = true) |-- tuple: struct (nullable = true) | |-- _1: string (nullable = true) | |-- _2: integer (nullable = false)
获取struct中的数据
value.withColumn("a",$"tuple"("_1")).withColumn("b",$"tuple"("_2")).show(false)
|