Intro
????hive explode操作
import pyspark
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("pysaprk").getOrCreate()
import pyspark.sql.functions
df = spark.createDataFrame([(1, "A,B"),
(2, "C,D"),
(3, "E")],
["id", "split_str"])
df.show(20,truncate=False)
+---+---------+
|id |split_str|
+---+---------+
|1 |A,B |
|2 |C,D |
|3 |E |
+---+---------+
df.createOrReplaceTempView("temp")
split_str拆分多行
sql = """
select id,split_str,explode(split(split_str,',')) as letter
from temp
"""
spark.sql(sql).show()
+---+---------+---+
| id|split_str|col|
+---+---------+---+
| 1| A,B| A|
| 1| A,B| B|
| 2| C,D| C|
| 2| C,D| D|
| 3| E| E|
+---+---------+---+
????????????????????????????????2022-03-29 于南京市江宁区九龙湖
|