行转列
数据源:
select *
from test.tmp_row_to_col
;
568810800017635002 210100026066367206
568810800017635002 210100026070369019
568810800017635002 210100026038823395
568810800017635002 210100026038921817
568811800170738001 210100016048730468
568811800170738001 210100016034424946
568811800170738001 210100016034424947
568811800170738001 210100016034424948
568817000105010000 210100006034670622
568817000105010000 210100006045431694
最终结果是这样 hive语法 collect_set 转为数组并去重,concat_ws 将数组用逗号间隔连接成字符串
select user_id, concat_ws(',', collect_set(order_id)) as order_ids
from test.tmp_row_to_col
group by user_id
;
+
| user_id | order_ids |
+
| 568811800170738001 | 210100016034424946,210100016048730468,210100016034424947,210100016034424948 |
| 568810800017635002 | 210100026066367206,210100026070369019,210100026038823395,210100026038921817 |
| 568817000105010000 | 210100006045431694,210100006034670622 |
+
presto语法 array_agg 转为数组,array_distinct 去重,array_join 将数组用逗号间隔连接成字符串
select user_id, array_join(array_distinct(array_agg(order_id)), ',') as order_ids
from test.tmp_row_to_col
group by user_id
;
列转行
即将上面的两张图顺序调换一下
hive语法 split 将````order_ids拆分成数组, lateral view explode```将数组炸裂开
select user_id, order_id
from test.tmp_col_to_row
lateral view explode(split(order_ids, ',')) b as order_id
;
+
| user_id | order_id |
+
| 568811800170738001 | 210100016048730468 |
| 568811800170738001 | 210100016034424947 |
| 568811800170738001 | 210100016034424948 |
| 568811800170738001 | 210100016034424946 |
| 568817000105010000 | 210100006034670622 |
| 568817000105010000 | 210100006045431694 |
| 568810800017635002 | 210100026066367206 |
| 568810800017635002 | 210100026070369019 |
| 568810800017635002 | 210100026038823395 |
| 568810800017635002 | 210100026038921817 |
+
presto语法 split 将order_ids 拆分成数组,````cross join unnest```将数组炸裂开,要注意一下两种语法的表名缩写位置
select a.user_id
, b.order_id
from tmp.tmp_col_to_row a
cross join unnest(split(order_ids, ',')) as b(order_id) ;
|