求top3英雄及其pick率(pick率为每个英雄出场数/全部英雄总的出场数) 数据准备:
1 亚索,挖掘机,艾瑞,洛,卡沙
2 亚索,盖伦,奥巴马,牛头,皇子
3 亚索,盖伦,艾瑞,宝石,琴女
4 亚索,盖伦,赵信,老鼠,锤石
建表
create table table16(
id int,
names array<string>
)
row format delimited fields terminated by '\t'
collection items terminated by ',';
映射数据:
load data local inpath '/home/offcn/test01/hive/0720/test15.txt' into table table16;
查询数据:
select * from heros;
+-------------+------------------------------+
| table16.id | table16.names |
+-------------+------------------------------+
| 1 | ["亚索","挖掘机","艾瑞","洛","卡沙"] |
| 2 | ["亚索","盖伦","奥巴马","牛头","皇子"] |
| 3 | ["亚索","盖伦","艾瑞","宝石","琴女"] |
| 4 | ["亚索","盖伦","赵信","老鼠","锤石"] |
+-------------+------------------------------+
分析:
用lateral view exploe(names) as name 将集合炸开成一列
select
name,count(name)
from table16
lateral view explode(names) as name; --报错
select
name,count(name)
from table16
lateral view explode(names) n as name; --报错
ERRO: FAILED: SemanticException [Error 10025]: Line 2:0 Expression not in GROUP BY key 'name' (state=42000,code=10025)
用开窗函数试试:
select
name,count(name) over() as countsum
from table16
lateral view explode(names) n as name;
结果:成功炸开了
+-------+-----------+
| name | countsum |
+-------+-----------+
| 亚索 | 20 |
| 挖掘机 | 20 |
| 艾瑞 | 20 |
| 洛 | 20 |
| 卡沙 | 20 |
| 亚索 | 20 |
| 盖伦 | 20 |
| 奥巴马 | 20 |
| 牛头 | 20 |
| 皇子 | 20 |
| 亚索 | 20 |
| 盖伦 | 20 |
| 艾瑞 | 20 |
| 宝石 | 20 |
| 琴女 | 20 |
| 亚索 | 20 |
| 盖伦 | 20 |
| 赵信 | 20 |
| 老鼠 | 20 |
| 锤石 | 20 |
+-------+-----------+
num越大pick越高,按照num排序即可
select t.name,
count(name) as num,
round(count(name)/t.countsum,2)
from (
select
name,count(name) over() as countsum
from table16
lateral view explode(names) n as name) t
group by t.name
order by num desc
limit 3;
--报错 这里误以为round是聚合函数可以直接使用在select 后面没有考虑里面的参数
最终语句
--rand() 进行小数精度舍弃,2代表精确到十分位
--count()统计英雄个数
--t,countsum所有英雄出场场次
select t.name,count(name) as num,
round(count(name)/t.countsum,2) --rand(double数据,精确位数)
from (select
name,count(name) over() as countsum
from table16
lateral view explode(names) n as name) t
group by t.name,t.countsum
order by num desc
limit 3;
结果:
+---------+------+-------+
| t.name | num | _c2 |
+---------+------+-------+
| 亚索 | 4 | 0.2 |
| 盖伦 | 3 | 0.15 |
| 艾瑞 | 2 | 0.1 |
+---------+------+-------+
|