Hive中自定义变量的用法
承接上篇Hive窗口帧文章,具体主页有
两个窗口帧方式:rows、range
range格式:如果当前值在80,求前2后2所有值的平均值,
取值就会在80-2=78和80+2=82之内的所有行
avg(score) over (partition by clazz order by score desc range between 2 perceding and 2 following)
select *,
row_number() over(partition by clazz order by score desc) as num,
rank() over(partition by clazz order by score desc) as num2,
avg(score) over(partition by clazz order by score desc rows between 1 preceding and 1 following) as avg1,
max(score) over(partition by clazz order by score desc rows between 2 preceding
and current row) as max1,
avg(score) over (partition by clazz order by score desc range between 2 preceding and 2 following) as avg2
from testwins;
优化sql语句
WINDOW w AS (partition by clazz order by score desc)
等同于
window w = (partition by clazz order by score desc)
变量值类型 变量名 = 赋值
select *,
row_number() over w as num,
rank() over w as num2,
avg(score) over(partition by clazz order by score desc rows between 1 preceding and 1 following) as avg1,
max(score) over(partition by clazz order by score desc rows between 2 preceding
and current row) as max1,
avg(score) over (partition by clazz order by score desc range between 2 preceding and 2 following) as avg2
from testwins
WINDOW w AS (partition by clazz order by score desc);
联表联查中用的最多
create table students(
id bigint,
name string,
age int,
gender string,
clazz string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
create table score(
id bigint,
course_id string,
score int
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
在学生表后面显示成绩
select students.*,score.score from students left join score on students.id=score.id;
select * from (select id,sum(score) as score from (select students.*,score.score from students left join score on students.id=score.id) as stu group by id) as s1
left join (select students.*,score.score from students left join score on students.id=score.id) as s2
on s1.id=s2.id;
with as使用
例子:
with stu as (select students.*,score.score from students left join score on students.id=score.id)
with s1 as ((select id,sum(score) as score from stu group by id))
select * from s1
left join stu
on s1.id=stu.id;
|