在电商、物流和银行可能经常会遇到这样的需求:统计用户连续交易的总额、连续登陆天数、连续登陆开始和结束时间、间隔天数等
建表:数据在最后
create table deal_tb(
id string
,datestr string
,amount string
)row format delimited fields terminated by ',';
求客户连续消费的金额总额:就是8号9号10号时间是连续的,就求amount的金额总和 注意:每个用户每天可能会有多条记录
计算逻辑:
对数据中时间和id进行分组排序,聚合amount,使得聚合后数据没人每天只有一条数据
select id,datestr,sum(amount) as amount from deal_tb group by id,datestr
排序完按id求row_number
select *,row_number()over(partition by id order by datestr) as rank from (select id,datestr,sum(amount) as amount from deal_tb group by id,datestr) as t
然后用时间减去row_number,一样就是连续的(时间加了1,row_number也加了1,所以可以判断)
select *,date_sub(datestr,rank)as time from (select *,row_number()over(partition by id order by datestr) as rank from (select id,datestr,sum(amount) as amount from deal_tb group by id,datestr) as t ) as tt
最后用id和减的时间进行分组,求sum(amount)
select id,time,sum(amount) as amount from (select *,date_sub(datestr,rank)as time from (select *,row_number()over(partition by id order by datestr) as rank from (select id,datestr,sum(amount) as amount from deal_tb group by id,datestr) as t ) as tt ) as ttt group by id,time
统计用户连续交易的总额、连续登陆天数、连续登陆开始和结束时间、间隔天数
select ttt1.id
,ttt1.grp
,round(sum(ttt1.sum_amount),2) as sc_sum_amount
,count(1) as sc_days
,min(ttt1.datestr) as sc_start_date
,max(ttt1.datestr) as sc_end_date
,datediff(ttt1.grp,lag(ttt1.grp,1) over(partition by ttt1.id order by ttt1.grp)) as iv_days
from(
select tt1.id
,tt1.datestr
,tt1.sum_amount
,date_sub(tt1.datestr,rn) as grp
from(
select t1.id
,t1.datestr
,t1.sum_amount
,row_number() over(partition by id order by datestr) as rn
from(
select id
,datestr
,sum(amount) as sum_amount
from deal_tb
group by id,datestr
) t1
) tt1
) ttt1
group by ttt1.id,ttt1.grp;
数据:
id datestr amount
1,2019-02-08,6214.23
1,2019-02-08,6247.32
1,2019-02-09,85.63
1,2019-02-09,967.36
1,2019-02-10,85.69
1,2019-02-12,769.85
1,2019-02-13,943.86
1,2019-02-14,538.42
1,2019-02-15,369.76
1,2019-02-16,369.76
1,2019-02-18,795.15
1,2019-02-19,715.65
1,2019-02-21,537.71
2,2019-02-08,6214.23
2,2019-02-08,6247.32
2,2019-02-09,85.63
2,2019-02-09,967.36
2,2019-02-10,85.69
2,2019-02-12,769.85
2,2019-02-13,943.86
2,2019-02-14,943.18
2,2019-02-15,369.76
2,2019-02-18,795.15
2,2019-02-19,715.65
2,2019-02-21,537.71
3,2019-02-08,6214.23
3,2019-02-08,6247.32
3,2019-02-09,85.63
3,2019-02-09,967.36
3,2019-02-10,85.69
3,2019-02-12,769.85
3,2019-02-13,943.86
3,2019-02-14,276.81
3,2019-02-15,369.76
3,2019-02-16,369.76
3,2019-02-18,795.15
3,2019-02-19,715.65
3,2019-02-21,537.71
|