拼多多面试题:如何找出连续出现N次的内容?
视频这么火,你会这么分析吗? - 知乎
一个select中不能写两个开窗函数
在一个select中开窗函数的结果不能作为筛选条件
表名:user_online_record
字段名:user_id,online_date
用到的函数
窗口函数 row_number,lead,lag 日期函数 month,date_sub 特殊函数 coalesce
1.每个月每个用户的每个日期的登陆顺序
select *,
month(online_date) as 月,
row_number() over (partition by month(online_date), user_id order by online_date) as 每个月每个日期的登陆顺序
from user_online_record
?2.每个月每个用户的每个日期的下一次登陆日期
select *,
month(online_date) as 月,
lead(online_date,1,'当月最后登陆日期') over (partition by month(online_date), user_id order by online_date) as 当月下一次登陆日期
from user_online_record
?2.1带上登陆顺序
select *,
lead(online_date,1,'当月最后登陆日期') over (partition by month(online_date), user_id order by online_date) as 当月下一次登陆日期
from (
select *,month(online_date) as 月,
row_number() over (partition by month(online_date), user_id order by online_date) as 每个月每个日期的登陆顺序
from user_online_record
) as t1
?2.2.筛选出中止登陆的日期
select * from(
select *,lead(online_date,1,'当月最后登陆日期') over (partition by month(online_date), user_id order by online_date) as 当月下一次登陆日期
from (
select *,month(online_date) as 月,row_number() over (partition by month(online_date), user_id order by online_date) as 每个月每个日期的登陆顺序
from user_online_record
) as t1
) as t2
where date_sub(当月下一次登陆日期,interval 1 day) <> online_date or 当月下一次登陆日期='当月最后登陆日期';
?3.上次登陆日期的登陆顺序
select *,
lag(每个月每个日期的登陆顺序,1) over (partition by month(online_date),user_id order by '每个月每个日期的登陆顺序') as '上次登陆顺序'
from(
select *,lead(online_date,1,'当月最后登陆日期') over (partition by month(online_date), user_id order by online_date) as 当月下一次登陆日期
from (
select *,month(online_date) as 月,row_number() over (partition by month(online_date), user_id order by online_date) as 每个月每个日期的登陆顺序
from user_online_record
) as t1
) as t2
where date_sub(当月下一次登陆日期,interval 1 day) <> online_date or 当月下一次登陆日期='当月最后登陆日期';
4.最后一次登陆日期的登陆顺序减去最后一次的前一次的登陆日期的登陆顺序即为连续登陆的天数?
select *,
每个月每个日期的登陆顺序-coalesce(lag(每个月每个日期的登陆顺序,1) over (partition by month(online_date),user_id order by '每个月每个日期的登陆顺序'),0) as '连续登陆天数'
from(
select *,lead(online_date,1,'当月最后登陆日期') over (partition by month(online_date), user_id order by online_date) as 当月下一次登陆日期
from (
select *,month(online_date) as 月,row_number() over (partition by month(online_date), user_id order by online_date) as 每个月每个日期的登陆顺序
from user_online_record
) as t1
) as t2
where date_sub(当月下一次登陆日期,interval 1 day) <> online_date or 当月下一次登陆日期='当月最后登陆日期';
?5.再套一层,以连续登陆天数为条件删选
select distinct 月,user_id
from (
select *,
每个月每个日期的登陆顺序-coalesce(lag(每个月每个日期的登陆顺序,1) over (partition by month(online_date),user_id order by '每个月每个日期的登陆顺序'),0) as '连续登陆天数'
from(
select *,lead(online_date,1,'当月最后登陆日期') over (partition by month(online_date), user_id order by online_date) as 当月下一次登陆日期
from (
select *,month(online_date) as 月,row_number() over (partition by month(online_date), user_id order by online_date) as 每个月每个日期的登陆顺序
from user_online_record
) as t1
) as t2
where date_sub(当月下一次登陆日期,interval 1 day) <> online_date or 当月下一次登陆日期='当月最后登陆日期'
) as t3 where 连续登陆天数>=2;
综上:每个月连续登陆超过2天的用户
|