SQL7 2021年11月每天的人均浏览文章时长
DROP TABLE IF EXISTS tb_user_log;
CREATE TABLE tb_user_log (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
artical_id INT NOT NULL COMMENT '视频ID',
in_time datetime COMMENT '进入时间',
out_time datetime COMMENT '离开时间',
sign_in TINYINT DEFAULT 0 COMMENT '是否签到'
) CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_user_log(uid, artical_id, in_time, out_time, sign_in) VALUES
(101, 9001, '2021-11-01 10:00:00', '2021-11-01 10:00:31', 0),
(102, 9001, '2021-11-01 10:00:00', '2021-11-01 10:00:24', 0),
(102, 9002, '2021-11-01 11:00:00', '2021-11-01 11:00:11', 0),
(101, 9001, '2021-11-02 10:00:00', '2021-11-02 10:00:50', 0),
(102, 9002, '2021-11-02 11:00:01', '2021-11-02 11:00:24', 0);
select date(in_time) dt,
round(sum((timestampdiff(second, in_time, out_time)))/ count(distinct uid), 1) used_time
from tb_user_log
where date_format(in_time, '%Y%m')= '202111'
and artical_id != 0
and timestampdiff(second, in_time, out_time) > 0
group by dt
order by used_time asc;
SQL8 每篇文章同一时刻最大在看人数
DROP TABLE IF EXISTS tb_user_log;
CREATE TABLE tb_user_log (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
artical_id INT NOT NULL COMMENT '视频ID',
in_time datetime COMMENT '进入时间',
out_time datetime COMMENT '离开时间',
sign_in TINYINT DEFAULT 0 COMMENT '是否签到'
) CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_user_log(uid, artical_id, in_time, out_time, sign_in) VALUES
(101, 9001, '2021-11-01 10:00:00', '2021-11-01 10:00:11', 0),
(102, 9001, '2021-11-01 10:00:09', '2021-11-01 10:00:38', 0),
(103, 9001, '2021-11-01 10:00:28', '2021-11-01 10:00:58', 0),
(104, 9002, '2021-11-01 11:00:45', '2021-11-01 11:01:11', 0),
(105, 9001, '2021-11-01 10:00:51', '2021-11-01 10:00:59', 0),
(106, 9002, '2021-11-01 11:00:55', '2021-11-01 11:01:24', 0),
(107, 9001, '2021-11-01 10:00:01', '2021-11-01 10:01:50', 0);
select artical_id,max(cnt) cnt from
(
select artical_id,in_time,
sum(cnt) over (partition by artical_id order by in_time,cnt desc) cnt
-- 按照时间倒序是否可以???!!!
from
(
select
artical_id,in_time, 1 cnt
from tb_user_log
where 1=1
and artical_id != 0
union all
select
artical_id,out_time, -1 cnt
from tb_user_log
where 1=1
and artical_id != 0
order by artical_id,in_time asc
) t
) t
group by artical_id
order by cnt desc ;
SQL9 2021年11月每天新用户的次日留存率
DROP TABLE IF EXISTS tb_user_log;
CREATE TABLE tb_user_log (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
artical_id INT NOT NULL COMMENT '视频ID',
in_time datetime COMMENT '进入时间',
out_time datetime COMMENT '离开时间',
sign_in TINYINT DEFAULT 0 COMMENT '是否签到'
) CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_user_log(uid, artical_id, in_time, out_time, sign_in) VALUES
(101, 0, '2021-11-01 10:00:00', '2021-11-01 10:00:42', 1),
(102, 9001, '2021-11-01 10:00:00', '2021-11-01 10:00:09', 0),
(103, 9001, '2021-11-01 10:00:01', '2021-11-01 10:01:50', 0),
(101, 9002, '2021-11-02 10:00:09', '2021-11-02 10:00:28', 0),
(103, 9002, '2021-11-02 10:00:51', '2021-11-02 10:00:59', 0),
(104, 9001, '2021-11-02 10:00:28', '2021-11-02 10:00:50', 0),
(101, 9003, '2021-11-03 11:00:55', '2021-11-03 11:01:24', 0),
(104, 9003, '2021-11-03 11:00:45', '2021-11-03 11:00:55', 0),
(105, 9003, '2021-11-03 11:00:53', '2021-11-03 11:00:59', 0),
(101, 9002, '2021-11-04 11:00:55', '2021-11-04 11:00:59', 0);
with user as
( select uid,date(in_time) login_dt from tb_user_log
union -- 多次登录需要去重
select uid,date(out_time) login_dt from tb_user_log
) -- 将所有日期垒起来,都算是活跃的日期
select first_login_date,
round(sum(if(is_con=1,1,0))/count(distinct uid),2) is_con
-- 登录+活跃的人数/唯一总人数=留存率
from (
select u1.uid,u1.first_login_date ,u2.uid u2id,u2.login_dt,datediff(login_dt,first_login_date) is_con
from(
select uid,min(login_dt) first_login_date from user group by uid
) u1
left join
(select uid,login_dt from user) u2 on u1.uid=u2.uid
where date_format(u1.first_login_date,'%Y-%m')='2021-11'
-- 最小日期左外连所有日期,日期之差为1则为登录后第二天又活跃了
)t
group by first_login_date
order by first_login_date asc;
with user as
( select uid,date(in_time) login_dt from tb_user_log
union -- 多次登录需要去重
select uid,date(out_time) login_dt from tb_user_log
) -- 将所有日期垒起来,都算是活跃的日期
select first_login_date,
round(count(u2id)/count(uid) ,2)
-- 第二天登录旧用户数/第一天新用户数
from
(
select u1.uid,u1.first_login_date ,u2.uid u2id,u2.login_dt,datediff(login_dt,first_login_date) is_con
from(
select uid,min(login_dt) first_login_date from user group by uid
) u1
left join
(select uid,login_dt from user) u2 on u1.uid=u2.uid and u1.first_login_date=date_sub(u2.login_dt,interval 1 day)
-- 增加连接匹配条件,第二天没登录的就为null了
where date_format(u1.first_login_date,'%Y-%m')='2021-11'
-- 最小日期左外连所有日期,日期之差为1则为登录后第二天又活跃了
)t
group by first_login_date
order by first_login_date;
with user as
( select uid,date(in_time) login_dt from tb_user_log
union -- 多次登录需要去重
select uid,date(out_time) login_dt from tb_user_log
) -- 将所有日期垒起来,都算是活跃的日期
select register_dt,
sum(if(datediff(next_dt,register_dt)=1,1,0))/count(distinct uid)
from (
select
uid,
min(login_dt) over (partition by uid order by uid asc,login_dt asc) register_dt,
lead(login_dt,1) over (partition by uid order by uid asc,login_dt asc) next_dt
from user
where date_format(login_dt,'%Y-%m')='2021-11'
)t
group by register_dt
SQL10 统计活跃间隔对用户分级结果
DROP TABLE IF EXISTS tb_user_log;
CREATE TABLE tb_user_log (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
artical_id INT NOT NULL COMMENT '视频ID',
in_time datetime COMMENT '进入时间',
out_time datetime COMMENT '离开时间',
sign_in TINYINT DEFAULT 0 COMMENT '是否签到'
) CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_user_log(uid, artical_id, in_time, out_time, sign_in) VALUES
(109, 9001, '2021-08-31 10:00:00', '2021-08-31 10:00:09', 0),
(109, 9002, '2021-11-04 11:00:55', '2021-11-04 11:00:59', 0),
(108, 9001, '2021-09-01 10:00:01', '2021-09-01 10:01:50', 0),
(108, 9001, '2021-11-03 10:00:01', '2021-11-03 10:01:50', 0),
(104, 9001, '2021-11-02 10:00:28', '2021-11-02 10:00:50', 0),
(104, 9003, '2021-09-03 11:00:45', '2021-09-03 11:00:55', 0),
(105, 9003, '2021-11-03 11:00:53', '2021-11-03 11:00:59', 0),
(102, 9001, '2021-10-30 10:00:00', '2021-10-30 10:00:09', 0),
(103, 9001, '2021-10-21 10:00:00', '2021-10-21 10:00:09', 0),
(101, 0, '2021-10-01 10:00:00', '2021-10-01 10:00:42', 1);
忠实用户(近7天活跃过且非新晋用户) 登录时间在近7天且注册时间在7天之前
新晋用户(近7天新增) 注册时间在7天之内
沉睡用户(近7天未活跃但更早前活跃过) 近7天没有登录,注册时间在7天之前
流失用户(近30天未活跃但更早前活跃过) 近30天没有登录,注册时间在30天之前
构造列表:当前所有用户id、注册时间、最后登录时间
with user as
(
select uid,date(min(in_time)) register_dt,date(max(out_time)) latest_dt
from tb_user_log
group by uid
)
select u.uid,u.register_dt,
l1.in_time '忠实用户',
case
when l1.in_time is null and l3.in_time is null and l4.in_time is null then l2.in_time else null
end '新晋用户',
case
when l1.in_time is null and l2.in_time is null and l4.in_time is null then l3.in_time else null
end '沉睡用户',
case
when l1.in_time is null and l2.in_time is null then l4.in_time else null
end '流失用户'
from user u
left join tb_user_log l1 on u.uid=l1.uid and datediff('2021-11-04',register_dt) >=6 and l1.in_time >=date_sub('2021-11-04',interval 6 day)
left join tb_user_log l2 on u.uid=l2.uid and datediff('2021-11-04',register_dt) <=6
left join tb_user_log l3 on u.uid=l3.uid and datediff('2021-11-04',register_dt) >=6 and l3.in_time < date_sub('2021-11-04',interval 6 day)
left join tb_user_log l4 on u.uid=l4.uid and datediff('2021-11-04',register_dt) >=29 and l4.in_time < date_sub('2021-11-04',interval 29 day)
;
忠实用户(近7天活跃过且非新晋用户) 登录时间在近7天且注册时间在7天之前
新晋用户(近7天新增) 注册时间在7天之内
沉睡用户(近7天未活跃但更早前活跃过) 近7天没有登录,注册时间在7天之前
流失用户(近30天未活跃但更早前活跃过) 近30天没有登录,注册时间在30天之前
-- left 连接可能会存在重复登录的数据
with user as
(
select uid,date(min(in_time)) register_dt,date(max(out_time)) latest_dt
from tb_user_log
group by uid
)
select u.uid,u.register_dt,
l1.in_time '7day_old',
l2.in_time '7day_new',
l3.in_time '7day_login',
l4.in_time '30_login'
from user u
left join tb_user_log l1 on u.uid=l1.uid
and u.register_dt < date_sub('2021-11-04',interval 6 day) -- 注册时间在7天之前
and l1.in_time >=date_sub('2021-11-04',interval 6 day) -- 近7天有登录
left join tb_user_log l2 on u.uid=l2.uid
and datediff('2021-11-04',register_dt) <=6 -- 近7天新进用户
left join tb_user_log l3 on u.uid=l3.uid
and u.latest_dt < date_sub('2021-11-04',interval 6 day) -- 最后登录日期在7天之内
and u.latest_dt >= date_sub('2021-11-04',interval 29 day) -- 30天前没有登录过
left join tb_user_log l4 on u.uid=l4.uid
and u.latest_dt <= date_sub('2021-11-04',interval 29 day); -- 最后登录日期在30天之前
with user as
(
select uid,date(min(in_time)) register_dt,date(max(out_time)) latest_dt
from tb_user_log
group by uid
)
select grade,round(count(grade)/(select count(*) from user limit 1),2) ratio
from (
select
uid,
case
when 7day_old is not null then '忠实用户'
when 7day_new is not null then '新晋用户'
when 7day_login is not null then '沉睡用户'
when 30_login is not null then '流失用户'
end grade
from (
select uid,max(7day_old) 7day_old,max(7day_new) 7day_new,max(7day_login) 7day_login,max(30_login) 30_login
-- group by 保留唯一数据
from (
select u.uid,u.register_dt,
l1.in_time '7day_old',
l2.in_time '7day_new',
l3.in_time '7day_login',
l4.in_time '30_login'
from user u
left join tb_user_log l1 on u.uid=l1.uid
and u.register_dt < date_sub('2021-11-04',interval 6 day) -- 注册时间在7天之前
and l1.in_time >=date_sub('2021-11-04',interval 6 day) -- 近7天有登录
left join tb_user_log l2 on u.uid=l2.uid
and datediff('2021-11-04',register_dt) <=6 -- 近7天新进用户
left join tb_user_log l3 on u.uid=l3.uid
and u.latest_dt < date_sub('2021-11-04',interval 6 day) -- 最后登录日期在7天之内
and u.latest_dt >= date_sub('2021-11-04',interval 29 day) -- 30天前没有登录过
left join tb_user_log l4 on u.uid=l4.uid
and u.latest_dt <= date_sub('2021-11-04',interval 29 day) -- 最后登录日期在30天之前)
) t group by uid
) t
)t group by grade
order by ratio desc
with user as
(
select uid,date(min(in_time)) register_dt,date(max(out_time)) latest_dt
from tb_user_log
group by uid
),
current_dt as
(
select max(out_time) cur_dt from tb_user_log
)
select user_grade,round(count(user_grade)/amount,2) ratio from (
select
u.uid,
case
when datediff(cur_dt,register_dt)>6 and datediff(cur_dt,latest_dt)<=6 then '忠实用户'
when datediff(cur_dt,register_dt)<=6 then '新晋用户'
when datediff(cur_dt,latest_dt)<=29 and datediff(cur_dt,latest_dt)> 6 then '沉睡用户'
when datediff(cur_dt,latest_dt)>29 then '流失用户'
-- else '流失用户'
end user_grade,
count(*) over() amount
from user u
left join current_dt c on 1=1 -- 每一行都附加上当前最大日期
) t
group by user_grade,amount
order by ratio desc
-- 大佬的做法
SELECT user_grade, round(count(uid)/(select count(distinct uid) from tb_user_log),2) ratio
FROM (SELECT uid,
(CASE WHEN DATEDIFF(DATE((SELECT MAX(in_time) FROM tb_user_log)),date(max(in_time)))<=6
AND DATEDIFF(DATE((SELECT MAX(in_time) FROM tb_user_log)),date(min(in_time)))>6
THEN '忠实用户'
WHEN DATEDIFF(DATE((SELECT MAX(in_time) FROM tb_user_log)),date(min(in_time)))<=6
THEN '新晋用户'
WHEN DATEDIFF(DATE((SELECT MAX(in_time) FROM tb_user_log)),date(max(in_time))) BETWEEN 7 AND 29
THEN '沉睡用户'
WHEN DATEDIFF(DATE((SELECT MAX(in_time) FROM tb_user_log)),date(max(in_time)))>29
THEN '流失用户' END) AS user_grade
FROM tb_user_log
GROUP BY uid) a
GROUP BY user_grade
ORDER BY ratio DESC;
SQL11 每天的日活数及新用户占比
DROP TABLE IF EXISTS tb_user_log;
CREATE TABLE tb_user_log (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
artical_id INT NOT NULL COMMENT '视频ID',
in_time datetime COMMENT '进入时间',
out_time datetime COMMENT '离开时间',
sign_in TINYINT DEFAULT 0 COMMENT '是否签到'
) CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_user_log(uid, artical_id, in_time, out_time, sign_in) VALUES
(101, 9001, '2021-10-31 10:00:00', '2021-10-31 10:00:09', 0),
(102, 9001, '2021-10-31 10:00:00', '2021-10-31 10:00:09', 0),
(101, 0, '2021-11-01 10:00:00', '2021-11-01 10:00:42', 1),
(102, 9001, '2021-11-01 10:00:00', '2021-11-01 10:00:09', 0),
(108, 9001, '2021-11-01 10:00:01', '2021-11-01 10:01:50', 0),
(108, 9001, '2021-11-02 10:00:01', '2021-11-02 10:01:50', 0),
(104, 9001, '2021-11-02 10:00:28', '2021-11-02 10:00:50', 0),
(106, 9001, '2021-11-02 10:00:28', '2021-11-02 10:00:50', 0),
(108, 9001, '2021-11-03 10:00:01', '2021-11-03 10:01:50', 0),
(109, 9002, '2021-11-03 11:00:55', '2021-11-03 11:00:59', 0),
(104, 9003, '2021-11-03 11:00:45', '2021-11-03 11:00:55', 0),
(105, 9003, '2021-11-03 11:00:53', '2021-11-03 11:00:59', 0),
(106, 9003, '2021-11-03 11:00:45', '2021-11-03 11:00:55', 0);
-
题目 问题:统计每天的日活数及新用户占比 注:
-
SQL
select login_dt,count(distinct uid) dau,round(sum(is_con)/count(distinct uid) ,2) ratio
from (
select u.uid,u.dt login_dt,ur.re_dt,
if(u.dt=ur.re_dt,1,0) is_con
from (
select uid,date(in_time) dt from tb_user_log
union
select uid,date(out_time) dt from tb_user_log
) u
left join (
select uid,min(date(in_time)) re_dt from tb_user_log
group by uid
) ur on u.uid=ur.uid )t
group by login_dt
order by login_dt asc
SQL12 连续签到领金币
DROP TABLE IF EXISTS tb_user_log;
CREATE TABLE tb_user_log (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
artical_id INT NOT NULL COMMENT '视频ID',
in_time datetime COMMENT '进入时间',
out_time datetime COMMENT '离开时间',
sign_in TINYINT DEFAULT 0 COMMENT '是否签到'
) CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_user_log(uid, artical_id, in_time, out_time, sign_in) VALUES
(101, 0, '2021-07-07 10:00:00', '2021-07-07 10:00:09', 1),
(101, 0, '2021-07-08 10:00:00', '2021-07-08 10:00:09', 1),
(101, 0, '2021-07-09 10:00:00', '2021-07-09 10:00:42', 1),
(101, 0, '2021-07-10 10:00:00', '2021-07-10 10:00:09', 1),
(101, 0, '2021-07-11 23:59:55', '2021-07-11 23:59:59', 1),
(101, 0, '2021-07-12 10:00:28', '2021-07-12 10:00:50', 1),
(101, 0, '2021-07-13 10:00:28', '2021-07-13 10:00:50', 1),
(102, 0, '2021-10-01 10:00:28', '2021-10-01 10:00:50', 1),
(102, 0, '2021-10-02 10:00:01', '2021-10-02 10:01:50', 1),
(102, 0, '2021-10-03 11:00:55', '2021-10-03 11:00:59', 1),
(102, 0, '2021-10-04 11:00:45', '2021-10-04 11:00:55', 0),
(102, 0, '2021-10-05 11:00:53', '2021-10-05 11:00:59', 1),
(102, 0, '2021-10-06 11:00:45', '2021-10-06 11:00:55', 1);
-
题目 场景逻辑说明:
-
artical_id-文章ID代表用户浏览的文章的ID,特殊情况artical_id-文章ID为0表示用户在非文章内容页(比如App内的列表页、活动页等)。注意:只有artical_id为0时sign_in值才有效。 -
从2021年7月7日0点开始,用户每天签到可以领1金币,并可以开始累积签到天数,连续签到的第3、7天分别可额外领2、6金币。 -
每连续签到7天后重新累积签到天数(即重置签到天数:连续第8天签到时记为新的一轮签到的第一天,领1金币) 问题:计算每个用户2021年7月以来每月获得的金币数(该活动到10月底结束,11月1日开始的签到不再获得金币)。结果按月份、ID升序排序。 注:如果签到记录的in_time-进入时间和out_time-离开时间跨天了,也只记作in_time对应的日期签到了。
-
SQL
select uid,mon,sum(coins) conns from
(
select uid,in_time,mon,rk1 ,
case
when rk1%7=3 then 3
when rk1%7=0 then 7
else 1
end coins
from (
select uid,in_time,date_format(in_time,'%Y%m') mon,
rank() over(partition by uid,flag order by in_time) rk1
from(
select
uid,
in_time,
date_sub(in_time, interval rk day) flag
from
(
select
distinct uid,date(in_time) in_time,
rank() over(partition by uid order by in_time) rk
from tb_user_log l
where artical_id=0 and sign_in=1 and date(in_time) between '2021-07-07' and '2021-10-31'
) t
) t
) t
) t group by uid,mon order by mon,uid asc
|