每月涨粉量和当前总粉丝数
题目链接
1. 数据准备
DROP TABLE IF EXISTS tb_user_video_log, tb_video_info;
CREATE TABLE tb_user_video_log (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
video_id INT NOT NULL COMMENT '视频ID',
start_time datetime COMMENT '开始观看时间',
end_time datetime COMMENT '结束观看时间',
if_follow TINYINT COMMENT '是否关注',
if_like TINYINT COMMENT '是否点赞',
if_retweet TINYINT COMMENT '是否转发',
comment_id INT COMMENT '评论ID'
) CHARACTER SET utf8 COLLATE utf8_bin;
CREATE TABLE tb_video_info (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
video_id INT UNIQUE NOT NULL COMMENT '视频ID',
author INT NOT NULL COMMENT '创作者ID',
tag VARCHAR(16) NOT NULL COMMENT '类别标签',
duration INT NOT NULL COMMENT '视频时长(秒数)',
release_time datetime NOT NULL COMMENT '发布时间'
)CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_user_video_log(uid, video_id, start_time, end_time, if_follow, if_like, if_retweet, comment_id) VALUES
(101, 2001, '2020-09-01 10:00:00', '2020-09-01 10:00:20', 0, 1, 1, null)
,(105, 2002, '2021-09-10 11:00:00', '2021-09-10 11:00:30', 1, 0, 1, null)
,(101, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:20', 1, 1, 1, null)
,(102, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:15', 0, 0, 1, null)
,(103, 2001, '2021-10-01 11:00:50', '2021-10-01 11:01:15', 1, 1, 0, 1732526)
,(106, 2002, '2021-10-01 10:59:05', '2021-10-01 11:00:05', 2, 0, 0, null)
,(107, 2002, '2021-11-01 10:59:05', '2021-11-01 11:00:05', 1, 0, 0, null)
,(108, 2002, '2021-11-01 10:59:05', '2021-11-01 11:00:05', 1, 0, 0, null)
,(109, 2002, '2021-11-01 10:59:05', '2021-11-01 11:00:05', 0, 0, 0, null)
,(104, 2004, '2021-11-02 11:00:50', '2021-11-02 11:01:15', 2, 1, 1, 2932521)
,(107, 2004, '2021-10-01 11:00:00', '2021-10-01 11:00:30', 1, 0, 1, null)
,(108, 2004, '2021-10-01 11:59:05', '2021-10-01 12:00:05', 0, 0, 0, null)
,(101, 2004, '2021-10-01 13:00:00', '2021-10-01 13:00:30', 1, 0, 1, null)
,(102, 2004, '2021-10-01 13:59:05', '2021-10-01 14:00:05', 1, 0, 1, null)
,(103, 2004, '2021-10-02 13:59:05', '2021-10-02 14:00:05', 1, 0, 1, null)
,(104, 2003, '2021-09-03 10:00:00', '2021-09-03 10:00:42', 1, 0, 0, null)
,(105, 2003, '2021-09-01 10:00:00', '2021-09-01 10:01:07', 1, 0, 1, null);
INSERT INTO tb_video_info(video_id, author, tag, duration, release_time) VALUES
(2001, 901, '影视', 30, '2020-01-01 7:00:00')
,(2002, 901, '影视', 60, '2021-01-01 7:00:00')
,(2003, 902, '旅游', 90, '2020-01-01 7:00:00')
,(2004, 902, '美女', 90, '2020-01-01 8:00:00');
2. 查询
SELECT * FROM tb_user_video_log; SELECT * FROM tb_video_info;
3.问题
- 求2021年每个创作者每月的涨粉率及截止当前的总粉丝量
**注:**每月的涨粉率= (每月的加粉量-每月掉粉量)/每月的视频总播放量
? 截止当前的总粉丝量 : (每月的加粉量-每月掉粉量) 的累加
难点:这里的截止当前的总粉丝量怎么求解?
4.解法:分组函数
WITH t AS (
SELECT
A.author,
LEFT (DATE(B.start_time), 7) AS DATE,
SUM(
CASE B.if_follow
WHEN 2 THEN
- 1
WHEN 1 THEN
1
WHEN 0 THEN
0
END
) fans_growth,
COUNT(START_TIME) total_video_count
FROM
tb_video_info A,
tb_user_video_log B
WHERE
A.video_id = B.video_id
AND YEAR (start_time) = 2021
GROUP BY
A.author,
LEFT (DATE(B.start_time), 7)
ORDER BY
A.author,
LEFT (DATE(B.start_time), 7)
) SELECT
author,
date AS MONTH,
ROUND(
fans_growth / total_video_count,
3
) fans_growth_rate,
(
SELECT
SUM(fans_growth)
FROM
t t1
WHERE
t1.author = t.author
AND t1.date <= t.date
) total_fans
FROM
t
ORDER BY
author,
total_fans ASC;
这种解法是使用查询语句实现 求解 截止目前的粉丝量:实现了对月粉丝量的累加(常规解法,比较慢)。
(
SELECT
SUM(fans_growth)
FROM
t t1
WHERE
t1.author = t.author
AND t1.date <= t.date
) total_fans
5.解法:窗口函数
这两种解法显著的区别就是在求解 截止当前总粉丝量时的不同。
SELECT
author,
left(start_time, 7) month,
ROUND(SUM(CASE if_follow
WHEN 0 THEN 0
WHEN 1 THEN 1
WHEN 2 THEN -1
END)/count(start_time), 3) fans_growth_rate,
SUM(CASE if_follow
WHEN 0 THEN 0
WHEN 1 THEN 1
WHEN 2 THEN -1
END) month_fans,
SUM(SUM(CASE if_follow
WHEN 0 THEN 0
WHEN 1 THEN 1
WHEN 2 THEN -1
END)) over(partition by author ORDER BY left(start_time, 7)) total_fans
FROM
tb_video_info A,
tb_user_video_log B
WHERE
A.video_id = B.video_id
AND YEAR(start_time) = '2021'
GROUP BY author, left(start_time, 7)
ORDER BY
A.author,
total_fans;
实现代码如下:
SUM(SUM(CASE if_follow
WHEN 0 THEN 0
WHEN 1 THEN 1
WHEN 2 THEN -1
END)) over(partition by author ORDER BY left(start_time, 7)) total_fans
里面的 case when 语句求的是月涨粉量month_fans:
SUM(CASE if_follow
WHEN 0 THEN 0
WHEN 1 THEN 1
WHEN 2 THEN -1
END)
另外从 字段 为 datetime 类型的日期里面,即含有年月日时分秒的字段中提取年和月,可以使用截取,也可以使用提取年和月的日期函数,如下:
SELECT DATE_FORMAT('2021-09-01','%Y-%m') AS ym
总结:对于 问题:计算2021年里每个创作者每月的涨粉率及截止当月的总粉丝量。我们首先应进行分解,分析条件:年份:2021,其次是涨粉率的求法和截止当月的总粉丝量的求法,根据 作者id、年月 分组,注意 窗口函数里 使用 sum() 求解 **截止当月的总粉丝量 **的使用。
|