一、分组topk
(1)建测试表
-- 员工表
create table member (
?? ?`id` INT UNSIGNED NOT NULL auto_increment PRIMARY KEY,
?? ?`mgr_id` INT UNSIGNED NOT NULL DEFAULT 0,
?? ?`num` INT UNSIGNED NOT NULL DEFAULT 0
)ENGINE=INNODB;
INSERT INTO member (mgr_id, num) VALUES?
(1, 10),
(1, 20),
(5, 30),
(2, 30),
(3, 60),
(2, 30),
(1, 11),
(1, 21),
(5, 31),
(2, 31),
(3, 61),
(2, 11);
(2)借助聚合函数,每组最大值的员工,大于每组平均值的员工
-- 每组最大值
SELECT m.mgr_id, max(m.num) as max_num from member as m GROUP BY m.mgr_id
-- 大于每组平均值的员工(子查询)
SELECT * from member as m
where num > (select avg(num) from member where mgr_id = m.mgr_id);
-- 大于每组平均值的员工(关联查询)
SELECT a.*, b.avg from member as a
left join (
SELECT m.mgr_id, avg(m.num) as avg from member as m GROUP BY m.mgr_id
) as b
on a.mgr_id = b.mgr_id
where a.num > b.avg;
-- 每组最大值的员工
SELECT a.*, b.max_num from member as a
left join (
SELECT m.mgr_id, max(m.num) as max_num from member as m GROUP BY m.mgr_id
) as b
on a.mgr_id = b.mgr_id
where a.num = b.max_num;
(3)每组topk
-- 方法一:给表增加组内排行字段,然后选择排行小于3的员工
-- 增加组内排行字段
select b.*, (select count(1)+1 from member a where a.mgr_id = b.mgr_id and a.num > b.num) as rk
from member b
order by mgr_id, rk;
-- 组内排行小于3的员工(有重复排行)(给表增加组内排行字段,然后选择排行小于3的员工)
select b.*, (select count(1)+1 from member a where a.mgr_id = b.mgr_id and a.num > b.num) as rk
from member b
having rk < 3
order by mgr_id, rk;
-- 扫描行,过滤:值 >= 本组内排名第二的值
-- 在每组前两个id的员工
select * from member a
where num >= (
select num from member where mgr_id = a.mgr_id order by num desc limit 1,1
)
order by mgr_id, num desc;
-- 扫描行,过滤:id 在 本组内排名第二名的id
-- 在每组前两个id的员工,不支持 in (select ... limit) 子查询
select * from member a
where id = (
select id from member where mgr_id = a.mgr_id order by num desc limit 0,1
) or id = (
select id from member where mgr_id = a.mgr_id order by num desc limit 1,1
)
order by mgr_id, num desc;
-- 扫描行,过滤:本组内排行 < 2
-- 每组比当前大的个数少于2个
select * from member a
where (
select count(*) from member b
where a.mgr_id = b.mgr_id and b.num > a.num
) < 2
order by mgr_id, num;
二、日志统计,日活topk
? ? ? ?与上面的不同是,在日志中,以 login_at 分组的 uid 是重复的。
(1)建测试表
create table login_log (
`id` INT UNSIGNED NOT NULL auto_increment PRIMARY KEY,
`uid` INT UNSIGNED NOT NULL DEFAULT 0,
`login_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
`ip` varchar(32) DEFAULT NULL
)ENGINE=INNODB;
INSERT INTO `test`.`login_log`(`uid`, `login_at`, `ip`) VALUES
(1, '2021-10-15 00:11:34', '1.1.1.1'),
(1, '2021-10-15 00:13:19', '1.1.1.2'),
(2, '2021-10-15 00:13:23', '1.1.1.2'),
(7, '2021-10-15 00:13:26', '1.1.1.1'),
(3, '2021-10-15 00:13:34', '1.1.1.3'),
(5, '2021-10-15 00:11:34', '1.1.1.4'),
(1, '2021-10-16 00:13:19', '1.1.1.5'),
(2, '2021-10-16 00:13:19', '1.1.1.1'),
(4, '2021-10-16 00:13:19', '1.1.1.2'),
(1, '2021-10-16 00:13:19', '1.1.1.3'),
(1, '2021-10-16 00:13:19', '1.1.1.6'),
(3, '2021-10-16 00:13:19', '1.1.1.1'),
(2, '2021-10-17 00:13:19', '1.1.1.1'),
(2, '2021-10-17 00:13:19', '1.1.1.1'),
(1, '2021-10-17 00:13:23', '1.1.1.1'),
(1, '2021-10-17 00:13:23', '1.1.1.1'),
(6, '2021-10-17 00:13:26', '1.1.1.1'),
(2, '2021-10-17 00:13:34', '1.1.1.5');
(2)统计每天登录次数前2的用户
-- (1)创建表, 每天-每个用户-登录次数 -- 两级分组,插入列
CREATE TABLE uid_cnt_log AS
select date(login_at) as date, uid, count(*) as cnt from login_log group by date(login_at), uid;
-- 查看表
select * from uid_cnt_log;
-- (2)每天,登录次数前2的用户
select b.*, (select count(1)+1 from uid_cnt_log as a where date(a.date) = date(b.date) and a.cnt > b.cnt) as rk
from uid_cnt_log as b
having rk < 3
order by date, rk;
-- (3)删除临时表
DROP TABLE uid_cnt_log;
-- 每天,每个用户,每个ip,登录次数
select date(login_at) as date, uid, ip, count(*) as cnt from login_log
group by date(login_at), uid, ip
order by date(login_at), cnt desc;
-- 每天,每个用户,登录ip数,登录次数
select date(login_at) as date, uid, count(distinct(ip)) as ip_cnt, count(*) as cnt from login_log
group by date(login_at), uid
order by date(login_at), ip_cnt desc, cnt desc;
-- 每天,每个ip,登录uid数,登录次数
select date(login_at) as date, ip, count(distinct(uid)) as uid_cnt, count(*) as cnt from login_log
group by date(login_at), ip
order by date(login_at), uid_cnt desc, cnt desc;
|