IT数码 购物 网址 头条 软件 日历 阅读 图书馆
TxT小说阅读器
↓语音阅读,小说下载,古典文学↓
图片批量下载器
↓批量下载图片,美女图库↓
图片自动播放器
↓图片自动播放器↓
一键清除垃圾
↓轻轻一点,清除系统垃圾↓
开发: C++知识库 Java知识库 JavaScript Python PHP知识库 人工智能 区块链 大数据 移动开发 嵌入式 开发工具 数据结构与算法 开发测试 游戏开发 网络协议 系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑 笔记本 显卡 显示器 固态硬盘 硬盘 耳机 手机 iphone vivo oppo 小米 华为 单反 装机 图拉丁
 
   -> 大数据 -> mysql 对null 的三种处理方式 -> 正文阅读

[大数据]mysql 对null 的三种处理方式




select * from girl ;

# +--+----+------+
# |id|name|boy_id|
# +--+----+------+
# |1 |A   |1     |
# |2 |B   |1     |
# |3 |C   |3     |
# |4 |C   |3     |
# |5 |C   |3     |
# |6 |D   |NULL  |
# |7 |D   |NULL  |
# |8 |C   |2     |
# +--+----+------+


# null 不参与比较的
select * from girl where boy_id !=3   ;

# +--+----+------+
# |id|name|boy_id|
# +--+----+------+
# |1 |A   |1     |
# |2 |B   |1     |
# |8 |C   |2     |
# +--+----+------+


select * from girl where boy_id !=3 or  boy_id is null   ;


+--+----+------+
|id|name|boy_id|
+--+----+------+
|1 |A   |1     |
|2 |B   |1     |
|6 |D   |NULL  |
|7 |D   |NULL  |
|8 |C   |2     |
+--+----+------+


# 对 null 常用的处理方法
# 方法1:coalesce(A,B),如果A为空则返回B,不为空则返回A
# 方法2:IF(A IS NULL ,B,C)如果A为空则返回B,不为空则返回C
# 方法3: case when then else end
#
#  考察点:
#  count(*) 是统计 总计(可能是 count(主键)), count(score) 不统计  score 为 null 的数据  count(score)/ count(*) 就可以得出 需要的值
#  原因: count(score)  的时候 加了个过滤条件 : select count(主键)   from A where   score is  not null ;
#   count(*) 是统计 总计(可能是 count(主键)) ,  count(score) 不统计  score 为 null 的数据

# format(小数,小数点位数) 来精确小数点位数  ,等同于  round(小数,小数点位数)



#
select score from exam_record;
# +-----+
# |score|
# +-----+
# |80   |
# |NULL |
# |87   |
# |20   |
# |89   |
# |NULL |
# |90   |
# +-----+

select
   count(*)
FROM
    exam_record ;

# +--------+
# |count(*)|
# +--------+
# |7       |
# +--------+

select
   count(score)
FROM
    exam_record ;

# +------------+
# |count(score)|
# +------------+
# |5           |
# +------------+


# avg() 总数会不会算入 null 的 ,不会算入 的 : avg(score) = sum (score)/count(score) 而不是: sum (score)/count(*)
select
   avg(score)
FROM
    exam_record ;

# +----------+
# |avg(score)|
# +----------+
# |73.2000   |
# +----------+


select

   sum(score)/count(*)
FROM
    exam_record ;

# +-------------------+
# |sum(score)/count(*)|
# +-------------------+
# |52.2857            |
# +-------------------+



select
   sum(score)/count(score)
FROM
    exam_record ;

# +-----------------------+
# |sum(score)/count(score)|
# +-----------------------+
# |73.2000                |
# +-----------------------+




drop table if exists exam_record;
CREATE TABLE exam_record (
    id int PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
    uid int NOT NULL COMMENT '用户ID',
    exam_id int NOT NULL COMMENT '试卷ID',
    start_time datetime NOT NULL COMMENT '开始时间',
    submit_time datetime COMMENT '提交时间',
    score tinyint COMMENT '得分'
)CHARACTER SET utf8 COLLATE utf8_general_ci;

INSERT INTO exam_record(uid,exam_id,start_time,submit_time,score) VALUES
(1001, 9001, '2020-01-02 09:01:01', '2020-01-02 09:21:01', 80),
(1001, 9001, '2021-05-02 10:01:01', '2021-05-02 10:30:01', 81),
(1001, 9001, '2021-09-02 12:01:01', null, null);

select * from exam_record;

# +--+----+-------+-------------------+-------------------+-----+
# |id|uid |exam_id|start_time         |submit_time        |score|
# +--+----+-------+-------------------+-------------------+-----+
# |1 |1001|9001   |2020-01-02 09:01:01|2020-01-02 09:21:01|80   |
# |2 |1001|9001   |2021-05-02 10:01:01|2021-05-02 10:30:01|81   |
# |3 |1001|9001   |2021-09-02 12:01:01|NULL               |NULL |
# +--+----+-------+-------------------+-------------------+-----+

# 请统计有未完成状态的试卷的未完成数incomplete_cnt和未完成率incomplete_rate。由示例数据结果输出如下:
# exam_id	incomplete_cnt	complete_rate
# 9001	       1	             0.333


#
# 思路① 筛选出 为 null 的数据 , 然后分组 ,求完成率 (SELECT FORMAT(100.7654,3)  函数将 100.7654 保留为 三位数  )
select exam_id,
       count(*)                                                                                    incomplete_cnt,
       FORMAT(count(*) / (select count(1) from exam_record er0 where er0.exam_id = er.exam_id), 3) complete_rate
from exam_record er
where start_time is not null
  and submit_time is null
group by exam_id order by exam_id;


# 思路② sum 求和 ,但是 如果 不是对表里某个字段处理,也可以用来当   count 处理

select exam_id,
       sum(if(score is null, 1, 0)) as incomplete_cnt,
       round(sum(if(score is null, 1, 0)) / count(start_time), 3) as incomplete_rate
from exam_record
group by exam_id
having incomplete_cnt >= 1 ;

# +-------+--------------+---------------+
# |exam_id|incomplete_cnt|incomplete_rate|
# +-------+--------------+---------------+
# |9001   |1             |0.333          |
# +-------+--------------+---------------+

# 思路③  总数-已完成的=未完成
#       count(*)-count(score)=未完成

select
    exam_id,
    count(*) - count(score) incomplete_cnt,
    round((count(*) - count(score)) / COUNT(*),3) incomplete_rate
FROM
    exam_record
WHERE
         exam_id IN(
        select
            exam_id
        from
            exam_record
        where
            score is  null
    )
GROUP BY
    exam_id ;

#   count(*) 是统计 总计(可能是 count(主键)) ,  count(score) 不统计  score 为 null 的数据
select
   count(*)
FROM
    exam_record ;

# +--------+
# |count(*)|
# +--------+
# |3       |
# +--------+

select
   count(score)
FROM
    exam_record ;

# +------------+
# |count(score)|
# +------------+
# |2           |
# +------------+




#  满级人类

drop table if exists examination_info,user_info,exam_record;
CREATE TABLE examination_info (
    id int PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
    exam_id int UNIQUE NOT NULL COMMENT '试卷ID',
    tag varchar(32) COMMENT '类别标签',
    difficulty varchar(8) COMMENT '难度',
    duration int NOT NULL COMMENT '时长',
    release_time datetime COMMENT '发布时间'
)CHARACTER SET utf8 COLLATE utf8_general_ci;

CREATE TABLE user_info (
    id int PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
    uid int UNIQUE NOT NULL COMMENT '用户ID',
    `nick_name` varchar(64) COMMENT '昵称',
    achievement int COMMENT '成就值',
    level int COMMENT '用户等级',
    job varchar(32) COMMENT '职业方向',
    register_time datetime COMMENT '注册时间'
)CHARACTER SET utf8 COLLATE utf8_general_ci;

CREATE TABLE exam_record (
    id int PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
    uid int NOT NULL COMMENT '用户ID',
    exam_id int NOT NULL COMMENT '试卷ID',
    start_time datetime NOT NULL COMMENT '开始时间',
    submit_time datetime COMMENT '提交时间',
    score tinyint COMMENT '得分'
)CHARACTER SET utf8 COLLATE utf8_general_ci;

INSERT INTO user_info(uid,`nick_name`,achievement,level,job,register_time) VALUES
  (1001, '牛客1号', 10, 0, '算法', '2020-01-01 10:00:00'),
  (1002, '牛客2号', 2100, 6, '算法', '2020-01-01 10:00:00');

INSERT INTO examination_info(exam_id,tag,difficulty,duration,release_time) VALUES
  (9001, 'SQL', 'hard', 60, '2020-01-01 10:00:00'),
  (9002, 'SQL', 'easy', 60, '2020-01-01 10:00:00'),
  (9004, '算法', 'medium', 80, '2020-01-01 10:00:00');

INSERT INTO exam_record(uid,exam_id,start_time,submit_time,score) VALUES
(1001, 9001, '2020-01-02 09:01:01', '2020-01-02 09:21:59', 80),
(1001, 9001, '2021-05-02 10:01:01', null, null),
(1001, 9002, '2021-02-02 19:01:01', '2021-02-02 19:30:01', 87),
(1001, 9001, '2021-06-02 19:01:01', '2021-06-02 19:32:00', 20),
(1001, 9002, '2021-09-05 19:01:01', '2021-09-05 19:40:01', 89),
(1001, 9002, '2021-09-01 12:01:01', null, null),
(1002, 9002, '2021-05-05 18:01:01', '2021-05-05 18:59:02', 90);


select  * from  examination_info ;

# +--+-------+---+----------+--------+-------------------+
# |id|exam_id|tag|difficulty|duration|release_time       |
# +--+-------+---+----------+--------+-------------------+
# |1 |9001   |SQL|hard      |60      |2020-01-01 10:00:00|
# |2 |9002   |SQL|easy      |60      |2020-01-01 10:00:00|
# |3 |9004   |算法 |medium    |80      |2020-01-01 10:00:00|
# +--+-------+---+----------+--------+-------------------+

select  * from   user_info  ;

# +--+----+---------+-----------+-----+---+-------------------+
# |id|uid |nick_name|achievement|level|job|register_time      |
# +--+----+---------+-----------+-----+---+-------------------+
# |1 |1001|牛客1号     |10         |0    |算法 |2020-01-01 10:00:00|
# |2 |1002|牛客2号     |2100       |6    |算法 |2020-01-01 10:00:00|
# +--+----+---------+-----------+-----+---+-------------------+

select  * from  exam_record ;

# +--+----+-------+-------------------+-------------------+-----+
# |id|uid |exam_id|start_time         |submit_time        |score|
# +--+----+-------+-------------------+-------------------+-----+
# |1 |1001|9001   |2020-01-02 09:01:01|2020-01-02 09:21:59|80   |
# |2 |1001|9001   |2021-05-02 10:01:01|NULL               |NULL |
# |3 |1001|9002   |2021-02-02 19:01:01|2021-02-02 19:30:01|87   |
# |4 |1001|9001   |2021-06-02 19:01:01|2021-06-02 19:32:00|20   |
# |5 |1001|9002   |2021-09-05 19:01:01|2021-09-05 19:40:01|89   |
# |6 |1001|9002   |2021-09-01 12:01:01|NULL               |NULL |
# |7 |1002|9002   |2021-05-05 18:01:01|2021-05-05 18:59:02|90   |
# +--+----+-------+-------------------+-------------------+-----+


# 需求:出每个0级用户所有的高难度试卷考试平均用时和平均得分,未完成的默认试卷最大考试时长和0分处理

# 思路① 先筛选出 0级别 用户的数据 ;然后 group by 这些 零级用户 ;最后平均用时和平均得分(null 的话 用 要求的数据填充)

#
# select TIMESTAMPDIFF(MINUTE,'2008-08-01 22:00:00','2008-08-01 23:01:00') ;
#  TIMESTAMPDIFF(MINUTE,start_time,submit_time)

#  sum(TIMESTAMPDIFF(MINUTE, start_time,
#                          if(submit_time is null, select duration from examination_info where examination_info.,
#                             submit_time))) avg_score,
#        sum(score)                          avg_time_took
select er.uid,
       start_time ,
       submit_time,
       ( select duration from examination_info where examination_info.exam_id = ei.exam_id ) duration ,
       if (score is null ,0,score)

from user_info ui
         inner join exam_record er on ui.uid = er.uid and ui.level = 0
         inner join examination_info ei on er.exam_id = ei.exam_id and ei.difficulty = 'hard'
group by er.uid;

#  填充 null
select uid, start_time,if(submit_time is null, duration, submit_time) , score
from (select er.uid,
             start_time,
             submit_time,
             (select duration from examination_info where examination_info.exam_id = ei.exam_id) duration,
             if(score is null, 0, score)                                                         score

      from user_info ui
               inner join exam_record er on ui.uid = er.uid and ui.level = 0
               inner join examination_info ei on er.exam_id = ei.exam_id and ei.difficulty = 'hard') tem ;

# +----+-------------------+----------------------------------------------+-----+
# |uid |start_time         |if(submit_time is null, duration, submit_time)|score|
# +----+-------------------+----------------------------------------------+-----+
# |1001|2020-01-02 09:01:01|2020-01-02 09:21:59                           |80   |
# |1001|2021-05-02 10:01:01|60                                            |0    |
# |1001|2021-06-02 19:01:01|2021-06-02 19:32:00                           |20   |
# +----+-------------------+----------------------------------------------+-----+


select uid, start_time, if(submit_time is null,duration,TIMESTAMPDIFF(MINUTE,start_time,submit_time))   , score
from (select er.uid,
             start_time,
             submit_time,
             (select duration from examination_info where examination_info.exam_id = ei.exam_id) duration,
             if(score is null, 0, score)                                                         score

      from user_info ui
               inner join exam_record er on ui.uid = er.uid and ui.level = 0
               inner join examination_info ei on er.exam_id = ei.exam_id and ei.difficulty = 'hard') tem ;

# +----+-------------------+-----------------------------------------------------------------------------+-----+
# |uid |start_time         |if(submit_time is null,duration,TIMESTAMPDIFF(MINUTE,start_time,submit_time))|score|
# +----+-------------------+-----------------------------------------------------------------------------+-----+
# |1001|2020-01-02 09:01:01|20                                                                           |80   |
# |1001|2021-05-02 10:01:01|60                                                                           |0    |
# |1001|2021-06-02 19:01:01|30                                                                           |20   |
# +----+-------------------+-----------------------------------------------------------------------------+-----+


#  分组 求平均值 avg_score	avg_time_took
select tem1.uid  ,format(avg(score),0)  ,format(avg(duration),1) from (select uid, start_time, if(submit_time is null,duration,TIMESTAMPDIFF(MINUTE,start_time,submit_time)) duration  , score
from (select er.uid,
             start_time,
             submit_time,
             (select duration from examination_info where examination_info.exam_id = ei.exam_id) duration,
             if(score is null, 0, score)                                                         score

      from user_info ui
               inner join exam_record er on ui.uid = er.uid and ui.level = 0
               inner join examination_info ei on er.exam_id = ei.exam_id and ei.difficulty = 'hard') tem) tem1
group by tem1.uid ;



# 其他思路: coalesce(A,B),如果A不为空返回A,否则返回B  (英[?k????les] 美[?ko???les])
SELECT uid,
       ROUND(AVG(score_new), 0) avg_score,
       ROUND(AVG(cost_time), 1) avg_time_took
FROM (SELECT a.uid,
             start_time,
             COALESCE(score, 0)                                                                  score_new, #方法1:coalesce(A,B),如果A为空则返回B,不为空则返回A
             IF(submit_time IS NULL, b.duration,
                TIMESTAMPDIFF(minute, start_time, submit_time))                                  cost_time  #IF(A IS NULL ,B,C)如果A为空则返回B,不为空则返回C
      FROM exam_record a
               LEFT JOIN examination_info b ON a.exam_id = b.exam_id
               LEFT JOIN user_info c ON a.uid = c.uid
      WHERE difficulty = 'hard'
        AND level = '0') t1

GROUP BY uid;


  大数据 最新文章
实现Kafka至少消费一次
亚马逊云科技:还在苦于ETL?Zero ETL的时代
初探MapReduce
【SpringBoot框架篇】32.基于注解+redis实现
Elasticsearch:如何减少 Elasticsearch 集
Go redis操作
Redis面试题
专题五 Redis高并发场景
基于GBase8s和Calcite的多数据源查询
Redis——底层数据结构原理
上一篇文章      下一篇文章      查看所有文章
加:2022-04-29 12:12:55  更:2022-04-29 12:14:22 
 
开发: C++知识库 Java知识库 JavaScript Python PHP知识库 人工智能 区块链 大数据 移动开发 嵌入式 开发工具 数据结构与算法 开发测试 游戏开发 网络协议 系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑 笔记本 显卡 显示器 固态硬盘 硬盘 耳机 手机 iphone vivo oppo 小米 华为 单反 装机 图拉丁

360图书馆 购物 三丰科技 阅读网 日历 万年历 2024年11日历 -2024/11/24 0:57:00-

图片自动播放器
↓图片自动播放器↓
TxT小说阅读器
↓语音阅读,小说下载,古典文学↓
一键清除垃圾
↓轻轻一点,清除系统垃圾↓
图片批量下载器
↓批量下载图片,美女图库↓
  网站联系: qq:121756557 email:121756557@qq.com  IT数码