IT数码 购物 网址 头条 软件 日历 阅读 图书馆
TxT小说阅读器
↓语音阅读,小说下载,古典文学↓
图片批量下载器
↓批量下载图片,美女图库↓
图片自动播放器
↓图片自动播放器↓
一键清除垃圾
↓轻轻一点,清除系统垃圾↓
开发: C++知识库 Java知识库 JavaScript Python PHP知识库 人工智能 区块链 大数据 移动开发 嵌入式 开发工具 数据结构与算法 开发测试 游戏开发 网络协议 系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑 笔记本 显卡 显示器 固态硬盘 硬盘 耳机 手机 iphone vivo oppo 小米 华为 单反 装机 图拉丁
 
   -> 大数据 -> 基于mysql的电商行业数据分析 -> 正文阅读

[大数据]基于mysql的电商行业数据分析

文章目录

目录

前言

指标体系

数据处理

数据分析

总结


前言

trade1表

user_id:用户id

auction_id:订单id

cat1:商品品类

buy_amount:购买数量

day:购买日期

baby表

user_id:用户id

gender:性别

birthday:婴儿出生日期

数据主要反映出电商行业某母婴产品的销售情况


一、指标体系

粘性指标

传播性指标

销量指标

二、数据分析

1.数据处理

#转换字符串类型的日期
update trade1
set day=str_to_date(day,'%Y%m%d');
#提取月份
alter table trade1 add  month int;
update trade1
set month=month(day);
#提取年份
alter table trade1 add  year int;
update trade1
set year=year(day);
#基本了解数据
select count(*) from trade1;
select count(distinct auction_id) from trade1;
select count(distinct user_id) from trade1;

2.具体分析

1.粘性分析

 #1.活跃用户:有购买行为的用户
  #1.1 不同年份月活跃用户
select year,month,count(distinct  user_id) as num
from trade1
group by year,month;
  #1.2 年活跃用户及差值
select *,last_num-num as 差值
from
(select year,count(distinct user_id) as num,
       lead(count(distinct user_id),1)over(order by year asc) as last_num
from trade1
group by year) as a  #窗口函数在group by后执行,因此窗口函数可以配合聚合函数
 #2.购买频率
 #注:仅计算有两次及以上购买行为的人的购买频率
select *,avg(cha)over() as mean
from
(select *,datediff(date1,day) as cha
from
   (select user_id,day,
           lead(day,1)over(partition by user_id order by day asc) as date1
    from trade1) as a
) as b
where cha is not null
 #3.复购率
  #3.1四年的的复购率
select concat(round(count(user_id)*100/(select count(distinct user_id) from trade1),2),'%') as rate
from
(select user_id,count(distinct auction_id) as num
from trade1
group by user_id
having count(distinct auction_id)>=2) as a
  #3.2每一年的复购率
select year,
       count(user_id)*100/(select count(distinct user_id) from trade1 where year=a.year) as rate
from
(select year,user_id,count(distinct auction_id) as num
from trade1
group by year,user_id
having count(distinct auction_id)>=2) as a
group by year
 #4.留存率
 #4.1每年的月留存率
  #方法一
select
year,month,
count(distinct(case when cha=1 then user_id else null end))/count(distinct(case when cha=0 then user_id else null end))as rate
from
    (select t.year,a.user_id,month,(month-mm) as cha
     from trade1 t
     inner join
              (select year,user_id,min(month) as mm
               from trade1
               group by year,user_id) as a
     on a.user_id=t.user_id
     and a.year=t.year) as b
group by year,month
  #方法二
select
year,month,
count(distinct user_id)/
(select count(distinct user_id) from trade1 where year=b.year and month=b.month) as rate
from
    (select t.year,a.user_id,month,(month-mm) as cha
     from trade1 t
     inner join
              (select year,user_id,min(month) as mm
               from trade1
               group by year,user_id) as a
     on a.user_id=t.user_id
     and a.year=t.year) as b
where b.cha=1
group by year,month

2.传播性分析

 #1.用户画像
  #1.1不同性别用户数量占比
select *,num/(select count(distinct user_id) from baby where gender in (1,0)) as rate
       from (
                select gender, count(distinct b.user_id) as num
                from trade1 t
                inner join baby b
                on t.user_id = b.user_id
                where gender in (1,0)
                group by gender
            ) as a

  #1.2不同年龄用户占比
update baby
set birthday=str_to_date(birthday,'%Y%m%d')

alter table baby add age int;

select age,count(distinct user_id) as num,
       count(distinct user_id)/sum(count(distinct user_id))over() as rate
from
     (
select t.user_id,t.year-year(b.birthday) as age
from baby b inner join trade1 t on b.user_id = t.user_id
     ) as a
group by age
 #2.不同性别的用户趋势及增长率
  #2.1男性
select * ,concat(round((late_num-num)*100/num,2),'%') as rate
from (
      select year,
      gender,
      count(distinct t.user_id) as num,
      lead(count(distinct b.user_id)) over (order by year asc) as late_num
      from baby b
      join trade1 t on b.user_id = t.user_id
      where gender = 0
      group by t.year, gender
    ) as a
  #2.2女性
select * ,concat(round((late_num-num)*100/num,2),'%') as rate
from (
      select year,
      gender,
      count(distinct t.user_id) as num,
      lead(count(distinct b.user_id)) over (order by year asc) as late_num
      from baby b
      join trade1 t on b.user_id = t.user_id
      where gender = 1
      group by t.year, gender
    ) as a
#3.每年的高价值用户数量
  #高价值用户数量:购买数量在本年占前10%
 select year,count(distinct user_id) as num1,
        lead(count(distinct user_id)) over (order by year asc) as late_num1,
        lead(count(distinct user_id)) over (order by year asc)/count(distinct user_id)-1 as cha
from
(select year,user_id,
       sum(buy_mount) as num,
       cume_dist() over (partition by year order by sum(buy_mount) desc ) as rank1
from trade1 t
group by year, user_id) as a
where rank1<=0.1
group by year
#4.高价值用户的特征
   #4.1选出高价值用户,即消费数量占比前10%
create view  high_value
as
select a.user_id,b.gender,b.birthday
from
baby b inner join
     (select user_id,
             cume_dist() over (order by sum(buy_mount) desc) as rate
         from trade1
         group by user_id
     )
as a
on b.user_id=a.user_id
where a.rate<=0.1
  #4.2高价值用户的性别分布
select gender,count(distinct user_id) as num,
       count(distinct user_id)/sum(count(distinct user_id))over() as rate
from high_value
where gender in (1,0)
group by gender
  #4.3高价值用户的年龄分布
select age,count(distinct user_id) as num,
       count(distinct user_id)/sum(count(distinct user_id))over() as rate
from(
     select h.*,year-year(birthday) as age
     from high_value h inner join trade1 t on h.user_id = t.user_id
    ) as a
group by age
order by rate desc
#5.不同类型用户的购买数量占比
  #5.1不同性别的购买数量占比
select gender,sum(buy_mount) as num,
       sum(buy_mount)/sum(sum(buy_mount))over() as rate
from trade1 inner join baby b on trade1.user_id = b.user_id
where gender in (1,0)
group by gender
order by rate desc
  #5.2不同年龄的购买数量占比
select age,sum(buy_mount) as num,
       sum(buy_mount)/sum(sum(buy_mount))over() as rate
from
(select trade1.*,year-year(b.birthday) as age
from trade1 inner join baby b on trade1.user_id = b.user_id) as a
group by age
order by rate desc

3.销量

#1.销量趋势
select year,sum(buy_mount) as num,
       lead(year,1)over(order by year asc) as 下一年,
       lead(sum(buy_mount))over(order by year asc) as 下一年的销量,
       lead(sum(buy_mount))over(order by year asc)/sum(buy_mount)-1 as 增长率,
       sum(sum(buy_mount))over(order by year asc) as 累计销量
from trade1
group by year
  #1.1不同性别用户销量趋势、占比以及增长率
   #方法一:窗口函数
    #男性
select year,gender,sum(buy_mount) as num,
       sum(buy_mount)/sum(sum(buy_mount))over() as rate,
       lead(sum(buy_mount))over(order by year asc) as late_num,
       lead(sum(buy_mount))over(order by year asc)/sum(buy_mount)-1 as growth
from trade1 t inner join baby b on t.user_id = b.user_id
where gender=0
group by year,gender
    #女性
select year,gender,sum(buy_mount) as num,
       sum(buy_mount)/sum(sum(buy_mount))over() as rate,
       lead(sum(buy_mount))over(order by year asc) as late_num,
       lead(sum(buy_mount))over(order by year asc)/sum(buy_mount)-1 as growth
from trade1 t inner join baby b on t.user_id = b.user_id
where gender=1
group by year,gender
   #方法二:用子查询的方法
 select a.*,
        (select sum(buy_mount) from trade1 t inner join baby b
        on t.user_id = b.user_id
        where gender=a.gender and year-a.year=1
        ) as late_num,
        (select sum(buy_mount)
        from trade1 t inner join baby b on t.user_id = b.user_id
        where gender=a.gender and year-a.year=1
        )/a.num-1 as growth
from
(select year,gender,sum(buy_mount) as num
from
trade1 t inner join baby b on t.user_id = b.user_id
where gender in (1,0)
group by year,gender) as a
order by year asc,gender asc
 #1.2不同年龄的销量趋势以及增长率
select year,age,sum(buy_mount) as num,
        (select sum(buy_mount)from
                 (select trade1.*,year-year(birthday) as age
                  from trade1 inner join baby b on trade1.user_id = b.user_id
                 )as b
        where age=a.age and year-a.year=1) as late_num, #用子查询的方法计算同一年龄在次年的总销售量
       (select sum(buy_mount)from
                 (select trade1.*,year-year(birthday) as age
                  from trade1 inner join baby b on trade1.user_id = b.user_id
                 )as b
        where age=a.age and year-a.year=1)/sum(buy_mount)-1 as growth #同上,然后计算增长率
from
(select trade1.*,year-year(birthday) as age
from trade1 inner join baby b on trade1.user_id = b.user_id) as a
group by year,age
 #2.订单数量
select count(distinct auction_id) as num from trade1
  #2.1每一年的订单量以及增长率
select year,count(distinct auction_id) as num,
       count(distinct auction_id)/ sum(count(distinct auction_id))over() as 占比,
       lead(count(distinct auction_id))over(order by year asc) as late_num,
       lead(count(distinct auction_id))over(order by year asc)/count(distinct auction_id) as growth,
       round(cume_dist() over (order by count(distinct auction_id) desc),2) as 名次百分比,
       sum(count(distinct auction_id))over(order by year asc) as 累计和
from trade1
group by year
 #3.每年每个季度的销量,增长率,排名
  #计算日期属于哪个季度
  #判断日期是一年中的第几周
select week(day)
from trade1
  #判断日期是一年中的第几个季度
select quarter(day)
from trade1
  #添加季度列
alter table trade1 add season int;
update trade1
set season=quarter(day);

select year,season,sum(buy_mount) as num,
       sum(sum(buy_mount))over(partition by year) as year_sum,
       sum(buy_mount)/ sum(sum(buy_mount))over(partition by year) as rate,
       rank()over(partition by year order by sum(buy_mount) desc) as rank1,
       sum(sum(buy_mount))over(partition by year order by season asc) as 累计和,
       lead(sum(buy_mount),1)over(partition by year order by season asc)/sum(buy_mount)-1 as growth
from trade1
group by year,season
#4.不同品类的销量
  #4.1四年中不同品类的销量
select cat1,sum(buy_mount) as num
from trade1
group by cat1
order by num desc
  #4.2每年不同品类的销量、占比、下一年该品类的销量、增长率
select year,cat1,sum(buy_mount) as num,
       sum(buy_mount)/sum(sum(buy_mount))over() as rate,
       (select sum(buy_mount) from trade1 t where t.cat1=trade1.cat1 and year-trade1.year=1) as late_num,
       (select sum(buy_mount) from trade1 t where t.cat1=trade1.cat1 and year-trade1.year=1)/sum(buy_mount)-1 as growth
from trade1
group by year,cat1
  #4.3每年销量前三名的品类
select *
from
     (   select year,
                cat1,
                sum(buy_mount) as num,
                rank() over (partition by year order by sum(buy_mount) desc) as rank1
         from trade1
         group by year, cat1
     ) as a
where rank1<=3

总结

1.用户粘性较差,考虑两个原因

(1)产品并不能在较大程度上满足客户需求,不具有吸引力

(2)在营销上具有劣势,其他商家的营销手段更容易吸引客户

2.产品受众性别分布较为均衡

3.产品受众当中儿童年龄普遍较低,集中在0-3岁,产品可以该群体为重点进行迭代

.在分析时,注意各方面的趋势变化,即将时间维度纳入分析

4.该商家的产品销量增长水平逐年下降,在2014年出现负增长

  大数据 最新文章
实现Kafka至少消费一次
亚马逊云科技:还在苦于ETL?Zero ETL的时代
初探MapReduce
【SpringBoot框架篇】32.基于注解+redis实现
Elasticsearch:如何减少 Elasticsearch 集
Go redis操作
Redis面试题
专题五 Redis高并发场景
基于GBase8s和Calcite的多数据源查询
Redis——底层数据结构原理
上一篇文章      下一篇文章      查看所有文章
加:2021-09-09 11:50:45  更:2021-09-09 11:51:56 
 
开发: C++知识库 Java知识库 JavaScript Python PHP知识库 人工智能 区块链 大数据 移动开发 嵌入式 开发工具 数据结构与算法 开发测试 游戏开发 网络协议 系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑 笔记本 显卡 显示器 固态硬盘 硬盘 耳机 手机 iphone vivo oppo 小米 华为 单反 装机 图拉丁

360图书馆 购物 三丰科技 阅读网 日历 万年历 2024年11日历 -2024/11/23 20:03:10-

图片自动播放器
↓图片自动播放器↓
TxT小说阅读器
↓语音阅读,小说下载,古典文学↓
一键清除垃圾
↓轻轻一点,清除系统垃圾↓
图片批量下载器
↓批量下载图片,美女图库↓
  网站联系: qq:121756557 email:121756557@qq.com  IT数码