文章目录
前言
trade1表
user_id:用户id
auction_id:订单id
cat1:商品品类
buy_amount:购买数量
day:购买日期
baby表
user_id:用户id
gender:性别
birthday:婴儿出生日期
数据主要反映出电商行业某母婴产品的销售情况
一、指标体系
粘性指标
传播性指标
销量指标
二、数据分析
1.数据处理
#转换字符串类型的日期
update trade1
set day=str_to_date(day,'%Y%m%d');
#提取月份
alter table trade1 add month int;
update trade1
set month=month(day);
#提取年份
alter table trade1 add year int;
update trade1
set year=year(day);
#基本了解数据
select count(*) from trade1;
select count(distinct auction_id) from trade1;
select count(distinct user_id) from trade1;
2.具体分析
1.粘性分析
#1.活跃用户:有购买行为的用户
#1.1 不同年份月活跃用户
select year,month,count(distinct user_id) as num
from trade1
group by year,month;
#1.2 年活跃用户及差值
select *,last_num-num as 差值
from
(select year,count(distinct user_id) as num,
lead(count(distinct user_id),1)over(order by year asc) as last_num
from trade1
group by year) as a #窗口函数在group by后执行,因此窗口函数可以配合聚合函数
#2.购买频率
#注:仅计算有两次及以上购买行为的人的购买频率
select *,avg(cha)over() as mean
from
(select *,datediff(date1,day) as cha
from
(select user_id,day,
lead(day,1)over(partition by user_id order by day asc) as date1
from trade1) as a
) as b
where cha is not null
#3.复购率
#3.1四年的的复购率
select concat(round(count(user_id)*100/(select count(distinct user_id) from trade1),2),'%') as rate
from
(select user_id,count(distinct auction_id) as num
from trade1
group by user_id
having count(distinct auction_id)>=2) as a
#3.2每一年的复购率
select year,
count(user_id)*100/(select count(distinct user_id) from trade1 where year=a.year) as rate
from
(select year,user_id,count(distinct auction_id) as num
from trade1
group by year,user_id
having count(distinct auction_id)>=2) as a
group by year
#4.留存率
#4.1每年的月留存率
#方法一
select
year,month,
count(distinct(case when cha=1 then user_id else null end))/count(distinct(case when cha=0 then user_id else null end))as rate
from
(select t.year,a.user_id,month,(month-mm) as cha
from trade1 t
inner join
(select year,user_id,min(month) as mm
from trade1
group by year,user_id) as a
on a.user_id=t.user_id
and a.year=t.year) as b
group by year,month
#方法二
select
year,month,
count(distinct user_id)/
(select count(distinct user_id) from trade1 where year=b.year and month=b.month) as rate
from
(select t.year,a.user_id,month,(month-mm) as cha
from trade1 t
inner join
(select year,user_id,min(month) as mm
from trade1
group by year,user_id) as a
on a.user_id=t.user_id
and a.year=t.year) as b
where b.cha=1
group by year,month
2.传播性分析
#1.用户画像
#1.1不同性别用户数量占比
select *,num/(select count(distinct user_id) from baby where gender in (1,0)) as rate
from (
select gender, count(distinct b.user_id) as num
from trade1 t
inner join baby b
on t.user_id = b.user_id
where gender in (1,0)
group by gender
) as a
#1.2不同年龄用户占比
update baby
set birthday=str_to_date(birthday,'%Y%m%d')
alter table baby add age int;
select age,count(distinct user_id) as num,
count(distinct user_id)/sum(count(distinct user_id))over() as rate
from
(
select t.user_id,t.year-year(b.birthday) as age
from baby b inner join trade1 t on b.user_id = t.user_id
) as a
group by age
#2.不同性别的用户趋势及增长率
#2.1男性
select * ,concat(round((late_num-num)*100/num,2),'%') as rate
from (
select year,
gender,
count(distinct t.user_id) as num,
lead(count(distinct b.user_id)) over (order by year asc) as late_num
from baby b
join trade1 t on b.user_id = t.user_id
where gender = 0
group by t.year, gender
) as a
#2.2女性
select * ,concat(round((late_num-num)*100/num,2),'%') as rate
from (
select year,
gender,
count(distinct t.user_id) as num,
lead(count(distinct b.user_id)) over (order by year asc) as late_num
from baby b
join trade1 t on b.user_id = t.user_id
where gender = 1
group by t.year, gender
) as a
#3.每年的高价值用户数量
#高价值用户数量:购买数量在本年占前10%
select year,count(distinct user_id) as num1,
lead(count(distinct user_id)) over (order by year asc) as late_num1,
lead(count(distinct user_id)) over (order by year asc)/count(distinct user_id)-1 as cha
from
(select year,user_id,
sum(buy_mount) as num,
cume_dist() over (partition by year order by sum(buy_mount) desc ) as rank1
from trade1 t
group by year, user_id) as a
where rank1<=0.1
group by year
#4.高价值用户的特征
#4.1选出高价值用户,即消费数量占比前10%
create view high_value
as
select a.user_id,b.gender,b.birthday
from
baby b inner join
(select user_id,
cume_dist() over (order by sum(buy_mount) desc) as rate
from trade1
group by user_id
)
as a
on b.user_id=a.user_id
where a.rate<=0.1
#4.2高价值用户的性别分布
select gender,count(distinct user_id) as num,
count(distinct user_id)/sum(count(distinct user_id))over() as rate
from high_value
where gender in (1,0)
group by gender
#4.3高价值用户的年龄分布
select age,count(distinct user_id) as num,
count(distinct user_id)/sum(count(distinct user_id))over() as rate
from(
select h.*,year-year(birthday) as age
from high_value h inner join trade1 t on h.user_id = t.user_id
) as a
group by age
order by rate desc
#5.不同类型用户的购买数量占比
#5.1不同性别的购买数量占比
select gender,sum(buy_mount) as num,
sum(buy_mount)/sum(sum(buy_mount))over() as rate
from trade1 inner join baby b on trade1.user_id = b.user_id
where gender in (1,0)
group by gender
order by rate desc
#5.2不同年龄的购买数量占比
select age,sum(buy_mount) as num,
sum(buy_mount)/sum(sum(buy_mount))over() as rate
from
(select trade1.*,year-year(b.birthday) as age
from trade1 inner join baby b on trade1.user_id = b.user_id) as a
group by age
order by rate desc
3.销量
#1.销量趋势
select year,sum(buy_mount) as num,
lead(year,1)over(order by year asc) as 下一年,
lead(sum(buy_mount))over(order by year asc) as 下一年的销量,
lead(sum(buy_mount))over(order by year asc)/sum(buy_mount)-1 as 增长率,
sum(sum(buy_mount))over(order by year asc) as 累计销量
from trade1
group by year
#1.1不同性别用户销量趋势、占比以及增长率
#方法一:窗口函数
#男性
select year,gender,sum(buy_mount) as num,
sum(buy_mount)/sum(sum(buy_mount))over() as rate,
lead(sum(buy_mount))over(order by year asc) as late_num,
lead(sum(buy_mount))over(order by year asc)/sum(buy_mount)-1 as growth
from trade1 t inner join baby b on t.user_id = b.user_id
where gender=0
group by year,gender
#女性
select year,gender,sum(buy_mount) as num,
sum(buy_mount)/sum(sum(buy_mount))over() as rate,
lead(sum(buy_mount))over(order by year asc) as late_num,
lead(sum(buy_mount))over(order by year asc)/sum(buy_mount)-1 as growth
from trade1 t inner join baby b on t.user_id = b.user_id
where gender=1
group by year,gender
#方法二:用子查询的方法
select a.*,
(select sum(buy_mount) from trade1 t inner join baby b
on t.user_id = b.user_id
where gender=a.gender and year-a.year=1
) as late_num,
(select sum(buy_mount)
from trade1 t inner join baby b on t.user_id = b.user_id
where gender=a.gender and year-a.year=1
)/a.num-1 as growth
from
(select year,gender,sum(buy_mount) as num
from
trade1 t inner join baby b on t.user_id = b.user_id
where gender in (1,0)
group by year,gender) as a
order by year asc,gender asc
#1.2不同年龄的销量趋势以及增长率
select year,age,sum(buy_mount) as num,
(select sum(buy_mount)from
(select trade1.*,year-year(birthday) as age
from trade1 inner join baby b on trade1.user_id = b.user_id
)as b
where age=a.age and year-a.year=1) as late_num, #用子查询的方法计算同一年龄在次年的总销售量
(select sum(buy_mount)from
(select trade1.*,year-year(birthday) as age
from trade1 inner join baby b on trade1.user_id = b.user_id
)as b
where age=a.age and year-a.year=1)/sum(buy_mount)-1 as growth #同上,然后计算增长率
from
(select trade1.*,year-year(birthday) as age
from trade1 inner join baby b on trade1.user_id = b.user_id) as a
group by year,age
#2.订单数量
select count(distinct auction_id) as num from trade1
#2.1每一年的订单量以及增长率
select year,count(distinct auction_id) as num,
count(distinct auction_id)/ sum(count(distinct auction_id))over() as 占比,
lead(count(distinct auction_id))over(order by year asc) as late_num,
lead(count(distinct auction_id))over(order by year asc)/count(distinct auction_id) as growth,
round(cume_dist() over (order by count(distinct auction_id) desc),2) as 名次百分比,
sum(count(distinct auction_id))over(order by year asc) as 累计和
from trade1
group by year
#3.每年每个季度的销量,增长率,排名
#计算日期属于哪个季度
#判断日期是一年中的第几周
select week(day)
from trade1
#判断日期是一年中的第几个季度
select quarter(day)
from trade1
#添加季度列
alter table trade1 add season int;
update trade1
set season=quarter(day);
select year,season,sum(buy_mount) as num,
sum(sum(buy_mount))over(partition by year) as year_sum,
sum(buy_mount)/ sum(sum(buy_mount))over(partition by year) as rate,
rank()over(partition by year order by sum(buy_mount) desc) as rank1,
sum(sum(buy_mount))over(partition by year order by season asc) as 累计和,
lead(sum(buy_mount),1)over(partition by year order by season asc)/sum(buy_mount)-1 as growth
from trade1
group by year,season
#4.不同品类的销量
#4.1四年中不同品类的销量
select cat1,sum(buy_mount) as num
from trade1
group by cat1
order by num desc
#4.2每年不同品类的销量、占比、下一年该品类的销量、增长率
select year,cat1,sum(buy_mount) as num,
sum(buy_mount)/sum(sum(buy_mount))over() as rate,
(select sum(buy_mount) from trade1 t where t.cat1=trade1.cat1 and year-trade1.year=1) as late_num,
(select sum(buy_mount) from trade1 t where t.cat1=trade1.cat1 and year-trade1.year=1)/sum(buy_mount)-1 as growth
from trade1
group by year,cat1
#4.3每年销量前三名的品类
select *
from
( select year,
cat1,
sum(buy_mount) as num,
rank() over (partition by year order by sum(buy_mount) desc) as rank1
from trade1
group by year, cat1
) as a
where rank1<=3
总结
1.用户粘性较差,考虑两个原因
(1)产品并不能在较大程度上满足客户需求,不具有吸引力
(2)在营销上具有劣势,其他商家的营销手段更容易吸引客户
2.产品受众性别分布较为均衡
3.产品受众当中儿童年龄普遍较低,集中在0-3岁,产品可以该群体为重点进行迭代
.在分析时,注意各方面的趋势变化,即将时间维度纳入分析
4.该商家的产品销量增长水平逐年下降,在2014年出现负增长
|