一、dim层
1、商品信息表
(1)建表语句
drop table if EXISTS dim_sku_info_df;
CREATE TABLE if not EXISTS dim_sku_info_df(
sku_id string comment '库存单元id',
price double comment '价格',
sku_name string comment '库存单元名称',
sku_desc string comment '商品规格描述',
weight double comment '重量',
is_sale string comment '是否销售(1:是 0:否)',
spu_id string comment '商品id',
spu_name string comment '商品名称',
brand_id string comment '品牌id',
brand_name string comment '品牌名称',
is_used string comment '是否有效 1有效 0失效' ,
banned_time string comment '封禁时间',
is_domestic string comment '是否国内品牌 1是 0否',
logo_url string comment '品牌logo的图片路径',
class_name_3 string comment '三级分类名称',
class_id_3 string comment '三级分类ID',
class_name_2 string comment '二级分类名称',
class_id_2 string comment '二级分类ID',
class_name_1 string comment '一级分类名称',
class_id_1 string comment '一级分类ID',
create_time string comment '创建时间',
update_time string comment '修改时间'
)
comment '商品信息表'
PARTITIONED BY (
pt string comment '日期'
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS ORC tblproperties("orc.compress"="SNAPPY")
;
(2)导数语句
with sku AS
(SELECT
sku_id
,spu_id
,price
,sku_name
,sku_desc
,weight
,brand_id
,category3_id
,sku_img
,is_sale
,create_time
,update_time
,pt
from project.ods_sku_info_full where pt = '2022-01-01'),
spu as
(select
spu_id
,spu_name
from project.ods_spu_info_full where pt = '2022-01-01'),
brand as
(select
brand_id
,brand_name
,is_used
,banned_time
,is_domestic
,logo_url
from project.ods_pub_brand_full where pt = '2022-01-01'),
class as
(select
l3.class_id as class_id_3
,l3.class_name as class_name_3
,l2.class_id as class_id_2
,l2.class_name as class_name_2
,l1.class_id as class_id_1
,l1.class_name as class_name_1
from (select * from project.ods_pub_class_full where pt='2022-01-01' and `level`= '1') l1
left join (select * from project.ods_pub_class_full where pt='2022-01-01' and `level`='2') l2
on l1.class_id = l2.parent_id
left join (select * from project.ods_pub_class_full where pt='2022-01-01' and `level`='3') l3
on l2.class_id = l3.parent_id )
insert overwrite table project.dim_sku_info_df partition(pt='2022-01-01')
select
sku.sku_id,
sku.price,
sku.sku_name,
sku.sku_desc,
sku.weight,
sku.is_sale,
sku.spu_id,
spu.spu_name,
brand.brand_id,
brand.brand_name,
brand.is_used,
brand.banned_time,
brand.is_domestic,
brand.logo_url,
class.class_name_3,
class.class_id_3,
class.class_name_2,
class.class_id_2,
class.class_name_1,
class.class_id_1,
sku.create_time,
sku.update_time
from sku
left join spu
on sku.spu_id = spu.spu_id
left join brand
on sku.brand_id = brand.brand_id
left join class
on sku.category3_id = class.class_id_3 ;
2、商品详细属性表
(1)建表语句
drop table if EXISTS dim_sku_para_info_df;
CREATE TABLE if not EXISTS dim_sku_para_info_df(
id string comment 'id'
,sku_id string comment '库存单元id'
,spu_id string comment '商品id'
,para_id string comment 'sku参数id'
,attr_id string comment '属性id'
,attr_name string comment '属性名称'
,value string comment '属性值'
)
comment '商品详细属性表'
PARTITIONED BY (
pt string comment '日期'
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS ORC tblproperties("orc.compress"="SNAPPY")
;
(2)导数语句
with sp AS
(select
id
,sku_id
,spu_id
,para_id
from project.ods_sku_para_full where pt = '2022-01-01'),
spi as
(select
para_id
,attr_id
,attr_name
,value
from project.ods_sku_para_info_full where pt = '2022-01-01')
insert overwrite table dim_sku_para_info_df partition(pt='2022-01-01')
select
sp.id
,sp.sku_id
,sp.spu_id
,sp.para_id
,spi.attr_id
,spi.attr_name
,spi.value
from sp
left join spi
on sp.para_id = spi.para_id
;
3、活动信息表
(1)建表语句
drop table if exists dim_activity_info_df;
create external table dim_activity_info_df
( id string comment 'id',
activity_id string comment '活动id',
activity_name string comment '活动名称',
activity_type_code string comment '活动类型编码',
activity_type_name string comment '活动类型名称',
activity_desc string comment '活动描述',
activity_rules string comment '活动规则',
content_amount double comment '满足的金额',
reduce_amount int comment '活动减免的金额',
content_cnt double comment '满足的件数',
reduce_rate double comment '活动减免的折扣',
activity_level string comment '优惠等级',
start_time string comment '开始时间',
end_time string comment '结束时间',
create_time string comment '创建时间',
update_time string comment '修改时间'
) comment '活动信息表'
partitioned by (pt string)
stored as orc
tblproperties ('orc.compress' = 'snappy')
;
(2)导数语句
with act as
(
select
id
,activity_id
,activity_name
,activity_type
,activity_desc
,case activity_type
when '3101' then concat('满',content_amount,'元减',reduce_amount,'元')
when '3102' then concat('满',content_cnt,'件打',10*(1-reduce_rate),'折')
end activity_rules
,content_amount
,reduce_amount
,content_cnt
,reduce_rate
,activity_level
,start_time
,end_time
,create_time
,update_time
from project.ods_activity_info_full where pt='2022-01-01'
),
code as
(
select
code_id
,code_name
from project.ods_pub_code_full where pt='2022-01-01'
)
insert overwrite table dim_activity_info_df partition(pt='2022-01-01')
select
id
,activity_id
,activity_name
,activity_type
,code.code_name
,activity_desc
,case activity_type
when '3101' then concat('满',content_amount,'元减',reduce_amount,'元')
when '3102' then concat('满',content_cnt,'件打',10*(1-reduce_rate),'折')
end activity_rules
,content_amount
,reduce_amount
,content_cnt
,reduce_rate
,activity_level
,start_time
,end_time
,create_time
,update_time
from act
left join code
on act.activity_type = code.code_id
;
4、优惠券信息表
(1)建表语句
drop table if exists dim_coupon_info_df;
create table dim_coupon_info_df
(
coupon_id string comment '购物券id'
,coupon_name string comment '购物券名称'
,coupon_type string comment '购物券类型'
,coupon_type_name string comment '购物券类型名称'
,content_amount double comment '满足的金额'
,reduce_amount double comment '优惠券减免的金额'
,range_type string comment '优惠券范围类型'
,class_id_3 string comment '三级分类id'
,start_time string comment '优惠券使用开始日期'
,end_time string comment '优惠券使用结束日期'
,create_time string comment '创建时间'
,update_time string comment '修改时间'
,etl_load_time string comment 'etl加载时间'
) comment '优惠券信息表'
partitioned by (pt string)
stored as orc
tblproperties ('orc.compress' = 'snappy')
;
(2)导数语句
with coupon as
(
select
coupon_id
,coupon_name
,coupon_type
,content_amount
,reduce_amount
,range_type
,category3_id
,start_time
,end_time
,create_time
,update_time
,current_timestamp()
from project.ods_coupon_info_full
where pt='2022-01-01'
)
,
code as
(
select
code_id
,code_name
from project.ods_pub_code_full where pt='2022-01-01'
)
insert overwrite table dim_coupon_info_df partition(pt='2022-01-01')
select
coupon_id
,coupon_name
,coupon_type
,code_name
,content_amount
,reduce_amount
,range_type
,category3_id
,start_time
,end_time
,create_time
,update_time
,current_timestamp()
from coupon
left join code
on coupon.coupon_type = code.code_id
;
5、区域表
(1)建表语句
drop table if exists dim_pub_area_df;
create table dim_pub_area_df
(
province_id string comment 'id'
,province_name string comment '省市名称'
,area_code string comment '地区编码'
,iso_code_old string comment '旧版地区编码'
,iso_code_new string comment '新版地区编码'
,region_id string comment '地区id'
,region_name string comment '地区名称'
,etl_load_time string comment 'etl加载时间'
) comment '区域表'
partitioned by (pt string)
stored as orc
tblproperties ('orc.compress' = 'snappy')
;
(2)导数语句
insert overwrite table dim_pub_area_df partition(pt='2022-01-01')
select
province_id
,province_name
,area_code
,iso_code
,iso_3166_2
,region_id
,region_name
,current_timestamp()
from project.ods_pub_area_full
where pt='2022-01-01'
6、商家信息表
(1)建表语句
drop table if exists dim_business_info_df;
create table dim_business_info_df
(
bus_id string comment '商家id'
,bus_name string comment '商家名称'
,telephone string comment '联系电话'
,pic_url string comment '图片地址URL'
,bus_user_name double comment '商家联系人'
,province double comment '省份'
,city string comment '城市'
,dist string comment '区'
,addr string comment '详细地址'
,zipcode string comment '邮编'
,is_open string comment '是否关店 1关店 0未关'
,open_time string comment '开店时间'
,closed_time string comment '关店时间'
,create_time string comment '创建时间'
,update_time string comment '修改时间'
,etl_load_time string comment 'etl加载时间'
) comment '优惠券信息表'
partitioned by (pt string)
stored as orc
tblproperties ('orc.compress' = 'snappy')
;
(2)导数语句
insert overwrite table dim_business_info_df partition(pt='2022-01-01')
select
bus_id
,bus_name
,md5(telephone)
,pic_url
,md5(bus_user_name)
,province
,city
,dist
,addr
,zipcode
,is_open
,open_time
,closed_time
,create_time
,update_time
,current_timestamp()
from project.ods_business_info_full
where pt ='2022-01-01'
;
7、用户信息表(拉链表)
(1)建表语句
DROP TABLE IF EXISTS dim_user_info_his;
CREATE EXTERNAL TABLE dim_user_info_his
(
user_id STRING COMMENT '用户id'
,login_name STRING COMMENT '用户名称'
,nick_name STRING COMMENT '用户昵称'
,name STRING COMMENT '用户姓名'
,phone_num STRING COMMENT '手机号码'
,email STRING COMMENT '邮箱'
,head_img STRING COMMENT '头像地址'
,user_level STRING COMMENT '用户等级'
,birthday STRING COMMENT '生日'
,gender STRING COMMENT '性别'
,status STRING COMMENT '状态 1-有效,0-无效,2-黑名单'
,create_time STRING COMMENT '创建时间'
,update_time STRING COMMENT '操作时间'
,start_date STRING COMMENT '开始日期'
,end_date STRING COMMENT '结束日期'
) COMMENT '用户表'
PARTITIONED BY (pt STRING)
STORED AS ORC
TBLPROPERTIES ('orc.compress' = 'snappy');
(2)首日装载语句
insert overwrite table dim_user_info_his partition(pt='9999-12-31')
select
user_id
,login_name
,nick_name
,md5(user_name)
,md5(phone_num)
,md5(email)
,head_img
,user_level
,birthday
,gender
,status
,create_time
,update_time
,'2022-01-01' start_date
,'9999-12-31' end_date
from project.ods_user_info_incr_stg
(3)每日装载(x)
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
with tmp as
(
select
t1.user_id as user_id1
, t1.login_name as login_name1
, t1.nick_name as nick_name1
, t1.name as name1
, t1.phone_num as phone_num1
, t1.email as email1
, t1.head_img as head_img1
, t1.user_level as user_level1
, t1.birthday as birthday1
, t1.gender as gender1
, t1.create_time as create_time1
, t1.update_time as update_time1
, t1.statuss as statuss1
, t1.start_date as start_date1
, t1.end_date as end_date1
, t2.user_id as user_id2
, t2.login_name as login_name2
, t2.nick_name as nick_name2
, t2.name as name2
, t2.phone_num as phone_num2
, t2.email as email2
, t2.head_img as head_img2
, t2.user_level as user_level2
, t2.birthday as birthday2
, t2.gender as gender2
, t2.create_time as create_time2
, t2.update_time as update_time2
, t2.statuss as statuss2
, t2.start_date as start_date2
, t2.end_date as end_date2
from
(
SELECT user_id, login_name, nick_name, name, phone_num, email, head_img, user_level, birthday, gender, create_time, update_time, `status` as statuss,start_date, end_date
FROM project.dim_user_info_his
where pt ='9999-12-31'
) t1
full join
(
SELECT user_id, login_name, nick_name, user_name as name , phone_num, email, head_img, user_level, birthday, gender, create_time, update_time, `status` as statuss,'2022-01-02' as start_date,'9999-12-31' as end_date
FROM project.ods_user_info_incr_stg
)t2
on t1.user_id = t2.user_id
)
insert overwrite table dim_user_info_his partition(pt)
select
if(user_id2 is not null,user_id2,user_id1)
, if(login_name2 is not null,login_name2,login_name1)
, if(nick_name2 is not null,nick_name2,nick_name1)
, if(name2 is not null,name2,name1)
, if(phone_num2 is not null,phone_num2,phone_num1)
, if(email2 is not null,email2,email1)
, if(head_img2 is not null,head_img2,head_img1)
, if(user_level2 is not null,user_level2,user_level1)
, if(birthday2 is not null,birthday2,birthday1)
, if(gender2 is not null,gender2,gender1)
, if(create_time2 is not null,create_time2,create_time1)
, if(update_time2 is not null,update_time2,update_time1)
, if(statuss2 is not null,statuss2,statuss1)
, if(start_date2 is not null,start_date2,start_date1)
, if(end_date2 is not null,end_date2,end_date1)
, if(end_date2 is not null,end_date2,end_date1) as pt
from tmp
union all
select
user_id1
, login_name1
, nick_name1
, name1
, phone_num1
, email1
, head_img1
, user_level1
, birthday1
, gender1
, create_time1
, update_time1
, statuss1
, start_date1
, '2022-01-01' as end_date1
, '2022-01-01' as pt
from tmp
where user_id1 is not null and user_id1 is not null
(4)每日装载
set hive.exec.dynamic.partition.mode=true;
with user_info_tmp as
(
select
dim.user_id
,dim.login_name
,dim.nick_name
,dim.name
,dim.phone_num
,dim.email
,dim.head_img
,dim.user_level
,dim.birthday
,dim.gender
,dim.status
,dim.create_time
,dim.update_time
,dim.start_date
,case when ods.user_id is not null and dim.end_date='9999-12-31' then date_sub('2022-01-02',1)
else dim.end_date end end_date
from (select
user_id
,login_name
,nick_name
,name
,phone_num
,email
,head_img
,user_level
,birthday
,gender
,status
,create_time
,update_time
,start_date
,end_date
from dim_user_info_his
where pt='9999-12-31') dim
left join ods_user_info_incr_stg ods
on dim.user_id = ods.user_id
union all
select
user_id
,login_name
,nick_name
,md5(user_name)
,md5(phone_num)
,md5(email)
,head_img
,user_level
,birthday
,gender
,status
,create_time
,update_time
,'2022-01-02' start_date
,'9999-12-31' end_date
from ods_user_info_incr_stg
)
insert overwrite table dim_user_info_his partition(pt)
select
user_id
,login_name
,nick_name
,name
,phone_num
,email
,head_img
,user_level
,birthday
,gender
,status
,create_time
,update_time
,start_date
,end_date
,end_date
from user_info_tmp
;
|