IT数码 购物 网址 头条 软件 日历 阅读 图书馆
TxT小说阅读器
↓语音阅读,小说下载,古典文学↓
图片批量下载器
↓批量下载图片,美女图库↓
图片自动播放器
↓图片自动播放器↓
一键清除垃圾
↓轻轻一点,清除系统垃圾↓
开发: C++知识库 Java知识库 JavaScript Python PHP知识库 人工智能 区块链 大数据 移动开发 嵌入式 开发工具 数据结构与算法 开发测试 游戏开发 网络协议 系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑 笔记本 显卡 显示器 固态硬盘 硬盘 耳机 手机 iphone vivo oppo 小米 华为 单反 装机 图拉丁
 
   -> 大数据 -> 数仓建表111111 -> 正文阅读

[大数据]数仓建表111111

一、dim层

1、商品信息表

(1)建表语句

drop table if EXISTS dim_sku_info_df;
CREATE TABLE if not EXISTS dim_sku_info_df(
sku_id string comment   '库存单元id',
price  double comment   '价格',
sku_name string comment '库存单元名称',
sku_desc string comment '商品规格描述',
weight double  comment '重量',
is_sale string  comment '是否销售(1:是 0:否)',
spu_id string  comment '商品id',
spu_name string comment '商品名称',
brand_id string comment '品牌id',               
brand_name string comment '品牌名称',           
is_used string  comment '是否有效 1有效 0失效' ,                
banned_time string comment '封禁时间',            
is_domestic string comment '是否国内品牌 1是 0否',             
logo_url string  comment '品牌logo的图片路径',                   
class_name_3 string comment '三级分类名称',
class_id_3 string comment '三级分类ID',
class_name_2 string comment '二级分类名称',
class_id_2 string comment '二级分类ID',
class_name_1 string comment '一级分类名称',
class_id_1 string comment '一级分类ID',
create_time string  comment '创建时间',
update_time string comment '修改时间'
)
comment '商品信息表'
PARTITIONED BY (
  pt string comment '日期'
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' 
STORED AS ORC tblproperties("orc.compress"="SNAPPY")
;

(2)导数语句

with sku AS
(SELECT
 sku_id                  
,spu_id                  
,price                   
,sku_name                
,sku_desc                
,weight                  
,brand_id                
,category3_id             
,sku_img                   
,is_sale                     
,create_time             
,update_time             
,pt                      
from  project.ods_sku_info_full where pt = '2022-01-01'),
spu as 
(select 
spu_id                         
,spu_name                         
from  project.ods_spu_info_full where pt = '2022-01-01'),
brand as 
(select 
 brand_id                
,brand_name              
,is_used                    
,banned_time             
,is_domestic                
,logo_url                  
from project.ods_pub_brand_full where pt = '2022-01-01'),
class as 
(select 
 l3.class_id as class_id_3
,l3.class_name as class_name_3
,l2.class_id as class_id_2
,l2.class_name as class_name_2
,l1.class_id as class_id_1
,l1.class_name as class_name_1
from (select * from project.ods_pub_class_full where pt='2022-01-01' and `level`= '1') l1
left join (select * from project.ods_pub_class_full where pt='2022-01-01' and `level`='2') l2 
 on  l1.class_id = l2.parent_id
left join (select * from project.ods_pub_class_full where pt='2022-01-01' and `level`='3') l3
 on l2.class_id = l3.parent_id )
insert overwrite table project.dim_sku_info_df partition(pt='2022-01-01')
select
sku.sku_id,
sku.price,
sku.sku_name,
sku.sku_desc,
sku.weight,
sku.is_sale,
sku.spu_id,
spu.spu_name,
brand.brand_id,               
brand.brand_name,           
brand.is_used,                
brand.banned_time,            
brand.is_domestic,             
brand.logo_url,                   
class.class_name_3,
class.class_id_3,
class.class_name_2,
class.class_id_2,
class.class_name_1,
class.class_id_1,
sku.create_time,
sku.update_time
from  sku 
left join spu 
 on sku.spu_id = spu.spu_id
left join brand
 on  sku.brand_id = brand.brand_id
left join class 
 on sku.category3_id =  class.class_id_3 ;   

2、商品详细属性表

(1)建表语句

drop table if EXISTS dim_sku_para_info_df;
CREATE TABLE if not EXISTS dim_sku_para_info_df(
 id string comment   'id'
,sku_id string comment   '库存单元id'
,spu_id string comment   '商品id'
,para_id string comment   'sku参数id'
,attr_id string comment   '属性id'
,attr_name  string comment   '属性名称'
,value  string comment   '属性值'
)
comment '商品详细属性表'
PARTITIONED BY (
  pt string comment '日期'
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' 
STORED AS ORC tblproperties("orc.compress"="SNAPPY")
;

(2)导数语句

with sp AS
(select 
id
,sku_id
,spu_id
,para_id
from project.ods_sku_para_full   where pt = '2022-01-01'),
spi as 
(select 
para_id
,attr_id
,attr_name
,value
from project.ods_sku_para_info_full   where pt = '2022-01-01')
insert overwrite table dim_sku_para_info_df partition(pt='2022-01-01')
select
 sp.id 
,sp.sku_id
,sp.spu_id
,sp.para_id
,spi.attr_id
,spi.attr_name
,spi.value
from sp
left join spi
on sp.para_id = spi.para_id
;

3、活动信息表

(1)建表语句

drop table if exists dim_activity_info_df;
create external table dim_activity_info_df
(   id     string comment 'id',
    activity_id        string comment '活动id',
    activity_name      string comment '活动名称',
    activity_type_code string comment '活动类型编码',
    activity_type_name string comment '活动类型名称',
    activity_desc      string comment '活动描述',
    activity_rules   string comment '活动规则',
    content_amount   double comment '满足的金额',
    reduce_amount      int comment '活动减免的金额',
    content_cnt      double comment '满足的件数',
    reduce_rate      double comment '活动减免的折扣',
    activity_level     string comment '优惠等级',
    start_time         string comment '开始时间',
    end_time           string comment '结束时间',
    create_time        string comment '创建时间',
    update_time string comment '修改时间'
) comment '活动信息表'
partitioned by (pt string)
stored as orc
tblproperties ('orc.compress' = 'snappy')
;

(2)导数语句

with act as 
(
select 
id
,activity_id
,activity_name
,activity_type
,activity_desc
,case activity_type
        when '3101' then concat('满',content_amount,'元减',reduce_amount,'元')
        when '3102' then concat('满',content_cnt,'件打',10*(1-reduce_rate),'折')
  end activity_rules
,content_amount
,reduce_amount
,content_cnt
,reduce_rate
,activity_level
,start_time
,end_time
,create_time
,update_time
from  project.ods_activity_info_full where pt='2022-01-01'
),
code as
(
select 
 code_id
,code_name
from project.ods_pub_code_full where pt='2022-01-01'
)
insert overwrite table dim_activity_info_df partition(pt='2022-01-01')
select 
id
,activity_id
,activity_name
,activity_type
,code.code_name
,activity_desc
,case activity_type
        when '3101' then concat('满',content_amount,'元减',reduce_amount,'元')
        when '3102' then concat('满',content_cnt,'件打',10*(1-reduce_rate),'折')
  end activity_rules
,content_amount
,reduce_amount
,content_cnt
,reduce_rate
,activity_level
,start_time
,end_time
,create_time
,update_time
from  act
left join  code
on act.activity_type = code.code_id
;

4、优惠券信息表

(1)建表语句

drop table if exists dim_coupon_info_df;
create table dim_coupon_info_df
(
 coupon_id        string comment '购物券id'
,coupon_name      string comment '购物券名称'
,coupon_type      string comment '购物券类型'
,coupon_type_name string comment '购物券类型名称'
,content_amount   double comment '满足的金额'
,reduce_amount    double comment '优惠券减免的金额'
,range_type       string comment '优惠券范围类型'
,class_id_3     string  comment '三级分类id'
,start_time       string comment '优惠券使用开始日期'
,end_time         string comment '优惠券使用结束日期'
,create_time      string comment '创建时间'
,update_time     string comment '修改时间'
,etl_load_time string comment 'etl加载时间'
) comment '优惠券信息表'
partitioned by (pt string)
stored as orc
tblproperties ('orc.compress' = 'snappy')
;

(2)导数语句

with coupon as
(
select 
 coupon_id
,coupon_name
,coupon_type
,content_amount
,reduce_amount
,range_type
,category3_id
,start_time
,end_time
,create_time
,update_time
,current_timestamp()
from project.ods_coupon_info_full
where pt='2022-01-01'
)
,
code as
(
select 
 code_id
,code_name
from project.ods_pub_code_full where pt='2022-01-01'
)
insert overwrite table dim_coupon_info_df partition(pt='2022-01-01')
select 
 coupon_id
,coupon_name
,coupon_type
,code_name 
,content_amount
,reduce_amount
,range_type
,category3_id 
,start_time
,end_time
,create_time
,update_time
,current_timestamp()
from coupon
left join code 
on coupon.coupon_type = code.code_id
;

5、区域表

(1)建表语句

drop table if exists dim_pub_area_df;
create  table dim_pub_area_df
(  
 province_id   string comment 'id'
,province_name string comment '省市名称'
,area_code     string comment '地区编码'
,iso_code_old      string comment '旧版地区编码'
,iso_code_new    string comment '新版地区编码'
,region_id     string comment '地区id'
,region_name   string comment '地区名称'
,etl_load_time string comment 'etl加载时间'
) comment '区域表'
partitioned by (pt string)
stored as orc
tblproperties ('orc.compress' = 'snappy')
;

(2)导数语句

insert overwrite table dim_pub_area_df partition(pt='2022-01-01')
select    
 province_id            
,province_name 
,area_code     
,iso_code      
,iso_3166_2    
,region_id     
,region_name   
,current_timestamp()
from project.ods_pub_area_full
where pt='2022-01-01'

6、商家信息表

(1)建表语句

drop table if exists dim_business_info_df;
create table dim_business_info_df
(
 bus_id          string comment '商家id'
,bus_name        string comment '商家名称'
,telephone       string comment '联系电话'
,pic_url         string comment '图片地址URL'
,bus_user_name   double comment '商家联系人'
,province        double comment '省份'
,city            string comment '城市'
,dist            string  comment '区'
,addr            string comment '详细地址'
,zipcode         string comment '邮编'
,is_open         string comment '是否关店 1关店 0未关'
,open_time       string comment '开店时间'
,closed_time     string comment '关店时间'
,create_time     string comment '创建时间'
,update_time     string comment '修改时间'
,etl_load_time   string comment 'etl加载时间'
) comment '优惠券信息表'
partitioned by (pt string)
stored as orc
tblproperties ('orc.compress' = 'snappy')
;

(2)导数语句

insert overwrite table dim_business_info_df partition(pt='2022-01-01')
select 
bus_id
,bus_name
,md5(telephone)
,pic_url
,md5(bus_user_name)
,province
,city
,dist
,addr
,zipcode
,is_open
,open_time
,closed_time
,create_time
,update_time
,current_timestamp()
from project.ods_business_info_full
where pt ='2022-01-01'
;

7、用户信息表(拉链表)

(1)建表语句

DROP TABLE IF EXISTS dim_user_info_his;
CREATE EXTERNAL TABLE dim_user_info_his
(
user_id       STRING COMMENT '用户id'
,login_name   STRING COMMENT '用户名称'
,nick_name    STRING COMMENT '用户昵称'
,name         STRING COMMENT '用户姓名'
,phone_num    STRING COMMENT '手机号码'
,email        STRING COMMENT '邮箱'
,head_img     STRING COMMENT '头像地址'
,user_level   STRING COMMENT '用户等级'
,birthday     STRING COMMENT '生日'
,gender       STRING COMMENT '性别'
,status       STRING COMMENT '状态 1-有效,0-无效,2-黑名单'
,create_time  STRING COMMENT '创建时间'
,update_time  STRING COMMENT '操作时间'
,start_date   STRING COMMENT '开始日期'
,end_date     STRING COMMENT '结束日期'
) COMMENT '用户表'
PARTITIONED BY (pt STRING)
STORED AS ORC
TBLPROPERTIES ('orc.compress' = 'snappy');

(2)首日装载语句

insert overwrite table  dim_user_info_his partition(pt='9999-12-31')
select
 user_id
,login_name
,nick_name
,md5(user_name)
,md5(phone_num)
,md5(email)
,head_img
,user_level
,birthday
,gender
,status
,create_time
,update_time
,'2022-01-01' start_date
,'9999-12-31' end_date
from project.ods_user_info_incr_stg

(3)每日装载(x)

set hive.exec.dynamic.partition=true;    
set hive.exec.dynamic.partition.mode=nonstrict;
with tmp as 
(
    select 
      t1.user_id       as user_id1
    , t1.login_name    as login_name1
    , t1.nick_name     as nick_name1
    , t1.name          as name1
    , t1.phone_num     as phone_num1
    , t1.email         as email1
    , t1.head_img      as head_img1
    , t1.user_level    as user_level1
    , t1.birthday      as birthday1
    , t1.gender        as gender1
    , t1.create_time   as create_time1
    , t1.update_time   as update_time1
    , t1.statuss       as statuss1
    , t1.start_date    as start_date1
    , t1.end_date      as end_date1

    , t2.user_id       as user_id2
    , t2.login_name    as login_name2
    , t2.nick_name     as nick_name2 
    , t2.name          as name2
    , t2.phone_num     as phone_num2
    , t2.email         as email2
    , t2.head_img      as head_img2
    , t2.user_level    as user_level2
    , t2.birthday      as birthday2
    , t2.gender        as gender2
    , t2.create_time   as create_time2
    , t2.update_time   as update_time2
    , t2.statuss       as statuss2
    , t2.start_date    as start_date2
    , t2.end_date      as end_date2
    from
    (
        SELECT user_id, login_name, nick_name, name, phone_num, email, head_img, user_level, birthday, gender,  create_time, update_time, `status` as statuss,start_date, end_date
        FROM project.dim_user_info_his
        where pt ='9999-12-31'
    ) t1
    full join
    (
        SELECT user_id, login_name, nick_name,  user_name as name , phone_num, email, head_img, user_level, birthday, gender, create_time, update_time, `status` as statuss,'2022-01-02' as start_date,'9999-12-31' as end_date
        FROM project.ods_user_info_incr_stg
    )t2
    on t1.user_id = t2.user_id
)
insert overwrite table dim_user_info_his partition(pt)
select 
      if(user_id2     is not null,user_id2,user_id1)
    , if(login_name2  is not null,login_name2,login_name1)
    , if(nick_name2   is not null,nick_name2,nick_name1)  
    , if(name2        is not null,name2,name1) 
    , if(phone_num2   is not null,phone_num2,phone_num1)     
    , if(email2       is not null,email2,email1)   
    , if(head_img2    is not null,head_img2,head_img1)    
    , if(user_level2  is not null,user_level2,user_level1)       
    , if(birthday2    is not null,birthday2,birthday1)     
    , if(gender2      is not null,gender2,gender1)   
    , if(create_time2 is not null,create_time2,create_time1)       
    , if(update_time2 is not null,update_time2,update_time1)      
    , if(statuss2     is not null,statuss2,statuss1)       
    , if(start_date2  is not null,start_date2,start_date1)         
    , if(end_date2    is not null,end_date2,end_date1)
    , if(end_date2    is not null,end_date2,end_date1) as pt    
from tmp
union all
select 
      user_id1
    , login_name1
    , nick_name1
    , name1
    , phone_num1
    , email1
    , head_img1
    , user_level1
    , birthday1
    , gender1
    , create_time1
    , update_time1
    , statuss1
    , start_date1
    , '2022-01-01' as end_date1
    , '2022-01-01' as pt
from tmp
where user_id1 is not null and user_id1 is not null

(4)每日装载

set hive.exec.dynamic.partition.mode=true; 
with user_info_tmp as 
(
select 
 dim.user_id     
,dim.login_name 
,dim.nick_name  
,dim.name       
,dim.phone_num  
,dim.email      
,dim.head_img   
,dim.user_level 
,dim.birthday   
,dim.gender     
,dim.status     
,dim.create_time
,dim.update_time
,dim.start_date 
,case when ods.user_id is not null and dim.end_date='9999-12-31' then date_sub('2022-01-02',1) 
 else dim.end_date end end_date
from (select 
      user_id     
      ,login_name 
      ,nick_name  
      ,name       
      ,phone_num  
      ,email      
      ,head_img   
      ,user_level 
      ,birthday   
      ,gender     
      ,status     
      ,create_time
      ,update_time
      ,start_date 
      ,end_date   
      from dim_user_info_his 
      where pt='9999-12-31') dim
left join ods_user_info_incr_stg ods
       on dim.user_id = ods.user_id
union all
select 
 user_id     
,login_name 
,nick_name  
,md5(user_name)
,md5(phone_num)
,md5(email)   
,head_img   
,user_level 
,birthday   
,gender     
,status     
,create_time
,update_time
,'2022-01-02' start_date
,'9999-12-31'  end_date
from ods_user_info_incr_stg
)
insert overwrite table dim_user_info_his partition(pt)
select 
user_id     
,login_name 
,nick_name  
,name       
,phone_num  
,email      
,head_img   
,user_level 
,birthday   
,gender     
,status     
,create_time
,update_time
,start_date
,end_date 
,end_date
from user_info_tmp
;

  大数据 最新文章
实现Kafka至少消费一次
亚马逊云科技:还在苦于ETL?Zero ETL的时代
初探MapReduce
【SpringBoot框架篇】32.基于注解+redis实现
Elasticsearch:如何减少 Elasticsearch 集
Go redis操作
Redis面试题
专题五 Redis高并发场景
基于GBase8s和Calcite的多数据源查询
Redis——底层数据结构原理
上一篇文章           查看所有文章
加:2022-04-24 09:31:36  更:2022-04-24 09:34:10 
 
开发: C++知识库 Java知识库 JavaScript Python PHP知识库 人工智能 区块链 大数据 移动开发 嵌入式 开发工具 数据结构与算法 开发测试 游戏开发 网络协议 系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑 笔记本 显卡 显示器 固态硬盘 硬盘 耳机 手机 iphone vivo oppo 小米 华为 单反 装机 图拉丁

360图书馆 购物 三丰科技 阅读网 日历 万年历 2024年11日历 -2024/11/24 3:42:23-

图片自动播放器
↓图片自动播放器↓
TxT小说阅读器
↓语音阅读,小说下载,古典文学↓
一键清除垃圾
↓轻轻一点,清除系统垃圾↓
图片批量下载器
↓批量下载图片,美女图库↓
  网站联系: qq:121756557 email:121756557@qq.com  IT数码