CREATE EXTERNAL TABLE `ods_a_full` (
`id` STRING COMMENT '主键',
`name` STRING COMMENT '名称'
) COMMENT 'a信息'
PARTITIONED BY (`ymd` STRING);
CREATE EXTERNAL TABLE `ods_b_full` (
`id` STRING COMMENT '主键',
`name` STRING COMMENT '名称',
`a_id` STRING COMMENT '关联a.id'
) COMMENT 'b信息'
PARTITIONED BY (`ymd` STRING);
CREATE EXTERNAL TABLE `ods_c_full` (
`id` STRING COMMENT '主键',
`name` STRING COMMENT '名称',
`b_id` STRING COMMENT '关联b.id'
) COMMENT 'c信息'
PARTITIONED BY (`ymd` STRING);
-
若 a 一对多b 一对多c 则 c 是最细维度 则 c 左联b 左联a -
左联后 命名取最细粒度c 前缀dim ,全量维度表后缀full ,拉链表后缀zip 则全名为dim_c_full -
建表 1、完全复制3个表的字段,不同表要隔开,顺序cba ,c 表主键字段排在字段第一 2、不同维度表的字段名会重名(例如上面都有name ),对此,所有字段前缀加上源表名,如:c_ 、b_ 、a_ 3、注释掉所有外键 4、添加压缩和列式存储
CREATE EXTERNAL TABLE `dim_c_full` (
`c_id` STRING COMMENT '主键',
`c_name` STRING COMMENT '名称',
`b_id` STRING COMMENT '主键',
`b_name` STRING COMMENT '名称',
`a_id` STRING COMMENT '主键',
`a_name` STRING COMMENT '名称'
) COMMENT 'c维度'
PARTITIONED BY (`ymd` STRING)
STORED AS ORC
TBLPROPERTIES('orc.compress'='snappy');
- SQL
1、WITH AS 结构 2、JOIN 结构 3、SELECT FROM WHERE 结构
WITH
c AS (
SELECT
FROM ods_c_full
WHERE ymd='{ymd}'
),
b AS (
SELECT
FROM ods_b_full
WHERE ymd='{ymd}'
),
a AS (
SELECT
FROM ods_a_full
WHERE ymd='{ymd}'
)
SELECT
FROM c
LEFT JOIN b ON b.`id`=c.`b_id`
LEFT JOIN a ON a.`id`=b.`a_id`;
WITH
c AS (
SELECT
`id`
,`name`
,`b_id`
FROM ods_c_full
WHERE ymd='{ymd}'
),
b AS (
SELECT
`id`
,`name`
,`a_id`
FROM ods_b_full
WHERE ymd='{ymd}'
),
a AS (
SELECT
`id`
,`name`
FROM ods_a_full
WHERE ymd='{ymd}'
)
INSERT OVERWRITE TABLE dim_c_full PARTITION(ymd='{ymd}')
SELECT
c.`id` AS c_id
,c.`name` AS c_name
,b.`id` AS b_id
,b.`name` AS b_name
,a.`id` AS a_id
,a.`name` AS a_name
FROM c
LEFT JOIN b ON b.`id`=c.`b_id`
LEFT JOIN a ON a.`id`=b.`a_id`;
- 联表后校验
若 ods_c_full 表为m行,联表后dim_c_full 为n行 则 必须要满足m=n(若n>m,则说明笛卡儿积)
如果维度表只被用1次,可以试试视图?
CREATE OR REPLACE VIEW `view_dim_c_full` (
`c_id` COMMENT '主键',
`c_name` COMMENT '名称',
`b_id` COMMENT '主键',
`b_name` COMMENT '名称',
`a_id` COMMENT '主键',
`a_name` COMMENT '名称'
) COMMENT 'c维度' AS
WITH
c AS (
SELECT
`id`
,`name`
,`b_id`
FROM ods_c_full
WHERE ymd='{ymd}'
),
b AS (
SELECT
`id`
,`name`
,`a_id`
FROM ods_b_full
WHERE ymd='{ymd}'
),
a AS (
SELECT
`id`
,`name`
FROM ods_a_full
WHERE ymd='{ymd}'
)
SELECT
c.`id` AS c_id
,c.`name` AS c_name
,b.`id` AS b_id
,b.`name` AS b_name
,a.`id` AS a_id
,a.`name` AS a_name
FROM c
LEFT JOIN b ON b.`id`=c.`b_id`
LEFT JOIN a ON a.`id`=b.`a_id`;
|