DROP DATABASE IF EXISTS school;
CREATE DATABASE school LOCATION '/school';
分区表
- 分区是对应一个HDFS上的独立文件夹
WHERE 分区字段,查询效率更高
1、创建分区表
DROP TABLE IF EXISTS school.student;
CREATE TABLE school.student (s_id INT)
PARTITIONED BY (grade STRING)
LOCATION '/school/student';
2、插入数据
INSERT INTO TABLE school.student PARTITION(grade='a') VALUES (1),(2);
INSERT INTO TABLE school.student PARTITION(grade='b') VALUES (3);
3、查询分区
SHOW PARTITIONS school.student;
Hue界面结果显示
4、查询时WHERE 分区
SELECT * FROM school.student WHERE grade='a';
Hue界面结果显示
5、查看HDFS
动态分区
Dynamic Partition:根据分区字段的值,自动将数据插入到相应的分区
基础配置
SET hive.exec.dynamic.partition=true;
SET hive.exec.dynamic.partition.mode=nonstrict;
SET hive.exec.max.dynamic.partitions=1000;
SET hive.exec.max.dynamic.partitions.pernode=100;
SET hive.exec.max.created.files=100000;
SET hive.error.on.empty.partition=false;
写法
分区字段写最尾
SET hive.exec.dynamic.partition=true;
SET hive.exec.dynamic.partition.mode=nonstrict;
INSERT INTO TABLE school.student PARTITION(grade)
SELECT
s_id,
grade
FROM school.student;
SELECT * FROM school.student WHERE grade='b'; Hue结果显示(数据续上)
多级分区表
实际对应HDFS的层级文件夹
DROP TABLE IF EXISTS school.student2;
CREATE TABLE school.student2 (name STRING,age INT)
PARTITIONED BY (s_id INT,grade STRING)
LOCATION '/school/student2';
INSERT INTO TABLE school.student2 PARTITION(s_id=5,grade='c') VALUES ('小黄',13);
|