一、概述
分区表指的是将逻辑上的一个大表分成一些物理上的小文件。 一个表何种情况下需要设计成分区表,取决于应用,一个经验法则是当表的尺寸超过了数据库服务器物理内存时,设计为分区表是有利的。
二、优势:
1)查询性能提升,当那些访问频繁的数据在一个分区或者少数几个分区时,只需访问少数几个分区,不需要访问全表。 2)更新性能提升,当更新一个分区的大部分行时,可以通过该分区上的一个顺序扫描来取代分散到整个表上的索引和随机访问。 3)批量插入删除数据性能提升,可通过加载,删除分区表实现,可以避免批量 DELETE 导致的 VACUUM 开销。 4)节约成本,很少使用的数据可以被迁移到便宜且较慢的存储介质上。
三、类型
根据分区表的创建方式不同,可分为声明式创建的分区表和继承式创建的分区表。 声明式分区表仅支持范围划分,列表划分,哈希划分(11版本才支持),继承式分区表允许用户自定义约束条件进行划分。
-
范围划分:表被根据一个关键列或一组列划分为“范围”,不同的分区的范围之间没有重叠。例如,我们可以根据日期范围划分,或者根据特定业务对象的标识符划分。 -
列表划分:通过显式地列出每一个分区中出现的键值来划分表。 -
哈希划分:通过为每个分区指定模数和余数来对表进行分区。每个分区所持有的行都满足:分区键的值除以为其指定的模数将产生为其指定的余数。
四 、示例
1. 声明式范围分区表
mkdir /{tbs1,tbs2,tbs3,tbs4}
create tablespace tbs1 owner sysdba location '/tbs1';
create tablespace tbs2 owner sysdba location '/tbs2';
create tablespace tbs3 owner sysdba location '/tbs3';
create tablespace tbs4 owner sysdba location '/tbs4';
create table order_main (id int not null,datetime date not null,name varchar(50),amount int,price numeric) partition by range (datetime);
create table order_main202201_03 partition of order_main for values from ('2022-01-01') to ('2022-04-01') tablespace tbs1;
create table order_main202204_06 partition of order_main for values from ('2022-04-01') to ('2022-07-01') tablespace tbs2;
create table order_main202207_09 partition of order_main for values from ('2022-07-01') to ('2022-10-01') tablespace tbs3;
create table order_main202210_12 partition of order_main for values from ('2022-10-01') to ('2023-01-01') tablespace tbs4;
create index on order_main (datetime);
show enable_partition_pruning;
alter system set enable_partition_pruning = 'on';
select pg_reload_conf();
alter table order_main detach partition order_main202201_03;
drop table order_main202201_03;
mkdir /tbs5
create tablespace tbs5 owner sysdba location '/tbs5';
create table order_main202301_03 partition of order_main for values from ('2023-01-01') to ('2023-04-01') tablespace tbs5;
alter table order_main attach partition order_main202201_03 for values from ('2022-01-01') to ('2022-04-01');
1)父表创建无效索引
create index order_main_id_idx on only order_main (id);
2)各个分区表使用 concurrently 选项创建索引
create index concurrently order_main202201_03_id_idx on order_main202201_03 (id);
create index concurrently order_main202204_06_id_idx on order_main202204_06 (id);
create index concurrently order_main202207_09_id_idx on order_main202207_09 (id);
create index concurrently order_main202210_12_id_idx on order_main202210_12 (id);
create index concurrently order_main202301_03_id_idx on order_main202301_03 (id);
3)将各个分区索引全部附加到父表索引后,父表索引自动生效
alter index order_main_id_idx attach partition order_main202201_03_id_idx;
alter index order_main_id_idx attach partition order_main202204_06_id_idx;
alter index order_main_id_idx attach partition order_main202207_09_id_idx;
alter index order_main_id_idx attach partition order_main202210_12_id_idx;
alter index order_main_id_idx attach partition order_main202301_03_id_idx;
主键必须包含所有分区字段。
2. 声明式列表分区表
mkdir /{tbs1,tbs2}
create tablespace tbs1 owner sysdba location '/tbs1';
create tablespace tbs2 owner sysdba location '/tbs2';
create table register_all(id int,name varchar(20),sex varchar(5),age int) partition by list(sex);
create table register_01 partition of register_all for values in ('男') TABLESPACE tbs1;
create table register_02 partition of register_all for values in ('女') TABLESPACE tbs2;
create index on register_all (id);
show enable_partition_pruning;
alter system set enable_partition_pruning = 'on';
select pg_reload_conf();
alter table register_all detach partition register_01;
drop table register_01;
mkdir /tbs3
create tablespace tbs5 owner sysdba location '/tbs3';
create table register_03 partition of register_all for values in ('中') TABLESPACE tbs3;
alter table register_all attach partition register_01 for values in ('男');
1)父表创建无效索引
CREATE INDEX register_all_sex_idx ON ONLY register_all (sex);
2)各个分区表使用 CONCURRENTLY 选项创建索引
CREATE INDEX CONCURRENTLY register_01_sex_idx ON register_01 (sex);
CREATE INDEX CONCURRENTLY register_02_sex_idx ON register_02 (sex);
CREATE INDEX CONCURRENTLY register_03_sex_idx ON register_03 (sex);
3)将各个分区索引全部附加到父表索引后,父表索引自动生效
ALTER INDEX register_all_sex_idx ATTACH PARTITION register_01_sex_idx;
ALTER INDEX register_all_sex_idx ATTACH PARTITION register_02_sex_idx;
ALTER INDEX register_all_sex_idx ATTACH PARTITION register_03_sex_idx;
insert into register_all select generate_series(1,10000),repeat(chr(int4(random()*26)+65),3),'男',(random()*(10^2))::integer;
insert into register_all select generate_series(10001,20000),repeat(chr(int4(random()*26)+65),3),'女',(random()*(10^2))::integer;
主键必须包含所有分区字段。
3. 声明式哈希分区表
mkdir /{tbs1,tbs2,tbs3,tbs4}
create tablespace tbs1 owner sysdba location '/tbs1';
create tablespace tbs2 owner sysdba location '/tbs2';
create tablespace tbs3 owner sysdba location '/tbs3';
create tablespace tbs4 owner sysdba location '/tbs4';
create table student(name varchar(20),ctime timestamp(6) without time zone) partition by hash(name);
create table student_01 partition of student for values with(modulus 4, remainder 0) tablespace tbs1;
create table student_02 partition of student for values with(modulus 4, remainder 1) tablespace tbs2;
create table student_03 partition of student for values with(modulus 4, remainder 2) tablespace tbs3;
create table student_04 partition of student for values with(modulus 4, remainder 3) tablespace tbs4;
create index on student (name);
show enable_partition_pruning;
alter system set enable_partition_pruning = 'on';
select pg_reload_conf();
alter table student detach partition student_01;
drop table student_01;
mkdir /tbs5
create tablespace tbs5 owner sysdba location '/tbs5';
create table student_05 partition of student for values with(modulus 4, remainder 0) tablespace tbs5;
alter table student attach partition student_01 for values with(modulus 4, remainder 0);
1)父表创建无效索引
create index student_ctime_idx on only student (ctime);
2)各个分区表使用 concurrently 选项创建索引
create index concurrently student_01_ctime_idx on student_01 (ctime);
create index concurrently student_02_ctime_idx on student_02 (ctime);
create index concurrently student_03_ctime_idx on student_03 (ctime);
create index concurrently student_04_ctime_idx on student_04 (ctime);
3)将各个分区索引全部附加到父表索引后,父表索引自动生效
alter index student_ctime_idx attach partition student_01_ctime_idx;
alter index student_ctime_idx attach partition student_02_ctime_idx;
alter index student_ctime_idx attach partition student_03_ctime_idx;
alter index student_ctime_idx attach partition student_04_ctime_idx;
主键必须包含所有分区字段。
4. 继承式分区表
create table almart(date_key date,hour_key smallint,client_key integer,item_key integer,account integer,expense numeric);
create table almart_2022_02_10 () inherits (almart);
create table almart_2022_02_11 () inherits (almart);
create table almart_2022_02_12 () inherits (almart);
create table almart_2022_02_13 () inherits (almart);
alter table almart_2022_02_10 add constraint almart_2022_02_10_check_date_key check (date_key = '2022-02-10'::date);
alter table almart_2022_02_11 add constraint almart_2022_02_11_check_date_key check (date_key = '2022-02-11'::date);
alter table almart_2022_02_12 add constraint almart_2022_02_12_check_date_key check (date_key = '2022-02-12'::date);
alter table almart_2022_02_13 add constraint almart_2022_02_13_check_date_key check (date_key = '2022-02-13'::date);
create index almart_date_key_2022_02_10 on almart_2022_02_10 (date_key);
create index almart_date_key_2022_02_11 on almart_2022_02_11 (date_key);
create index almart_date_key_2022_02_12 on almart_2022_02_12 (date_key);
create index almart_date_key_2022_02_13 on almart_2022_02_13 (date_key);
create or replace function almart_partition_trigger()
returns trigger as $$
begin
if new.date_key = date '2022-02-10'
then
insert into almart_2022_02_10 values (new.*);
elsif new.date_key = date '2022-02-11'
then
insert into almart_2022_02_11 values (new.*);
elsif new.date_key = date '2022-02-12'
then
insert into almart_2022_02_12 values (new.*);
elsif new.date_key = date '2022-02-13'
then
insert into almart_2022_02_13 values (new.*);
elsif new.date_key = date '2022-02-14'
then
insert into almart_2022_02_14 values (new.*);
end if;
return null;
end;
$$
language plpgsql;
drop trigger insert_almart_partition_trigger on almart;
create trigger insert_almart_partition_trigger
before insert on almart
for each row execute procedure almart_partition_trigger();
控制查询规划器对表约束的使用,以优化查询。
constraint_exclusion 的允许值是on(对所有表检查约束)、off(从不检查约束)和 partition(只对继承的子表和UNION ALL子查询检查约束)。
partition 是默认设置。它通常与传统的继承树一起使用来提高性能。
show constraint_exclusion;
alter system set constraint_exclusion = 'partition';
select pg_reload_conf();
alter table almart_2022_02_13 no inherit almart;
drop table almart_2022_02_13;
create table almart_2022_02_14 () inherits (almart);
alter table almart_2022_02_14 add constraint almart_2022_02_14_check_date_key check (date_key = '2022-02-14::date);
create index almart_date_key_2022_02_14 on almart_2022_02_14 (date_key);
--性能测试对比
1)创建普通表
create table almart_all(date_key date,hour_key smallint,client_key integer,item_key integer,account integer,expense numeric);
2)插入1000万条测试数据
insert into almart_all
select
(select
array_agg(i::date)
from generate_series('2022-02-10'::date,'2022-02-13'::date,'1 day'::interval) as t(i)
)[floor(random()*4)+1] as date_key,
floor(random()*24) as hour_key,
floor(random()*1000000)+1 as client_key,
floor(random()*100000)+1 as item_key,
floor(random()*20)+1 as account,
floor(random()*10000)+1 as expense
from
generate_series(1,10000000,1);
insert into almart select * from almart_all;
3) 查询测试
explain analyze select * from almart where date_key between '2022-02-10 00:00:00.0' and '2022-02-11 00:00:00.0';
explain analyze select * from almart_all where date_key between '2022-02-10 00:00:00.0' and '2022-02-11 00:00:00.0';
|