很多項目現場由於前期規划問題導致很多表未設置成分區表,下面介紹一種在線遷移的方法。
使用繼承表,觸發器,異步遷移,交換表名一系列步驟,在線將非分區表,轉換為分區表(交換表名是需要短暫的堵塞)。
關鍵技術:
1、繼承表(子分區)
對select, update, delete, truncate, drop透明。
2、觸發器
插入,采用before觸發器,數據路由到繼承分區
更新,采用before觸發器,刪除老表記錄,同時將更新后的數據插入新表
3、后台遷移數據,cte only skip locked , delete only, insert into new table
4、遷移結束(p表沒有數據后),短暫上鎖,剝離INHERTI關系,切換到原生分區,切換表名。
以下以常用的時間分區進行試驗:
1.創建測試表
create table old (id int primary key, info text, create_time timestamp);
2.插入100萬測試數據
insert into old select generate_series(1,1000000),md5(random()::text),(now()+ ((random()*100)::int ||' day')::interval);
3.創建子分區(模擬項目現場按時間的range分區)
do language plpgsql $$ declare rec record; begin for rec in (select t as beginmonth,t+interval '1 month' as endmonth from generate_series('2020-12-01'::timestamp,'2021-05-01'::timestamp,interval '1 month') g(t)) loop execute format('create table old_%s (like old including all) inherits (old)', to_char(rec.beginmonth,'yyyyMM')); execute format('alter table old_%s add constraint ck check(create_time>=%s::timestamp and create_time<%s::timestamp)', to_char(rec.beginmonth,'yyyyMM'), ''''||rec.beginmonth||'''', ''''||rec.endmonth||''''); end loop; end; $$;
4.old表繼承關系
postgres=# \d+ old Table "public.old" Column | Type | Collation | Nullable | Default | Storage | Stats target | Description -------------+-----------------------------+-----------+----------+---------+----------+--------------+------------- id | integer | | not null | | plain | | info | text | | | | extended | | create_time | timestamp without time zone | | | | plain | | Indexes: "old_pkey" PRIMARY KEY, btree (id) Child tables: old_202012, old_202101, old_202102, old_202103, old_202104, old_202105 Access method: heap postgres=# \d+ old_202109 Table "public.old_202109" Column | Type | Collation | Nullable | Default | Storage | Stats target | Description -------------+-----------------------------+-----------+----------+---------+----------+--------------+------------- id | integer | | not null | | plain | | info | text | | | | extended | | create_time | timestamp without time zone | | | | plain | | Indexes: "old_202109_pkey" PRIMARY KEY, btree (id) Check constraints: "ck" CHECK (create_time >= '2021-09-01 00:00:00'::timestamp without time zone AND create_time < '2021-10-01 00:00:00'::timestamp without time zone) Inherits: old Access method: heap
5.插入,采用before觸發器,路由到新表(需要根據實際情況修改,測試數據分區只到'2021-05')
create or replace function ins_tbl() returns trigger as $$ declare begin case when (new.create_time>='2020-12-01'::timestamp and new.create_time<'2021-01-01'::timestamp) then insert into old_202012 values (NEW.*); when (new.create_time>='2021-01-01'::timestamp and new.create_time<'2021-02-01'::timestamp) then insert into old_202101 values (NEW.*); when (new.create_time>='2021-02-01'::timestamp and new.create_time<'2021-03-01'::timestamp) then insert into old_202102 values (NEW.*); when (new.create_time>='2021-03-01'::timestamp and new.create_time<'2021-04-01'::timestamp) then insert into old_202103 values (NEW.*); when (new.create_time>='2021-04-01'::timestamp and new.create_time<'2021-05-01'::timestamp) then insert into old_202104 values (NEW.*); when (new.create_time>='2021-05-01'::timestamp and new.create_time<'2021-06-01'::timestamp) then insert into old_202105 values (NEW.*); else return NEW; -- 如果是NULL則寫本地父表,主鍵不會為NULL end case; return null; end; $$ language plpgsql strict; create trigger tg1 before insert on old for each row execute procedure ins_tbl();
6.更新,采用before觸發器,路由到新表(分區字段理論上不更新,如果更新會導致報錯)
create or replace function upd_tbl () returns trigger as $$ declare begin case when (new.create_time>='2020-12-01'::timestamp and new.create_time<'2021-01-01'::timestamp) then insert into old_202012 values (NEW.*); when (new.create_time>='2021-01-01'::timestamp and new.create_time<'2021-02-01'::timestamp) then insert into old_202101 values (NEW.*); when (new.create_time>='2021-02-01'::timestamp and new.create_time<'2021-03-01'::timestamp) then insert into old_202102 values (NEW.*); when (new.create_time>='2021-03-01'::timestamp and new.create_time<'2021-04-01'::timestamp) then insert into old_202103 values (NEW.*); when (new.create_time>='2021-04-01'::timestamp and new.create_time<'2021-05-01'::timestamp) then insert into old_202104 values (NEW.*); when (new.create_time>='2021-05-01'::timestamp and new.create_time<'2021-06-01'::timestamp) then insert into old_202105 values (NEW.*); else return NEW; -- 如果是NULL則寫本地父表,主鍵不會為NULL end case; delete from only old where id=NEW.id; return null; end; $$ language plpgsql strict; create trigger tg2 before update on old for each row execute procedure upd_tbl();
7.測試delete、insert、update、select是否邏輯正常
--DELETE postgres=# select tableoid::regclass,* from old where id=1; tableoid | id | info | create_time ----------+----+----------------------------------+---------------------------- old | 1 | 9f6bd5bc6e54e549b8380c8d6c70c9b4 | 2021-01-14 15:13:05.442282 (1 row) postgres=# delete from old where id=1; DELETE 1 postgres=# select tableoid::regclass,* from old where id=1; tableoid | id | info | create_time ----------+----+------+------------- (0 rows) --INSERT postgres=# INSERT INTO old values(1,md5(random()::text),(now()+ ((random()*100)::int ||' day')::interval)); INSERT 0 0 postgres=# select tableoid::regclass,* from old where id=1; tableoid | id | info | create_time ------------+----+----------------------------------+---------------------------- old_202101 | 1 | adfcee05df6437fabb21f40b13320ce0 | 2021-01-05 15:17:26.066304 (1 row) --UPDATE postgres=# select tableoid::regclass,* from old where id in(1,2); tableoid | id | info | create_time ------------+----+----------------------------------+---------------------------- old | 2 | ca46fe7d0fe21f33ec46fb07dd669e32 | 2021-03-13 15:13:05.442282 old_202101 | 1 | adfcee05df6437fabb21f40b13320ce0 | 2021-01-05 15:17:26.066304 (2 rows) postgres=# update old set info='test' where id in(1,2) returning tableoid::regclass,*; tableoid | id | info | create_time ------------+----+------+---------------------------- old_202101 | 1 | test | 2021-01-05 15:17:26.066304 (1 row) UPDATE 1 postgres=# select tableoid::regclass,* from old where id in(1,2); tableoid | id | info | create_time ------------+----+------+---------------------------- old_202101 | 1 | test | 2021-01-05 15:17:26.066304 old_202103 | 2 | test | 2021-03-13 15:13:05.442282 (2 rows)
8、開啟壓測,后台對原表數據進行遷移
create or replace function test_ins(int) returns void as $$ declare begin insert into old values ($1,'test',(now()+ ((random()*100)::int ||' day')::interval)); exception when others then return; end; $$ language plpgsql strict; vi test.sql \set id1 random(10000001,200000000) \set id2 random(1,50000) \set id3 random(50001,100000) delete from old where id=:id2; update old set info=md5(random()::text) where id=:id3; select test_ins(:id1);
開啟壓測
pgbench -M prepared -n -r -P 1 -f ./test.sql -c 4 -j 4 -T 1200
9、在線遷移數據
批量遷移,每一批遷移N條。調用以下SQL
with a as ( delete from only old where ctid = any (array (select ctid from only old limit 10000 for update skip locked) ) returning * ) insert into old select * from a;
持續調用以上SQL,直到old表已經完全沒數據,則代表數據全部遷移到分區
postgres=# select count(*) from only old; count ------- 0 (1 row) postgres=# select count(*) from old; count --------- 1023111 (1 row)
10.切換到分區表
創建分區表
create table new (id int, info text, create_time timestamp) partition by range (create_time);
切換表名,防止雪崩,使用所超時,因為只是涉及到表名更改,所以速度非常快
begin; set lock_timeout ='3s'; alter table old_202012 no inherit old; alter table old_202101 no inherit old; alter table old_202102 no inherit old; alter table old_202103 no inherit old; alter table old_202104 no inherit old; alter table old_202105 no inherit old; alter table old rename to old_tmp; alter table new rename to old; alter table old ATTACH PARTITION old_202012 for values from ('2020-12-01'::timestamp) to ('2021-01-01'::timestamp); alter table old ATTACH PARTITION old_202101 for values from ('2021-01-01'::timestamp) to ('2021-02-01'::timestamp); alter table old ATTACH PARTITION old_202102 for values from ('2021-02-01'::timestamp) to ('2021-03-01'::timestamp); alter table old ATTACH PARTITION old_202103 for values from ('2021-03-01'::timestamp) to ('2021-04-01'::timestamp); alter table old ATTACH PARTITION old_202104 for values from ('2021-04-01'::timestamp) to ('2021-05-01'::timestamp); alter table old ATTACH PARTITION old_202105 for values from ('2021-05-01'::timestamp) to ('2021-06-01'::timestamp); end;
切換后分區如下:
postgres=# \d+ old Partitioned table "public.old" Column | Type | Collation | Nullable | Default | Storage | Stats target | Description -------------+-----------------------------+-----------+----------+---------+----------+--------------+------------- id | integer | | | | plain | | info | text | | | | extended | | create_time | timestamp without time zone | | | | plain | | Partition key: RANGE (create_time) Partitions: old_202012 FOR VALUES FROM ('2020-12-01 00:00:00') TO ('2021-01-01 00:00:00'), old_202101 FOR VALUES FROM ('2021-01-01 00:00:00') TO ('2021-02-01 00:00:00'), old_202102 FOR VALUES FROM ('2021-02-01 00:00:00') TO ('2021-03-01 00:00:00'), old_202103 FOR VALUES FROM ('2021-03-01 00:00:00') TO ('2021-04-01 00:00:00'), old_202104 FOR VALUES FROM ('2021-04-01 00:00:00') TO ('2021-05-01 00:00:00'), old_202105 FOR VALUES FROM ('2021-05-01 00:00:00') TO ('2021-06-01 00:00:00')
查詢測試
postgres=# explain analyze select * from old where id=1; QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------- Append (cost=0.42..50.26 rows=6 width=40) (actual time=0.265..0.266 rows=0 loops=1) -> Index Scan using old_202012_pkey on old_202012 (cost=0.42..8.44 rows=1 width=40) (actual time=0.077..0.077 rows=0 loops=1) Index Cond: (id = 1) -> Index Scan using old_202101_pkey on old_202101 (cost=0.42..8.44 rows=1 width=40) (actual time=0.046..0.046 rows=0 loops=1) Index Cond: (id = 1) -> Index Scan using old_202102_pkey on old_202102 (cost=0.42..8.44 rows=1 width=39) (actual time=0.052..0.052 rows=0 loops=1) Index Cond: (id = 1) -> Index Scan using old_202103_pkey on old_202103 (cost=0.42..8.44 rows=1 width=40) (actual time=0.037..0.038 rows=0 loops=1) Index Cond: (id = 1) -> Index Scan using old_202104_pkey on old_202104 (cost=0.29..8.30 rows=1 width=39) (actual time=0.036..0.036 rows=0 loops=1) Index Cond: (id = 1) -> Index Scan using old_202105_pkey on old_202105 (cost=0.15..8.17 rows=1 width=44) (actual time=0.011..0.011 rows=0 loops=1) Index Cond: (id = 1) Planning Time: 2.582 ms Execution Time: 0.424 ms (15 rows) postgres=# explain analyze select * from old where id=1 and create_time between '2020-12-01'::timestamp and '2021-03-01'::timestamp; QUERY PLAN --------------------------------------------------------------------------------------------------------------------------------------------------------------- Append (cost=0.42..33.80 rows=4 width=40) (actual time=0.122..0.122 rows=0 loops=1) -> Index Scan using old_202012_pkey on old_202012 (cost=0.42..8.44 rows=1 width=40) (actual time=0.038..0.039 rows=0 loops=1) Index Cond: (id = 1) Filter: ((create_time >= '2020-12-01 00:00:00'::timestamp without time zone) AND (create_time <= '2021-03-01 00:00:00'::timestamp without time zone)) -> Index Scan using old_202101_pkey on old_202101 (cost=0.42..8.45 rows=1 width=40) (actual time=0.040..0.040 rows=0 loops=1) Index Cond: (id = 1) Filter: ((create_time >= '2020-12-01 00:00:00'::timestamp without time zone) AND (create_time <= '2021-03-01 00:00:00'::timestamp without time zone)) -> Index Scan using old_202102_pkey on old_202102 (cost=0.42..8.45 rows=1 width=39) (actual time=0.018..0.019 rows=0 loops=1) Index Cond: (id = 1) Filter: ((create_time >= '2020-12-01 00:00:00'::timestamp without time zone) AND (create_time <= '2021-03-01 00:00:00'::timestamp without time zone)) -> Index Scan using old_202103_pkey on old_202103 (cost=0.42..8.45 rows=1 width=40) (actual time=0.022..0.022 rows=0 loops=1) Index Cond: (id = 1) Filter: ((create_time >= '2020-12-01 00:00:00'::timestamp without time zone) AND (create_time <= '2021-03-01 00:00:00'::timestamp without time zone)) Planning Time: 0.773 ms Execution Time: 0.202 ms (15 rows)
數據
postgres=# select count(*) from old; count --------- 1162055 (1 row)
參考資料:
https://github.com/digoal/blog/blob/master/201901/20190131_01.md