postgresql-性能環境宕機排查
問題背景
測試人員在性能測試時,給數據庫加壓數據過程中服務器宕機,啟動數據庫十余分鍾后還是顯示數據庫一直在啟動中。
由於當時周末,我在外,沒有帶筆記本。聽到這個消息后,首先詢問了啟動日志是否有異常,反饋是沒有錯誤日志,日志打印了很多啟動中。
猜測
數據庫一直啟動中,是否由於事務日志需要回滾導致的呢?
讓測試查看了pg_xlog日志文件大小,居然有90GB+
。
pg_xlog簡介
abase3.6.2目錄中有三類日志文件,pg_log
記錄日志文件,pg_xlog
記錄事務日志,pg_clog
記錄事務提交日志
abase5.0以后為了防止誤刪三類日志文件分別對應:pg_log->log,pg_xlog->pg_wal,pg_clog->pg_xact
正常情況下pg_xlog
的大小應該是小於max_wal_size=1GB
如果一直在啟動中說明在回滾日志,只需要等待即可。
宕機問題排查
聽到宕機后,我也火速回家處理問題
按照正常的磁盤讀寫速度,90GB
也用不了多久,但是卻還原了兩個多小時
查看磁盤讀寫速度只有7MB/S
,而磁盤的io
負荷已經100%
最終排查出磁盤讀寫慢的主要問題是,服務器在前幾天擴容時,擴的是走網絡存儲
,而不是物理存儲
,導致讀寫慢。
而生成大量日志的原因是因為數據庫在加壓,加壓函數慢,大量未提交的事務,導致生成大量的wal日志
數據庫宕機異常日志
database system was not properly shut down; automatic recovery in progress
redo starts at 4F1/AB018A90
ending log output to stderr Future log output will go to log destination "csvlog".
database system was interrupted while in recovery at 2019-05-26 13:19:42 CST This probably means that some data is corrupted and you will have to use the last backup for recovery.
MultiXact member wraparound protections are now enabled
database system is ready to accept connections
autovacuum launcher started
網絡存儲
物理存儲為datastore,網絡存儲為target,網絡存儲較慢
加壓函數
大量類似的加壓函數,這里選取一條以作說明
CREATE OR REPLACE FUNCTION "db_scld"."abase2019rycgjl"(cyclecount int4)
RETURNS "pg_catalog"."varchar" AS $BODY$
declare
v_db_scld_t_scld_rycgjl_c_bh bpchar;
v_db_scld_t_scld_rycgjl_c_cprscjlbh bpchar;
v_db_scld_t_scld_rycgjl_c_jdrybm varchar(300);
v_db_scld_t_scld_rycgjl_c_jdryxm varchar(300);
v_db_scld_t_scld_rycgjl_d_cgrq date;
v_db_scld_t_scld_rycgjl_c_cgqk varchar(300);
v_db_scld_t_scld_rycgjl_n_gzgs numeric;
v_db_scld_t_scld_rycgjl_c_sfyx varchar(300);
v_db_scld_t_scld_rycgjl_c_cjr varchar(300);
v_db_scld_t_scld_rycgjl_c_gxr varchar(300);
v_db_scld_t_scld_rycgjl_dt_crsj timestamp;
v_db_scld_t_scld_rycgjl_dt_gxsj timestamp;
v_db_scld_t_scld_rycgjl_c_cjbm varchar(300);
v_db_scld_t_scld_rycgjl_c_cjdw varchar(300);
v_s varchar(300);
v_scdd varchar(300);
BEGIN
for i in 1.. cycleCount loop
v_db_scld_t_scld_rycgjl_c_cprscjlbh :=(select C_BH from db_scld.t_scld_cprscjl order by random() LIMIT 1 );
v_scdd := (select c_cjbm from db_scld.t_scld_cprscjl where c_bh= v_db_scld_t_scld_rycgjl_c_cprscjlbh );
for j in 1 .. 144 loop
v_db_scld_t_scld_rycgjl_c_bh :=get_UUID(32,32);
v_db_scld_t_scld_rycgjl_c_jdrybm :=(select c_jdrybm from db_scld.t_jdry
where c_bmbm = v_scdd and c_sfyx ='1' and c_ryzszt not in ('05','12','11','07','09','13') order by random() limit 1 );
v_db_scld_t_scld_rycgjl_c_jdryxm :=(select c_xm from db_scld.t_jdry where c_jdrybm = v_db_scld_t_scld_rycgjl_c_jdrybm);
v_db_scld_t_scld_rycgjl_d_cgrq :=(select d_scrq from db_scld.T_SCLD_CPRSCJL where C_BH = v_db_scld_t_scld_rycgjl_c_cprscjlbh);
v_db_scld_t_scld_rycgjl_c_cgqk :=get_split_str('01');
v_db_scld_t_scld_rycgjl_n_gzgs :=get_rand_int(8,8);
v_db_scld_t_scld_rycgjl_c_sfyx :=get_split_str('1');
v_db_scld_t_scld_rycgjl_c_cjr :=(select C_CJR from db_scld.T_SCLD_CPRSCJL where C_BH = v_db_scld_t_scld_rycgjl_c_cprscjlbh and d_scrq= v_db_scld_t_scld_rycgjl_d_cgrq);
v_db_scld_t_scld_rycgjl_c_gxr :=v_db_scld_t_scld_rycgjl_c_cjr;
v_db_scld_t_scld_rycgjl_dt_crsj :=v_db_scld_t_scld_rycgjl_d_cgrq;
v_db_scld_t_scld_rycgjl_dt_gxsj :=v_db_scld_t_scld_rycgjl_dt_crsj;
v_db_scld_t_scld_rycgjl_c_cjbm :=(select C_CJBM from db_scld.T_SCLD_CPRSCJL where C_BH = v_db_scld_t_scld_rycgjl_c_cprscjlbh);
v_db_scld_t_scld_rycgjl_c_cjdw :=(select left(v_db_scld_t_scld_rycgjl_c_cjbm,6));
/*==人員出工記錄 (db_scld.t_scld_rycgjl) ==*/
INSERT INTO db_scld.t_scld_rycgjl(c_bh,c_cprscjlbh,c_jdrybm,c_jdryxm,d_cgrq,c_cgqk,n_gzgs,c_sfyx,c_cjr,c_gxr,dt_crsj,dt_gxsj,c_cjbm,c_cjdw)
VALUES (v_db_scld_t_scld_rycgjl_c_bh,v_db_scld_t_scld_rycgjl_c_cprscjlbh,v_db_scld_t_scld_rycgjl_c_jdrybm,v_db_scld_t_scld_rycgjl_c_jdryxm,v_db_scld_t_scld_rycgjl_d_cgrq,v_db_scld_t_scld_rycgjl_c_cgqk,v_db_scld_t_scld_rycgjl_n_gzgs,v_db_scld_t_scld_rycgjl_c_sfyx,v_db_scld_t_scld_rycgjl_c_cjr,v_db_scld_t_scld_rycgjl_c_gxr,v_db_scld_t_scld_rycgjl_dt_crsj,v_db_scld_t_scld_rycgjl_dt_gxsj,v_db_scld_t_scld_rycgjl_c_cjbm,v_db_scld_t_scld_rycgjl_c_cjdw);
end loop;
end loop;
RETURN 'success';
end;
$BODY$
LANGUAGE 'plpgsql' VOLATILE COST 100
;
ALTER FUNCTION "db_scld"."abase2019rycgjl"(cyclecount int4) OWNER TO "test";
查看表信息
、、、
Indexes:
"pk_scld_rycgjl" UNIQUE, btree (c_bh)
"i_t_scld_rycgjl_c_cprscjlbh" btree (c_cprscjlbh)
"i_t_scld_rycgjl_c_gwbh" btree (c_gwbh)
"i_t_scld_rycgjl_d_cgrq" btree (d_cgrq)
建議在加壓過程中去掉索引
加壓函數優化
該函數主要是向db_scld.t_scld_rycgjl
表中構造數據,其中包含了一些查詢
加壓一條數據大約耗時1s
,這個速度已經非常慢了。單條sql執行如果為千分之一秒,那么一秒鍾最多也就插入一千條數據
慢的原因主要是使用random()隨機獲取一條記錄時慢
select C_BH from db_scld.t_scld_cprscjl order by `random()` LIMIT 1;
select c_jdrybm from db_scld.t_jdry
where c_bmbm = v_scdd and c_sfyx ='1' and c_ryzszt not in ('05','12','11','07','09','13') order by `random()` limit 1
db_jdsjpt=# explain analyze select C_BH from db_scld.t_scld_cprscjl order by random() LIMIT 1;
QUERY PLAN
---------------------------------------------------------------------------------------------------
Limit (cost=61029.94..61029.94 rows=1 width=41) (actual time=587.193..587.193 rows=1 loops=1)
-> Sort (cost=61029.94..63172.22 rows=856911 width=41) (actual time=587.185..587.185 rows=1 loops=1)
Sort Key: (random())
Sort Method: top-N heapsort Memory: 25kB
-> Seq Scan on t_scld_cprscjl (cost=0.00..56745.39 rows=856911 width=41) (actual time=0.019..380.139 rows=854682 loop
s=1)
Planning time: 1.179 ms
Execution time: 587.242 ms
(7 rows)
--表總數量
db_jdsjpt=# select count(*) from db_scld.t_scld_cprscjl;
count
--------
854682
(1 row)
隨機獲取一條記錄random()
random()
耗時:Time: 389.818 ms
--隨機獲取一條耗時
db_jdsjpt=# select C_BH from db_scld.t_scld_cprscjl order by random() LIMIT 1;
c_bh
----------------------------------
6d861b011c854040bf5b731f49d40b48
(1 row)
Time: 389.818 ms
改寫1
offset
耗時:Time: 60.022 ms
--offset可以走索引,少了排序操作
db_jdsjpt=# select C_BH from db_scld.t_scld_cprscjl offset floor(random()*854682) LIMIT 1;
c_bh
----------------------------------
f90301bd8ac2485196ffae32ee70345c
(1 row)
Time: 60.022 ms
db_jdsjpt=# explain analyze select C_BH from db_scld.t_scld_cprscjl offset floor(random()*854682) LIMIT 1;
QUERY PLAN
---------------------------------------------------------------------------------------------------
Limit (cost=3747.64..3747.68 rows=1 width=33) (actual time=30.758..30.759 rows=1 loops=1)
-> Index Only Scan using i_corscjl_cprscbh_ on t_scld_cprscjl (cost=0.42..37472.65 rows=854682 width=33) (actual time=0.
047..25.808 rows=81993 loops=1)
Heap Fetches: 0
Planning time: 0.228 ms
Execution time: 30.802 ms
(5 rows)
Time: 31.779 ms
改寫2
pg從9.5開始提供抽樣函數
使用tablesample抽樣的過程中比例不能太低,否則可能獲取不到結果,且不能帶有過濾條件
system
耗時: Time: 0.639 ms
system
:隨機性較差,效率高
--改寫后耗時
db_jdsjpt=# select c_bh from db_scld.t_scld_cprscjl tablesample system(0.1) limit 1;
c_bh
----------------------------------
e2fce25399db42f0bf49faf8e7214d5f
(1 row)
Time: 0.639 ms
--system隨機抽樣以塊為單位所以更快
db_jdsjpt=# explain analyze select c_bh from db_scld.t_scld_cprscjl tablesample system(0.1) limit 1;
QUERY PLAN
---------------------------------------------------------------------------------------------------
Limit (cost=0.00..0.23 rows=1 width=33) (actual time=0.105..0.105 rows=1 loops=1)
-> Sample Scan on t_scld_cprscjl (cost=0.00..192.55 rows=855 width=33) (actual time=0.102..0.102 rows=1 loops=1)
Sampling: system ('0.1'::real)
Planning time: 0.190 ms
Execution time: 0.134 ms
(5 rows)
Time: 1.182 ms
改寫3
bernoulli
:隨機性更好,但效率比system要低
bernoullih
耗時:Time: 0.822 ms
db_jdsjpt=# select c_bh from db_scld.t_scld_cprscjl tablesample bernoulli(0.1) limit 1;
c_bh
----------------------------------
7ec30761ffd04bd9ad77797a33645a84
(1 row)
Time: 0.822 ms
--bernoulli以行為單位進行抽樣,比system效率低點
db_jdsjpt=# explain analyze select c_bh from db_scld.t_scld_cprscjl tablesample bernoulli(0.1) limit 1;
QUERY PLAN
---------------------------------------------------------------------------------------------------
Limit (cost=0.00..53.85 rows=1 width=33) (actual time=1.410..1.411 rows=1 loops=1)
-> Sample Scan on t_scld_cprscjl (cost=0.00..46042.55 rows=855 width=33) (actual time=1.408..1.408 rows=1 loops=1)
Sampling: bernoulli ('0.1'::real)
Planning time: 0.446 ms
Execution time: 1.436 ms
(5 rows)
Time: 25.770 ms
同理另外一條sql也可用同樣的方式,且在c_bmbm
字段上面加上索引
當有條件的時候可以使用offset
獲取,offset
的值也可以通過for
循環傳入
db_jdsjpt=# select count(*) from db_scld.t_jdry;
count
--------
214819
(1 row)
db_jdsjpt=# select c_jdrybm from db_scld.t_jdry where c_bmbm = '4402222804' and c_sfyx ='1' and c_ryzszt not in ('05','12','11','07','09','13') offset floor(random()*214819) limit 1;
c_jdrybm
----------
(0 rows)
Time: 1.924 ms
對比
方法 | 耗時 |
---|---|
order by random() | 389.818 ms |
offset n | 60.022 ms-240ms |
system() | 0.639 ms |
bernoulli() | 0.822 ms |
使用offset
的時候和n的大小有關系,當n越大,掃描的索引塊越多,就越大,但是相對於order by random()
的方式仍然要快。
注意
system(0.1)
等於百分之零點一,也就是抽樣千分之一 854682*0.001=854
,大概每次抽取854
條記錄
--system
db_jdsjpt=# select count(*) from db_scld.t_scld_cprscjl tablesample system(0.1) ;
count
-------
592
(1 row)
Time: 1.499 ms
--bernoulli
db_jdsjpt=# select count(*) from db_scld.t_scld_cprscjl tablesample bernoulli(0.1) ;
count
-------
840
(1 row)
Time: 86.037 ms
這里可以看出bernoulli效率比system要低
優化后
CREATE OR REPLACE FUNCTION "db_scld"."abase2019rycgjl_zfx"(cyclecount int4)
RETURNS "pg_catalog"."varchar" AS $BODY$
declare
v_db_scld_t_scld_rycgjl_c_bh bpchar;
v_db_scld_t_scld_rycgjl_c_cprscjlbh bpchar;
v_db_scld_t_scld_rycgjl_c_jdrybm varchar(300);
v_db_scld_t_scld_rycgjl_c_jdryxm varchar(300);
v_db_scld_t_scld_rycgjl_d_cgrq date;
v_db_scld_t_scld_rycgjl_c_cgqk varchar(300);
v_db_scld_t_scld_rycgjl_n_gzgs numeric;
v_db_scld_t_scld_rycgjl_c_sfyx varchar(300);
v_db_scld_t_scld_rycgjl_c_cjr varchar(300);
v_db_scld_t_scld_rycgjl_c_gxr varchar(300);
v_db_scld_t_scld_rycgjl_dt_crsj timestamp;
v_db_scld_t_scld_rycgjl_dt_gxsj timestamp;
v_db_scld_t_scld_rycgjl_c_cjbm varchar(300);
v_db_scld_t_scld_rycgjl_c_cjdw varchar(300);
v_s varchar(300);
v_scdd varchar(300);
BEGIN
for i in 1.. cycleCount loop
v_db_scld_t_scld_rycgjl_c_cprscjlbh :=(select C_BH from db_scld.t_scld_cprscjl tablesample system(0.1) LIMIT 1);
v_scdd := (select c_cjbm from db_scld.t_scld_cprscjl where c_bh= v_db_scld_t_scld_rycgjl_c_cprscjlbh );
for j in 1 .. 144 loop
v_db_scld_t_scld_rycgjl_c_bh := "public".get_UUID(32,32);
v_db_scld_t_scld_rycgjl_c_jdrybm :=(select c_jdrybm from db_scld.t_jdry
where c_bmbm = v_scdd and c_sfyx ='1' and c_ryzszt not in ('05','12','11','07','09','13') offset j-1 limit 1 );
v_db_scld_t_scld_rycgjl_c_jdryxm :=(select c_xm from db_scld.t_jdry where c_jdrybm = v_db_scld_t_scld_rycgjl_c_jdrybm);
v_db_scld_t_scld_rycgjl_d_cgrq :=(select d_scrq from db_scld.T_SCLD_CPRSCJL where C_BH = v_db_scld_t_scld_rycgjl_c_cprscjlbh);
v_db_scld_t_scld_rycgjl_c_cgqk :=get_split_str('01');
v_db_scld_t_scld_rycgjl_n_gzgs :=get_rand_int(8,8);
v_db_scld_t_scld_rycgjl_c_sfyx :=get_split_str('1');
v_db_scld_t_scld_rycgjl_c_cjr :=(select C_CJR from db_scld.T_SCLD_CPRSCJL where C_BH = v_db_scld_t_scld_rycgjl_c_cprscjlbh and d_scrq= v_db_scld_t_scld_rycgjl_d_cgrq);
v_db_scld_t_scld_rycgjl_c_gxr :=v_db_scld_t_scld_rycgjl_c_cjr;
v_db_scld_t_scld_rycgjl_dt_crsj :=v_db_scld_t_scld_rycgjl_d_cgrq;
v_db_scld_t_scld_rycgjl_dt_gxsj :=v_db_scld_t_scld_rycgjl_dt_crsj;
v_db_scld_t_scld_rycgjl_c_cjbm :=(select C_CJBM from db_scld.T_SCLD_CPRSCJL where C_BH = v_db_scld_t_scld_rycgjl_c_cprscjlbh);
v_db_scld_t_scld_rycgjl_c_cjdw :=(select left(v_db_scld_t_scld_rycgjl_c_cjbm,6));
/*==人員出工記錄 (db_scld.t_scld_rycgjl) ==*/
INSERT INTO db_scld.t_scld_rycgjl_zfx(c_bh,c_cprscjlbh,c_jdrybm,c_jdryxm,d_cgrq,c_cgqk,n_gzgs,c_sfyx,c_cjr,c_gxr,dt_crsj,dt_gxsj,c_cjbm,c_cjdw)
VALUES (v_db_scld_t_scld_rycgjl_c_bh,v_db_scld_t_scld_rycgjl_c_cprscjlbh,v_db_scld_t_scld_rycgjl_c_jdrybm,v_db_scld_t_scld_rycgjl_c_jdryxm,v_db_scld_t_scld_rycgjl_d_cgrq,v_db_scld_t_scld_rycgjl_c_cgqk,v_db_scld_t_scld_rycgjl_n_gzgs,v_db_scld_t_scld_rycgjl_c_sfyx,v_db_scld_t_scld_rycgjl_c_cjr,v_db_scld_t_scld_rycgjl_c_gxr,v_db_scld_t_scld_rycgjl_dt_crsj,v_db_scld_t_scld_rycgjl_dt_gxsj,v_db_scld_t_scld_rycgjl_c_cjbm,v_db_scld_t_scld_rycgjl_c_cjdw);
end loop;
end loop;
RETURN 'success';
end;
$BODY$
LANGUAGE 'plpgsql' VOLATILE COST 100
;
ALTER FUNCTION "db_scld"."abase2019rycgjl_zfx"(cyclecount int4) OWNER TO "test";
--執行1000次
db_jdsjpt=# select "db_scld"."abase2019rycgjl_zfx"(1000);
abase2019rycgjl_zfx
---------------------
success
(1 row)
Time: 88489.060 ms
--88s一共插入14w條數據
db_jdsjpt=# select count(*) from db_scld.t_scld_rycgjl_zfx;
count
--------
144000
(1 row)
Time: 29.704 ms
結語
1.公司許多服務器存儲划分的是網絡存儲
,數據庫禁止使用網絡存儲
2.數據加壓過程中,應刪除索引,待數據構造完畢在建索引
3.隨機獲取表中的一條數據,當表中數據較小時使用random感覺不明顯,當數據量大時random由於每次都需要排序操作,導致隨機獲取一條的成本較高
4.隨機獲取一條記錄可以使用limit 1 offset n-1的方式,或者使用隨機抽樣的方式
5.無論是使用limit 1 offset n還是使用tablesample隨機抽樣方式都需要知道表中的數據量,不能超過表數據量