對於PostgreSQL的 "create index concurrently". 我個人認為其中存在一個bug。
我的驗證過程如下:
我有兩個表,tab01和 tab02,這兩個表之間沒有任何關聯。
我認為 對 tab02執行 "create index concurrently" 不會對 訪問tab01的事務有任何影響,然而事實並非盡然。
我第一程序的表現: 通過ecpg執行事務,再通過 "create index concurrently" 給tab02建立索引,成功。
我第二程序的表現:通過ecpg執行事務,再通過 "create index concurrently" 給tab02建立索引,被阻塞。
我第三個測試: 通過psql發起事務, 另一個psql客戶端執行 "create index concurrently" 成功。
我第四個測試: 通過psql發起事務 另一個psql客戶端執行 "create index concurrently",被阻塞。
無論 PostgreSQL9.1.2,還是PostgreSQL9.2.4,結果是一樣的。
數據准備:
[postgres@server bin]$ ./psql -U tester -d tester psql (9.1.2) Type "help" for help. tester=> \d tab01; Table "public.tab01" Column | Type | Modifiers --------+----------------------+----------- id | integer | cd | character varying(4) | tester=> \d tab02; Table "public.tab02" Column | Type | Modifiers --------+---------+----------- id | integer | value | integer | tester=> select * from tab01; id | cd ----+---- 1 | 14 2 | 15 3 | 14 (3 rows) tester=> select * from tab02; id | value ----+------- 1 | 100 2 | 200 3 | 300 (3 rows) tester=>
我的測試方法:
對第一個程序和第二個程序:
當我的eccp程序正在睡眠的時候,我另外開一個終端,執行:
"create index concurrently idx_tab02_id_new on tab02(id)"
結果是:
第一個程序執行中,我可成功建立索引。
第二個程序執行中,我無法建立索引,會被阻塞
而我的tab01和tab02之間,沒有任何關聯。而且我也不認為我的ecpg程序會有潛在的可能去使用tab02的索引。
事實上,如果我去看ecpg預編譯后得到的c程序,我可以看到:
{ ECPGdo(__LINE__, 0, 1, "db_conn", 0, ECPGst_normal, "select count ( * ) from tab01 where cd = $1 ", ECPGt_char,(vcd),(long)4 + 1,(long)1,(4 + 1)*sizeof(char), ECPGt_NO_INDICATOR, NULL , 0L, 0L, 0L, ECPGt_EOIT, ECPGt_int,&(vCount),(long)1,(long)1,sizeof(int), ECPGt_NO_INDICATOR, NULL , 0L, 0L, 0L, ECPGt_EORT);}
當我給$1加入引號后,我就可以成功地建立索引了。
{ ECPGdo(__LINE__, 0, 1, "db_conn", 0, ECPGst_normal, "select count ( * ) from tab01 where cd = '$1' ", ECPGt_char,(vcd),(long)4 + 1,(long)1,(4 + 1)*sizeof(char), ECPGt_NO_INDICATOR, NULL , 0L, 0L, 0L, ECPGt_EOIT, ECPGt_int,&(vCount),(long)1,(long)1,sizeof(int), ECPGt_NO_INDICATOR, NULL , 0L, 0L, 0L, ECPGt_EORT);}
下面是我測試程序的源代碼:
第一個:
[root@server soft]# cat ./test01/test01.pc int main() { EXEC SQL BEGIN DECLARE SECTION; int vCount; char vcd[4+1]; EXEC SQL END DECLARE SECTION;
EXEC SQL CONNECT TO 'tester@127.0.0.1:5432' AS db_conn USER tester IDENTIFIED BY tester; EXEC SQL AT db_conn SELECT COUNT(*) INTO :vCount FROM tab01; fprintf(stderr,"count is:%d\n",vCount); fprintf(stderr,"Before disconnect,sleep for 500 seconds\n"); sleep(500); EXEC SQL DISCONNECT db_conn; fprintf(stderr,"After disconnect,sleep for 600 seconds\n"); sleep(600); return 0; } [root@server soft]#
第二個:
[root@server soft]# cat ./test02/test02.pc int main() { EXEC SQL BEGIN DECLARE SECTION; int vCount; char vcd[4+1]; EXEC SQL END DECLARE SECTION; EXEC SQL CONNECT TO 'tester@127.0.0.1:5432' AS db_conn USER tester IDENTIFIED BY tester; char *pCd="14"; memset(vcd,'\0',5); strncpy(vcd, pCd,4); EXEC SQL AT db_conn SELECT COUNT(*) INTO :vCount FROM tab01 WHERE cd = :vcd; fprintf(stderr,"count is:%d\n",vCount);
fprintf(stderr,"Before disconnect,sleep for 500 seconds\n"); sleep(500); EXEC SQL DISCONNECT db_conn; fprintf(stderr,"After disconnect,sleep for 600 seconds\n"); sleep(600); return 0; } [root@server soft]#
而且,通過 psql,還可以發現一個與 create index concurrently 相關的現象:
我的第三個測試:
客戶端1:
[postgres@server pgsql]$ ./bin/psql -d tester -U tester psql (9.1.2) Type "help" for help. tester=> begin; BEGIN tester=> select * from tab01 where cd = '14'; id | cd ----+---- 1 | 14 3 | 14 (2 rows) tester=>
客戶端2:
[postgres@server pgsql]$ ./bin/psql -d tester -U tester psql (9.1.2) Type "help" for help. tester=> create index concurrently idx_tab02_id_new on tab02(id);
可以很快就成功創建索引。
我的第四個測試:
客戶端1:
[postgres@server pgsql]$ ./bin/psql -d tester -U tester psql (9.1.2) Type "help" for help. tester=> begin; BEGIN tester=> select * from tab01 where cd = '14'; id | cd ----+---- 1 | 14 3 | 14 (2 rows) tester=> select pg_sleep(500); pg_sleep ---------- (1 row) tester=>
客戶端2:
[postgres@server pgsql]$ ./bin/psql -d tester -U tester psql (9.1.2) Type "help" for help. tester=> create index concurrently idx_tab02_id_new on tab02(id);
客戶端2的創建索引會被阻塞
根據我對PostgreSQL的源代碼的跟蹤,可以看到有如下的調用關系:
PortalRunMulti--> PortalRunUtility-->Standard_ProcessUtility-->DefineIndex
而我對DefineIndex作簡化后,可以看到:
{
… old_snapshots = GetCurrentVirtualXIDs(snapshot->xmin, true, false, PROC_IS_AUTOVACUUM | PROC_IN_VACUUM, &n_old_snapshots); for (i = 0; i < n_old_snapshots; i++) { … if (VirtualTransactionIdIsValid(old_snapshots[i])) VirtualXactLockTableWait(old_snapshots[i]); } … }
對於我的第一個測試程序,GetCurrentVirtualXIDs 函數執行后,n_old_snapshots 的值為0 ,
for (i = 0; i < n_old_snapshots; i++) 循環不會被執行,索引的生成不會被阻塞。
對我的第二個測試程序,GetCurrentVirtualXIDs 函數執行后,n_old_snapshots 的值為1,
for (i = 0; i < n_old_snapshots; i++) 循環會被執行。
VirtualXactLockTableWait(old_snapshots[i]) 的執行,導致等待一個鎖,所以索引生成被阻塞。
再往下分析:
VirtualTransactionId * GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0, bool allDbs, int excludeVacuum, int *nvxids) { VirtualTransactionId *vxids; ProcArrayStruct *arrayP = procArray; int count = 0; int index; /* allocate what's certainly enough result space */ vxids = (VirtualTransactionId *) palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs); LWLockAcquire(ProcArrayLock, LW_SHARED); for (index = 0; index < arrayP->numProcs; index++) { volatile PGPROC *proc = arrayP->procs[index]; if (proc == MyProc) continue; if (excludeVacuum & proc->vacuumFlags) continue; if (allDbs || proc->databaseId == MyDatabaseId) { /* Fetch xmin just once - might change on us */ TransactionId pxmin = proc->xmin;
if (excludeXmin0 && !TransactionIdIsValid(pxmin)) continue; /* * InvalidTransactionId precedes all other XIDs, so a proc that * hasn't set xmin yet will not be rejected by this test. */ if (!TransactionIdIsValid(limitXmin) || TransactionIdPrecedesOrEquals(pxmin, limitXmin)) { VirtualTransactionId vxid; GET_VXID_FROM_PGPROC(vxid, *proc);
if (VirtualTransactionIdIsValid(vxid)) vxids[count++] = vxid; } } } LWLockRelease(ProcArrayLock); *nvxids = count; return vxids; }
對於我的第一個程序,測試結果顯示:pxmin 為零,TransactionIdIsValid(pxmin) 為假。所以如下代碼導致跳過循環一次。
if (excludeXmin0 && !TransactionIdIsValid(pxmin)) continue;
沒有機會執行 vxids[count++]=vxid 這一行。
那么pxmin是如何來的?
看這句: TransactionId pxmin = proc->xmin;
而xmin的含義是:當我們執行程序中對數據進行增刪改的時候,會將當前transaction id 賦予給 xmin。
寫記錄的時候,把這個xmin寫入該行記錄頭。
如此,每個進程看來,它只關心 xmin 小於自己的transaction id的。PostgreSQL用這種方式來保證MVCC。
但此處,proc->xmin為零是很不合理的。
此時,
if (allDbs || proc->databaseId == MyDatabaseId) 里的:
if (excludeXmin0 && !TransactionIdIsValid(pxmin))就會成立,所以會直接continue調回循環開始處,也就沒有機會去
vxids[count++] = vxid;
在我的第二個程序里,proc->xmin根本就不為零。故此說,這是一個bug。
另外的佐證:對我的三個測試,運行下列SQL文:
pgsql=# select l.pid, l.mode, sa.procpid, sa.current_query from pg_locks l inner join pg_stat_activity sa on l.pid = sa.procpid where l.mode like '%xclusive%';
一開始在pg_sleep(100)執行期間,可以看到:
pgsql=# select l.pid, l.mode, sa.procpid, sa.current_query from pg_locks l inner join pg_stat_activity sa on l.pid = sa.procpid where l.mode like '%xclusive%'; pid | mode | procpid | current_query ------+---------------+---------+---------------------------------------------------- 5356 | ExclusiveLock | 5356 | select l.pid, l.mode, sa.procpid, sa.current_query+ | | | from pg_locks l + | | | inner join pg_stat_activity sa + | | | on l.pid = sa.procpid + | | | where l.mode like '%xclusive%'; 5517 | ExclusiveLock | 5517 | select pg_sleep(100); (2 rows)
我開另外的終端,執行 "create index concurrently"的時候,再看:
pgsql=# select l.pid, l.mode, sa.procpid, sa.current_query from pg_locks l inner join pg_stat_activity sa on l.pid = sa.procpid where l.mode like '%xclusive%'; pid | mode | procpid | current_query ------+--------------------------+---------+---------------------------------------------------------- 5356 | ExclusiveLock | 5356 | select l.pid, l.mode, sa.procpid, sa.current_query + | | | from pg_locks l + | | | inner join pg_stat_activity sa + | | | on l.pid = sa.procpid + | | | where l.mode like '%xclusive%'; 5517 | ExclusiveLock | 5517 | select pg_sleep(100); 5527 | ExclusiveLock | 5527 | create index concurrently idx_tab02_id_new on tab02(id); 5527 | RowExclusiveLock | 5527 | create index concurrently idx_tab02_id_new on tab02(id); 5527 | ShareUpdateExclusiveLock | 5527 | create index concurrently idx_tab02_id_new on tab02(id); (5 rows)
等到 pg_sleep執行完畢的時候:
pgsql=# select l.pid, l.mode, sa.procpid, sa.current_query from pg_locks l inner join pg_stat_activity sa on l.pid = sa.procpid where l.mode like '%xclusive%'; pid | mode | procpid | current_query ------+--------------------------+---------+---------------------------------------------------------- 5356 | ExclusiveLock | 5356 | select l.pid, l.mode, sa.procpid, sa.current_query + | | | from pg_locks l + | | | inner join pg_stat_activity sa + | | | on l.pid = sa.procpid + | | | where l.mode like '%xclusive%'; 5517 | ExclusiveLock | 5517 | <IDLE> in transaction 5527 | ExclusiveLock | 5527 | create index concurrently idx_tab02_id_new on tab02(id); 5527 | RowExclusiveLock | 5527 | create index concurrently idx_tab02_id_new on tab02(id); 5527 | ShareUpdateExclusiveLock | 5527 | create index concurrently idx_tab02_id_new on tab02(id); (5 rows)