PostgreSQL的 create index concurrently

本文轉載自查看原文 2013-06-25 13:09 3019

對於PostgreSQL的 "create index concurrently". 我個人認為其中存在一個bug。

我的驗證過程如下：

我有兩個表，tab01和 tab02，這兩個表之間沒有任何關聯。

我認為對 tab02執行 "create index concurrently" 不會對訪問tab01的事務有任何影響，然而事實並非盡然。

我第一程序的表現: 通過ecpg執行事務，再通過 "create index concurrently" 給tab02建立索引，成功。

我第二程序的表現：通過ecpg執行事務，再通過 "create index concurrently" 給tab02建立索引，被阻塞。

我第三個測試: 通過psql發起事務，另一個psql客戶端執行 "create index concurrently" 成功。

我第四個測試：通過psql發起事務另一個psql客戶端執行 "create index concurrently"，被阻塞。

無論 PostgreSQL9.1.2，還是PostgreSQL9.2.4，結果是一樣的。

數據准備：

[postgres@server bin]$ ./psql -U tester -d tester
psql (9.1.2)
Type "help" for help.
tester=> \d tab01;
          Table "public.tab01"
 Column |         Type         | Modifiers
--------+----------------------+-----------
 id     | integer              |
 cd     | character varying(4) |


tester=> \d tab02;
    Table "public.tab02"
 Column |  Type   | Modifiers
--------+---------+-----------
 id     | integer |
 value  | integer |

tester=> select * from tab01;
 id | cd
----+----
  1 | 14
  2 | 15
  3 | 14
(3 rows)

tester=> select * from tab02;
 id | value
----+-------
  1 |   100
  2 |   200
  3 |   300
(3 rows)
tester=>

我的測試方法：

對第一個程序和第二個程序：

當我的eccp程序正在睡眠的時候，我另外開一個終端，執行：

"create index concurrently idx_tab02_id_new on tab02(id)"

結果是：

第一個程序執行中，我可成功建立索引。
第二個程序執行中，我無法建立索引，會被阻塞
而我的tab01和tab02之間，沒有任何關聯。而且我也不認為我的ecpg程序會有潛在的可能去使用tab02的索引。

事實上，如果我去看ecpg預編譯后得到的c程序，我可以看到：

 { ECPGdo(__LINE__, 0, 1, "db_conn", 0, ECPGst_normal, "select count ( * ) from tab01 where cd = $1 ",

        ECPGt_char,(vcd),(long)4 + 1,(long)1,(4 + 1)*sizeof(char),

        ECPGt_NO_INDICATOR, NULL , 0L, 0L, 0L, ECPGt_EOIT,

        ECPGt_int,&(vCount),(long)1,(long)1,sizeof(int),

        ECPGt_NO_INDICATOR, NULL , 0L, 0L, 0L, ECPGt_EORT);}

當我給$1加入引號后，我就可以成功地建立索引了。

 { ECPGdo(__LINE__, 0, 1, "db_conn", 0, ECPGst_normal, "select count ( * ) from tab01 where cd = '$1' ",

        ECPGt_char,(vcd),(long)4 + 1,(long)1,(4 + 1)*sizeof(char),

        ECPGt_NO_INDICATOR, NULL , 0L, 0L, 0L, ECPGt_EOIT,

        ECPGt_int,&(vCount),(long)1,(long)1,sizeof(int),

        ECPGt_NO_INDICATOR, NULL , 0L, 0L, 0L, ECPGt_EORT);}

下面是我測試程序的源代碼:

第一個：

[root@server soft]# cat ./test01/test01.pc
int main()
{

   EXEC SQL BEGIN DECLARE SECTION;
         int vCount;
         char vcd[4+1]; 
   EXEC SQL END DECLARE SECTION;

   EXEC SQL CONNECT TO 'tester@127.0.0.1:5432' AS db_conn
     USER tester IDENTIFIED BY tester;                            

   EXEC SQL AT db_conn SELECT COUNT(*) 
        INTO :vCount FROM tab01;                                      

   fprintf(stderr,"count is:%d\n",vCount); 

   fprintf(stderr,"Before disconnect,sleep for 500 seconds\n");
   sleep(500);

   EXEC SQL DISCONNECT db_conn;
   fprintf(stderr,"After disconnect,sleep for 600 seconds\n");

   sleep(600);
   return 0;
}

[root@server soft]#

第二個：

[root@server soft]# cat ./test02/test02.pc

int main()

{
   EXEC SQL BEGIN DECLARE SECTION;
         int vCount;
         char vcd[4+1];                        
   EXEC SQL END DECLARE SECTION;


   EXEC SQL CONNECT TO 'tester@127.0.0.1:5432' AS db_conn
     USER tester IDENTIFIED BY tester;

   char *pCd="14";
   memset(vcd,'\0',5);
   strncpy(vcd, pCd,4);             
                                                               
   EXEC SQL AT db_conn SELECT COUNT(*)            
        INTO :vCount FROM tab01 WHERE cd = :vcd;
                                                                       
   fprintf(stderr,"count is:%d\n",vCount);

   fprintf(stderr,"Before disconnect,sleep for 500 seconds\n");
   sleep(500);

   EXEC SQL DISCONNECT db_conn;

   fprintf(stderr,"After disconnect,sleep for 600 seconds\n");
   sleep(600);

   return 0;                                                                                               
}

 [root@server soft]#

而且，通過 psql，還可以發現一個與 create index concurrently 相關的現象：

我的第三個測試：

客戶端1：

[postgres@server pgsql]$ ./bin/psql -d tester -U tester
psql (9.1.2)
Type "help" for help.

tester=> begin;
BEGIN
tester=> select * from tab01 where cd = '14';
 id | cd
----+----
  1 | 14
  3 | 14
(2 rows)
tester=>

客戶端2：

[postgres@server pgsql]$ ./bin/psql -d tester -U tester
psql (9.1.2)
Type "help" for help.

tester=> create index concurrently idx_tab02_id_new on tab02(id);

可以很快就成功創建索引。

我的第四個測試：

客戶端1:

[postgres@server pgsql]$ ./bin/psql -d tester -U tester
psql (9.1.2)
Type "help" for help.
tester=> begin;
BEGIN
tester=> select * from tab01 where cd = '14';
 id | cd
----+----
  1 | 14
  3 | 14
(2 rows)
 

tester=> select pg_sleep(500);
 pg_sleep
----------
(1 row)
tester=>

客戶端2：

[postgres@server pgsql]$ ./bin/psql -d tester -U tester
psql (9.1.2)
Type "help" for help.

tester=> create index concurrently idx_tab02_id_new on tab02(id);

客戶端2的創建索引會被阻塞

根據我對PostgreSQL的源代碼的跟蹤，可以看到有如下的調用關系：

PortalRunMulti--> PortalRunUtility-->Standard_ProcessUtility-->DefineIndex

而我對DefineIndex作簡化后，可以看到：

{     
      …
      old_snapshots = GetCurrentVirtualXIDs(snapshot->xmin, true, false,
                 PROC_IS_AUTOVACUUM | PROC_IN_VACUUM, &n_old_snapshots);

       for (i = 0; i < n_old_snapshots; i++)
       {     
            …

            if (VirtualTransactionIdIsValid(old_snapshots[i]))
                  VirtualXactLockTableWait(old_snapshots[i]);
       }
       …
}

對於我的第一個測試程序，GetCurrentVirtualXIDs 函數執行后,n_old_snapshots 的值為0 ，
for (i = 0; i < n_old_snapshots; i++)　循環不會被執行，索引的生成不會被阻塞。

對我的第二個測試程序，GetCurrentVirtualXIDs 函數執行后,n_old_snapshots 的值為1，
for (i = 0; i < n_old_snapshots; i++)　循環會被執行。
VirtualXactLockTableWait(old_snapshots[i]) 的執行，導致等待一個鎖，所以索引生成被阻塞。

再往下分析：

VirtualTransactionId *
GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0,
                      bool allDbs, int excludeVacuum,
                      int *nvxids)
{
    VirtualTransactionId *vxids;
    ProcArrayStruct *arrayP = procArray;
    int            count = 0;
    int            index;

    /* allocate what's certainly enough result space */
    vxids = (VirtualTransactionId *)
        palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs);

    LWLockAcquire(ProcArrayLock, LW_SHARED);
for (index = 0; index < arrayP->numProcs; index++)
    {
volatile PGPROC *proc = arrayP->procs[index];

        if (proc == MyProc)
            continue;

        if (excludeVacuum & proc->vacuumFlags)
            continue;

        if (allDbs || proc->databaseId == MyDatabaseId)
        {
 /* Fetch xmin just once - might change on us */
            TransactionId pxmin = proc->xmin;
if (excludeXmin0 && !TransactionIdIsValid(pxmin))
                continue;
/*
             * InvalidTransactionId precedes all other XIDs, so a proc that
             * hasn't set xmin yet will not be rejected by this test.
             */
            if (!TransactionIdIsValid(limitXmin) ||
                TransactionIdPrecedesOrEquals(pxmin, limitXmin))
            {
                VirtualTransactionId vxid;

                GET_VXID_FROM_PGPROC(vxid, *proc);

if (VirtualTransactionIdIsValid(vxid))
                    vxids[count++] = vxid;
            }
        }
    }

    LWLockRelease(ProcArrayLock);

    *nvxids = count;
    return vxids;
}

對於我的第一個程序，測試結果顯示：pxmin 為零，TransactionIdIsValid(pxmin) 為假。所以如下代碼導致跳過循環一次。

if (excludeXmin0 && !TransactionIdIsValid(pxmin))
                continue;

沒有機會執行 vxids[count++]=vxid 這一行。

那么pxmin是如何來的？

看這句： TransactionId pxmin = proc->xmin;

而xmin的含義是：當我們執行程序中對數據進行增刪改的時候，會將當前transaction id 賦予給 xmin。

寫記錄的時候，把這個xmin寫入該行記錄頭。

如此，每個進程看來，它只關心 xmin 小於自己的transaction id的。PostgreSQL用這種方式來保證MVCC。

但此處，proc->xmin為零是很不合理的。

此時，

if (allDbs || proc->databaseId == MyDatabaseId) 里的：

if (excludeXmin0 && !TransactionIdIsValid(pxmin))就會成立，所以會直接continue調回循環開始處，也就沒有機會去

vxids[count++] = vxid;

在我的第二個程序里，proc->xmin根本就不為零。故此說，這是一個bug。

另外的佐證：對我的三個測試，運行下列SQL文：

pgsql=# select l.pid, l.mode, sa.procpid, sa.current_query
from pg_locks l
inner join pg_stat_activity sa
        on l.pid = sa.procpid
where l.mode like '%xclusive%';

一開始在pg_sleep（100）執行期間，可以看到：

pgsql=# select l.pid, l.mode, sa.procpid, sa.current_query
from pg_locks l
inner join pg_stat_activity sa
        on l.pid = sa.procpid
where l.mode like '%xclusive%';
 pid  |     mode      | procpid |                   current_query                    
------+---------------+---------+----------------------------------------------------
 5356 | ExclusiveLock |    5356 | select l.pid, l.mode, sa.procpid, sa.current_query+
      |               |         | from pg_locks l                                   +
      |               |         | inner join pg_stat_activity sa                    +
      |               |         |         on l.pid = sa.procpid                     +
      |               |         | where l.mode like '%xclusive%';
 5517 | ExclusiveLock |    5517 | select pg_sleep(100);
(2 rows)

我開另外的終端，執行 "create index concurrently"的時候，再看：

pgsql=# select l.pid, l.mode, sa.procpid, sa.current_query
from pg_locks l
inner join pg_stat_activity sa
        on l.pid = sa.procpid
where l.mode like '%xclusive%';
 pid  |           mode           | procpid |                      current_query                       
------+--------------------------+---------+----------------------------------------------------------
 5356 | ExclusiveLock            |    5356 | select l.pid, l.mode, sa.procpid, sa.current_query      +
      |                          |         | from pg_locks l                                         +
      |                          |         | inner join pg_stat_activity sa                          +
      |                          |         |         on l.pid = sa.procpid                           +
      |                          |         | where l.mode like '%xclusive%';
 5517 | ExclusiveLock            |    5517 | select pg_sleep(100);
 5527 | ExclusiveLock            |    5527 | create index concurrently idx_tab02_id_new on tab02(id);
 5527 | RowExclusiveLock         |    5527 | create index concurrently idx_tab02_id_new on tab02(id);
 5527 | ShareUpdateExclusiveLock |    5527 | create index concurrently idx_tab02_id_new on tab02(id);
(5 rows)

等到 pg_sleep執行完畢的時候:

pgsql=# select l.pid, l.mode, sa.procpid, sa.current_query
from pg_locks l
inner join pg_stat_activity sa
        on l.pid = sa.procpid
where l.mode like '%xclusive%';
 pid  |           mode           | procpid |                      current_query                       
------+--------------------------+---------+----------------------------------------------------------
 5356 | ExclusiveLock            |    5356 | select l.pid, l.mode, sa.procpid, sa.current_query      +
      |                          |         | from pg_locks l                                         +
      |                          |         | inner join pg_stat_activity sa                          +
      |                          |         |         on l.pid = sa.procpid                           +
      |                          |         | where l.mode like '%xclusive%';
 5517 | ExclusiveLock            |    5517 | <IDLE> in transaction
 5527 | ExclusiveLock            |    5527 | create index concurrently idx_tab02_id_new on tab02(id);
 5527 | RowExclusiveLock         |    5527 | create index concurrently idx_tab02_id_new on tab02(id);
 5527 | ShareUpdateExclusiveLock |    5527 | create index concurrently idx_tab02_id_new on tab02(id);
(5 rows)

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 elasticsearch index 之 create index（二） elasticsearch index 之 create index（-） PostgreSQL 執行進度監控：VACUUM 、ANALYZE、CREATE INDEX 、CLUSTER、Base Backup PostgreSQL 之 CREATE FUNCTION MySQL創建索引（CREATE INDEX） PostgreSQL create table as與like用法 SQL CREATE INDEX 語句:如何創建索引？如何使用CREATE INDEX語句對表增加索引？ elasticsearch action.auto_create_index CREATE INDEX - 定義一個新索引