最近有一個場景,在生產環境的一個庫中,新增了一個字段。需要從另一個關聯表中找到相當的字段回填。
影響數據數百萬條。
首先,不能使用一條大的update語句來更新,這個鎖太大,容易產生鎖征用,造成死鎖。
update B a set new_column=(SELECT other_col from A b where status=9 and a.busi_id=b.busi_id and b.pid=1242343324),modified=now() where pid=21343
因此,寫一個存儲過程解決,存儲過程一條條處理,太慢且消耗本地IO,所以寫了個批量更新的存儲過程。
如下:
DELIMITER &&
CREATE PROCEDURE updateTimeV1(IN comCount bigint)
BEGIN
#Routine body goes here...
declare i int;
set i=0;
set @sum=(SELECT count(DISTINCT busi_id) FROM `B` WHERE `pid`=1242343324 and status=9 );
set @log = "log query ....";
select CONCAT(@log,@sum," 條");
while i<=@sum do
CREATE PROCEDURE updateTimeV1(IN comCount bigint)
BEGIN
#Routine body goes here...
declare i int;
set i=0;
set @sum=(SELECT count(DISTINCT busi_id) FROM `B` WHERE `pid`=1242343324 and status=9 );
set @log = "log query ....";
select CONCAT(@log,@sum," 條");
while i<=@sum do
set i=i+1;
set @busi_id =( SELECT DISTINCT busi_id FROM `A` WHERE `pid`=1242343324 limit i,1);
set @other_col =(SELECT other_col FROM `B` where `pid`=1242343324 and yn =1 and `busi_id` = @busi_id limit 1);
if @busi_id is NULL THEN
select CONCAT(@log," busi_id is null");
elseif @other_col is NULL THEN
select CONCAT(@log," other_col is null");
else
#START TRANSACTION;
/** 關閉事務的自動提交 */
SET autocommit = 0;
update A set new_column =@other_col,modified = now() where `pid`=1242343324 and busi_id=@busi_id and status =15;
## if mod(i,comCount)=0 then commit;
set @busi_id =( SELECT DISTINCT busi_id FROM `A` WHERE `pid`=1242343324 limit i,1);
set @other_col =(SELECT other_col FROM `B` where `pid`=1242343324 and yn =1 and `busi_id` = @busi_id limit 1);
if @busi_id is NULL THEN
select CONCAT(@log," busi_id is null");
elseif @other_col is NULL THEN
select CONCAT(@log," other_col is null");
else
#START TRANSACTION;
/** 關閉事務的自動提交 */
SET autocommit = 0;
update A set new_column =@other_col,modified = now() where `pid`=1242343324 and busi_id=@busi_id and status =15;
## if mod(i,comCount)=0 then commit;
if mod(i,1000)=0 then commit;
end if;
end if;
end while;
commit; #最后不足1000條時提交
SET autocommit = 1;
END&&
delimiter ;
end if;
end if;
end while;
commit; #最后不足1000條時提交
SET autocommit = 1;
END&&
delimiter ;
上面的存儲過程效率還不夠快,並且
limit i,1會導致,存儲過程在執行到中間某一時刻后會取不到數據,導致漏數據。根據經驗值,如果影響數據量在500W之內,可以直接游標取出來更新,而且不會有漏掉的。另外,根據經驗值,批量提交3000到5000是可以的,當然保險期間可以1000到2000提交一次。
修改后的存儲過程正下:
DELIMITER &&
CREATE PROCEDURE updateTimeV2(IN comCount bigint)
Begin
DECLARE c_ busi_id BIGINT(20);
declare i int DEFAULT 0;
DECLARE cur_award CURSOR for
SELECT DISTINCT busi_id FROM `A` WHERE `pid`= 1242343324 and yn =1 and order_time is null ;
OPEN cur_award;
LOOP
FETCH cur_award INTO c_ busi_id;
set i=i+1;
set @other_col =(SELECT cast(other_col as date) FROM `B` where `platform_id`= 1242343324 and yn =1 and ` busi_id` = c_ busi_id limit 1);
if @other_col is NULL THEN
select CONCAT(@log," other_col is null");
else
SET autocommit = 0;
select CONCAT("before update c_ busi_id is ",c_ busi_id," other_col is ",@other_col);
update A set order_time =@other_col,modified = now() where `pid`= 1242343324 and busi_id=c_ busi_id and yn =1;
select CONCAT("after update c_ busi_id is ",c_ busi_id);
if mod(i,comCount)=0 then
select CONCAT("befor update commit i is ",i);
commit;
end if;
end if;
END LOOP;
commit;
SET autocommit = 1;
END&&
delimiter ;
經測試優化后的存儲過程的性能要遠遠優化優化前的,且不會漏記錄。
CREATE PROCEDURE updateTimeV2(IN comCount bigint)
Begin
DECLARE c_ busi_id BIGINT(20);
declare i int DEFAULT 0;
DECLARE cur_award CURSOR for
SELECT DISTINCT busi_id FROM `A` WHERE `pid`= 1242343324 and yn =1 and order_time is null ;
OPEN cur_award;
LOOP
FETCH cur_award INTO c_ busi_id;
set i=i+1;
set @other_col =(SELECT cast(other_col as date) FROM `B` where `platform_id`= 1242343324 and yn =1 and ` busi_id` = c_ busi_id limit 1);
if @other_col is NULL THEN
select CONCAT(@log," other_col is null");
else
SET autocommit = 0;
select CONCAT("before update c_ busi_id is ",c_ busi_id," other_col is ",@other_col);
update A set order_time =@other_col,modified = now() where `pid`= 1242343324 and busi_id=c_ busi_id and yn =1;
select CONCAT("after update c_ busi_id is ",c_ busi_id);
if mod(i,comCount)=0 then
select CONCAT("befor update commit i is ",i);
commit;
end if;
end if;
END LOOP;
commit;
SET autocommit = 1;
END&&
delimiter ;
經測試優化后的存儲過程的性能要遠遠優化優化前的,且不會漏記錄。
備注:
1、測試了一下一條一條執行,一小時大概20W行更新
2、mod(i,1000)這塊得注意 ,如果把set i=i+1;放到最后,在上面的存儲過程批量就不會生效。
3、刪除存儲過程drop procedure updateTimeV1;