“前兩天在大批量刪除ES數據的時候,出現部分數據刪除,部分遺留的問題,原因在於偏移量”
思路
在刪除之前考慮是應用Es的“_bulk”,但也不是一股腦把幾萬條,或者幾十萬條的數據全部堆進去, 而且符合條件的才能進行刪除, 所以需要根據條件對數據進行一個查詢,然后刪除。
出現問題的處理方式
TestController.php (文件名稱)
public $page = 0;
... 以上都是類信息
public function actionDelEsData($limit = 1000, $chunkLimit = 100) {
$esModel = $this->container->get('EsModel');
$index = $esModel->index; #這里可以多個index
$type = $esModel->type; #相對應index,可以多個type
$query = [ // Yii2.0 框架中Es Query的寫法
'bool' => [
'must' => [
'term' => []
],
'must_not' => [
'range' => []
]
]
];
$sort = ['_id' => SORT_ASC];
while($list = $esModel->getList($index, $type, $query, $sort, $limit, $this->page)) {
$idList = array_column($list, '_id');
// 通過array_chunk進行分割,批次進行刪除
$chunkIdList = array_chunk($idList, $chunkLimit);
foreach ($chunkIdList as $item) {
$esModel->batchDelEsData($index, $type, $item);
}
// 將page + 1
$this->page += 1;
}
}
EsModel.php (文件名稱)
/**
* 獲取列表
**/
public function getList(string $index, string $type, array $query = [], array $sort = [], $limit, $page) {
self::$index = $index;
self::$type = $type;
$esModel = self::find();
if ($query) {
$esModel->query($query);
}
if (!$sort) {
$sort = ['field' => SORT_ASC/SORT_DESC];
}
$offset = ($page - 1) * $limit;
return $esModel->orderBy($sort)->offset($offset)->limit($limit)->asArray()->all();
}
從上邊可以看出,我是利用limit, offset來進行偏移查詢, 然后在進行批量刪除,可是在進行刪除的過程中執行近一半,跳出了while循環,es數據沒有刪除完整,原因則在於
數據的偏移
修改后的處理方式
- 固定的query, 可以在外加上"_id"當作查詢條件,每次 "> $minId"
TestController.php (文件名稱)
public $minId= 0;
... 以上都是類信息
public function actionDelEsData($limit = 1000, $chunkLimit = 100) {
$esModel = $this->container->get('EsModel');
$index = $esModel->index; #這里可以多個index
$type = $esModel->type; #相對應index,可以多個type
$query = [ // Yii2.0 框架中Es Query的寫法
'bool' => [
'must' => [
'term' => []
],
'must_not' => [
'range' => []
]
]
];
$sort = ['_id' => SORT_ASC];
while($list = $esModel->getList($index, $type, $query, $sort, $limit, $this->minId)) {
$idList = array_column($list, '_id');
// 通過array_chunk進行分割,批次進行刪除
$chunkIdList = array_chunk($idList, $chunkLimit);
foreach ($chunkIdList as $item) {
$esModel->batchDelEsData($index, $type, $item);
}
// 每次置換最小ID
$this->minId = end($list)['_id'];
}
}
EsModel.php (文件名稱)
/**
* 獲取列表
**/
public function getList(string $index, string $type, array $query = [], array $sort = [], $limit = 100, $minId = 0) {
self::$index = $index;
self::$type = $type;
$esModel = self::find();
$range = [
'bool' => [
'must' => [
'range' => [
'id' => ['gt' => $minId]
]
]
]
];
if ($query) {
$query['bool']['must'] = $range;
}else{
$query = $range;
}
$esModel->query($query);
if (!$sort) {
$sort = ['field' => SORT_ASC/SORT_DESC];
}
return $esModel->orderBy($sort)->limit($limit)->asArray()->all();
}
- 在偏移查詢的時候,offset可以一直從0開始,樣例如下:
TestController.php (文件名稱)
public $page = 0;
... 以上都是類信息
public function actionDelEsData($limit = 1000, $chunkLimit = 100) {
$esModel = $this->container->get('EsModel');
$index = $esModel->index; #這里可以多個index
$type = $esModel->type; #相對應index,可以多個type
$query = [ // Yii2.0 框架中Es Query的寫法
'bool' => [
'must' => [
'term' => []
],
'must_not' => [
'range' => []
]
]
];
$sort = ['_id' => SORT_ASC];
while($list = $esModel->getList($index, $type, $query, $sort, $limit, $this->page)) {
$idList = array_column($list, '_id');
// 通過array_chunk進行分割,批次進行刪除
$chunkIdList = array_chunk($idList, $chunkLimit);
foreach ($chunkIdList as $item) {
$esModel->batchDelEsData($index, $type, $item);
}
}
}
EsModel.php (文件名稱)
/**
* 獲取列表
**/
public function getList(string $index, string $type, array $query = [], array $sort = [], $limit, $page) {
self::$index = $index;
self::$type = $type;
$esModel = self::find();
if ($query) {
$esModel->query($query);
}
if (!$sort) {
$sort = ['field' => SORT_ASC/SORT_DESC];
}
$offset = ($page - 1) * $limit;
return $esModel->orderBy($sort)->offset($offset)->limit($limit)->asArray()->all();
}