網上的大部教程都講到了elasticsearch使用scroll游標的方法,但使用后往往沒有清除游標,這會造成scroll超過最大數量的限制而報錯,應該在任務結束時去手動清理scroll(否則只能等到設定的時間后游標才會自動清理)
from elasticsearch import Elasticsearch
def main():
es = Elasticsearch([***], http_auth = ('***', '****'), port = *** )
query = ***
page = es.search(
index= ** *,
scroll = '2m',
size = 1000,
body = {"query": query})
sid = page['_scroll_id']
sid_list = [sid]
scroll_size_max = page['hits']['total']['value']
cnt = 0
while cnt < scroll_size_max:
for info in page['hits']['hits']:
# do something
cnt += 1
page = es.scroll(scroll_id=sid, scroll='2m')
sid = page['_scroll_id']
sid_list.append(sid)
for sid_del in sid_list:
es.clear_scroll(scroll_id=sid_del)
if __name__ == "__main__":
main()
