網上的大部教程都講到了elasticsearch使用scroll游標的方法,但使用后往往沒有清除游標,這會造成scroll超過最大數量的限制而報錯,應該在任務結束時去手動清理scroll(否則只能等到設定的時間后游標才會自動清理)
from elasticsearch import Elasticsearch def main(): es = Elasticsearch([***], http_auth = ('***', '****'), port = *** ) query = *** page = es.search( index= ** *, scroll = '2m', size = 1000, body = {"query": query}) sid = page['_scroll_id'] sid_list = [sid] scroll_size_max = page['hits']['total']['value'] cnt = 0 while cnt < scroll_size_max: for info in page['hits']['hits']: # do something cnt += 1 page = es.scroll(scroll_id=sid, scroll='2m') sid = page['_scroll_id'] sid_list.append(sid) for sid_del in sid_list: es.clear_scroll(scroll_id=sid_del) if __name__ == "__main__": main()