from size
from + size不能大於10000, 適用小數據量的查詢,總量大於10000時這種方法就不適用了。
scroll_id分頁查詢
通過游標的方式查,無查詢上限,實際是一種分頁機制。
from elasticsearch import Elasticsearch
class MyElastic:
def __init__(self):
self.es = Elasticsearch(['192.168.199.32'], http_auth=('elastic', 'passwd'), port=9200)
def query_by_ScrollId(self, index, body):
with open('es_query_answer.txt', 'w') as fw:
res = self.es.search(index=index, doc_type='_doc', scroll='5m', timeout='1m', size=1000, body=body)
total = res["hits"]["total"]['value']
print(f'符合Query的記錄總數:{total}, 使用scroll分頁查:')
cur_length = len(res['hits']['hits'])
for x in res['hits']['hits']:
fw.write(x['_source']['name'])
fw.write('\n')
print('當前:', cur_length)
# 通過游標scroll_id查出全部數據
scroll_id = res["_scroll_id"]
for i in range(int(total / 1000)+1): # scroll分頁, 每次size=1000
res = self.es.scroll(scroll_id=scroll_id, scroll='5m')
for x in res['hits']['hits']: # 寫入文件
fw.write(x['_source']['name'])
fw.write('\n')
cur_length += 1000
print('當前:', cur_length)
es = MyElastic()
body = { # match: 匹配name包含xxx的數據
"_source": ["tld.subdomain", "tld.domain", 'name'], # 選取字段
"query": {
"match": {
"name": '.xyz'
}
}
}
es.query_by_ScrollId('fdns_a_2020-05', body)