python操作elasticsearch

本文轉載自查看原文 2020-04-08 17:46 843 python/ ElasticSearch/ elasticsearch/ 其他/ 數據庫

一、基本查詢

from elasticsearch import Elasticsearch

# 建立連接
es = Elasticsearch( 
    hosts={'192.168.0.120', '192.168.0.153'},  # 地址
    timeout=3600  # 超時時間
)


es.search(index='pv23')  # index：選擇數據庫

此方法是默認查詢，由於沒有任何篩選條件，會默認顯示前10條數據的所有信息

二、filter_path

添加過濾路徑。通過指定字段，只顯示數據的指定字段信息（默認顯示所有字段的信息）。

from elasticsearch import Elasticsearch

# 建立連接
es = Elasticsearch( 
    hosts={'192.168.0.120', '192.168.0.153'},  # 地址
    timeout=3600  # 超時時間
)

# 定義過濾字段，最終只顯示此此段信息
filter_path=['hits.hits._source.ziduan1',  # 字段1
             'hits.hits._source.ziduan2']  # 字段2

es.search(index='pv71', filter_path=filter_path)  # 指定字段：filter_path

三、條件查詢

通過制定body，進行條件查詢。類似於mysql中的where。

1、切片查詢

from elasticsearch import Elasticsearch

# 建立連接
es = Elasticsearch( 
    hosts={'192.168.0.120', '192.168.0.153'},  # 地址
    timeout=3600  # 超時時間
)

# body指定查詢條件
body = {
    'from': 0,  # 從0開始
    'size': 20  # 取20個數據。類似mysql中的limit 0, 20。 注：size可以在es.search中指定，也可以在此指定，默認是10
}

# 定義過濾字段，最終只顯示此此段信息
filter_path=['hits.hits._source.ziduan1',  # 字段1
             'hits.hits._source.ziduan2']  # 字段2

es.search(index='pv23', filter_path=filter_path, body=body)  # 指定查詢條件

2、match，模糊查詢

body = {
    'query': {  # 查詢命令
        'match': {  # 查詢方法：模糊查詢（會被分詞）。比如此代碼，會查到只包含：“我愛你”， “中國”的內容
            'ziduan1': '我愛你中國'
        }
    },
　　'size': 20  # 不指定默認是10，最大值不超過10000（可以修改，但是同時會增加數據庫壓力）
}

# size的另一種指定方法
es.search(index='pv23', filter_path=filter_path, body=body, size=200)  # 指定size，默認是10

3、match_phrase，模糊查詢

body = {
    'query': {  # 查詢命令
        'match_phrase': {  # 查詢方法：模糊查詢（不會被分詞）。會查到包含：“我愛你中國”的內容
            'ziduan1': '我愛你中國'
        }
    }
}

# 注：內容中的下划線，等標點符號會被忽略，有與沒有的效果一樣

4、term，精准單值查詢

# 注：此方法只能查詢一個字段，且只能指定一個值。類似於mysql中的where ziduan='a'
body ={   
    'query':{
        'term':{
            'ziduan1.keyword': '我愛你中國'  # 查詢內容等於“我愛你中國的”的數據。查詢中文，在字段后面需要加上.keyword
　　　　     # 'ziduan2': 'I love China'
        }
    }
}

5、terms，精准多值查詢

#此方法只能查詢一個字段，但可以同時指定多個值。類似於mysql中的where ziduan in (a, b,c...)
body ={   
    "query":{
        "terms":{
            "ziduan1": ["我愛你中國", "I love China"]  # 查詢ziduan1=a或=b...的數據
        }
    }
}

6、multi_match，多字段查詢

# 查詢多個字段中都包含指定內容的數據
body = {
    "query":{
        "multi_match":{
            "query":"我愛你中國",  # 指定查詢內容，注意：會被分詞
            "fields":["ziduan1", "ziduan2"]  # 指定字段
        }
    }
}

7、prefix，前綴查詢

body = {
    'query': {
        'prefix': { 
            'ziduan.keyword': '我愛你'  # 查詢前綴是指定字符串的數據
        }
    }
}

# 注：英文不需要加keyword

8、wildcard，通配符查詢

body = {
    'query': {
        'wildcard': {
            'ziduan1.keyword': '?愛你中*'  # ?代表一個字符，*代表0個或多個字符
        }
    }
}
# 注：此方法只能查詢單一格式的（都是英文字符串，或者都是漢語字符串）。兩者混合不能查詢出來。

9、regexp，正則查詢

body = {
    'query': {
        'regexp': {
            'ziduan1': 'W[0-9].+'   # 使用正則表達式查詢
        }
    }
}

10、bool，多條件查詢

# must：[] 各條件之間是and的關系
body = {
        "query":{
            "bool":{
                'must': [{"term":{'ziduan1.keyword': '我愛你中國'}},
                         {'terms': {'ziduan2': ['I love', 'China']}}]
            }
        }
    }

# should: [] 各條件之間是or的關系
body = {
        "query":{
            "bool":{
                'should': [{"term":{'ziduan1.keyword': '我愛你中國'}},
                         {'terms': {'ziduan2': ['I love', 'China']}}]
            }
        }
    }

# must_not：[]各條件都不滿足
body = {
        "query":{
            "bool":{
                'must_not': [{"term":{'ziduan1.keyword': '我愛你中國'}},
                         {'terms': {'ziduan2': ['I love', 'China']}}]
            }
        }
    }



# bool嵌套bool
# ziduan1、ziduan2條件必須滿足的前提下，ziduan3、ziduan4滿足一個即可
body = {
    "query":{
        "bool":{
            "must":[{"term":{"ziduan1":"China"}},  #  多個條件並列  ，注意：must后面是[{}, {}],[]里面的每個條件外面有個{}
                    {"term":{"ziduan2.keyword": '我愛你中國'}},
                    {'bool': {
                        'should': [
                            {'term': {'ziduan3': 'Love'}},
                            {'term': {'ziduan4': 'Like'}}
                        ]
                    }}
            ]
        }
    }
}

11、exists，存在字段查詢

body = {
    'query': {
        'exists': {'field': 'ziduan1'}  # 查詢存在ziduan1的數據
    }
}


# exists、bool嵌套查詢
# 存在ziduan1的情況下，ziduan2的值必須為指定字段
body = {
    "query":{
        "bool":{
            "must":[{'exists': {'field': 'ziduan1'}},
                    {"term":{"ziduan2.keyword": '我愛你中國'}},
                   ]
        }
    }
}

12、大於小於查詢

body = {
        "query": {
            "range": {
                "ziduan1":{
                    "gte": 3,  # 大於
                    "lt": 20  # 小於
                }
            }
        }
    }

13、nest，json數據查詢

body = {
    'query': {
        'nested': {
            'path': 'ziduan1',  # 指定json數據的字段
            'query': {  # 指定查詢方式
                'term': {'ziduan1.ziduan2': '我愛你中國'}  # 根據ziduan1里面的ziduan2數據查詢
            }
        }
    }
}


# nest、bool嵌套查詢
body = {
    'query': {
        'bool': {
            'must': [
                {'term':{"ziduan3" : "I love China"}},
                {'nested': {  # json查詢
                    'path': 'ziduan1',
                    'query': {  # 指定查詢方式
                        'term': {'ziduan1.ziduan2': '我愛你中國'}  # 根據ziduan1里面的ziduan2數據查詢
                    }
                }}
            ]
        }
    }
}

14、排序

body = {
    "query":{  # 指定條件，可以使用以上的任何條件等查詢數據。然后再對符合條件的數據進行排序
        "match_all":{}
    },
    "sort":{  # 對滿足條件的數據排序
        "age":{                 # 根據age字段排序
            "order":"asc"       # asc升序，desc降序
        }
    }
}

# 多字段排序，注意順序！寫在前面的優先排序
body = {
    "query":{
        "match_all":{}
    },
    "sort":[{
        "age":{
            "order":"asc"      # 先根據age升序
        }},
        {"name":{               # 后根據name字段升序排序
            "order":"asc"      # asc升序，desc降序
        }}],
}

15、scroll，翻頁查詢（數據量小可以使用此方法，數據量大推薦使用search_after方法--見下一個方法）

body={
    'query':{'match_all': {}},
    'sort': {'zidan6': 'asc'},  # 翻頁需要先使用sort排序，與search_after類似
}

# size：設置一頁數據量
result = es.search(index='pv91', scroll='1m', size=5, body=body)
# 獲取總的數據量，用於得到總的數據頁數
total = result['hits']['total']
# 獲取初始翻頁id
scrid = result['_scroll_id']
# 第一頁的數據
result = es.search(index='patent_cn_v71',size=5, body=body, filter_path=['hits.hits._source.ziduan1', 'hits.hits._source.ziduan2.ziduan2_1'])

# 開始翻頁
for i in range(5):  #翻5頁
    print(result, '\n')
    print('*' * 50, '第{}頁'.format(i), '*' * 50)
    result = es.scroll(scroll_id=scrid, scroll='1m', filter_path=['hits.hits._source.ziduan1', 'hits.hits._source.ziduan2.ziduan2_1'])

16、search_after，翻頁查找（推薦此方法）

content_size = 3000  # 設置一頁的數據量
size_cont = content_size
next_id = 0  # 初始化next_id，每次循環是從  此數據 之后的第1個數據開始
while size_cont == content_size:
    body = {
        "query": {
            "range": {
                "update_time":{
                    "gte": "2019-10-14"
                }
            }
        },
        'sort': {'ziduan2': 'asc'},  # 以ziduan2為next_id，需要先對其進行排序
        'search_after': [next_id],  # 從此數據之后的第1個數據開始，但不包含此數據
        'size': content_size  # 指定當前頁數據量
    }
    filter_path = [
        'hits.hits._source.ziduan1',
        'hits.hits._source.ziduan2'
    ]
    rt = es.search(index='pv1', body=body, filter_path=filter_path)['hits']['hits']
    size_cont = len(rt)  # 更新循環條件：若數據量不等於指定的數據量，說明遍歷到最后的一頁數據了
    for result in rt:
        try:
            app_date = result['_source']['ziduan1']
        except:
            continue
        try:
            ziduan2 = result['_source']['ziduan2']
            next_id = ziduan2  # 更新next_id
        except:
            app_text = ''

17、聚合查詢

# 統計符合條件的指定字段的數據中，各數據的個數

body = {
#     'query': {
#         'match_all': {}
#     },
    'size': 0,  # 設置0為條件查詢后的數據顯示條數，默認顯示10條
    'aggs':{
        'num_ipcr': {  # 自定義buckets
            'terms': {
                'field': 'ziduan1',  # 需要查找的字段
                'size': 10,  # 設置聚合數據顯示條數
            }
        }
    },
}

# 嵌套聚合：根據聚合后的數據大小再進行聚合
body = {
    'size': 0,  # 設置0為條件查詢后的數據顯示條數，默認顯示10條
    "aggs": {
        "agroup": {
            "nested": {"path": "ziduan1"},  # 選擇ziduan1里面的數據量再進行聚合
            "aggs": {
                "de_inventor": {
                    "terms": {"field": "ziduan1.ziduan1_1.keyword", "size": 200000}
                }
            },
        },
    },
}


# 去重統計
body = {
    'size': 0, 
    'aggs': {
        'discount_ipcr': {
            'cardinality':{  # 去重統計
                'field': 'ziduan1',
                'precision_threshold': 100 # 保證100個以內准確率接近100%，每個類別會占用100字節的內存
            },
        },
    }
}
# 結果返回值說明
"""
doc_count_error_upper_bound: 表示沒有在這次聚合中返回、但是可能存在的潛在聚合結果
sum_other_doc_count：表示這次聚合中沒有統計到的文檔數
buckets：聚合結果，默認由高到低排列。key表示聚合元素的值，doc_count表示元素出現的次數。注意，這里的doc_count也是不准確的
"""

四、建立es數據

es.indices.create(index='my-index', ignore=400)  # 建立索引（理解為數據庫，就是es.search(index='')查找時候用到的index）
es.index(index="my-index", id=0, body={'name': 'jaychou', "age": 30, "sex": 'male'})  # 插入數據：id是唯一標識，body里面是字段以及對應的數據

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 在Python中操作Elasticsearch Python操作Elasticsearch對象 elasticsearch之python操作 python3 操作elasticsearch elasticsearch之python操作(非原生) python對接elasticsearch的進階操作 python操作Elasticsearch7.x 使用Python對ElasticSearch獲取數據及操作 Python Elasticsearch批量操作客戶端 Python3操作Elasticsearch進行增刪改查