Python 操作Elasticsearch之elasticsearch模塊

本文轉載自查看原文 2020-08-21 17:38 1290

官方文檔：https://elasticsearch-py.readthedocs.io/en/master/

mac下的操作

01 基礎

01-01 Elasticsearch 基本操作

啟動 elasticsearch

 brew services start elasticsearch
 # 或者
elasticsearch

停止elasticsearch

brew services stop elasticsearch
或者
control + c

后台瀏覽

http://localhost:9200

01-02 在python中引入

安裝Elasticsearch模塊

pip3 install elasticsearch

簡單基本操作

from elasticsearch import Elasticsearch

ES = ["127.0.0.1:9200", ]
es = Elasticsearch(
    ES,
    # 啟動前嗅探es集群服務器
    sniff_on_start=True,
    # es集群服務器結點連接異常時是否刷新es節點信息
    sniff_on_connection_fail=True,
    # 每60秒刷新節點信息
    sniffer_timeout=60
)
# 必須指定id，索引相同的id不能重復，id可以是數字字符串
es.craete(index="索引", id=1, doc_type="類型", body={})
# 不用指定id，是隨機的
es.index(index="索引", doc_type="dict", body={})

es.delete(id="id", index="索引", doc_type="類型")
# 注意三個值必填，精准刪除，沒有時會報錯
"""
參數
index 索引
id 數據id
doc_type 數據類型
"""
# 可以指定body搜索指定數據刪除
es.delete_by_query(index='books', body=body)

查詢

es.search() # 查詢所有的
"""
index
doc_type
body
"""
# get
es.get(index="books", doc_type="dict", id=2)
"""
參數
index 索引 必填
doc_type 數據類型 必填
id 必填
"""

注意：

	get三個參數必填，如果沒找到會報錯

02 操作

02-01 寫入

操作模版

from elasticsearch_dsl import connections
from elasticsearch.client import IndicesClient


type_dict = {
    "text": {"type": "text"},
    "keyword": {"type": "keyword"},
    "ip": {"type": "ip"},
    "long": {"type": "long"},
    "double": {"type": "double"},
    "boolean": {"type": "boolean"},
    "integer": {"type": "integer"}

}
mappings = {
    "web_log": {
        "_source": {
            "enabled": True
        },
        "properties": {
            "remote_addr": type_dict["ip"],
            "time_local": {
                "type": "date",
                "format": "dd/MMM/yyyy:HH:mm:ss Z"

            },
            "body_bytes_sent": type_dict["long"],
            "request_length": type_dict["long"],
            "bytes_sent": type_dict["long"],
            "request_time": type_dict["double"],
            "idss_action": type_dict["integer"],
        }
    }
}
template = {
    "index_patterns": ["web_bubble_index*"],
    "settings": {
        "index": {
            "refresh_interval": "30s",
            "number_of_shards": "12",
            "number_of_replicas": "2"
        },
        "translog": {
            "sync_interval": "30s",
            "durability": "async",
            "flush_threshold_size": "1000mb"
        }
    },
    "mappings": mappings

}

es = connections.create_connection(hosts="127.0.0.1:9200", timeout=60, http_compress=True,
                                   sniff_on_connection_fail=True, )
# 設置模版
IndicesClient(es).put_mapping("模版名字", "模版")

# 獲取模版
IndicesClient(es).get_template(name="模版名字")

# 刪除名字
IndicesClient(es).delete_template(name="模版名字")

寫入es庫

普通寫入

es.index(index="索引", doc_type="dict", body={})

批量寫入

from elasticsearch.helpers import bulk

bulk(es, [body,])

02-02 查詢

普通查詢

match_all

查看所有文檔，相當於不做篩選條件

body = {
"query": {
    "match_all": {}
  }
}
es.search(index="books", body=body)

size from

size顯示幾條，from從第幾條開始，默認第一條

顯示前幾條

# 第一種
body = {
    "query": {
        "match_all": {

        }
    },

}
es.search(index="books", body=body, size=2)
# 第二種
body = {
    "query": {
        "match_all": {

        }
    },
  	 "size": 2

}
es.search(index="books", body=body)

# 顯示前兩條,search中的參數優先級高

從第一條開始顯示幾條

# 第一種
body = {
    "query": {
        "match_all": {

        }
    },
    "from": 5

}
es.search(index="books", body=body, size=2)
# 第二種
body = {
    "query": {
        "match_all": {

        }
    },
    "from": 5,
  	"size": 2

}
es.search(index="books", body=body)

# 從第五條開始顯示前兩條

注意：

	size、from和query同級，都可以用

term

term query會去倒排索引中尋找確切的term，它並不知道分詞器的存在，這種查詢適合keyword、numeric、date等明確值的

查詢某個字段的某個關鍵字

body = {
    "query":{
        "term":{
            "name": "java"
        }
    }
}
es.search(index="books", body=body)
# name是java的數據的前兩條，size限制顯示數量

精確匹配，注意query中的格式，value表示一個匹配的數據，注意只支持一個key

terms

查詢某個字段里含有多個關鍵詞的文檔

body = {
    "query":{
        "terms":{
            "name": ["java", "python"]
        }
    }
}
es.search(index="books", body=body)
# name是python或者java

注意：

	只支持一個key，value必須是了列表或者元組

match

match query 知道分詞器的存在，會對field進行分詞操作，然后再查詢

body = {
    "query":{
        "match":{
            "name": "java python"
        }
    }
}
es.search(index="books", body=body)
# name是java或者是python的數據

注意：

	key只能有一個，value可以是字符串或者數字

	一般情況是字符串，可以以各種類型去分割，空格，逗號，｜等等都可以識別

multi_match

可以指定多個key

body = {
"query": {
    "multi_match": {
        "query": "python java",
        "fields": ["name", "book_name"]
    }
  }
}
es.search(index="books", body=body)
# name或者book_name字段值是python或者是java的數據

注意：

	query指定的是要篩選的value值，必須是字符串或者數字，可以是多個，分詞

	fields是指定的key值，必須是列表或者是元組

	只要fields中的key中有query指定的value，都被篩選

復合查詢

bool

有三類查詢關系

must 都滿足

should 只要滿足一個

must_not 都不滿足

基本語法

可以一塊用

body = {
    "query": {
        "bool": {
            "should": [
               {
                    "term": {
                        "name": "java"
                    }
                },
                {
                    "terms": {
                        "name": ["java", "python"]
                    }
                },
                {
                    "match": {
                        "name": "java python"
                    }
                },
                {
                    "multi_match": {
                        "query": "python java",
                        "fields": ["name", "book_name"]
                    }
                }
            ],
            "must": [],
          	"must_not": []
        }
    }
}
es.search(index="books", body=body)

	注意：

		每個查詢類中都能寫多個查詢方法

range

范圍查詢

body = {
    "query": {
        "range": {
            "price": {
                "lt": 180,
                "gte": 10
            }
        }
    }
}
es.search(index="books", body=body)
"""
price 大於等於10小於180
lt  小於
gt  大於
lte 小於等於
"""

注意：

	可以只寫一個，例如大於10

prefix

前綴—以什么開頭

body = {
    "query":
        {
            "prefix": {
                "name": "j"
            }
        }
}
es.search(index="books", body=body)
# name以j開頭的數據

wildcard

通配符—正則表達式

body = {
    "query": {
        "wildcard": {
            "name": "*v*"
        }
    }
}
es.search(index="books", body=body)
"""
name中包含v的所有數據，
*a 以a結尾
a* 以a開頭
"""

排序

sort

desc降序

asc升序

body = {
    "query": {
        "term": {
            "name": "java"
        }

    },
    "sort": {
        "age": {
            "order": "desc"
        }
    }
}
es.search(index="books", body=body, size=2)
# name是java的數據，按年齡是降序，顯示前兩條

注意：

	sort和query同級

其他

exists

是否存在數據，結果是布爾值

es.exists(index="books", doc_type="dict", id=1)
"""
參數
index 索引必填
doc_type 數據類型 必填
id 必填
"""

get_source

直接返回body數據

es.get_source(id=2, index="books", doc_type="dict")

注意：

	id、index、doc_type必填，沒數據會報錯

ids

篩選id

body = {
    "query": {
        "ids": {
            "type": "dict",
            "values": [1, 2] 
        }
    }
}
es.search(index="books", body=body)
# 意思是id是1或者2的數據

filter_path

篩選顯示數據的字段

body = {
    "query": {
        "term": {
            "name": "java"
        }

    },
    "sort": {
        "age": {
            "order": "desc"
        }
    }
}
es.search(index="books", body=body, filter_path=["hits.hits._id", "hits.hits._source.name"])
# 支持*，匹配任何字段或者字段的一部分
es.search(index="books", body=body, filter_path=["hits.hits._*",])
"""
顯示是數據的_id，和數據的name
可以限制顯示多個字段，以逗號隔開
沒有該字段時，不顯示
"""

count

數據數量

body = {
    "query": {
        "term": {
            "name": "java"
        }
    }
}
result = es.count(index="books", body=body)
# 結果是字典result.get("count")

match_phrase

和term一樣，不切分

body = {
    "query": {
        "match_phrase": {
            "name": "java python"
        }

    }
}
es.search(index="books", body=body)
# name的值是java python的數據

查詢總結

代碼加解釋

from elasticsearch import Elasticsearch

es = Elasticsearch()
body = {
    "query": {
        "bool": {
            "should": [ 
                {
                    "term": { 
                        "name": "python" 
                    }
                },
                {
                    "terms": {
                        "name": ["java", ]
                    }
                },
                {
                    "match": {
                        "author": "lynn"
                    }
                },
                {
                    "multi_match": {
                        "query": "lynn 田少",
                        "fields": ["publish", "author"]
                    }
                },
                {
                    "prefix": {
                        "name": "金"
                    }
                },
                {
                    "ids": {
                        "type": "dict",
                        "values": [1, 2]
                    }
                }


            ],
            "must":[
{
                    "range": {
                        "price": {
                            "lt": 200,

                        }
                    }
                },
                {
                    "wildcard": {
                        "name": "*梅*"
                    }
                }
            ],
            "must_not": [
                {
                    "range": {
                        "price": {
                            "lt": 0
                        }
                    }
                }
            ]

        }
    },
    "sort": {
        "price": {
            "order": "desc"
        }
    },
    "from": 2,
    "size": 3
}
es.search(index="books", body=body, filter_path=["hits.hits._source.name", "hits.hits._id"])

注意：

	方法都可以放在bool中

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 python操作elasticsearch7 elasticsearch for Python之操作篇 Python操作elasticsearch python操作Elasticsearch (一、例子) python下的Elasticsearch操作 Python 操作 ElasticSearch python對接elasticsearch的基本操作 Python os模塊之文件操作 python操作Excel模塊openpyxl python 操作 elasticsearch-7.0.2 遇到的問題