# es的倒排索引(擴展閱讀.md)
-把文章進行分詞,對每個詞建立索引
具體操作可以查看官方文檔
https://www.elastic.co/guide/en/elasticsearch/reference/7.5/indices.html>
官方2版本的中文文檔
https://www.elastic.co/guide/cn/elasticsearch/guide/current/index-settings.html
一 索引初始化
#新建一個lqz2的索引,索引分片數量為5,索引副本數量為1 PUT lqz2 { "settings": { "index":{ "number_of_shards":5, "number_of_replicas":1 } } } ''' number_of_shards 每個索引的主分片數,默認值是 5 。這個配置在索引創建后不能修改。 number_of_replicas 每個主分片的副本數,默認值是 1 。對於活動的索引庫,這個配置可以隨時修改。 '''
二 查詢索引配置
#獲取lqz2索引的配置信息 GET lqz2/_settings #獲取所有索引的配置信息 GET _all/_settings #同上 GET _settings #獲取lqz和lqz2索引的配置信息 GET lqz,lqz2/_settings
三 更新索引
#修改索引副本數量為2 PUT lqz/_settings { "number_of_replicas": 2 } #如遇到報錯:cluster_block_exception,因為這是由於ES新節點的數據目錄data存儲空間不足,導致從master主節點接收同步數據的時候失敗,此時ES集群為了保護數據,會自動把索引分片index置為只讀read-only
PUT _all/_settings { "index": { "blocks": { "read_only_allow_delete": false } } }
四 刪除索引
#刪除lqz索引 DELETE lqz
一 新增文檔
#新增一個id為1的書籍(POST和PUT都可以) POST lqz/_doc/1/_create #POST lqz/_doc/1 #POST lqz/_doc 會自動創建id,必須用Post { "title":"紅樓夢", "price":12, "publish_addr":{ "province":"黑龍江", "city":"鶴崗" }, "publish_date":"2013-11-11", "read_num":199, "tag":["古典","名著"] }
二 查詢文檔
#查詢lqz索引下id為7的文檔 GET lqz/_doc/1 #查詢lqz索引下id為7的文檔,只要title字段 GET lqz/_doc/7?_source=title #查詢lqz索引下id為7的文檔,只要title和price字段 GET lqz/_doc/7?_source=title,price #查詢lqz索引下id為7的文檔,要全部字段 GET lqz/_doc/7?_source
三 修改文檔
#修改文檔(覆蓋修改,原來的字段就沒有了) PUT lqz/_doc/1 { "title":"xxxx", "price":333, "publish_addr":{ "province":"黑龍江", "city":"福州" } } #修改文檔,增量修改,只修改某個字段(注意是post)(一定要注意包在doc中) POST lqz/_update/1 { "doc":{ "title":"修改" } }
四 刪除文檔
#刪除文檔id為10的 DELETE lqz/_doc/10
五 批量操作之_mget
#批量獲取lqz索引_doc類型下id為2的數據和lqz2索引_doc類型下id為1的數據 GET _mget { "docs":[ { "_index":"lqz", "_type":"_doc", "_id":2 }, { "_index":"lqz2", "_type":"_doc", "_id":1 } ] } #批量獲取lqz索引下id為1和2的數據 GET lqz/_mget { "docs":[ { "_id":2 }, { "_id":1 } ] } #同上 GET lqz/_mget { "ids":[1,2] }
六 批量操作之 bulk
PUT test/_doc/2/_create { "field1" : "value22" } POST _bulk { "index" : { "_index" : "test", "_id" : "1" } } { "field1" : "value1" } { "delete" : { "_index" : "test", "_id" : "2" } } { "create" : { "_index" : "test", "_id" : "3" } } { "field1" : "value3" } { "update" : {"_id" : "1", "_index" : "test"} } { "doc" : {"field2" : "value2"} }
一 前言
elasticsearch提供兩種查詢方式:
-
-
另外一種是通過DSL語句來進行查詢,被稱為DSL查詢(Query DSL),DSL是Elasticsearch提供的一種豐富且靈活的查詢語言,該語言以json請求體的形式出現,通過restful請求與Elasticsearch進行交互。
二 准備數據

PUT lqz/doc/1 { "name":"顧老二", "age":30, "from": "gu", "desc": "皮膚黑、武器長、性格直", "tags": ["黑", "長", "直"] } PUT lqz/doc/2 { "name":"大娘子", "age":18, "from":"sheng", "desc":"膚白貌美,嬌憨可愛", "tags":["白", "富","美"] } PUT lqz/doc/3 { "name":"龍套偏房", "age":22, "from":"gu", "desc":"mmp,沒怎么看,不知道怎么形容", "tags":["造數據", "真","難"] } PUT lqz/doc/4 { "name":"石頭", "age":29, "from":"gu", "desc":"粗中有細,狐假虎威", "tags":["粗", "大","猛"] } PUT lqz/doc/5 { "name":"魏行首", "age":25, "from":"廣雲台", "desc":"仿佛兮若輕雲之蔽月,飄飄兮若流風之回雪,mmp,最后竟然沒有嫁給顧老二!", "tags":["閉月","羞花"] }
三 查詢字符串
GET lqz/doc/_search?q=from:gu
還是使用GET
命令,通過_serarch
查詢,查詢條件是什么呢?條件是from
屬性是gu
家的人都有哪些。

結果如下 { "took" : 1, "timed_out" : false, "_shards" : { "total" : 5, "successful" : 5, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : 3, "max_score" : 0.6931472, "hits" : [ { "_index" : "lqz", "_type" : "doc", "_id" : "4", "_score" : 0.6931472, "_source" : { "name" : "石頭", "age" : 29, "from" : "gu", "desc" : "粗中有細,狐假虎威", "tags" : [ "粗", "大", "猛" ] } }, { "_index" : "lqz", "_type" : "doc", "_id" : "1", "_score" : 0.2876821, "_source" : { "name" : "顧老二", "age" : 30, "from" : "gu", "desc" : "皮膚黑、武器長、性格直", "tags" : [ "黑", "長", "直" ] } }, { "_index" : "lqz", "_type" : "doc", "_id" : "3", "_score" : 0.2876821, "_source" : { "name" : "龍套偏房", "age" : 22, "from" : "gu", "desc" : "mmp,沒怎么看,不知道怎么形容", "tags" : [ "造數據", "真", "難" ] } } ] } }
我們來重點說下hits
,hits
是返回的結果集——所有from
屬性為gu
的結果集。重點中的重點是_score
得分,得分是什么呢?根據算法算出跟查詢條件的匹配度,匹配度高得分就高。后面再說這個算法是怎么回事。
四 結構化查詢
我們現在使用DSL方式,來完成剛才的查詢,查看來自顧家的都有哪些人。
GET lqz/_doc/_search { "query": { "match": { "from": "gu" } } }
上例,查詢條件是一步步構建出來的,將查詢條件添加到match
中即可,而match
則是查詢所有from
字段的值中含有gu
的結果就會返回。 當然結果沒啥變化:

{ "took" : 0, "timed_out" : false, "_shards" : { "total" : 5, "successful" : 5, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : 3, "max_score" : 0.6931472, "hits" : [ { "_index" : "lqz", "_type" : "doc", "_id" : "4", "_score" : 0.6931472, "_source" : { "name" : "石頭", "age" : 29, "from" : "gu", "desc" : "粗中有細,狐假虎威", "tags" : [ "粗", "大", "猛" ] } }, { "_index" : "lqz", "_type" : "doc", "_id" : "1", "_score" : 0.2876821, "_source" : { "name" : "顧老二", "age" : 30, "from" : "gu", "desc" : "皮膚黑、武器長、性格直", "tags" : [ "黑", "長", "直" ] } }, { "_index" : "lqz", "_type" : "doc", "_id" : "3", "_score" : 0.2876821, "_source" : { "name" : "龍套偏房", "age" : 22, "from" : "gu", "desc" : "mmp,沒怎么看,不知道怎么形容", "tags" : [ "造數據", "真", "難" ] } } ] } }
GET lqz/doc/_search { "query": { "match": { "from": "gu" } }, "sort": [ { "age": { "order": "desc" } } ] }

{ "took" : 0, "timed_out" : false, "_shards" : { "total" : 5, "successful" : 5, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : 3, "max_score" : null, "hits" : [ { "_index" : "lqz", "_type" : "doc", "_id" : "1", "_score" : null, "_source" : { "name" : "顧老二", "age" : 30, "from" : "gu", "desc" : "皮膚黑、武器長、性格直", "tags" : [ "黑", "長", "直" ] }, "sort" : [ 30 ] }, { "_index" : "lqz", "_type" : "doc", "_id" : "4", "_score" : null, "_source" : { "name" : "石頭", "age" : 29, "from" : "gu", "desc" : "粗中有細,狐假虎威", "tags" : [ "粗", "大", "猛" ] }, "sort" : [ 29 ] }, { "_index" : "lqz", "_type" : "doc", "_id" : "3", "_score" : null, "_source" : { "name" : "龍套偏房", "age" : 22, "from" : "gu", "desc" : "mmp,沒怎么看,不知道怎么形容", "tags" : [ "造數據", "真", "難" ] }, "sort" : [ 22 ] } ] } }
上例中,結果是以降序排列方式返回的。
GET lqz/doc/_search { "query": { "match_all": {} }, "sort": [ { "age": { "order": "asc" } } ] }

{ "took" : 0, "timed_out" : false, "_shards" : { "total" : 5, "successful" : 5, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : 5, "max_score" : null, "hits" : [ { "_index" : "lqz", "_type" : "doc", "_id" : "2", "_score" : null, "_source" : { "name" : "大娘子", "age" : 18, "from" : "sheng", "desc" : "膚白貌美,嬌憨可愛", "tags" : [ "白", "富", "美" ] }, "sort" : [ 18 ] }, { "_index" : "lqz", "_type" : "doc", "_id" : "3", "_score" : null, "_source" : { "name" : "龍套偏房", "age" : 22, "from" : "gu", "desc" : "mmp,沒怎么看,不知道怎么形容", "tags" : [ "造數據", "真", "難" ] }, "sort" : [ 22 ] }, { "_index" : "lqz", "_type" : "doc", "_id" : "5", "_score" : null, "_source" : { "name" : "魏行首", "age" : 25, "from" : "廣雲台", "desc" : "仿佛兮若輕雲之蔽月,飄飄兮若流風之回雪,mmp,最后竟然沒有嫁給顧老二!", "tags" : [ "閉月", "羞花" ] }, "sort" : [ 25 ] }, { "_index" : "lqz", "_type" : "doc", "_id" : "4", "_score" : null, "_source" : { "name" : "石頭", "age" : 29, "from" : "gu", "desc" : "粗中有細,狐假虎威", "tags" : [ "粗", "大", "猛" ] }, "sort" : [ 29 ] }, { "_index" : "lqz", "_type" : "doc", "_id" : "1", "_score" : null, "_source" : { "name" : "顧老二", "age" : 30, "from" : "gu", "desc" : "皮膚黑、武器長、性格直", "tags" : [ "黑", "長", "直" ] }, "sort" : [ 30 ] } ] } }
GET lqz/doc/_search { "query": { "match_all": {} }, "sort": [ { "age": { "order": "desc" } } ], "from": 2, "size": 1 } #上例,首先以`age`降序排序,查詢所有。並且在查詢的時候,添加兩個屬性`from`和`size`來控制查詢結果集的數據條數。 - from:從哪開始查 - size:返回幾條結果 # 有了這個查詢,如何分頁? 一頁有10條數據 第一頁: "from": 0, "size": 10 第二頁: "from": 10, "size": 10 第三頁: "from": 20, "size": 10

{ "took" : 0, "timed_out" : false, "_shards" : { "total" : 5, "successful" : 5, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : 5, "max_score" : null, "hits" : [ { "_index" : "lqz", "_type" : "doc", "_id" : "5", "_score" : null, "_source" : { "name" : "魏行首", "age" : 25, "from" : "廣雲台", "desc" : "仿佛兮若輕雲之蔽月,飄飄兮若流風之回雪,mmp,最后竟然沒有嫁給顧老二!", "tags" : [ "閉月", "羞花" ] }, "sort" : [ 25 ] } ] } }
多個條件 - must(and) - should(or) - must_not(not) - filter
組合查詢之must
# 查詢form gu和age=30的數據 GET lqz/doc/_search { "query": { "bool": { "must": [ { "match": { "from": "gu" } }, { "match": { "age": "30" } } ] } } }

{ "took" : 8, "timed_out" : false, "_shards" : { "total" : 5, "successful" : 5, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : 1, "max_score" : 1.287682, "hits" : [ { "_index" : "lqz", "_type" : "doc", "_id" : "1", "_score" : 1.287682, "_source" : { "name" : "顧老二", "age" : 30, "from" : "gu", "desc" : "皮膚黑、武器長、性格直", "tags" : [ "黑", "長", "直" ] } } ] } }
#查詢`from`為`gu`或者`tags`為`閉月`的數據 GET lqz/doc/_search { "query": { "bool": { "should": [ { "match": { "from": "gu" } }, { "match": { "tags": "閉月" } } ] } } }

{ "took" : 1, "timed_out" : false, "_shards" : { "total" : 5, "successful" : 5, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : 4, "max_score" : 0.6931472, "hits" : [ { "_index" : "lqz", "_type" : "doc", "_id" : "4", "_score" : 0.6931472, "_source" : { "name" : "石頭", "age" : 29, "from" : "gu", "desc" : "粗中有細,狐假虎威", "tags" : [ "粗", "大", "猛" ] } }, { "_index" : "lqz", "_type" : "doc", "_id" : "5", "_score" : 0.5753642, "_source" : { "name" : "魏行首", "age" : 25, "from" : "廣雲台", "desc" : "仿佛兮若輕雲之蔽月,飄飄兮若流風之回雪,mmp,最后竟然沒有嫁給顧老二!", "tags" : [ "閉月", "羞花" ] } }, { "_index" : "lqz", "_type" : "doc", "_id" : "1", "_score" : 0.2876821, "_source" : { "name" : "顧老二", "age" : 30, "from" : "gu", "desc" : "皮膚黑、武器長、性格直", "tags" : [ "黑", "長", "直" ] } }, { "_index" : "lqz", "_type" : "doc", "_id" : "3", "_score" : 0.2876821, "_source" : { "name" : "龍套偏房", "age" : 22, "from" : "gu", "desc" : "mmp,沒怎么看,不知道怎么形容", "tags" : [ "造數據", "真", "難" ] } } ] } }
#查詢`from`既不是`gu`並且`tags`也不是`可愛`,還有`age`不是`18`的數據 GET lqz/doc/_search { "query": { "bool": { "must_not": [ { "match": { "from": "gu" } }, { "match": { "tags": "可愛" } }, { "match": { "age": 18 } } ] } } }
filter查詢
filter條件過濾查詢,過濾條件的范圍用`range`表示,`gt`表示大於
gt:大於 lt:小於 get:大於等於 let:小於等於
#查詢`from`為`gu`,`age`大於`25`的數據 GET lqz/doc/_search { "query": { "bool": { "must": [ { "match": { "from": "gu" } } ], "filter": { "range": { "age": { "gt": 25 } } } } } }

{ "took" : 2, "timed_out" : false, "_shards" : { "total" : 5, "successful" : 5, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : 2, "max_score" : 0.6931472, "hits" : [ { "_index" : "lqz", "_type" : "doc", "_id" : "4", "_score" : 0.6931472, "_source" : { "name" : "石頭", "age" : 29, "from" : "gu", "desc" : "粗中有細,狐假虎威", "tags" : [ "粗", "大", "猛" ] } }, { "_index" : "lqz", "_type" : "doc", "_id" : "1", "_score" : 0.2876821, "_source" : { "name" : "顧老二", "age" : 30, "from" : "gu", "desc" : "皮膚黑、武器長、性格直", "tags" : [ "黑", "長", "直" ] } } ] } }
小結:
-
must
:與關系,相當於關系型數據庫中的and
。 -
should
:或關系,相當於關系型數據庫中的or
。 -
must_not
:非關系,相當於關系型數據庫中的not
。 -
filter
:過濾條件。 -
range
:條件篩選范圍。 -
gt
:大於,相當於關系型數據庫中的>
。 -
gte
:大於等於,相當於關系型數據庫中的>=
。 -
lt
:小於,相當於關系型數據庫中的<
。 -
lte
:小於等於,相當於關系型數據庫中的<=
。
一 前言
在未來,一篇文檔可能有很多的字段,每次查詢都默認給我們返回全部,在數據量很大的時候,是的,比如我只想查姑娘的手機號,你一並給我個喜好啊、三圍什么的算什么? 所以,我們對結果做一些過濾,清清白白的告訴elasticsearch
PUT lqz/doc/1 { "name":"顧老二", "age":30, "from": "gu", "desc": "皮膚黑、武器長、性格直", "tags": ["黑", "長", "直"] }
三 結果過濾:_source
現在,在所有的結果中,我只需要查看name
和age
兩個屬性,其他的不要怎么辦?
GET lqz/doc/_search { "query": { "match": { "name": "顧老二" } }, "_source": ["name", "age"] }

{ "took" : 8, "timed_out" : false, "_shards" : { "total" : 5, "successful" : 5, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : 1, "max_score" : 0.8630463, "hits" : [ { "_index" : "lqz", "_type" : "doc", "_id" : "1", "_score" : 0.8630463, "_source" : { "name" : "顧老二", "age" : 30 } } ] } }
在數據量很大的時候,我們需要什么字段,就返回什么字段就好了,提高查詢效率