一、創建索引時,自定義拼音分詞和ik分詞
PUT /my_index { "index": { "analysis": { "analyzer": { "ik_pinyin_analyzer": { 自定義分詞name "type": "custom", "tokenizer": "ik_smart", "filter": ["my_pinyin", "word_delimiter"] }, "pinyin_analyzer": { "type": "custom", "tokenizer": "ik_max_word", "filter": ["my_pinyin", "word_delimiter"] } }, "filter": { "my_pinyin": { "type" : "pinyin", "keep_separate_first_letter" : false, 啟用該選項時,將保留第一個字母分開,例如:劉德華
>l
,d
,h
,默認:false,注意:查詢結果也許是太模糊,由於長期過頻 "keep_full_pinyin" : true, 當啟用該選項,例如:劉德華
> [liu
,de
,hua
],默認值:true "keep_original" : true, 啟用此選項時,也將保留原始輸入,默認值:false "limit_first_letter_length" : 16, 設置first_letter結果的最大長度,默認值:16
"lowercase" : true, 小寫非中文字母,默認值:true
"remove_duplicated_term" : true 啟用此選項后,將刪除重復的術語以保存索引,例如:de的
>de
,default:false,注意:位置相關的查詢可能會受到影響
}
}
}
}
}
二、創建mapping時,設置字段分詞(注:相同索引下建不同的type時,相同字段名屬性必須設一樣)
POST /my_index/user/_mapping { "user": { "properties": { "id":{ "type":"integer" }, "userName": { "type": "text", "store": "no", "term_vector": "with_positions_offsets", "analyzer": "ik_pinyin_analyzer", 自定義分詞器name "boost": 10, "fielddata" : true, "fields": { "raw": { "type": "keyword" 設置keyword時,對該字段不進行分析 } } }, "reason":{ "type": "text", "store": "no", 字段store為true,這意味着這個field的數據將會被單獨存儲。這時候,如果你要求返回field1(store:yes),es會分辨出field1已經被存儲了,因此不會從_source中加載,而是從field1的存儲塊中加載。 "term_vector": "with_positions_offsets", "analyzer": "ik_pinyin_analyzer", "boost": 10 } } } }
測試
PUT /my_index/user/1 { "id":1, "userName":"劉德華", "reason":"大帥哥" } PUT /my_index/user/2 { "id":2, "userName":"劉德華", "reason":"中華人民" }
不分詞查詢
GET /my_index/user/_search { "query": { "match": { "userName.raw": "劉德華" } } } { "took": 0, "timed_out": false, "_shards": { "total": 5, "successful": 5, "skipped": 0, "failed": 0 }, "hits": { "total": 2, "max_score": 0.2876821, "hits": [ { "_index": "my_index", "_type": "user", "_id": "2", "_score": 0.2876821, "_source": { "id": 2, "userName": "劉德華", "reason": "中華人民" } }, { "_index": "my_index", "_type": "user", "_id": "1", "_score": 0.2876821, "_source": { "id": 1, "userName": "劉德華", "reason": "大帥哥" } } ] } }
分詞查詢
GET /my_index/user/_search { "query": { "match": { "userName": "劉" } } } { "took": 0, "timed_out": false, "_shards": { "total": 5, "successful": 5, "skipped": 0, "failed": 0 }, "hits": { "total": 2, "max_score": 0.31331712, "hits": [ { "_index": "my_index", "_type": "user", "_id": "2", "_score": 0.31331712, "_source": { "id": 2, "userName": "劉德華", "reason": "中華人民" } }, { "_index": "my_index", "_type": "user", "_id": "1", "_score": 0.31331712, "_source": { "id": 1, "userName": "劉德華", "reason": "大帥哥" } } ] } }
拼音分詞
GET /my_index/user/_search { "query": { "match": { "reason": "shuai" } } } { "took": 0, "timed_out": false, "_shards": { "total": 5, "successful": 5, "skipped": 0, "failed": 0 }, "hits": { "total": 1, "max_score": 3.4884284, "hits": [ { "_index": "my_index", "_type": "user", "_id": "1", "_score": 3.4884284, "_source": { "id": 1, "userName": "劉德華", "reason": "大帥哥" } } ] } }
分組聚合
GET /my_index/user/_search { "size":2, "query": { "match": { "userName": "liu" } }, "aggs": { "group_by_meetingType": { "terms": { "field": "userName.raw" } } } } { "took": 1, "timed_out": false, "_shards": { "total": 5, "successful": 5, "skipped": 0, "failed": 0 }, "hits": { "total": 2, "max_score": 3.133171, "hits": [ { "_index": "my_index", "_type": "user", "_id": "2", "_score": 3.133171, "_source": { "id": 2, "userName": "劉德華", "reason": "中華人民" } }, { "_index": "my_index", "_type": "user", "_id": "1", "_score": 3.133171, "_source": { "id": 1, "userName": "劉德華", "reason": "大帥哥" } } ] }, "aggregations": { "group_by_meetingType": { "doc_count_error_upper_bound": 0, "sum_other_doc_count": 0, "buckets": [ { "key": "劉德華", "doc_count": 2 } ] } } }