官方文檔https://www.elastic.co/guide/en/elasticsearch/reference/5.0/suggester-context.html
下面所有演示基於elasticsearch5.x和Python3.x
最近項目使用elasticsearch的補全功能時,需要對於所有文章(article)的作者名字(author)的搜索做補全,文章的mapping大致如下
ARTICLE = {
'properties': {
'id': {
'type': 'integer',
'index': 'not_analyzed',
},
'author': {
'type': 'text',
},
'author_completion': {
'type': 'completion',
},
'removed': {
'type': 'boolean',
}
}
}
MAPPINGS = {
'mappings': {
'article': ARTICLE,
}
}
現在的需求是,針對於下架狀態removed為True的不做補全提示。
作為演示先插入部分數據,代碼如下
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from elasticsearch.helpers import bulk
from elasticsearch import Elasticsearch
ES_HOSTS = [{'host': 'localhost', 'port': 9200}, ]
ES = Elasticsearch(hosts=ES_HOSTS)
INDEX = 'test_article'
TYPE = 'article'
ARTICLE = {
'properties': {
'id': {
'type': 'integer',
'index': 'not_analyzed',
},
'author': {
'type': 'text',
},
'author_completion': {
'type': 'completion',
},
'removed': {
'type': 'boolean',
}
}
}
MAPPINGS = {
'mappings': {
'article': ARTICLE,
}
}
def create_index():
"""
插入數據前創建對應的index
"""
ES.indices.delete(index=INDEX, ignore=404)
ES.indices.create(index=INDEX, body=MAPPINGS)
def insert_data():
"""
添加測試數據
:return:
"""
test_datas = [
{
'id': 1,
'author': 'tom',
'author_completion': 'tom',
'removed': False
},
{
'id': 2,
'author': 'tom_cat',
'author_completion': 'tom_cat',
'removed': True
},
{
'id': 3,
'author': 'kitty',
'author_completion': 'kitty',
'removed': False
},
{
'id': 4,
'author': 'tomato',
'author_completion': 'tomato',
'removed': False
},
]
bulk_data = []
for data in test_datas:
action = {
'_index': INDEX,
'_type': TYPE,
'_id': data.get('id'),
'_source': data
}
bulk_data.append(action)
success, failed = bulk(client=ES, actions=bulk_data, stats_only=True)
print('success', success, 'failed', failed)
if __name__ == '__main__':
create_index()
insert_data()
成功插入4條測試數據,下面測試獲取作者名稱補全建議,代碼如下
def get_suggestions(keywords):
body = {
# 'size': 0, # 這里是不返回相關搜索結果的字段,如author,id等,作為測試這里返回
'_source': 'suggest',
'suggest': {
'author_prefix_suggest': {
'prefix': keywords,
'completion': {
'field': 'author_completion',
'size': 10,
}
}
},
# 對於下架數據,我單純的以為加上下面的篩選就行了
'query': {
'term': {
'removed': False
}
}
}
suggest_data = ES.search(index=INDEX, doc_type=TYPE, body=body)
return suggest_data
if __name__ == '__main__':
# create_index()
# insert_data()
suggestions = get_suggestions('t')
print(suggestions)
"""
suggestions = {
'took': 0,
'timed_out': False,
'_shards': {
'total': 5,
'successful': 5,
'skipped': 0,
'failed': 0
},
'hits': {
'total': 3,
'max_score': 0.6931472,
'hits': [
{'_index': 'test_article', '_type': 'article', '_id': '4', '_score': 0.6931472,
'_source': {}},
{'_index': 'test_article', '_type': 'article', '_id': '1', '_score': 0.2876821,
'_source': {}},
{'_index': 'test_article', '_type': 'article', '_id': '3', '_score': 0.2876821,
'_source': {}}]},
'suggest': {
'author_prefix_suggest': [{'text': 't', 'offset': 0, 'length': 1, 'options': [
{'text': 'tom', '_index': 'test_article', '_type': 'article', '_id': '1', '_score': 1.0,
'_source': {}},
{'text': 'tom_cat', '_index': 'test_article', '_type': 'article', '_id': '2', '_score': 1.0,
'_source': {}},
{'text': 'tomato', '_index': 'test_article', '_type': 'article', '_id': '4', '_score': 1.0,
'_source': {}}]}]
}
}
"""
發現,removed為True的tom_cat赫然在列,明明加了
'query': {
'term': {
'removed': False
}
}
卻沒有起作用,難道elasticsearch不支持這種需求!?怎么可能……
查閱文檔發現解決方法為https://www.elastic.co/guide/en/elasticsearch/reference/5.0/suggester-context.html
找到問題所在,首先改造mapping,並重新錄入測試數據如下
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from elasticsearch.helpers import bulk
from elasticsearch import Elasticsearch
ES_HOSTS = [{'host': 'localhost', 'port': 9200}, ]
ES = Elasticsearch(hosts=ES_HOSTS)
INDEX = 'test_article'
TYPE = 'article'
ARTICLE = {
'properties': {
'id': {
'type': 'integer',
'index': 'not_analyzed'
},
'author': {
'type': 'text',
},
'author_completion': {
'type': 'completion',
'contexts': [ # 這里是關鍵所在
{
'name': 'removed_tab',
'type': 'category',
'path': 'removed'
}
]
},
'removed': {
'type': 'boolean',
}
}
}
MAPPINGS = {
'mappings': {
'article': ARTICLE,
}
}
def create_index():
"""
插入數據前創建對應的index
"""
ES.indices.delete(index=INDEX, ignore=404)
ES.indices.create(index=INDEX, body=MAPPINGS)
def insert_data():
"""
添加測試數據
:return:
"""
test_datas = [
{
'id': 1,
'author': 'tom',
'author_completion': 'tom',
'removed': False
},
{
'id': 2,
'author': 'tom_cat',
'author_completion': 'tom_cat',
'removed': True
},
{
'id': 3,
'author': 'kitty',
'author_completion': 'kitty',
'removed': False
},
{
'id': 4,
'author': 'tomato',
'author_completion': 'tomato',
'removed': False
},
]
bulk_data = []
for data in test_datas:
action = {
'_index': INDEX,
'_type': TYPE,
'_id': data.get('id'),
'_source': data
}
bulk_data.append(action)
success, failed = bulk(client=ES, actions=bulk_data, stats_only=True)
print('success', success, 'failed', failed)
if __name__ == '__main__':
create_index()
insert_data()
Duang!意想不到的問題出現了
elasticsearch.helpers.BulkIndexError: ('4 document(s) failed to index.', [{'index': {'_index': 'test_article', '_type': 'article', '_id': '1', 'status': 400, 'error': {'type': 'illegal_argument_exception', 'reason': 'Failed to parse context field [removed], only keyword and text fields are accepted'}, 'data': {'id': 1, 'author': 'tom', 'author_completion': 'tom', 'removed': False}}}, {'index': {'_index': 'test_article', '_type': 'article', '_id': '2', 'status': 400, 'error': {'type': 'illegal_argument_exception', 'reason': 'Failed to parse context field [removed], only keyword and text fields are accepted'}, 'data': {'id': 2, 'author': 'tom_cat', 'author_completion': 'tom_cat', 'removed': True}}}, {'index': {'_index': 'test_article', '_type': 'article', '_id': '3', 'status': 400, 'error': {'type': 'illegal_argument_exception', 'reason': 'Failed to parse context field [removed], only keyword and text fields are accepted'}, 'data': {'id': 3, 'author': 'kitty', 'author_completion': 'kitty', 'removed': False}}}, {'index': {'_index': 'test_article', '_type': 'article', '_id': '4', 'status': 400, 'error': {'type': 'illegal_argument_exception', 'reason': 'Failed to parse context field [removed], only keyword and text fields are accepted'}, 'data': {'id': 4, 'author': 'tomato', 'author_completion': 'tomato', 'removed': False}}}])
意思是context只支持keyword和text類型,而上面removed類型為boolean,好吧,再改造mapping,將mapping的removed改為keyword類型……
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from elasticsearch.helpers import bulk
from elasticsearch import Elasticsearch
ES_HOSTS = [{'host': 'localhost', 'port': 9200}, ]
ES = Elasticsearch(hosts=ES_HOSTS)
INDEX = 'test_article'
TYPE = 'article'
ARTICLE = {
'properties': {
'id': {
'type': 'integer',
'index': 'not_analyzed'
},
'author': {
'type': 'text',
},
'author_completion': {
'type': 'completion',
'contexts': [ # 這里是關鍵所在
{
'name': 'removed_tab',
'type': 'category',
'path': 'removed'
}
]
},
'removed': {
'type': 'keyword',
}
}
}
MAPPINGS = {
'mappings': {
'article': ARTICLE,
}
}
def create_index():
"""
插入數據前創建對應的index
"""
ES.indices.delete(index=INDEX, ignore=404)
ES.indices.create(index=INDEX, body=MAPPINGS)
def insert_data():
"""
添加測試數據
:return:
"""
test_datas = [
{
'id': 1,
'author': 'tom',
'author_completion': 'tom',
'removed': 'False'
},
{
'id': 2,
'author': 'tom_cat',
'author_completion': 'tom_cat',
'removed': 'True'
},
{
'id': 3,
'author': 'kitty',
'author_completion': 'kitty',
'removed': 'False'
},
{
'id': 4,
'author': 'tomato',
'author_completion': 'tomato',
'removed': 'False'
},
]
bulk_data = []
for data in test_datas:
action = {
'_index': INDEX,
'_type': TYPE,
'_id': data.get('id'),
'_source': data
}
bulk_data.append(action)
success, failed = bulk(client=ES, actions=bulk_data, stats_only=True)
print('success', success, 'failed', failed)
if __name__ == '__main__':
create_index()
insert_data()
mission success。看看表結構ok

接下來就是獲取補全建議
def get_suggestions(keywords):
body = {
'size': 0,
'_source': 'suggest',
'suggest': {
'author_prefix_suggest': {
'prefix': keywords,
'completion': {
'field': 'author_completion',
'size': 10,
'contexts': {
'removed_tab': ['False', ] # 篩選removed為'False'的補全,contexts不能包含多個tab,如加上一個'state_tab':['1',]的話contexts將失效
}
}
}
},
}
suggest_data = ES.search(index=INDEX, doc_type=TYPE, body=body)
return suggest_data
if __name__ == '__main__':
# create_index()
# insert_data()
suggestions = get_suggestions('t')
print(suggestions)
"""
suggestions = {
'took': 0,
'timed_out': False,
'_shards': {
'total': 5,
'successful': 5,
'skipped': 0, 'failed': 0
},
'hits': {
'total': 0,
'max_score': 0.0,
'hits': []
},
'suggest': {
'author_prefix_suggest': [
{'text': 't', 'offset': 0, 'length': 1, 'options': [
{'text': 'tom', '_index': 'test_article', '_type': 'article', '_id': '1', '_score': 1.0,
'_source': {},
'contexts': {'removed_tab': ['False']}},
{'text': 'tomato', '_index': 'test_article', '_type': 'article', '_id': '4', '_score': 1.0,
'_source': {},
'contexts': {'removed_tab': ['False']}}]}]}}
"""
發現,removed為'True'的tom_cat被篩選掉了,大功告成!
