elasticsearch搜索提示

本文轉載自查看原文 2017-06-03 14:27 1942 scrapy-redis-elasticsearch

elasticsearch搜索提示（補全）接口需要新增suggest字段並設type為:completion,結合到scrapy,修改es_types.py文件:

from datetime import datetime
from elasticsearch_dsl import DocType, Date, Nested, Boolean, analyzer, InnerObjectWrapper, Completion, Keyword, Text, Integer
from elasticsearch_dsl.connections import connections
connections.create_connection(hosts=['localhost'])
class ArticleType(DocType):
    #文章類型
    suggest = Completion(analyzer="ik_max_word") #這樣做由於原碼問題這里會報錯
    title = Text(analyzer="ik_max_word")
    create_date = Date()
    praise_nums = Integer()
    fav_nums = Integer()
    comment_nums = Integer()
    tags = Text(analyzer="ik_max_word")
    front_image_url = Keyword()
    url_object_id = Keyword()
    front_image_path = Keyword()
    url = Keyword()
    content = Text(analyzer="ik_max_word")

    class Meta:
        index = 'jobbole'
        doc_type = 'article'
if __name__ == '__main__':
    ArticleType.init()

解決辦法：自定義CustomAnalysis類，繼承自elasticsearch_dsl.analysis下的CustomAnalysis類：

from datetime import datetime
from elasticsearch_dsl import DocType, Date, Nested, Boolean, \
    analyzer, InnerObjectWrapper, Completion, Keyword, Text, Integer

from elasticsearch_dsl.analysis import CustomAnalyzer as _CustomAnalyzer

from elasticsearch_dsl.connections import connections
connections.create_connection(hosts=["localhost"])

class CustomAnalyzer(_CustomAnalyzer):
    def get_analysis_definition(self):
        return {}

ik_analyzer = CustomAnalyzer("ik_max_word", filter=["lowercase"])#大小寫轉換（搜索時忽略大小寫影響）
class ArticleType(DocType):
    #伯樂在線文章類型
    suggest = Completion(analyzer=ik_analyzer)
    title = Text(analyzer="ik_max_word")
    create_date = Date()
    url = Keyword()
    url_object_id = Keyword()
    front_image_url = Keyword()
    front_image_path = Keyword()
    praise_nums = Integer()
    comment_nums = Integer()
    fav_nums = Integer()
    tags = Text(analyzer="ik_max_word")
    content = Text(analyzer="ik_max_word")

    class Meta:
        index = "jobbole"
        doc_type = "article"

if __name__ == "__main__":
    ArticleType.init()

在item中生成搜索建議詞：

from spider.models.es_types import ArticleType
from elasticsearch_dsl.connections import connections
es = connections.create_connection(ArticleType._doc_type.using)
def gen_suggests(index, info_tuple):
    #根據字符串生成搜索建議數組
    used_words = set() #set為去重功能
    suggests = []
    for text, weight in info_tuple:
        if text:
            #字符串不為空時，調用elasticsearch的analyze接口分析字符串（分詞、大小寫轉換）
            words = es.indices.analyze(index=index, analyzer="ik_max_word", params={'filter':["lowercase"]}, body=text)
            anylyzed_words = set([r["token"] for r in words["tokens"] if len(r["token"])>1])
            new_words = anylyzed_words - used_words
        else:
            new_words = set()

        if new_words:
            suggests.append({'input': list(new_words), 'weight': weight})
    return suggests


class JobboleArticleItem(scrapy.Item):
    title = scrapy.Field()
    create_date = scrapy.Field(input_processor=MapCompose(date_convert))
    praise_nums = scrapy.Field(input_processor=MapCompose(number_convert))
    fav_nums = scrapy.Field(input_processor=MapCompose(number_convert))
    comment_nums = scrapy.Field(input_processor=MapCompose(number_convert))
    tags = scrapy.Field(input_processor=MapCompose(remove_comment_tags), output_processor=Join(','))
    front_image_url = scrapy.Field(output_processor=MapCompose(returnValue))
    url_object_id = scrapy.Field(input_processor=MapCompose(get_md5))
    front_image_path = scrapy.Field()
    url = scrapy.Field()
    content = scrapy.Field()

def save_to_elasticsearch(self):
        article = ArticleType()
        article.title = self['title']
        article.create_date = self['create_date']
        article.content = remove_tags(self['content'])  # remove_tags()去除html標簽
        article.front_image_url = self['front_image_url']
        if 'front_image_path' in self:
            article.front_image_path = self['front_image_path']
        article.praise_nums = self['praise_nums']
        article.fav_nums = self['fav_nums']
        article.comment_nums = self['comment_nums']
        article.url = self['url']
        article.tags = self['tags']
        article.meta.id = self['url_object_id']

        #生成搜索建議詞
        article.suggest = gen_suggests(ArticleType._doc_type.index, ((article.title, 10), (article.tags, 7)))
        article.save()  # 保存
        return

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 使用ElasticSearch實現搜索時即時提示與全文搜索功能【Elasticsearch 7 搜索之路】（一）什么是 Elasticsearch？ ElasticSearch（五）：簡單的ElasticSearch搜索功能 ElasticSearch SearchApi 高亮搜索 Elasticsearch 多字段搜索 Elasticsearch系列---初識搜索 ElasticSearch搜索解析 elasticsearch http 搜索測試 Elasticsearch 教程--搜索 elasticsearch多種搜索方式