Elasticsearch拼音和ik分詞器的結合應用

本文轉載自查看原文 2018-10-31 16:35 1988

一、創建索引時，自定義拼音分詞和ik分詞

PUT /my_index
{
    "index": {
        "analysis": {
            "analyzer": {
                "ik_pinyin_analyzer": {  自定義分詞name
                    "type": "custom",
                    "tokenizer": "ik_smart",
                    "filter": ["my_pinyin", "word_delimiter"]
                },
                "pinyin_analyzer": {
                    "type": "custom",
                    "tokenizer": "ik_max_word",
                    "filter": ["my_pinyin", "word_delimiter"]
                }
            },
            "filter": {
                "my_pinyin": {
                    "type" : "pinyin",
                    "keep_separate_first_letter" : false, 啟用該選項時，將保留第一個字母分開，例如：劉德華> l，d，h，默認：false，注意：查詢結果也許是太模糊，由於長期過頻
                    "keep_full_pinyin" : true,  當啟用該選項，例如：劉德華> [ liu，de，hua]，默認值：true
                    "keep_original" : true, 啟用此選項時，也將保留原始輸入，默認值：false
                    "limit_first_letter_length" : 16, 設置first_letter結果的最大長度，默認值：16
                    "lowercase" : true,  小寫非中文字母，默認值：true
                    "remove_duplicated_term" : true  啟用此選項后，將刪除重復的術語以保存索引，例如：de的> de，default：false，注意：位置相關的查詢可能會受到影響
} 
} 
} 
} 
}

二、創建mapping時，設置字段分詞(注：相同索引下建不同的type時，相同字段名屬性必須設一樣)

POST /my_index/user/_mapping
{
    "user": {
        "properties": {
          "id":{
            "type":"integer"
          },
            "userName": {
              "type": "text",
              "store": "no",
              "term_vector": "with_positions_offsets",
              "analyzer": "ik_pinyin_analyzer",   自定義分詞器name
              "boost": 10,
              "fielddata" : true,
              "fields": {
                    "raw": {
                        "type": "keyword"    設置keyword時，對該字段不進行分析
                    }
                }
            },
            "reason":{
              "type": "text",
              "store": "no",  字段store為true，這意味着這個field的數據將會被單獨存儲。這時候，如果你要求返回field1（store：yes），es會分辨出field1已經被存儲了，因此不會從_source中加載，而是從field1的存儲塊中加載。
              "term_vector": "with_positions_offsets",
              "analyzer": "ik_pinyin_analyzer",
              "boost": 10
            }
        }
    }
}

測試

PUT /my_index/user/1
{
  "id":1,
  "userName":"劉德華",
  "reason":"大帥哥"
}

PUT /my_index/user/2
{
  "id":2,
  "userName":"劉德華",
  "reason":"中華人民"
}

不分詞查詢

GET /my_index/user/_search
{
  "query": {
    "match": {
      "userName.raw": "劉德華"
    }
  }
}


{
  "took": 0,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 2,
    "max_score": 0.2876821,
    "hits": [
      {
        "_index": "my_index",
        "_type": "user",
        "_id": "2",
        "_score": 0.2876821,
        "_source": {
          "id": 2,
          "userName": "劉德華",
          "reason": "中華人民"
        }
      },
      {
        "_index": "my_index",
        "_type": "user",
        "_id": "1",
        "_score": 0.2876821,
        "_source": {
          "id": 1,
          "userName": "劉德華",
          "reason": "大帥哥"
        }
      }
    ]
  }
}

分詞查詢

GET /my_index/user/_search
{
  "query": {
    "match": {
      "userName": "劉"
    }
  }
}

{
  "took": 0,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 2,
    "max_score": 0.31331712,
    "hits": [
      {
        "_index": "my_index",
        "_type": "user",
        "_id": "2",
        "_score": 0.31331712,
        "_source": {
          "id": 2,
          "userName": "劉德華",
          "reason": "中華人民"
        }
      },
      {
        "_index": "my_index",
        "_type": "user",
        "_id": "1",
        "_score": 0.31331712,
        "_source": {
          "id": 1,
          "userName": "劉德華",
          "reason": "大帥哥"
        }
      }
    ]
  }
}

拼音分詞

GET /my_index/user/_search
{
  "query": {
    "match": {
      "reason": "shuai"
    }
  }
}


{
  "took": 0,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 1,
    "max_score": 3.4884284,
    "hits": [
      {
        "_index": "my_index",
        "_type": "user",
        "_id": "1",
        "_score": 3.4884284,
        "_source": {
          "id": 1,
          "userName": "劉德華",
          "reason": "大帥哥"
        }
      }
    ]
  }
}

分組聚合

GET /my_index/user/_search
{ 
  "size":2,
  "query": {
    "match": {
      "userName": "liu"
    }
  },
  "aggs": {
    "group_by_meetingType": {
      "terms": {
        "field": "userName.raw"
      }
    }
  }
}

{
  "took": 1,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 2,
    "max_score": 3.133171,
    "hits": [
      {
        "_index": "my_index",
        "_type": "user",
        "_id": "2",
        "_score": 3.133171,
        "_source": {
          "id": 2,
          "userName": "劉德華",
          "reason": "中華人民"
        }
      },
      {
        "_index": "my_index",
        "_type": "user",
        "_id": "1",
        "_score": 3.133171,
        "_source": {
          "id": 1,
          "userName": "劉德華",
          "reason": "大帥哥"
        }
      }
    ]
  },
  "aggregations": {
    "group_by_meetingType": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "劉德華",
          "doc_count": 2
        }
      ]
    }
  }
}

大神們這些都是個人理解哪里有一樣的想法或建議歡迎評論！！！！！！！

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 elasticsearch - ik分詞器 elasticsearch之ik分詞器 Elasticsearch IK分詞器 ElasticSearch中文分詞器-IK分詞器的使用 ElasticSearch中文分詞器-IK分詞器的使用 Elasticsearch的分詞器，IK分詞器以及IK分詞器權限問題 Elasticsearch集成ik分詞器 elasticsearch安裝ik分詞器 Elasticsearch整合IK分詞器 elasticsearch擴展ik分詞器詞庫