Elasticsearch拼音和ik分詞器的結合應用


一、創建索引時,自定義拼音分詞和ik分詞

PUT /my_index
{
    "index": {
        "analysis": {
            "analyzer": {
                "ik_pinyin_analyzer": {  自定義分詞name
                    "type": "custom",
                    "tokenizer": "ik_smart",
                    "filter": ["my_pinyin", "word_delimiter"]
                },
                "pinyin_analyzer": {
                    "type": "custom",
                    "tokenizer": "ik_max_word",
                    "filter": ["my_pinyin", "word_delimiter"]
                }
            },
            "filter": {
                "my_pinyin": {
                    "type" : "pinyin",
                    "keep_separate_first_letter" : false, 啟用該選項時,將保留第一個字母分開,例如:劉德華ldh,默認:false,注意:查詢結果也許是太模糊,由於長期過頻
                    "keep_full_pinyin" : true,  當啟用該選項,例如:劉德華> [ liudehua],默認值:true
                    "keep_original" : true, 啟用此選項時,也將保留原始輸入,默認值:false
                    "limit_first_letter_length" : 16, 設置first_letter結果的最大長度,默認值:16
"lowercase" : true, 小寫非中文字母,默認值:true
"remove_duplicated_term" : true 啟用此選項后,將刪除重復的術語以保存索引,例如:de的de,default:false,注意:位置相關的查詢可能會受到影響
}
}
}
}
}

 

二、創建mapping時,設置字段分詞(注:相同索引下建不同的type時,相同字段名屬性必須設一樣)

POST /my_index/user/_mapping
{
    "user": {
        "properties": {
          "id":{
            "type":"integer"
          },
            "userName": {
              "type": "text",
              "store": "no",
              "term_vector": "with_positions_offsets",
              "analyzer": "ik_pinyin_analyzer",   自定義分詞器name
              "boost": 10,
              "fielddata" : true,
              "fields": {
                    "raw": {
                        "type": "keyword"    設置keyword時,對該字段不進行分析
                    }
                }
            },
            "reason":{
              "type": "text",
              "store": "no",  字段store為true,這意味着這個field的數據將會被單獨存儲。這時候,如果你要求返回field1(store:yes),es會分辨出field1已經被存儲了,因此不會從_source中加載,而是從field1的存儲塊中加載。
              "term_vector": "with_positions_offsets",
              "analyzer": "ik_pinyin_analyzer",
              "boost": 10
            }
        }
    }
}

 

 

測試

PUT /my_index/user/1
{
  "id":1,
  "userName":"劉德華",
  "reason":"大帥哥"
}

PUT /my_index/user/2
{
  "id":2,
  "userName":"劉德華",
  "reason":"中華人民"
}

不分詞查詢

GET /my_index/user/_search
{
  "query": {
    "match": {
      "userName.raw": "劉德華"
    }
  }
}


{
  "took": 0,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 2,
    "max_score": 0.2876821,
    "hits": [
      {
        "_index": "my_index",
        "_type": "user",
        "_id": "2",
        "_score": 0.2876821,
        "_source": {
          "id": 2,
          "userName": "劉德華",
          "reason": "中華人民"
        }
      },
      {
        "_index": "my_index",
        "_type": "user",
        "_id": "1",
        "_score": 0.2876821,
        "_source": {
          "id": 1,
          "userName": "劉德華",
          "reason": "大帥哥"
        }
      }
    ]
  }
}

 

分詞查詢

GET /my_index/user/_search
{
  "query": {
    "match": {
      "userName": "劉"
    }
  }
}

{
  "took": 0,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 2,
    "max_score": 0.31331712,
    "hits": [
      {
        "_index": "my_index",
        "_type": "user",
        "_id": "2",
        "_score": 0.31331712,
        "_source": {
          "id": 2,
          "userName": "劉德華",
          "reason": "中華人民"
        }
      },
      {
        "_index": "my_index",
        "_type": "user",
        "_id": "1",
        "_score": 0.31331712,
        "_source": {
          "id": 1,
          "userName": "劉德華",
          "reason": "大帥哥"
        }
      }
    ]
  }
}

 

拼音分詞

GET /my_index/user/_search
{
  "query": {
    "match": {
      "reason": "shuai"
    }
  }
}


{
  "took": 0,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 1,
    "max_score": 3.4884284,
    "hits": [
      {
        "_index": "my_index",
        "_type": "user",
        "_id": "1",
        "_score": 3.4884284,
        "_source": {
          "id": 1,
          "userName": "劉德華",
          "reason": "大帥哥"
        }
      }
    ]
  }
}

 

分組聚合

GET /my_index/user/_search
{ 
  "size":2,
  "query": {
    "match": {
      "userName": "liu"
    }
  },
  "aggs": {
    "group_by_meetingType": {
      "terms": {
        "field": "userName.raw"
      }
    }
  }
}

{
  "took": 1,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 2,
    "max_score": 3.133171,
    "hits": [
      {
        "_index": "my_index",
        "_type": "user",
        "_id": "2",
        "_score": 3.133171,
        "_source": {
          "id": 2,
          "userName": "劉德華",
          "reason": "中華人民"
        }
      },
      {
        "_index": "my_index",
        "_type": "user",
        "_id": "1",
        "_score": 3.133171,
        "_source": {
          "id": 1,
          "userName": "劉德華",
          "reason": "大帥哥"
        }
      }
    ]
  },
  "aggregations": {
    "group_by_meetingType": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "劉德華",
          "doc_count": 2
        }
      ]
    }
  }
}

 

 

大神們這些都是個人理解哪里有一樣的想法或建議歡迎評論!!!!!!!


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM