es之得分（加權）

本文轉載自查看原文 2017-05-22 23:34 1326 ELK（elasticsearch、logstash、kibana）

隨着應用程序的增長，提高搜索質量的需求也進一步增大。我們把它叫做搜索體驗。我們需要知道什么對用戶更重要，關注用戶如何使用搜索功能。這導致不同的結論，例如，有些文檔比其他的更重要，或特定查詢需強調一個字段而弱化其他字段。這就是可以用到加權的地方。

進一步說搜索體驗，我們更希望檢索出來的數據是最想得到的數據；

這個其實就是關於文檔的【相關性得分】

進一步細節說：我們查詢的所有文檔，會在內部做一次相關性的評分score;然后會根據這個score從大到小的排序，依次展示給客戶端；

如何計算評分？

Elasticsearch使用的計算評分公式TF-IDF算法的實用計算公式如下: 
score(q,d) coord(q,d)queryNorm(q)(tf (tind)idf (t)2 boost(t)norm(t,d))

TF：詞頻，詞在文檔中出現的頻度是多少？頻度越高，權重越高

IDF：逆向文檔率，詞在集合所有文檔里出現的頻率是多少？頻次越高，權重越低

在我們實際的工作中，我們經常會控制boost來調整score（boost默認值是1）

創建索引和映射：

1）：創建索引

@Test
public void createIndex(){
    /**
     * 創建索引
     * */

    client.admin().indices().prepareCreate("blog").get();
}

2）：創建映射

/**
 * 創建映射
 */
@Test
public void testCreateIndexMapping_boost() throws Exception{
    /**
     * 格式：
     * "mappings" : {
     *     "document" : {
     *         "dynamic" : "false",
     *         "properties" :{
     *             "id" : { "type" : "string" },
     *             "content" : { "type" : "string" },
     *             "comment" : {"type" : "string"},
     *             "author" : { "type" : "string" }
     *         }
     *     }
     * }
     */
    //構建json的數據格式，創建映射
    XContentBuilder mappingBuilder = XContentFactory.jsonBuilder()
           .startObject()
           .startObject("document")
           .startObject("properties")
               .startObject("id").field("type","integer").field("store", "yes")
           .endObject()
           .startObject("title").field("type","string").field("store", "yes").field("analyzer" , "ik_max_word")
           .endObject()
           .startObject("content").field("type","string").field("store", "yes").field("analyzer" , "ik_max_word")
           .endObject()
           .startObject("comment").field("type","string").field("store", "yes").field("analyzer" , "ik_max_word")
           .endObject()
           .endObject()
           .endObject()
           .endObject();
    PutMappingRequest request = Requests.putMappingRequest("blog")
           .type("document")
           .source(mappingBuilder);
    client.admin().indices().putMapping(request).get();
}

3）：創建Document實體類

package com.elasticsearch.bean;

/**
 * Created by angel；
 */
public class Document {
    private Integer id;
    private String title;
    private String content;
    private String comment;

    public Integer getId() {
        return id;
   }

    public String getComment() {
        return comment;
   }

    public String getContent() {
        return content;
   }

    public String getTitle() {
        return title;
   }

    public void setComment(String comment) {
        this.comment = comment;
   }

    public void setContent(String content) {
        this.content = content;
   }

    public void setId(Integer id) {
        this.id = id;
   }

    public void setTitle(String title) {
        this.title = title;
   }
}

4）：重新創建索引和映射，創建文檔

   /**
     * 創建文檔
     * */
    @Test
    public void createDocument() throws JsonProcessingException {
        Document document = new Document();


//   document.setId(1);
//   document.setTitle("搜索引擎服務器");
//   document.setContent("基於restful的數據風格");
//   document.setComment("我們學習Elasticsearch搜索引擎服務器");
//
//       document.setId(2);
//       document.setTitle("什么是Elasticsearch");
//       document.setContent("Elasticsearch搜索引擎服務器");
//       document.setComment("Elasticsearch封裝了lucene");
//
        document.setId(3);
        document.setTitle("Elasticsearch的用途");
        document.setContent("Elasticsearch可以用來進行海量數據的檢索");
        document.setComment("Elasticsearch真NB");

        ObjectMapper objectMapper = new ObjectMapper();
        String source = objectMapper.writeValueAsString(document);
        System.out.println("source:"+source);

        IndexResponse indexResponse = client.prepareIndex("blog", "document", document.getId().toString()).setSource(source).get();
        // 獲取響應的信息
        System.out.println("索引名稱："+indexResponse.getIndex());
        System.out.println("文檔類型："+indexResponse.getType());
        System.out.println("ID："+indexResponse.getId());
        System.out.println("版本："+indexResponse.getVersion());
        System.out.println("是否創建成功："+indexResponse.status());
        client.close();
   }

5）：測試：

//TODO 如何讓id2 在 id1前面
    @Test
    public void BoolQuery_boost(){
        SearchResponse searchResponse = client.prepareSearch("blog").setTypes("document")
               .setQuery(QueryBuilders.boolQuery()
                       .should(QueryBuilders.termQuery("title" ,  "搜索"))
                       .should(QueryBuilders.termQuery("content" ,  "搜索"))
                       .should(QueryBuilders.termQuery("comment" ,  "搜索"))

               ).get();
        SearchHits hits = searchResponse.getHits();//獲取數據的結果集對象，獲取命中次數
        // 顯示數據
        printSearch(hits);

   }

    public void printSearch(SearchHits hits){
        System.out.println("查詢的結果數量有"+hits.getTotalHits()+"條");
        System.out.println("結果中最高分："+hits.getMaxScore());
        // 遍歷每條數據
        Iterator<SearchHit> iterator = hits.iterator();
        while(iterator.hasNext()){
            SearchHit searchHit = iterator.next();
            System.out.println("所有的數據JSON的數據格式："+searchHit.getSourceAsString());
            System.out.println("每條得分："+searchHit.getScore());
            // 獲取每個字段的數據
            System.out.println("id:"+searchHit.getSource().get("id"));
            System.out.println("title:"+searchHit.getSource().get("title"));
            System.out.println("content:"+searchHit.getSource().get("content"));
            System.out.println("**********************************************");
            for(Iterator<SearchHitField> ite = searchHit.iterator(); ite.hasNext();){
                SearchHitField next = ite.next();
                System.out.println(next.getValues());
           }
       }
   }

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 求出歌手的得分局部加權線性回歸 Nginx加權輪詢算法加權中位數指數加權移動平均 ArcGIS教程：加權疊加加權隨機算法局部加權線性回歸 CKA考試心得分享 es