elasticSearch 自定義多字段相關度配置


相關度研究記錄手稿

1.orderNum字段相關度 增強 score = math.sqrt(orderNum*0.001)

ScoreFunctionBuilder<?> dateFieldValueScoreFunction = ScoreFunctionBuilders.fieldValueFactorFunction("orderNum")
        .missing(1d)
        .modifier(FieldValueFactorFunction.Modifier.SQRT).factor(0.001f);

 

2.使用以下設置 如搜索個人所得稅 contents 字段包含個人所得稅所占相關度約為0.0004

MultiMatchQueryBuilder matchQueryBuilder = QueryBuilders
        .multiMatchQuery(text, BwbdType.PROPERTY_NUMBERS
                , BwbdType.PROPERTY_TITLES, BwbdType.PROPERTY_CONTENTS).analyzer("ik_smart")
        .field(BwbdType.PROPERTY_NUMBERS, 0.01f)
        .field(BwbdType.PROPERTY_TITLES, 0.1f)
        .field(BwbdType.PROPERTY_CONTENTS, 0.001f)
        .minimumShouldMatch(BwbdType.MATCH_LEVEL_THREE);

 

使用以上設置 兩條數據orderNum相差130 對相關度影響是 0.015233534

 

在不考慮檢索相關度情況下只看增強相關度 對最后相關度的影響。

ScoreFunctionBuilder<?> dataTypeFieldValueScoreFunction = ScoreFunctionBuilders.fieldValueFactorFunction("dataTypeRelation")
        .missing(10d)
        .modifier(FieldValueFactorFunction.Modifier.LN1P).factor(1f);

 

以上配置dataTypeRelation數據類型相關度相差10 相關度相差 0.2795849

增強score = math.log1p(dataTypeRelation*1)

 

FunctionScoreQueryBuilder.FilterFunctionBuilder[] filterFunctionBuilders = new FunctionScoreQueryBuilder.FilterFunctionBuilder[3];
// 時間相關
ScoreFunctionBuilder<?> dateFieldValueScoreFunction = ScoreFunctionBuilders.fieldValueFactorFunction("orderNum")
        .missing(1d)
        .modifier(FieldValueFactorFunction.Modifier.SQRT).factor(0.001f);
FunctionScoreQueryBuilder.FilterFunctionBuilder date = new FunctionScoreQueryBuilder.FilterFunctionBuilder(dateFieldValueScoreFunction);
filterFunctionBuilders[0] = date;
// 類型相關
ScoreFunctionBuilder<?> dataTypeFieldValueScoreFunction = ScoreFunctionBuilders.fieldValueFactorFunction("dataTypeRelation")
        .missing(10d)
        .modifier(FieldValueFactorFunction.Modifier.LN1P).factor(2f);
FunctionScoreQueryBuilder.FilterFunctionBuilder dataType = new FunctionScoreQueryBuilder.FilterFunctionBuilder(dataTypeFieldValueScoreFunction);
filterFunctionBuilders[1] = dataType;
// 來源相關
ScoreFunctionBuilder<?> originFieldValueScoreFunction = ScoreFunctionBuilders.fieldValueFactorFunction("originTypeRelation")
        .missing(10d)
        .modifier(FieldValueFactorFunction.Modifier.LN1P).factor(0.1f);
FunctionScoreQueryBuilder.FilterFunctionBuilder origin = new FunctionScoreQueryBuilder.FilterFunctionBuilder(originFieldValueScoreFunction);
filterFunctionBuilders[2] = origin;

 

FunctionScoreQueryBuilder query = QueryBuilders.functionScoreQuery(boolQueryBuilder,filterFunctionBuilders)
        .boostMode(CombineFunction.SUM)
        .scoreMode(FunctionScoreQuery.ScoreMode.SUM);

 

使用以上代碼 多字段配置相關度 最后對相關度的影響

Score = score(相關度)+score(增強相關度1)+score(增強相關度2)+score(增強相關度3)計算方式與

.boostMode(CombineFunction.SUM)
        .scoreMode(FunctionScoreQuery.ScoreMode.SUM);

配置有關

 

總結:多相關度優化方案 主要變更filed值讓相關度評分與function_score增強的評分達到一個最優解

 

另外 也要使用 modifier factor對單個相關度進行調整

 

最后 貼上該檢索方法源碼

 

    @Override
    public SearchDto improveSearch(SearchDto searchDto) {

        String text = searchDto.getTerm();
        String type = searchDto.getType();
        HighSearchParam searchParam = searchDto.getSearchParam();

        // 搜索請求對象
        SearchRequest searchRequest = new SearchRequest(BwbdType.ES_INDEX);
        // 指定類型
        searchRequest.types(BwbdType.ES_TYPE);
        // 搜索源構建對象
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        // 搜索方式
        // 首先構造多關鍵字查詢條件
        BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
        if (StringUtils.isNotEmpty(text)) {
            text = QueryParser.escape(text);  // 主要就是這一句把特殊字符都轉義,那么lucene就可以識別
            MultiMatchQueryBuilder matchQueryBuilder = QueryBuilders
                    .multiMatchQuery(text, BwbdType.PROPERTY_NUMBERS
                            , BwbdType.PROPERTY_TITLES, BwbdType.PROPERTY_CONTENTS).analyzer("ik_smart")
                    .field(BwbdType.PROPERTY_NUMBERS, 0.01f)
                    .field(BwbdType.PROPERTY_TITLES, 0.1f)
                    .field(BwbdType.PROPERTY_CONTENTS, 0.001f)
                    .minimumShouldMatch(BwbdType.MATCH_LEVEL_THREE);
            // 添加條件到布爾查詢
            boolQueryBuilder.must(matchQueryBuilder);
        } else {
            if (null == searchDto.getSearchParam() && StringUtils.isEmpty(type)) {
                searchDto.setType(BwbdType.DATA_TYPE_FG);
            }
        }

//        // 通過布爾查詢來構造過濾查詢
//        boolQueryBuilder.filter(QueryBuilders.matchQuery("economics","L"));
        if (StringUtils.isNotEmpty(type)) {
            boolQueryBuilder.filter(QueryBuilders
                    .matchQuery(BwbdType.PROPERTY_DATA_TYPE, type));
        }

        addFilterProperties(text,searchParam, boolQueryBuilder, searchSourceBuilder);

        FunctionScoreQueryBuilder.FilterFunctionBuilder[] filterFunctionBuilders = buildFilterFunctionBuilders();

        FunctionScoreQueryBuilder query = QueryBuilders.functionScoreQuery(boolQueryBuilder,filterFunctionBuilders)
                .boostMode(CombineFunction.SUM)
                .scoreMode(FunctionScoreQuery.ScoreMode.SUM);

        // 將查詢條件封裝給查詢對象
        searchSourceBuilder.query(query);
        if (searchDto.getSize() > 20) {
            searchDto.setSize(20);
        }
        searchSourceBuilder.size(searchDto.getSize());
        searchSourceBuilder.from(searchDto.getPage() - 1);

        // ***********************

        // 高亮查詢
        HighlightBuilder highlightBuilder = new HighlightBuilder();
        highlightBuilder.preTags(CommonConstraint.LIGHT_TAG_START); // 高亮前綴
        highlightBuilder.postTags(CommonConstraint.LIGHT_TAG_END); // 高亮后綴
        List<HighlightBuilder.Field> fields = highlightBuilder.fields();
        fields.add(new HighlightBuilder
                .Field(BwbdType.PROPERTY_NUMBERS)); // 高亮字段
        fields.add(new HighlightBuilder
                .Field(BwbdType.PROPERTY_TITLES)); // 高亮字段
        fields.add(new HighlightBuilder
                .Field(BwbdType.PROPERTY_CONTENTS).fragmentSize(100000)); // 高亮字段
        // 添加高亮查詢條件到搜索源
        searchSourceBuilder.highlighter(highlightBuilder);

        // ***********************

//        // 設置源字段過慮,第一個參數結果集包括哪些字段,第二個參數表示結果集不包括哪些字段
//        searchSourceBuilder.fetchSource(new String[]{"name","studymodel","price","timestamp"},new String[]{});
        // 向搜索請求對象中設置搜索源
        searchRequest.source(searchSourceBuilder);
        // 執行搜索,向ES發起http請求
        SearchResponse searchResponse = null;
        try (RestHighLevelClient client = new RestHighLevelClient(restClientBuilder)) {
            searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
            obtainFgType(searchDto, searchResponse);
        } catch (IOException e) {
            e.printStackTrace();
        }
        return searchDto;
    }

    private FunctionScoreQueryBuilder.FilterFunctionBuilder[] buildFilterFunctionBuilders() {
        FunctionScoreQueryBuilder.FilterFunctionBuilder[] filterFunctionBuilders = new FunctionScoreQueryBuilder.FilterFunctionBuilder[3];
        // 時間相關
        ScoreFunctionBuilder<?> dateFieldValueScoreFunction = ScoreFunctionBuilders.fieldValueFactorFunction("orderNum")
                .missing(1d)
                .modifier(FieldValueFactorFunction.Modifier.SQRT).factor(0.001f);
        FunctionScoreQueryBuilder.FilterFunctionBuilder date = new FunctionScoreQueryBuilder.FilterFunctionBuilder(dateFieldValueScoreFunction);
        filterFunctionBuilders[0] = date;
        // 類型相關
        ScoreFunctionBuilder<?> dataTypeFieldValueScoreFunction = ScoreFunctionBuilders.fieldValueFactorFunction("dataTypeRelation")
                .missing(10d)
                .modifier(FieldValueFactorFunction.Modifier.LN1P).factor(2f);
        FunctionScoreQueryBuilder.FilterFunctionBuilder dataType = new FunctionScoreQueryBuilder.FilterFunctionBuilder(dataTypeFieldValueScoreFunction);
        filterFunctionBuilders[1] = dataType;
        // 來源相關
        ScoreFunctionBuilder<?> originFieldValueScoreFunction = ScoreFunctionBuilders.fieldValueFactorFunction("originTypeRelation")
                .missing(10d)
                .modifier(FieldValueFactorFunction.Modifier.LN1P).factor(0.1f);
        FunctionScoreQueryBuilder.FilterFunctionBuilder origin = new FunctionScoreQueryBuilder.FilterFunctionBuilder(originFieldValueScoreFunction);
        filterFunctionBuilders[2] = origin;
        return filterFunctionBuilders;
    }

    private void addFilterProperties(String text, HighSearchParam searchParam, BoolQueryBuilder boolQueryBuilder, SearchSourceBuilder searchSourceBuilder) {
        if (null != searchParam) {

            if (StringUtils.isNotEmpty(searchParam.getYearStr())) {
                boolQueryBuilder.filter(QueryBuilders.matchPhraseQuery(BwbdType.PROPERTY_YEARS, searchParam.getYearStr()));
            }
            if (StringUtils.isNotEmpty(searchParam.getReasonName())) {
                boolQueryBuilder.filter(QueryBuilders.matchPhraseQuery("reasonName", searchParam.getReasonName()));
            }
            if (StringUtils.isNotEmpty(searchParam.getCaseType())) {
                boolQueryBuilder.filter(QueryBuilders.matchPhraseQuery("caseType", searchParam.getCaseType()));
            }
            if (StringUtils.isNotEmpty(searchParam.getTrialRoundText())) {
                boolQueryBuilder.filter(QueryBuilders.matchPhraseQuery("trialRoundText", searchParam.getTrialRoundText()));
            }
            if (StringUtils.isNotEmpty(searchParam.getJudgementType())) {
                boolQueryBuilder.filter(QueryBuilders.matchPhraseQuery("judgementType", searchParam.getJudgementType()));
            }
            if (StringUtils.isNotEmpty(searchParam.getAreaCode())) {
                boolQueryBuilder.filter(QueryBuilders.matchPhraseQuery("areaId", searchParam.getAreaCode()));
            }
            if (StringUtils.isNotEmpty(searchParam.getIndustry())) {
                boolQueryBuilder.filter(QueryBuilders.matchPhraseQuery("economics", searchParam.getIndustry()));
            }
            if (StringUtils.isNotEmpty(searchParam.getTaxType())) {
                boolQueryBuilder.filter(QueryBuilders.matchPhraseQuery("stypes", searchParam.getTaxType()));
            }

            // 排序
            // 根據 years 降序排列
            if (BwbdType.ORDER_TYPE_DATE.equals(searchParam.getOrderType())) {
                searchSourceBuilder.sort(new FieldSortBuilder("contentDate").order(SortOrder.DESC));
            }
        }
        // 如果沒有檢索內容 默認時間排序
        if (StringUtils.isEmpty(text)) {
            searchSourceBuilder.sort(new FieldSortBuilder("contentDate").order(SortOrder.DESC));
        }
        // 根據分數 _score 降序排列 (默認行為)
//        searchSourceBuilder.sort(new ScoreSortBuilder().order(SortOrder.DESC));
    }

    private void obtainFgType(AbstractTxjDto searchDto, SearchResponse searchResponse) {

        // 搜索結果
        SearchHits hits = searchResponse.getHits();
        // 匹配到的總記錄數
        long totalHits = hits.getTotalHits();
        searchDto.setTotal(totalHits);
        // 得到匹配度高的文檔
        SearchHit[] searchHits = hits.getHits();

        List<BwbdType> bwbdTypes = new ArrayList<>();

        for (SearchHit hit : searchHits) {
            String content = hit.getSourceAsString();//使用ES的java接口將實體類對應的內容轉換為json字符串
            BwbdType bwbdType = JSONObject.parseObject(content, BwbdType.class); //生成pojo對象
            // 獲取高亮查詢的內容。如果存在,則替換原來的name
            Map<String, HighlightField> highlightFields = hit.getHighlightFields();
            if (highlightFields != null) {
                HighlightField nameField = highlightFields.get(bwbdType.PROPERTY_NUMBERS);
                if (nameField != null) {
                    Text[] fragments = nameField.getFragments();
                    StringBuffer stringBuffer = new StringBuffer();
                    for (Text str : fragments) {
                        stringBuffer.append(str.string());
                    }
                    String numbers = stringBuffer.toString();
                    bwbdType.setNumbers(numbers);
                }

                HighlightField titlesField = highlightFields.get(bwbdType.PROPERTY_TITLES);
                if (titlesField != null) {
                    Text[] fragments = titlesField.getFragments();
                    StringBuffer stringBuffer = new StringBuffer();
                    for (Text str : fragments) {
                        stringBuffer.append(str.string());
                    }
                    String titles = stringBuffer.toString();
                    bwbdType.setTitles(titles);
                }

                HighlightField contentsField = highlightFields.get(bwbdType.PROPERTY_CONTENTS);
                if (contentsField != null) {
                    Text[] fragments = contentsField.getFragments();
                    StringBuffer stringBuffer = new StringBuffer();
                    for (Text str : fragments) {
                        stringBuffer.append(str.string());
                    }
                    bwbdType.setContents(stringBuffer.toString());
                }
                // 處理內容
                handleResult(bwbdType);
            }
            bwbdTypes.add(bwbdType);
        }
        searchDto.setRows(bwbdTypes);
    }

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM