1.應用場景
作為購物網站,搜索功能支持底紋搜索,后台配置的搜索框默認的搜索信息;輸入搜索,用戶通過輸入商品關鍵字進行模糊搜索;最近搜索,用戶輸入搜索過的商品記錄保存;熱門搜索,后台配置的商品搜索信息。那么,對於用戶來說,搜索到想要的商品是需要盡可能輸入完整的商品名稱,而潛在的主流族裔用戶很多時候是模糊記住品牌名稱和商品類別的,對於他們來說更依賴於搜索聯想詞功能幫助他們更快的搜索到想要的商品和推薦商品。
2.技術選型
ElasticSearch
3.創建索引
1PUT keywords_index_test
2 { 3 "settings": { 4 "index": { 5 "analysis": { 6 "analyzer": { 7 "default": { 8 "tokenizer": "ik_max_word" 9 }, 10 "pinyin_analyzer": { 11 "tokenizer": "shopmall_pinyin" 12 }, 13 "first_py_letter_analyzer": { 14 "tokenizer": "first_py_letter" 15 }, 16 "full_pinyin_letter_analyzer": { 17 "tokenizer": "full_pinyin_letter" 18 } 19 }, 20 "tokenizer": { 21 "shopmall_pinyin": { 22 "keep_joined_full_pinyin": "true", 23 "keep_first_letter": "true", 24 "keep_separate_first_letter": "false", 25 "lowercase": "true", 26 "type": "pinyin", 27 "limit_first_letter_length": "16", 28 "keep_original": "true", 29 "keep_full_pinyin": "true", 30 "keep_none_chinese_in_joined_full_pinyin": "true" 31 }, 32 "first_py_letter": { 33 "type": "pinyin", 34 "keep_first_letter": true, 35 "keep_full_pinyin": false, 36 "keep_original": false, 37 "limit_first_letter_length": 16, 38 "lowercase": true, 39 "trim_whitespace": true, 40 "keep_none_chinese_in_first_letter": false, 41 "none_chinese_pinyin_tokenize": false, 42 "keep_none_chinese": true, 43 "keep_none_chinese_in_joined_full_pinyin": true 44 }, 45 "full_pinyin_letter": { 46 "type": "pinyin", 47 "keep_separate_first_letter": false, 48 "keep_full_pinyin": false, 49 "keep_original": false, 50 "limit_first_letter_length": 16, 51 "lowercase": true, 52 "keep_first_letter": false, 53 "keep_none_chinese_in_first_letter": false, 54 "none_chinese_pinyin_tokenize": false, 55 "keep_none_chinese": true, 56 "keep_joined_full_pinyin": true, 57 "keep_none_chinese_in_joined_full_pinyin": true 58 } 59 } 60 } 61 } 62 }, 63 "mappings": { 64 "doc": { 65 "_all": { 66 "enabled": false 67 }, 68 "properties": { 69 "keywords": { 70 "type": "completion", 71 "fields": { 72 "pinyin": { 73 "type": "completion", 74 "analyzer": "pinyin_analyzer" 75 }, 76 "keyword_pinyin": { 77 "type": "completion", 78 "analyzer": "full_pinyin_letter_analyzer" 79 }, 80 "keyword_first_py": { 81 "type": "completion", 82 "analyzer": "first_py_letter_analyzer" 83 } 84 } 85 } 86 } 87 } 88 } 89 }
4.初始化詞庫
網站近三個月每天top4000搜索有結果的搜索詞
5.服務端接口開發
@Override
public BaseResponse<List<String>> getSearchSug(@RequestHeader("token") String token,
@RequestParam(required = false, value = "keywords", defaultValue = "") String keywords) {
List<String> suggestionList = searchService.getSearchSuggestions(keywords);
return BaseResponse.send(suggestionList);
}
public List<String> getSearchSuggestions(String keywords) {
List<String> result = new ArrayList<String>();
if (keywords == null) {
keywords = "";
} else {
keywords = keywords.trim().replace("\\", "").replace("\"", "");
}
if (keywords.equals("")) {
// result = getHotSuggestion(token);
return result;
}
String index = "keywords_index_test";
String type = "doc";
QueryBuilder queryBuilder = QueryBuilders.matchAllQuery();
String field = "keywords";
if(checkLetter(keywords)) {
field = "keywords.keyword_pinyin";
} else if(checkChinese(keywords)) {
field = "keywords";
} else {
field = "keywords.keyword_pinyin";
}
Set<String> results = getSuggestWord(index, type, field, keywords, queryBuilder);
//結果為空且是拼音,可以嘗試拼音首字母提示
if(results.size() == 0 && checkLetter(keywords)) {
field = "keywords.keyword_first_py";
results = getSuggestWord(index, type, field, keywords, queryBuilder);
}
for (String res : results) {
System.out.println(res);
result.add(res);
}
return result;
}
/**
* Description:提示詞,支持中文、拼音、首字母等(注意要去掉_source信息)
*
* 1、檢測搜索詞是中文還是拼音
* 2、若是中文,直接按照name字段提示
* 3、若是拼音(拼音+漢字),先按照name.keyword_pinyin獲取,若是無結果按照首字母name.keyword_first_py獲取
*
* SearchRequestBuilder的size要設置為0,否則顯示hits結果
* searchRequestBuilder.setSize(0);
*
* _source 由於磁盤讀取和網絡傳輸開銷,可以影響性能的大小,為了節省一些網絡開銷,請從_source 使用源過濾中過濾掉不必要的字段以最小化 _source大小
* 可以采用過濾的形式,也可以直接不顯示_source
* 1、searchRequestBuilder.setFetchSource("name", null); 過濾形式
* 2、searchRequestBuilder.setFetchSource(false) 直接不顯示_source
*
*
* @param index
* @param type
* @param field
* @param text
* @return
*/
public Set<String> getSuggestWord(String index, String type, String field, String text, QueryBuilder queryBuilder) {
//過濾相同的提示詞,Es5.2版本不支持過濾掉重復的建議,故需自己對ES返回做去重處理,Es6.1以上版本可以通過skip_duplicates字段處理,skip_duplicates表示是否應過濾掉重復的建議(默認為false)
Set<String> results = new TreeSet<String>();
CompletionSuggestionBuilder suggestionBuilder = new CompletionSuggestionBuilder(field);
suggestionBuilder.text(text);
suggestionBuilder.size(20);
SuggestBuilder suggestBuilder = new SuggestBuilder();
suggestBuilder.addSuggestion("my-suggest-1", suggestionBuilder);
SearchRequestBuilder searchRequestBuilder = client.prepareSearch(index).setTypes(type);
searchRequestBuilder.setExplain(false);
searchRequestBuilder.setSize(0);
searchRequestBuilder.setQuery(queryBuilder);
searchRequestBuilder.suggest(suggestBuilder);
searchRequestBuilder.setFetchSource(false);
// searchRequestBuilder.setFetchSource("name", null);
SearchResponse resp = searchRequestBuilder.execute().actionGet();
Suggest sugg = resp.getSuggest();
CompletionSuggestion suggestion = sugg.getSuggestion("my-suggest-1");
List<CompletionSuggestion.Entry> list = suggestion.getEntries();
for (int i = 0; i < list.size(); i++) {
List<? extends Suggest.Suggestion.Entry.Option> options = list.get(i).getOptions();
for (Suggest.Suggestion.Entry.Option op : options) {
results.add(op.getText().toString());
}
}
return results;
}
/**
* 只包含字母
* @return 驗證成功返回true,驗證失敗返回false
*/
public static boolean checkLetter(String cardNum) {
String regex = "^[A-Za-z]+$";
return Pattern.matches(regex, cardNum);
}
/**
* 驗證中文
* @param chinese 中文字符
* @return 驗證成功返回true,驗證失敗返回false
*/
public static boolean checkChinese(String chinese) {
String regex = "^[\u4E00-\u9FA5]+$";
return Pattern.matches(regex,chinese);
}
