一、概述
需求:
最近在做一個新聞項目,有這樣一個需求:
- 用戶根據視頻內容手動創建標簽,標簽個數不限
- 在視頻詳情頁提供根據標簽推薦視頻功能,即按本視頻的標簽進行搜索,標簽匹配多的排在前面,匹配少的排在后面
經過分析、調研,以單字段存儲標簽,嘗試了下面的幾種方案,這里一並寫出
不可行方案:
- 字段為keyword類型,數據以數組存儲,未找到可實現此功能的檢索方式
- 字段為text類型,多個標簽以空格隔開或者數組存儲,使用match搜索,數據評分不准確
- 字段為text類型,多個標簽以空格隔開或者數組存儲,使用match結合match_phrase搜索,數據評分扔不准確
可行方案:
1. 字段為text類型,指定分詞器為whitespace,以空格分隔標簽
"mediaTag" : { "type" : "text", "analyzer": "whitespace" }
2. 字段為text類型,指定分詞器為pattern,指定標簽分隔字符,以逗號分隔
PUT /es_medias_test2 { "settings": { "analysis": { "analyzer": { "comma": { //自定義分詞器名稱 "type": "pattern", "pattern": "," } } } }, "mappings": { "esmedias": { "properties": { "mediaTag": { "type": "text", "analyzer": "comma" } } } } }
二、可行方案測試(以可行方案一為例)
1. 創建索引
PUT /es_medias_test2 { "settings": { "index": { "number_of_shards": "1", "number_of_replicas": "0" } }, "mappings": { "esmedias": { "properties": { "mediaTag" : { "type" : "text", "analyzer": "whitespace" } } } } }
2. 添加數據
POST /es_medias_test2/_bulk {"create":{"_index":"es_medias_test2","_type":"esmedias","_id":"o3kyp3YB_f4AQBwwbA7Q"}} {"mediaTag":"美國 英國"} {"create":{"_index":"es_medias_test2","_type":"esmedias","_id":"lHk0p3YB_f4AQBwwvxBz"}} {"mediaTag":"英國 美國"} {"create":{"_index":"es_medias_test2","_type":"esmedias","_id":"-Xk1p3YB_f4AQBwwNRBt"}} {"mediaTag":"美國 法國 英國"} {"create":{"_index":"es_medias_test2","_type":"esmedias","_id":"AXlYp3YB_f4AQBww9zDT"}} {"mediaTag":"china 美國 英國"} {"create":{"_index":"es_medias_test2","_type":"esmedias","_id":"13k1p3YB_f4AQBwwBxDw"}} {"mediaTag":"美國 英國 士大夫"} {"create":{"_index":"es_medias_test2","_type":"esmedias","_id":"PXk1p3YB_f4AQBwwfxGI"}} {"mediaTag":"美國"} {"create":{"_index":"es_medias_test2","_type":"esmedias","_id":"G3k1p3YB_f4AQBwwahEM"}} {"mediaTag":"英國 船"} {"create":{"_index":"es_medias_test2","_type":"esmedias","_id":"G3lap3YB_f4AQBwwNTEX"}} {"mediaTag":"china 美國"} {"create":{"_index":"es_medias_test2","_type":"esmedias","_id":"FXlLp3YB_f4AQBwwUCRf"}} {"mediaTag":"china 美國 法國"}
3. 測試
GET /es_medias_test2/_search { "query": { "match": { "mediaTag": "美國 英國" } } }
返回結果符合預期
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 10,
"max_score" : 1.8475795,
"hits" : [
{
"_index" : "es_medias_test2",
"_type" : "esmedias",
"_id" : "-Xk1p3YB_f4AQBwwNRBt",
"_score" : 1.8475795,
"_source" : {
"mediaTag" : "美國 法國 英國"
}
},
{
"_index" : "es_medias_test2",
"_type" : "esmedias",
"_id" : "FXlLp3YB_f4AQBwwUCRf",
"_score" : 1.5141833,
"_source" : {
"mediaTag" : "china 美國 法國"
}
},
{
"_index" : "es_medias_test2",
"_type" : "esmedias",
"_id" : "o3kyp3YB_f4AQBwwbA7Q",
"_score" : 0.66557413,
"_source" : {
"mediaTag" : "美國 英國"
}
},
{
"_index" : "es_medias_test2",
"_type" : "esmedias",
"_id" : "xXkyp3YB_f4AQBwwpw6Y",
"_score" : 0.66557413,
"_source" : {
"mediaTag" : "美國 英國"
}
},
{
"_index" : "es_medias_test2",
"_type" : "esmedias",
"_id" : "lHk0p3YB_f4AQBwwvxBz",
"_score" : 0.66557413,
"_source" : {
"mediaTag" : "英國 美國"
}
},
{
"_index" : "es_medias_test2",
"_type" : "esmedias",
"_id" : "13k1p3YB_f4AQBwwBxDw",
"_score" : 0.5578373,
"_source" : {
"mediaTag" : "美國 英國 士大夫"
}
},
{
"_index" : "es_medias_test2",
"_type" : "esmedias",
"_id" : "AXlYp3YB_f4AQBww9zDT",
"_score" : 0.39778596,
"_source" : {
"mediaTag" : "china,美國 英國"
}
},
{
"_index" : "es_medias_test2",
"_type" : "esmedias",
"_id" : "G3k1p3YB_f4AQBwwahEM",
"_score" : 0.39778596,
"_source" : {
"mediaTag" : "英國 船"
}
},
{
"_index" : "es_medias_test2",
"_type" : "esmedias",
"_id" : "PXk1p3YB_f4AQBwwfxGI",
"_score" : 0.33188638,
"_source" : {
"mediaTag" : "美國"
}
},
{
"_index" : "es_medias_test2",
"_type" : "esmedias",
"_id" : "G3lap3YB_f4AQBwwNTEX",
"_score" : 0.26778817,
"_source" : {
"mediaTag" : "china 美國"
}
}
]
}
}