radic 是我用go語言實現搜索引擎的索引部分,包括:
倒排索引
正排索引
分布式索引
使用舉例
首先需要go get github.com/Orisun/radic
package main
import (
"radic/types"
"bytes"
"encoding/gob"
"fmt"
"radic"
"time"
)
//Book 以圖書搜索為例
type Book struct {
Id uint32
Name string
Price float32
Chinese bool //是否為漢語版
}
//Serialize 圖書序列化。序列化和反序列化由調用方決定,這不是radic負責的范疇。
func (self *Book) Serialize() []byte {
var value bytes.Buffer
encoder := gob.NewEncoder(&value) //gob是go自帶的序列化方法,當然也可以用protobuf等其它方式
err := encoder.Encode(self)
if err == nil {
return value.Bytes()
} else {
fmt.Println(err)
return []byte{}
}
}
//DeserializeBook 圖書反序列化
func DeserializeBook(v []byte) *Book {
buf := bytes.NewReader(v)
dec := gob.NewDecoder(buf)
var data = Book{}
err := dec.Decode(&data)
if err == nil {
return &data
} else {
fmt.Println(err)
return nil
}
}
func main() {
/**
初始化索引
*/
options := types.IndexerOpts{}
engine := radic.IndexEngine{}
engine.Init(options, types.DistOpts{}, "radic.log") //DistOpts為空,則采用單機索引
defer engine.Destroy()
/**
往索引上添加數據
*/
book1 := Book{Id: 1, Name: "工業機器學習算法詳解與實戰", Price: 100.0, Chinese: true}
book2 := Book{Id: 2, Name: "effective go", Price: 200.0, Chinese: false}
kw1 := &types.Keyword{Field: "field", Word: "算法"}
kw2 := &types.Keyword{Field: "title", Word: "工業"}
kw3 := &types.Keyword{Field: "title", Word: "機器學習"}
kw4 := &types.Keyword{Field: "field", Word: "編程"}
kw5 := &types.Keyword{Field: "title", Word: "go"}
docInfo1 := types.DocInfo{
DocId: uint32(book1.Id),
Keyword: []*types.Keyword{kw1, kw2, kw3},
RankScore: book1.Price,
Entry: book1.Serialize(),
CompositeFeature: 1, //轉成二進制,倒數第1位上是1
}
docInfo2 := types.DocInfo{
DocId: uint32(book2.Id),
Keyword: []*types.Keyword{kw4, kw5},
RankScore: book2.Price,
Entry: book2.Serialize(),
CompositeFeature: 3, //轉成二進制,倒數第1位和第2位上都是1
}
engine.IndexDoc(docInfo1) //往索引上添加數據是異步執行的
engine.IndexDoc(docInfo2)
time.Sleep(100 * time.Millisecond) //稍等一會兒,等Add操作執行完成
engine.InvertIndexStatistics("") //建好索引后最好調用一次InvertIndexStatistics,有利於搜索速度的提升
/**
按關鍵詞檢索
*/
keyword := &types.Keyword{Field: "title", Word: "go"}
request := types.SearchRequest{
Must: []*types.Keyword{keyword},
Should: nil,
Not: nil,
OutputOffset: 0,
OnFlag: 1, //要求倒數第1位上是1
OffFlag: 4, //要求倒數第3位上不能是1
Orderless: false,
CountDocsOnly: false,
Timeout: 200,
}
response := engine.Search(request)
if !response.Timeout {
fmt.Printf("一共有%d條搜索結果:\n", response.Total)
for _, doc := range response.Docs {
book := DeserializeBook(doc)
fmt.Printf("book name %s\n", book.Name)
}
}
fmt.Println()
/**
按doc id檢索
*/
doc := engine.GetDoc(book1.Id, 1) //根據ID獲取一個doc的詳情。flag=1僅獲取detail,flag=0都獲取
book := DeserializeBook(doc.Entry)
fmt.Printf("book name %s\n", book.Name)
fmt.Printf("book keywords %v\n", doc.Keyword) //由於flag=1,所以獲取不到Keyword
fmt.Println()
/**
刪除doc
*/
engine.RemoveDoc(book2.Id)
fmt.Println()
/**
遍歷索引上的數據
*/
engine.IterIndex(func(docId uint32, docInfo types.DocInfo) error {
book := DeserializeBook(docInfo.Entry)
fmt.Printf("book name %s\n", book.Name)
return nil
})
}
更多API參見radic.go