自己動手寫搜索引擎


radic 是我用go語言實現搜索引擎的索引部分,包括:
倒排索引
正排索引
分布式索引

使用舉例

首先需要go get github.com/Orisun/radic

package main

import (
	"radic/types"
	"bytes"
	"encoding/gob"
	"fmt"
	"radic"
	"time"
)

//Book 以圖書搜索為例
type Book struct {
	Id      uint32
	Name    string
	Price   float32
	Chinese bool //是否為漢語版
}

//Serialize 圖書序列化。序列化和反序列化由調用方決定,這不是radic負責的范疇。
func (self *Book) Serialize() []byte {
	var value bytes.Buffer
	encoder := gob.NewEncoder(&value) //gob是go自帶的序列化方法,當然也可以用protobuf等其它方式
	err := encoder.Encode(self)
	if err == nil {
		return value.Bytes()
	} else {
		fmt.Println(err)
		return []byte{}
	}
}

//DeserializeBook  圖書反序列化
func DeserializeBook(v []byte) *Book {
	buf := bytes.NewReader(v)
	dec := gob.NewDecoder(buf)
	var data = Book{}
	err := dec.Decode(&data)
	if err == nil {
		return &data
	} else {
		fmt.Println(err)
		return nil
	}
}

func main() {
	/**
	初始化索引
	 */
	options := types.IndexerOpts{}
	engine := radic.IndexEngine{}
	engine.Init(options, types.DistOpts{}, "radic.log") //DistOpts為空,則采用單機索引
	defer engine.Destroy()
	
	/**
	往索引上添加數據
	 */
	book1 := Book{Id: 1, Name: "工業機器學習算法詳解與實戰", Price: 100.0, Chinese: true}
	book2 := Book{Id: 2, Name: "effective go", Price: 200.0, Chinese: false}
	kw1 := &types.Keyword{Field: "field", Word: "算法"}
	kw2 := &types.Keyword{Field: "title", Word: "工業"}
	kw3 := &types.Keyword{Field: "title", Word: "機器學習"}
	kw4 := &types.Keyword{Field: "field", Word: "編程"}
	kw5 := &types.Keyword{Field: "title", Word: "go"}
	docInfo1 := types.DocInfo{
		DocId:            uint32(book1.Id),
		Keyword:          []*types.Keyword{kw1, kw2, kw3},
		RankScore:        book1.Price,
		Entry:            book1.Serialize(),
		CompositeFeature: 1, //轉成二進制,倒數第1位上是1
	}
	docInfo2 := types.DocInfo{
		DocId:            uint32(book2.Id),
		Keyword:          []*types.Keyword{kw4, kw5},
		RankScore:        book2.Price,
		Entry:            book2.Serialize(),
		CompositeFeature: 3, //轉成二進制,倒數第1位和第2位上都是1
	}
	engine.IndexDoc(docInfo1) //往索引上添加數據是異步執行的
	engine.IndexDoc(docInfo2)
	time.Sleep(100 * time.Millisecond) //稍等一會兒,等Add操作執行完成
	engine.InvertIndexStatistics("")   //建好索引后最好調用一次InvertIndexStatistics,有利於搜索速度的提升
	
	/**
	按關鍵詞檢索
	 */
	keyword := &types.Keyword{Field: "title", Word: "go"}
	request := types.SearchRequest{
		Must:          []*types.Keyword{keyword},
		Should:        nil,
		Not:           nil,
		OutputOffset:  0,
		OnFlag:        1, //要求倒數第1位上是1
		OffFlag:       4, //要求倒數第3位上不能是1
		Orderless:     false,
		CountDocsOnly: false,
		Timeout:       200,
	}
	response := engine.Search(request)
	if !response.Timeout {
		fmt.Printf("一共有%d條搜索結果:\n", response.Total)
		for _, doc := range response.Docs {
			book := DeserializeBook(doc)
			fmt.Printf("book name %s\n", book.Name)
		}
	}
	fmt.Println()
	
	/**
	按doc id檢索
	 */
	doc := engine.GetDoc(book1.Id, 1) //根據ID獲取一個doc的詳情。flag=1僅獲取detail,flag=0都獲取
	book := DeserializeBook(doc.Entry)
	fmt.Printf("book name %s\n", book.Name)
	fmt.Printf("book keywords %v\n", doc.Keyword) //由於flag=1,所以獲取不到Keyword
	fmt.Println()
	
	/**
	刪除doc
	 */
	engine.RemoveDoc(book2.Id)
	fmt.Println()
	
	/**
	遍歷索引上的數據
	 */
	engine.IterIndex(func(docId uint32, docInfo types.DocInfo) error {
		book := DeserializeBook(docInfo.Entry)
		fmt.Printf("book name %s\n", book.Name)
		return nil
	})
}

更多API參見radic.go


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM