fasttext的使用,預料格式,調用方法


數據格式:分詞后的句子+\t__label__+標簽

fasttext_model.py

from fasttext import FastText
import numpy as np

def get_data_path(by_word=True,train=True):
    if by_word:
        return "./classify/data_by_word_train.txt" if train else "./classify/data_by_word_test.txt"
    else:
        return "./classify/data_train.txt" if train else "./classify/data_test.txt"

def prepar_model():
    data_path = get_data_path(by_word=True,train=True)
    model = FastText.train_supervised(data_path,dim=100,epoch=20,wordNgrams=2)
    model.save_model("./fasttext_model/classify_by_word_100_20_2.model")

def ceshi_model():
    model = FastText.load_model("./fasttext_model/classify_by_word_100_20_2.model")
    test_data_path = get_data_path(by_word=True, train=False)

    sentences = []
    labels = []
    for line in open(test_data_path,encoding="utf-8").readlines():
        line = line.strip()
        temp_ret = line.split("\t")
        if len(temp_ret)==2:
            sentences.append(temp_ret[0])
            labels.append(temp_ret[1])

    ret = model.predict(sentences)[0]
    ret = [i[0] for i in ret]
    acc = np.mean([1 if labels[i] == ret[i] else 0 for i in range(len(labels))])
    print(acc)

if __name__ == '__main__':
    prepar_model()
    ceshi_model()

  

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM