import jieba import os import jieba.analyse data = cleaned_comments # 數據來源於評論數據 seg = jieba.lcut(data) print(seg) # 增加自定義詞表庫 mydict = os.getcwd()+"/mydict.txt" jieba.load_userdict(mydict) seg = jieba.lcut(data) print(seg) import jieba.posseg as pseg posseg = pseg.lcut(data) print(posseg) # 抽取出現次數最多的詞匯 extracttext = jieba.analyse.extract_tags(data, topK=20,withWeight=False, allowPOS=()) print(extracttext)
待續。。。