1 分析英文文本
1 from wordcloud import WordCloud 2 import os 3
4 cur_path = os.path.dirname(__file__) 5
6 with open(os.path.join(cur_path, 'love_en.txt')) as fp: 7 txt = fp.read() 8 # print(txt)
9 wordcloud = WordCloud().generate(txt) 10 image = wordcloud.to_image() 11 image.show()
發生錯誤,錯誤類型:OSError: cannot open resource
解決方案:添加字體
改正后代碼:
1 from wordcloud import WordCloud 2 import os 3
4 cur_path = os.path.dirname(__file__) 5
6 with open(os.path.join(cur_path, 'love_en.txt')) as fp: 7 txt = fp.read() 8 # print(txt)
9 wordcloud = WordCloud(font_path = 'FZLTXIHK.TTF').generate(txt) 10 image = wordcloud.to_image() 11 image.show()
進一步優化代碼:
1 from wordcloud import WordCloud 2 import os 3
4 cur_path = os.path.dirname(__file__) 5
6 with open(os.path.join(cur_path, 'love_en.txt')) as fp: 7 txt = fp.read() 8 # print(txt)
9 wordcloud = WordCloud(font_path = 'FZLTXIHK.TTF', # 字體
10 background_color = 'black', # 背景色
11 max_words = 30, # 最大顯示單詞數
12 max_font_size = 60 # 頻率最大單詞字體大小
13 ).generate(txt) 14 image = wordcloud.to_image() 15 image.show()
效果圖:
2 分析中文文本
1 import jieba 2 from wordcloud import WordCloud 3 import os 4
5 cur_path = os.path.dirname(__file__) 6
7 def chinese_jieba(txt): 8 wordlist_jieba = jieba.cut(txt) # 將文本分割,返回列表
9 txt_jieba = " ".join(wordlist_jieba) # 將列表拼接為以空格為間斷的字符串
10 return txt_jieba 11
12 stopwords = {'這些':0, '那些':0, '因為':0, '所以':0} # 噪聲詞
13
14 with open(os.path.join(cur_path, '擇天記.txt')) as fp: 15 txt = fp.read() 16 txt = chinese_jieba(txt) 17 # print(txt)
18 wordcloud = WordCloud(font_path = 'FZLTXIHK.TTF', # 字體
19 background_color = 'black', # 背景色
20 max_words = 30, # 最大顯示單詞數
21 max_font_size = 60, # 頻率最大單詞字體大小
22 stopwords = stopwords # 過濾噪聲詞
23 ).generate(txt) 24 image = wordcloud.to_image() 25 image.show()
效果圖:
3 進一步優化顯示效果
1 import jieba 2 from wordcloud import WordCloud 3 import os 4 import numpy 5 import PIL.Image as Image 6
7 cur_path = os.path.dirname(__file__) 8
9 def chinese_jieba(txt): 10 wordlist_jieba = jieba.cut(txt) # 將文本分割,返回列表
11 txt_jieba = " ".join(wordlist_jieba) # 將列表拼接為以空格為間斷的字符串
12 return txt_jieba 13
14 stopwords = {'這些':0, '那些':0, '因為':0, '所以':0} # 噪聲詞
15 mask_pic = numpy.array(Image.open(os.path.join(cur_path, 'love.jpg'))) 16
17 with open(os.path.join(cur_path, '擇天記.txt')) as fp: 18 txt = fp.read() 19 txt = chinese_jieba(txt) 20 # print(txt)
21 wordcloud = WordCloud(font_path = 'FZLTXIHK.TTF', # 字體
22 background_color = 'white', # 背景色
23 max_words = 100, # 最大顯示單詞數
24 max_font_size = 60, # 頻率最大單詞字體大小
25 stopwords = stopwords, # 過濾噪聲詞
26 mask = mask_pic # 自定義顯示的效果圖
27 ).generate(txt) 28 image = wordcloud.to_image() 29 image.show()
效果圖: