[python基礎] python生成wordcloud並保存


1.核心包

#jieba、pandas用來處理數據,數據源以xls格式存儲的,這里用pandas進行處理
import
jieba from jieba import analyse import pandas as pd
#scipy、wordcloud創建詞雲
from scipy.misc import imread from wordcloud import WordCloud
from wordcloud import ImageColorGenerator
#matpoltlib展示、保存生成的詞雲圖
import matplotlib.pyplot as plt

2.過程

import jieba
from
jieba import analyse import pandas as pd import sys reload(sys) sys.setdefaultencoding('utf-8') # 1.stopwords def stop_words(): stop_dict = set() with open(u'./百度停用詞列表.txt', 'r')as f: words = f.readlines() for word in words: stop_dict.add(word.strip().decode('utf-8')) return stop_dict # 2.分詞並去停用詞 # save chinese only,remove english words,emoji def remove_stopwords(stop_words): source_data = pd.read_excel('./11.xls') all_content = [] content = source_data[u'內容'] f = open('./weibo.txt', 'w') for line in content: cut_list = [c for c in jieba.cut(line)] ret_set = set(cut_list) - stop_words ret_list = list(ret_set) f.writelines([str(line) for line in ret_list]) f.writelines('\n') all_content.extend(ret_list) f.close() #3.統計詞頻 def get_frequency_words(file): with open(file, 'r') as f: texts = f.read()
     # 統計詞頻 top_words
= analyse.textrank(texts, topK=400, withWeight=True) ret_words = {} for word in top_words: ret_words[word[0]] = word[1] return ret_words from scipy.misc import imread from wordcloud import WordCloud from wordcloud import ImageColorGenerator import matplotlib.pyplot as plt
# 4.生成詞雲圖並保存
def generate_word_cloud(dict): color_mask = imread('./background.jpg') cloud = WordCloud( # 設置字體,不指定就會出現亂碼,文件名不支持中文 font_path="./static/chinese.msyh.ttf", # font_path=path.join(d,'simsun.ttc'), # 設置背景色,默認為黑,可根據需要自定義為顏色 background_color='white', # 詞雲形狀, mask=color_mask, # 允許最大詞匯 max_words=400, # 最大號字體,如果不指定則為圖像高度 max_font_size=150, # 畫布寬度和高度,如果設置了mask則不會生效 # 詞語水平擺放的頻率,默認為0.9.即豎直擺放的頻率為0.1 prefer_horizontal=0.8 ) cloud.generate_from_frequencies(frequencies=dict) cloud.to_file('word_cloud.jpg') # plt.imshow(cloud) # 不現實坐標軸 plt.axis('off') # 繪制詞雲 # plt.figure(dpi = 600) image_colors = ImageColorGenerator(color_mask)
# 重新上色 plt.imshow(cloud.recolor(color_func=image_colors))
  # 保存圖片 plt.savefig(
'./result2.png') # plt.show() if __name__ == '__main__': stop_words = stop_words() remove_stopwords(stop_words=stop_words) words_frequency = get_frequency_words('./weibo.txt') generate_word_cloud(words_frequency)

[注]:(1).wordcloud.generate_from_text(text=text)可以直接由文本生成詞雲,但必須是英文文本。

  (2).wordcloud.generate_from_frequencies(frequencies=dict)由詞頻字典生成詞雲,詞頻越大則顯示該詞size越大
[結果]:

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM