Python統計文本單詞出現的頻率生成圖片顯示

本文轉載自查看原文 2018-03-01 15:07 1142 Python

import string
from matplotlib import pyplot as plt
import matplotlib.font_manager as fm

hist=[]

def process_line(line, hist):#生成[50, 'the']等列表
    for word in line.split():
        word = word.strip(string.punctuation+string.whitespace)#去除空格及標點符號
        word.lower()#小寫
        if word not in hist:#生成列表並統計個數
            hist[word] = 1
        else:
            hist[word]=hist[word]+1
        #hist[word] = hist.get(word,0) + 1

def process_file(filename):
    res = {}
    with open(filename, 'r') as f:
        for line in f:
            process_line(line, res)
    return res#返回統計后字典

def most_word(hist, num):
    tmp = []
    for key,value in hist.items():#將key和value互換 排序
        tmp.append([value,key])
    tmp.sort(reverse=True)
    return tmp[:num]#切片

def showtable(data):
    for i in range(len(data)):
        plt.bar(data[i][1:],data[i][:-1])
    ZH = fm.FontProperties(fname='C:\Windows\Fonts\simkai.ttf')
    plt.legend(prop=ZH)  # 完成數據加載
    plt.xlabel(u'單詞', fontproperties=ZH)
    plt.ylabel(u'頻率', fontproperties=ZH)
    plt.title(u'統計單詞出現的頻率', fontproperties=ZH)
    #調整圖片輸出大小
    png_size = plt.gcf()
    png_size.set_size_inches(30.5, 18.5)#寬1850X1050
    png_size.savefig("D:\word.png", dpi=100)
    plt.show()


if __name__ == '__main__':
    hist = process_file("english.txt")
    data = most_word(hist,30)
    print(data)
    showtable(data)

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 單詞統計_統計文章中每個單詞出現的頻率統計單詞，字母出現的次數和頻率 python生成圖片 java-讀取文件並統計文本中字母和單詞出現的頻率個人作業——統計多個文本文件中的單詞及詞組出現頻率 Python-統計txt文本中出現頻率最高的詞語 Python 基礎 - 統計文本里單詞的個數以及出現的次數 python統計文本中每個單詞出現的次數統計文件中單詞出現頻率最高的10個以及他們出現的次數 python統計英文單詞出現次數並把結果生成字典