1 import jieba 2 """分析三國演義小說中名字出現次數最多的人物然后輸出""" 3 #第一步 讀取小說內容 4 fb=open('三國演義.txt','r',encoding='utf-8')# 'r'表示操作read 5 content=fb.read() 6 fb.close() 7 excludes = {"將軍", "卻說", "荊州", "二人", "不可", "不能", "如此", "商議", "如何", "主公", 8 9 "軍士", "左右", "軍馬", "引兵", "次日", "大喜", "天下", "東吳", "於是", "今日", 10 11 "不敢", "魏兵","人馬", "陛下", "一人", "不知", "漢中", "只見", "眾將","蜀兵","丞相"} #排除 12 print('--------------------------------------------------------------------') 13 #第二步 分詞 14 words=jieba._lcut(content) 15 #3.統計 容器 數據{單詞:次數} 16 data={}#定義空字典 17 for word in words: 18 if(len(word)==1):#去除符號和單字 19 continue 20 elif word=='孔明曰'or word=='諸葛亮': 21 rename='孔明' 22 elif word=='玄德曰'or word=='玄德': 23 rename='劉備' 24 elif word=='雲長'or word=='關公': 25 rename='關羽' 26 else: 27 rename=word 28 data[rename] = data.get(rename, 0) + 1 29 30 #2.去除干擾詞匯 31 for word in excludes: 32 del(data[word]) 33 34 35 #排序 36 list=list(data.items())#轉成列表以便排序 37 list.sort(key=lambda x:x[1],reverse=True) 38 fo=open('result.txt','w',encoding='utf-8') 39 40 for i in range(10):#返回序列 41 print('{:<10}{:>5}'.format(list[i][0],list[i][1]))#輸出到控制台 42 fo.write('{:<10}{:>5}{}'.format(list[i][0],list[i][1],'\n'))#寫入到文件 43 fo.close()
運行結果: