import jieba fp1=r'D:/python/a.txt' outph=r'D:/python/out.txt' f=open(fp1,'r',encoding='utf-8') txt=f.read().strip() f.close() words=jieba.lcut(txt) f=open(outph,'w',encoding='utf-8') for word in words: f.write(word) f.write('\n') f.close() #第二題去標點,統計詞頻 bd='[’!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~]+,。!?“”《》:、. ' counts={ } exlutes={'作者','之后'} for i in bd: txt=txt.replace(i,'') #字符串替換去標點符號 words=jieba.lcut(txt) #分詞 for word in words: if len(word)==1: continue else: counts[word]=counts.get(word,0)+1 #所有詞全統計 for word in exlutes: del(counts[word]) #刪除{a,b} items=list(counts.items()) items.sort(key=lambda x:x[1],reverse=True) for i in range(15): word,count=items[i] print("{0:>10}---{1:<5}".format(word,count))