# 把停用词做成字典
stopwords = {}
fstop = open('stop_words.txt', 'r',encoding='utf-8',errors='ingnore')
for eachWord in fstop:
stopwords[eachWord.strip()] = eachWord.strip() #停用词典
fstop.close()
f1=open('all.txt','r',encoding='utf-8',errors='ignore')
f2=open('allutf11.txt','w',encoding='utf-8')
line=f1.readline()
while line:
line = line.strip() #去前后的空格
line = re.sub(r"[0-9\s+\.\!\/_,$%^*()?;;:-【】+\"\']+|[+——!,;:。?、~@#¥%……&*()]+", " ", line) #去标点符号
seg_list=jieba.cut(line,cut_all=False) #结巴分词
outStr=""
for word in seg_list:
if word not in stopwords:
outStr+=word
outStr+=" "
f2.write(outStr)
line=f1.readline()
f1.close()
f2.close()
