将xml文件转为txt文件


import os
import re
import sys
import glob
import xml.etree.ElementTree as ET

def xml_to_txt(indir,outdir):

    os.chdir(indir)
    annotations = os.listdir('.')
    annotations = glob.glob(str(annotations)+'*.xml')
    pat = re.compile('(?<=\>).*?(?=\<)')

    for i, file in enumerate(annotations):
        file_save = file.split('.')[0]+'.txt'
        file_txt=os.path.join(outdir,file_save)
        f_w = open(file_txt,'w',encoding="utf-8")

        tree=ET.parse(file)
        root = tree.getroot()

        for obj in root.iter('PostItem'):
                current = list()
                for ele in obj.iter():
                    if "content" in ele.tag:
                        content = obj.find('content').text
                        if content:
                            content = re.sub(r'</?\w+[^>]*>','',content).replace("&nbsp;"," ").strip()
                            print(content)
                            f_w.write(content)
                            f_w.write("\n")
                    if "caption" in ele.tag:
                        caption = obj.find('caption').text
                        if caption:
                            caption = re.sub(r'</?\w+[^>]*>','',caption).replace("&nbsp;"," ").strip()
                            f_w.write(caption)
                            f_w.write("\n")
                            print(caption)

indir='E:\Data\demo-xml' #打开目录
outdir='E:\Data\demo-txt' #保存目录

xml_to_txt(indir,outdir)


免责声明!

本站转载的文章为个人学习借鉴使用,本站对版权不负任何法律责任。如果侵犯了您的隐私权益,请联系本站邮箱yoyou2525@163.com删除。



 
粤ICP备18138465号  © 2018-2025 CODEPRJ.COM