首先,讀取所有xml文件完整路徑,寫入train.txt 文本文檔中,
然后讀取TXT文檔,逐行讀取xml文檔,建文件夾,用於保存解析好的TXT,寫入TXT時,只需要保存類別名和坐標信息即可,中間用Tab分割
#!/usr/bin/evn python # coding:utf-8 import os import glob try: import xml.etree.cElementTree as ET except ImportError: import xml.etree.ElementTree as ET import sys # filename = os.listdir('F:/snow leopard/000_IMAGE_FRAME/000_B_XML/') filename = glob.glob('F:/snow leopard/000_IMAGE_FRAME/000_B_XML/' + '*xml') fileObject = open('train.txt', 'w') for ip in filename: fileObject.write(ip) fileObject.write('\n') fileObject.close() file_srx = open("train.txt") #其中包含所有待計算的文件名 line = file_srx.readline() while line: f = line[:-1] # 除去末尾的換行符 tree = ET.parse(f) #打開xml文檔 root = tree.getroot() #獲得root節點 print ("*"*10) filename = root.find('filename').text filename = filename[:-4] print (filename) dir_name = 'F:/snow leopard/Data preprocessing/txt' if os.path.exists(dir_name) == False: os.mkdir(dir_name) # file_object_txt = open(dir_name +'/' + filename + ".txt","a") # # file_object_txt = open(dir_name, 'w') #寫文件 # file_object_txt.write(filename +'\t') # file_object_log = open(filename + ".log", 'w') #寫文件 flag = False ######################################## for size in root.findall('size'): #找到root節點下的size節點 width = size.find('width').text #子節點下節點width的值 height = size.find('height').text #子節點下節點height的值 print (width, height) ######################################## for object in root.findall('object'): #找到root節點下的所有object節點 name = object.find('name').text #子節點下節點name的值 file_object_txt = open(dir_name +'/' + filename + ".txt","a") # file_object_txt = open(dir_name, 'w') #寫文件 file_object_txt.write(name +'\t') print (name) bndbox = object.find('bndbox') #子節點下屬性bndbox的值 xmin = bndbox.find('xmin').text ymin = bndbox.find('ymin').text xmax = bndbox.find('xmax').text ymax = bndbox.find('ymax').text file_object_txt.write(xmin+'\t' + ymin + '\t'+ xmax + '\t'+ ymax) print (xmin, ymin, xmax, ymax) file_object_txt.close() # file_object_log.close() if flag == False: #如果沒有符合條件的信息,則刪掉相應的txt文件以及jpg文件 #os.remove(filename + ".txt") #os.remove(filename + ".jpg") # os.remove(filename + ".log") pass line = file_srx.readline()
參考: https://www.cnblogs.com/rainsoul/p/6283231.html