xml文件讀取與xml文件數據保存(使用YOLO算法的輔助函數)


目前使用yolo系列算法較多,特別是今年yolo4的出現,是我們異常興奮,但鑒於某些數據集使用xml標注的數據,未能轉換成train.txt文件,為此,我寫了一份xml文件讀取,並將其轉換為yolo等訓練所需要的格式。希望對讀者有些幫助與啟示。本博客僅展示代碼與結果圖片。

 

 

代碼如下:

 
        


def read_xml(path_xml):
'''
:param path_xml: 輸入處理xml文件的絕對路徑
:return: 返回xml的label與box,其中label是一維的,並與box一一對應。
'''
import xml.etree.ElementTree as ET
with open( path_xml) as f:
root = ET.parse(f).getroot()

boxes = [] # 每張圖片的所有box保存在這里boxes,並重新讀取圖片將重新開始 # We'll store all boxes for this image here.
labels = [] #每張圖片的所有box對應的label保存在這里
objects = root.findall('object') # Get a list of all objects in this image.
# Parse the data for each object.
for obj in objects:
class_name = obj.find('name').text
# Check whether this class is supposed to be included in the dataset.

# Get the bounding box coordinates.
bndbox = obj.find('bndbox')
xmin = int(bndbox.find('xmin').text) # bndbox.find('xmin').text
ymin = int(bndbox.find('ymin').text)
xmax = int(bndbox.find('xmax').text)
ymax = int(bndbox.find('ymax').text)
item_dict = { # 'folder': '',#folder,
'class_name': class_name,
'xmin': xmin,
'ymin': ymin,
'xmax': xmax,
'ymax': ymax
}
box = []
box.append(item_dict['xmin'])
box.append(item_dict['ymin'])
box.append(item_dict['xmax'])
box.append(item_dict['ymax'])
labels.append(item_dict['class_name'])
boxes.append(box)

result=[]
result.append(labels)
result.append(boxes)

return result



def result2txt(data,f):
labels=data[0]
boxes=data[1]
print(labels)
print(boxes)
num=len(list(labels))
for i in range(num):
s=str(boxes[i][0])+ ','+str(boxes[i][1])+ ','+str(boxes[i][2])+ ','+str(boxes[i][3])+ ','+str(labels[i])+'\t'
f.write(s)




def readtotxt(write_file,path_xml,img_path=None,classes=None):
'''
:param write_file: 將結果寫入的文件夾路徑,后綴為.txt
:param path_xml: xml文件夾的路徑
:param img_path: 將給出圖像所在文件夾路徑,便於圖像讀取
'''
f = open(write_file, 'w') # 寫入txt文件
import os
for name in os.listdir(path_xml):
path = os.path.join(path_xml, name)
result = read_xml(path)
labels = result[0] # 對應的label列表,與boxes一一對應
boxes = result[1]
num = len(list(labels))
for i in range(num):
if classes is not None:
label=classes.index(labels[i])
else:
label=labels[i]
if img_path is not None:
if i == 0:
img, _ = os.path.splitext(name)
img_str = str(img_path + '/' + img + '.jpg')
s = img_str + '\t' + str(boxes[i][0]) + ',' + str(boxes[i][1]) + ',' + str(
boxes[i][2]) + ',' + str(
boxes[i][3]) + ',' + str(
label) + '\t'
else:
s = str(boxes[i][0]) + ',' + str(boxes[i][1]) + ',' + str(
boxes[i][2]) + ',' + str(
boxes[i][3]) + ',' + str(
label) + '\t'
else:
s = str(boxes[i][0]) + ',' + str(boxes[i][1]) + ',' + str(
boxes[i][2]) + ',' + str(
boxes[i][3]) + ',' + str(
label) + '\t'

print(s)
f.write(s)
f.write('\n')

f.close() # 關閉文件




if __name__ == "__main__":


path_xml = './data/train_data/xml' # 讀取文件路徑
path = "./data/train_data/2007_train.txt"
classes=['aeroplane','bicycle','bird','boat','bottle','bus','car','cat','chair',
'cow','diningtable','dog','horse','motorbike','person','pottedplant','sheep',
'sofa', 'train', 'tvmonitor']

readtotxt(path, path_xml,img_path='./data/train_data/img',classes=classes)

# img_path 表示圖像存放路徑


 

 

 

讀取與寫入結果如下:

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM