pascalVOC 標注文件,解析為TXT


首先,讀取所有xml文件完整路徑,寫入train.txt 文本文檔中,

然后讀取TXT文檔,逐行讀取xml文檔,建文件夾,用於保存解析好的TXT,寫入TXT時,只需要保存類別名和坐標信息即可,中間用Tab分割

 

#!/usr/bin/evn python 
# coding:utf-8 
import os
import glob

try: 
  import xml.etree.cElementTree as ET 
except ImportError: 
  import xml.etree.ElementTree as ET 
import sys 

# filename = os.listdir('F:/snow leopard/000_IMAGE_FRAME/000_B_XML/')
filename = glob.glob('F:/snow leopard/000_IMAGE_FRAME/000_B_XML/' + '*xml') 
fileObject = open('train.txt', 'w')

for ip in filename:
  fileObject.write(ip)
  fileObject.write('\n')
fileObject.close()


file_srx = open("train.txt")  #其中包含所有待計算的文件名
line = file_srx.readline()
while line:
  f = line[:-1]    # 除去末尾的換行符
  tree = ET.parse(f)     #打開xml文檔 
  root = tree.getroot()         #獲得root節點  
  print ("*"*10)
  filename = root.find('filename').text
  filename = filename[:-4]
  print (filename) 

  dir_name = 'F:/snow leopard/Data preprocessing/txt'
  if os.path.exists(dir_name) == False:
    os.mkdir(dir_name)  

  # file_object_txt = open(dir_name +'/' + filename + ".txt","a")
  # # file_object_txt = open(dir_name, 'w') #寫文件
  # file_object_txt.write(filename +'\t')

  # file_object_log = open(filename + ".log", 'w') #寫文件
  flag = False
  
  ########################################
  for size in root.findall('size'): #找到root節點下的size節點 
    width = size.find('width').text   #子節點下節點width的值 
    height = size.find('height').text   #子節點下節點height的值 
    print (width, height)
  ########################################
  
  for object in root.findall('object'): #找到root節點下的所有object節點 
    name = object.find('name').text   #子節點下節點name的值 
    file_object_txt = open(dir_name +'/' + filename + ".txt","a")
    # file_object_txt = open(dir_name, 'w') #寫文件
    file_object_txt.write(name +'\t')
    print (name)
    bndbox = object.find('bndbox')      #子節點下屬性bndbox的值 
    xmin = bndbox.find('xmin').text
    ymin = bndbox.find('ymin').text
    xmax = bndbox.find('xmax').text
    ymax = bndbox.find('ymax').text
    file_object_txt.write(xmin+'\t' + ymin + '\t'+ xmax + '\t'+ ymax)

    print (xmin, ymin, xmax, ymax)
  file_object_txt.close()
  # file_object_log.close()
  if flag == False:  #如果沒有符合條件的信息,則刪掉相應的txt文件以及jpg文件
    #os.remove(filename + ".txt")
    #os.remove(filename + ".jpg")
    # os.remove(filename + ".log")
    pass
  line = file_srx.readline()

 

 

參考: https://www.cnblogs.com/rainsoul/p/6283231.html


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM