CrowdHuman數據集標注格式轉換為YOLOv3可以使用的COCO格式


需要了解CrowdHuman的數據標注格式odgt,YOLOv3需要的COCO格式(不需要使用json文件,只需要圖片位置信息和標注信息)

YOLOv3 github地址:https://github.com/eriklindernoren/PyTorch-YOLOv3

保存每一張圖片的位置信息

 1 import os
 2 import json
 3 
 4 
 5 def load_file(fpath):  # fpath是具體的文件 ,作用:#str to list
 6     assert os.path.exists(fpath)  # assert() raise-if-not
 7     with open(fpath, 'r') as fid:
 8         lines = fid.readlines()
 9     records = [json.loads(line.strip('\n')) for line in lines]  # str to list
10     return records
11 
12 
13 def img2txt(odgtpath, respath):
14     records = load_file(odgtpath)  # 提取odgt文件數據
15     record_list = len(records)  # 獲得record的長度,循環遍歷所有數據。
16     print(os.getcwd())
17     # os.mkdir(os.getcwd() + respath)
18     with open(respath, 'w') as txt:
19         for i in range(record_list):
20             file_name = records[i]['ID'] + '.jpg'
21             file_name = str("/datasets/crowdhuman/images/val/Image/" + file_name)
22             txt.write(file_name + '\n')
23 
24 
25 if __name__ == '__main__':
26     odgtpath = "/datasets/crowdhuman/annotation_val.odgt"
27     respath = "/datasets/crowdhuman/val_name.txt"
28     img2txt(odgtpath, respath)

保存每一張圖片標注信息中的全身坐標fbox

 1 import time
 2 import img2txt
 3 from PIL import Image
 4 
 5 
 6 def tonormlabel(odgtpath, storepath):
 7     records = img2txt.load_file(odgtpath)
 8     record_list = len(records)
 9     print(record_list)
10     categories = {}
11     # txt = open(respath, 'w')
12     for i in range(record_list):
13         txt_name = storepath + records[i]['ID'] + '.txt'
14         file_name = records[i]['ID'] + '.jpg'
15         #print(i)
16         im = Image.open("/datasets/crowdhuman/images/train_all/Image/" + file_name)
17         height = im.size[1]
18         width = im.size[0]
19         file = open(txt_name, 'w')
20         gt_box = records[i]['gtboxes']
21         gt_box_len = len(gt_box)  # 每一個字典gtboxes里,也有好幾個記錄,分別提取記錄。
22         for j in range(gt_box_len):
23             category = gt_box[j]['tag']
24             if category not in categories:  # 該類型不在categories,就添加上去
25                 new_id = len(categories) + 1  # ID遞增
26                 categories[category] = new_id
27             category_id = categories[category]  # 重新獲取它的類別ID
28             fbox = gt_box[j]['fbox']  # 獲得全身框
29             norm_x = fbox[0] / width
30             norm_y = fbox[1] / height
31             norm_w = fbox[2] / width
32             norm_h = fbox[3] / height
33             '''
34             norm_x = 0 if norm_x <= 0 else norm_x
35             norm_x = 1 if norm_x >= 1 else norm_x
36             norm_y = 0 if norm_y <= 0 else norm_y
37             norm_y = 1 if norm_y >= 1 else norm_y
38             norm_w = 0 if norm_w <= 0 else norm_w
39             norm_w = 1 if norm_w >= 1 else norm_w
40             norm_h = 0 if norm_h <= 0 else norm_h
41             norm_h = 1 if norm_h >= 1 else norm_h
42             '''
43             blank = ' '
44             if j == gt_box_len-1:
45                 file.write(str(category_id - 1) + blank + '{:.6f}'.format(norm_x) + blank + '{:.6f}'.format(norm_y) + blank
46                            + '{:.6f}'.format(norm_w) + blank + '{:.6f}'.format(norm_h))
47             else:
48                 file.write(str(category_id - 1) + blank + '{:.6f}'.format(norm_x) + blank + '{:.6f}'.format(norm_y) + blank
49                            + '{:.6f}'.format(norm_w) + blank + '{:.6f}'.format(norm_h) + '\n')
50 
51 
52 if __name__ == '__main__':
53     odgtpath = "/datasets/crowdhuman/annotation_train.odgt"  
54     storepath = "/datasets/crowdhuman/labels/train_all/Image/"
55     print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))  # 格式化輸出時間
56     start = time.time()
57     tonormlabel(odgtpath, storepath)
58     end = time.time()
59     print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))
60     print('已完成轉換,共耗時{:.5f}s'.format(end - start))

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM