深度學習中常用數據集的制作與轉換
一. 數據集的制作。以常用的LabelImg和Labelme為例。
1. PASCAL VOC格式數據集(常用於目標檢測)。
a. 安裝LabelImg
。LabelImg是一款開源的圖像標注工具,標簽可用於分類和目標檢測,它是用 Python 編寫的,並使用Qt作為其圖形界面,簡單好用。注釋以 PASCAL VOC 格式保存為 XML 文件。
# Python 3 + Qt5 (Recommended)
pip install labelImg
labelImg
本文LabelImg版本:1.8.3
b. 操作界面。詳細教程

c. PASCAL VOC數據操作
import sys
import os
from xml.etree import ElementTree
from xml.etree.ElementTree import Element, SubElement
from lxml import etree
import codecs
import cv2
img_path = 'img/timg.png'
xml_path = 'img/timg.xml'
01. 數據讀取
class PascalVocReader: def __init__(self, filepath): # shapes type: # [labbel, [Xmin, Xmax, Ymin, Ymax], color, color, difficult] self.shapes = [] self.filepath = filepath self.verified = False self.XML_EXT = '.xml' self.ENCODE_METHOD = 'utf-8' try: self.parseXML() except: pass def getShapes(self): return self.shapes def addShape(self, label, bndbox, difficult): xmin = int(float(bndbox.find('xmin').text)) ymin = int(float(bndbox.find('ymin').text)) xmax = int(float(bndbox.find('xmax').text)) ymax = int(float(bndbox.find('ymax').text)) points = [xmin, xmax, ymin, ymax] self.shapes.append((label, points, None, None, difficult)) def parseXML(self): assert self.filepath.endswith(self.XML_EXT), "Unsupport file format" parser = etree.XMLParser(encoding=self.ENCODE_METHOD) xmltree = ElementTree.parse(self.filepath, parser=parser).getroot() filename = xmltree.find('filename').text try: verified = xmltree.attrib['verified'] if verified == 'yes': self.verified = True except KeyError: self.verified = False for object_iter in xmltree.findall('object'): bndbox = object_iter.find("bndbox") label = object_iter.find('name').text # Add chris difficult = False if object_iter.find('difficult') is not None: difficult = bool(int(object_iter.find('difficult').text)) self.addShape(label, bndbox, difficult) return True
reader = PascalVocReader(xml_path)
shapes = reader.getShapes()
print(shapes)
'''
# [labbel, [Xmin, Xmax, Ymin, Ymax], color, color, difficult]
[('dog', [135, 454, 117, 556], None, None, False), ('cat', [405, 918, 21, 546], None, None, False)]
'''
02. 數據可視化
class PascalVocVisualizer: def __init__(self, imgpath, shapes): self.BOX_COLOR = (0, 0, 255) self.TEXT_COLOR = (255, 255, 255) self.shapes = shapes self.imgpath = imgpath def visualize_bbox(self, img, bbox, class_name, thickness=2): x_min, x_max, y_min, y_max = bbox cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color=self.BOX_COLOR, thickness=thickness) ((text_width, text_height), _) = cv2.getTextSize(class_name, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2) if y_min < int(1.4 * text_height): y_min += int(1.4 * text_height) cv2.rectangle(img, (x_min, y_min - int(1.3 * text_height)), (x_min + text_width, y_min), self.BOX_COLOR, -1) cv2.putText(img, class_name, (x_min, y_min - int(0.3 * text_height)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, self.TEXT_COLOR, lineType=cv2.LINE_AA) return img def visualize(self): img = cv2.imread(self.imgpath) for idx, shape in enumerate(self.shapes): img = self.visualize_bbox(img, shape[1], shape[0]) cv2.imshow('vis', img) cv2.waitKey(0)
visualizer = PascalVocVisualizer(img_path, shapes)
vis = visualizer.visualize()

03. 數據寫入
class PascalVocWriter: def __init__(self, foldername, filename, imgSize, databaseSrc='Unknown', localImgPath=None): self.foldername = foldername self.filename = filename self.databaseSrc = databaseSrc self.imgSize = imgSize self.boxlist = [] self.localImgPath = localImgPath self.verified = False self.XML_EXT = '.xml' self.ENCODE_METHOD = 'utf-8' def prettify(self, elem): """ Return a pretty-printed XML string for the Element. """ rough_string = ElementTree.tostring(elem, 'utf8') root = etree.fromstring(rough_string) return etree.tostring(root, pretty_print=True, encoding=self.ENCODE_METHOD).replace(" ".encode(), "\t".encode()) def ustr(self, x): if sys.version_info < (3, 0, 0): from PyQt4.QtCore import QString if type(x) == str: return x.decode(self.DEFAULT_ENCODING) if type(x) == QString: return unicode(x.toUtf8(), self.DEFAULT_ENCODING, 'ignore') return x else: return x def genXML(self): """ Return XML root """ # Check conditions if self.filename is None or \ self.foldername is None or \ self.imgSize is None: return None top = Element('annotation') if self.verified: top.set('verified', 'yes') folder = SubElement(top, 'folder') folder.text = self.foldername filename = SubElement(top, 'filename') filename.text = self.filename if self.localImgPath is not None: localImgPath = SubElement(top, 'path') localImgPath.text = self.localImgPath source = SubElement(top, 'source') database = SubElement(source, 'database') database.text = self.databaseSrc size_part = SubElement(top, 'size') width = SubElement(size_part, 'width') height = SubElement(size_part, 'height') depth = SubElement(size_part, 'depth') width.text = str(self.imgSize[1]) height.text = str(self.imgSize[0]) if len(self.imgSize) == 3: depth.text = str(self.imgSize[2]) else: depth.text = '1' segmented = SubElement(top, 'segmented') segmented.text = '0' return top def addBndBox(self, shape): name = shape[0] xmin, ymin, xmax, ymax = shape[1] difficult = shape[4] bndbox = {'xmin': xmin, 'ymin': ymin, 'xmax': xmax, 'ymax': ymax} bndbox['name'] = name bndbox['difficult'] = difficult self.boxlist.append(bndbox) def appendObjects(self, top): for each_object in self.boxlist: object_item = SubElement(top, 'object') name = SubElement(object_item, 'name') name.text = self.ustr(each_object['name']) pose = SubElement(object_item, 'pose') pose.text = "Unspecified" truncated = SubElement(object_item, 'truncated') if int(float(each_object['ymax'])) == int(float(self.imgSize[0])) or (int(float(each_object['ymin']))== 1): truncated.text = "1" # max == height or min elif (int(float(each_object['xmax']))==int(float(self.imgSize[1]))) or (int(float(each_object['xmin']))== 1): truncated.text = "1" # max == width or min else: truncated.text = "0" difficult = SubElement(object_item, 'difficult') difficult.text = str( bool(each_object['difficult']) & 1 ) bndbox = SubElement(object_item, 'bndbox') xmin = SubElement(bndbox, 'xmin') xmin.text = str(each_object['xmin']) ymin = SubElement(bndbox, 'ymin') ymin.text = str(each_object['ymin']) xmax = SubElement(bndbox, 'xmax') xmax.text = str(each_object['xmax']) ymax = SubElement(bndbox, 'ymax') ymax.text = str(each_object['ymax']) def save(self, targetFile=None): root = self.genXML() self.appendObjects(root) out_file = None if targetFile is None: out_file = codecs.open( self.filename.split('.')[0] + self.XML_EXT, 'w', encoding=self.ENCODE_METHOD) else: out_file = codecs.open(os.path.join(self.foldername, targetFile), 'w', encoding=self.ENCODE_METHOD) prettifyResult = self.prettify(root) out_file.write(prettifyResult.decode('utf8')) out_file.close()
img = cv2.imread(img_path)
writer = PascalVocWriter(os.path.dirname(img_path), os.path.basename(img_path), img.shape, localImgPath=os.path.abspath(img_path))
for shape in shapes:
writer.addBndBox(shape)
writer.save('new.xml')

2. 分割數據集。
a. 安裝labelme
。 labelme是一款開源的圖像/視頻標注工具,標簽可用於目標檢測、分割和分類,支持圖像的標注的組件有:矩形框,多邊形,圓,線,點等,保存為labelme json文件。
# Python 3 + Qt5 (Recommended)
pip install labelme
labelme
本文Labelme版本:4.2.9
b. 操作界面。詳細教程

c. labelme json數據操作
可使用labelme工具轉換json文件為數據集
labelme_json_to_dataset *.json

二. 數據集的轉換。
1. Labelme json 轉 COCO json
Labelme json文件一般只存儲單個圖片的標記信息,不同於COCO json.
import os
import json
import glob
import base64
import io
import cv2
import time
import sys
import numpy as np
import PIL.Image
labelme to coco
class Lableme2CoCo: def __init__(self, img_format): self.images = [] self.annotations = [] self.categories = [] self.category_id = 0 self.img_id = 0 self.ann_id = 0 self.ann_num = 0 self.img_format = img_format def save_coco_json(self, instance, save_path): json.dump(instance, open(save_path, 'w', encoding='utf-8'), ensure_ascii=False, indent=4) print("\nsave instance json file to {}".format(save_path)) def to_coco(self, json_path_list): for json_path in json_path_list: obj = self.read_jsonfile(json_path) self.images.append(self._image(obj, json_path)) shapes = obj['shapes'] for shape in shapes: annotation = self._annotation(shape) self.annotations.append(annotation) self.ann_id += 1 self.img_id += 1 instance = dict() instance['info'] = 'instance segmentation' instance['license'] = ['license'] instance['images'] = self.images instance['annotations'] = self.annotations instance['categories'] = self.categories return instance def _init_categories(self, label): category = dict() if len(self.categories) == 0: category['id'] = self.category_id category['name'] = label self.categories.append(category) self.category_id += 1 else: category_list = [] for c in self.categories: category_list.append(c['name']) if label not in category_list: category['id'] = self.category_id category['name'] = label self.categories.append(category) self.category_id += 1 def _image(self, obj, path): image = dict() from labelme import utils img_x = utils.img_b64_to_arr(obj['imageData']) if len(img_x.shape[:]) == 3: h, w = img_x.shape[:-1] else: h, w = img_x.shape[:] image['height'] = h image['width'] = w image['id'] = self.img_id image['file_name'] = os.path.basename(path).replace(".json", self.img_format) return image def _annotation(self, shape): label = shape['label'] self._init_categories(label) points = shape['points'] category = list(filter(lambda c: c['name'] == label, self.categories))[0] annotation = dict() annotation['id'] = self.ann_id annotation['image_id'] = self.img_id annotation['category_id'] = category['id'] annotation['segmentation'] = [np.asarray(points).flatten().tolist()] annotation['bbox'] = self._get_box(points) annotation['iscrowd'] = 0 annotation['area'] = 1.0 return annotation def read_jsonfile(self, path): self.ann_num += 1 sys.stdout.write("\rload json file: {}, number: {}".format(path, self.ann_num)) sys.stdout.flush() try: with open(path, "r", encoding='utf-8') as f: return json.load(f) except: with open(path, "r", encoding='gbk') as f: return json.load(f) def _get_box(self, points): min_x = min_y = np.inf max_x = max_y = 0 for x, y in points: min_x = min(min_x, x) min_y = min(min_y, y) max_x = max(max_x, x) max_y = max(max_y, y) return [min_x, min_y, max_x - min_x, max_y - min_y]
a. 數據集轉換
labelme_json_path = "./labelme_json" # 輸入的labelme json文件夾(需要包含imageData字段的內容)
save_coco_path = "./coco_dataset" # 輸出文件夾
annotations_path = os.path.join(save_coco_path, 'annotations')
image_path = os.path.join(save_coco_path, 'images')
if not os.path.exists(annotations_path):
os.makedirs(annotations_path)
if not os.path.exists(image_path):
os.makedirs(image_path)
json_list_path = glob.glob(os.path.join(input_path, '*.json'))
l2c_train = Lableme2CoCo(img_format='.png')
train_instance = l2c_train.to_coco(json_list_path)
l2c_train.save_coco_json(train_instance, os.path.join(annotations_path, 'trainval.json'))
print("Start creating images..")
for json_path in json_list_path:
data_dict = json.load(open(json_path))
imageData = data_dict['imageData']
img = img_b64_to_arr(imageData)
img_save_path = os.path.join(image_path, os.path.basepath(json_path).split('.')[0] + img_format)
img.save(img_save_path)
print('\nSave dataset to {}. end!'.format(save_coco_path))
b. 數據集測試
from pycocotools.coco import COCO
ann_file = "./coco_dataset/annotations/trainval.json" # 轉換后的COCO json文件
coco = COCO(annotation_file=ann_file)
print("coco\nimages.size [%05d]\t annotations.size [%05d]\t category.size [%05d]"
% (len(coco.imgs), len(coco.anns), len(coco.cats)))
if len(coco.imgs) < 1:
print('error!')
else:
print('success!')
'''
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
coco
images.size [00002] annotations.size [00002] category.size [00001]
success!
'''