深度學習數據集准備——目標檢測、分割


深度學習中常用數據集的制作與轉換

一. 數據集的制作。以常用的LabelImg和Labelme為例。

1. PASCAL VOC格式數據集(常用於目標檢測)。

a. 安裝LabelImg。LabelImg是一款開源的圖像標注工具,標簽可用於分類和目標檢測,它是用 Python 編寫的,並使用Qt作為其圖形界面,簡單好用。注釋以 PASCAL VOC 格式保存為 XML 文件。
    # Python 3 + Qt5 (Recommended)
    pip install labelImg
    labelImg

本文LabelImg版本:1.8.3

b. 操作界面。詳細教程
c. PASCAL VOC數據操作
    import sys
    import os
    from xml.etree import ElementTree
    from xml.etree.ElementTree import Element, SubElement
    from lxml import etree
    import codecs
    import cv2

    img_path = 'img/timg.png'
    xml_path = 'img/timg.xml'
01. 數據讀取

class PascalVocReader:

def __init__(self, filepath):
    # shapes type:
    # [labbel, [Xmin, Xmax, Ymin, Ymax], color, color, difficult]
    self.shapes = []
    self.filepath = filepath
    self.verified = False
    self.XML_EXT = '.xml'
    self.ENCODE_METHOD = 'utf-8'
    try:
        self.parseXML()
    except:
        pass

def getShapes(self):
    return self.shapes

def addShape(self, label, bndbox, difficult):
    xmin = int(float(bndbox.find('xmin').text))
    ymin = int(float(bndbox.find('ymin').text))
    xmax = int(float(bndbox.find('xmax').text))
    ymax = int(float(bndbox.find('ymax').text))
    points = [xmin, xmax, ymin, ymax]
    self.shapes.append((label, points, None, None, difficult))

def parseXML(self):
    assert self.filepath.endswith(self.XML_EXT), "Unsupport file format"
    parser = etree.XMLParser(encoding=self.ENCODE_METHOD)
    xmltree = ElementTree.parse(self.filepath, parser=parser).getroot()
    filename = xmltree.find('filename').text
    try:
        verified = xmltree.attrib['verified']
        if verified == 'yes':
            self.verified = True
    except KeyError:
        self.verified = False

    for object_iter in xmltree.findall('object'):
        bndbox = object_iter.find("bndbox")
        label = object_iter.find('name').text
        # Add chris
        difficult = False
        if object_iter.find('difficult') is not None:
            difficult = bool(int(object_iter.find('difficult').text))
        self.addShape(label, bndbox, difficult)
    return True

    reader = PascalVocReader(xml_path)
    shapes = reader.getShapes()
    print(shapes)
    '''
	# [labbel, [Xmin, Xmax, Ymin, Ymax], color, color, difficult]
	[('dog', [135, 454, 117, 556], None, None, False), ('cat', [405, 918, 21, 546], None, None, False)]
    '''
02. 數據可視化

class PascalVocVisualizer:

def __init__(self, imgpath, shapes):
    self.BOX_COLOR = (0, 0, 255)
    self.TEXT_COLOR = (255, 255, 255)
    self.shapes = shapes
    self.imgpath = imgpath

def visualize_bbox(self, img, bbox, class_name, thickness=2):
    x_min, x_max, y_min, y_max = bbox
    cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color=self.BOX_COLOR, thickness=thickness)
    ((text_width, text_height), _) = cv2.getTextSize(class_name, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
    if y_min < int(1.4 * text_height):
        y_min += int(1.4 * text_height)
    cv2.rectangle(img, (x_min, y_min - int(1.3 * text_height)), (x_min + text_width, y_min), self.BOX_COLOR, -1)
    cv2.putText(img, class_name, (x_min, y_min - int(0.3 * text_height)), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                self.TEXT_COLOR, lineType=cv2.LINE_AA)
    return img

def visualize(self):
    img = cv2.imread(self.imgpath)
    for idx, shape in enumerate(self.shapes):
        img = self.visualize_bbox(img, shape[1], shape[0])
    cv2.imshow('vis', img)
    cv2.waitKey(0)

    visualizer = PascalVocVisualizer(img_path, shapes)
    vis = visualizer.visualize()
03. 數據寫入

class PascalVocWriter:

def __init__(self, foldername, filename, imgSize, databaseSrc='Unknown', localImgPath=None):
    self.foldername = foldername
    self.filename = filename
    self.databaseSrc = databaseSrc
    self.imgSize = imgSize
    self.boxlist = []
    self.localImgPath = localImgPath
    self.verified = False
    self.XML_EXT = '.xml'
    self.ENCODE_METHOD = 'utf-8'

def prettify(self, elem):
    """
        Return a pretty-printed XML string for the Element.
    """
    rough_string = ElementTree.tostring(elem, 'utf8')
    root = etree.fromstring(rough_string)
    return etree.tostring(root, pretty_print=True, encoding=self.ENCODE_METHOD).replace("  ".encode(), "\t".encode())

def ustr(self, x):
    if sys.version_info < (3, 0, 0):
        from PyQt4.QtCore import QString
        if type(x) == str:
            return x.decode(self.DEFAULT_ENCODING)
        if type(x) == QString:
            return unicode(x.toUtf8(), self.DEFAULT_ENCODING, 'ignore')
        return x
    else:
        return x

def genXML(self):
    """
        Return XML root
    """
    # Check conditions
    if self.filename is None or \
            self.foldername is None or \
            self.imgSize is None:
        return None

    top = Element('annotation')
    if self.verified:
        top.set('verified', 'yes')

    folder = SubElement(top, 'folder')
    folder.text = self.foldername

    filename = SubElement(top, 'filename')
    filename.text = self.filename

    if self.localImgPath is not None:
        localImgPath = SubElement(top, 'path')
        localImgPath.text = self.localImgPath

    source = SubElement(top, 'source')
    database = SubElement(source, 'database')
    database.text = self.databaseSrc

    size_part = SubElement(top, 'size')
    width = SubElement(size_part, 'width')
    height = SubElement(size_part, 'height')
    depth = SubElement(size_part, 'depth')
    width.text = str(self.imgSize[1])
    height.text = str(self.imgSize[0])
    if len(self.imgSize) == 3:
        depth.text = str(self.imgSize[2])
    else:
        depth.text = '1'

    segmented = SubElement(top, 'segmented')
    segmented.text = '0'
    return top

def addBndBox(self, shape):
    name = shape[0]
    xmin, ymin, xmax, ymax = shape[1]
    difficult = shape[4]
    bndbox = {'xmin': xmin, 'ymin': ymin, 'xmax': xmax, 'ymax': ymax}
    bndbox['name'] = name
    bndbox['difficult'] = difficult
    self.boxlist.append(bndbox)

def appendObjects(self, top):
    for each_object in self.boxlist:
        object_item = SubElement(top, 'object')
        name = SubElement(object_item, 'name')
        name.text = self.ustr(each_object['name'])
        pose = SubElement(object_item, 'pose')
        pose.text = "Unspecified"
        truncated = SubElement(object_item, 'truncated')
        if int(float(each_object['ymax'])) == int(float(self.imgSize[0])) or (int(float(each_object['ymin']))== 1):
            truncated.text = "1" # max == height or min
        elif (int(float(each_object['xmax']))==int(float(self.imgSize[1]))) or (int(float(each_object['xmin']))== 1):
            truncated.text = "1" # max == width or min
        else:
            truncated.text = "0"
        difficult = SubElement(object_item, 'difficult')
        difficult.text = str( bool(each_object['difficult']) & 1 )
        bndbox = SubElement(object_item, 'bndbox')
        xmin = SubElement(bndbox, 'xmin')
        xmin.text = str(each_object['xmin'])
        ymin = SubElement(bndbox, 'ymin')
        ymin.text = str(each_object['ymin'])
        xmax = SubElement(bndbox, 'xmax')
        xmax.text = str(each_object['xmax'])
        ymax = SubElement(bndbox, 'ymax')
        ymax.text = str(each_object['ymax'])

def save(self, targetFile=None):
    root = self.genXML()
    self.appendObjects(root)
    out_file = None
    if targetFile is None:
        out_file = codecs.open(
            self.filename.split('.')[0] + self.XML_EXT, 'w', encoding=self.ENCODE_METHOD)
    else:
        out_file = codecs.open(os.path.join(self.foldername, targetFile), 'w', encoding=self.ENCODE_METHOD)

    prettifyResult = self.prettify(root)
    out_file.write(prettifyResult.decode('utf8'))
    out_file.close()

    img = cv2.imread(img_path)
    writer = PascalVocWriter(os.path.dirname(img_path), os.path.basename(img_path), img.shape, localImgPath=os.path.abspath(img_path))
    for shape in shapes:
        writer.addBndBox(shape)
    writer.save('new.xml')

2. 分割數據集。

a. 安裝labelme。 labelme是一款開源的圖像/視頻標注工具,標簽可用於目標檢測、分割和分類,支持圖像的標注的組件有:矩形框,多邊形,圓,線,點等,保存為labelme json文件。
    # Python 3 + Qt5 (Recommended)
    pip install labelme
    labelme

本文Labelme版本:4.2.9

b. 操作界面。詳細教程
c. labelme json數據操作

可使用labelme工具轉換json文件為數據集

    labelme_json_to_dataset *.json

二. 數據集的轉換。

1. Labelme json 轉 COCO json

Labelme json文件一般只存儲單個圖片的標記信息,不同於COCO json.

    import os
    import json
    import glob
    import base64
    import io
    import cv2
    import time
    import sys
	
    import numpy as np
    import PIL.Image
labelme to coco

class Lableme2CoCo:
def __init__(self, img_format):
    self.images = []
    self.annotations = []
    self.categories = []
    self.category_id = 0
    self.img_id = 0
    self.ann_id = 0
    self.ann_num = 0
    self.img_format = img_format

def save_coco_json(self, instance, save_path):
    json.dump(instance, open(save_path, 'w', encoding='utf-8'), ensure_ascii=False, indent=4)
    print("\nsave instance json file to {}".format(save_path))

def to_coco(self, json_path_list):
    for json_path in json_path_list:
        obj = self.read_jsonfile(json_path)
        self.images.append(self._image(obj, json_path))
        shapes = obj['shapes']
        for shape in shapes:
            annotation = self._annotation(shape)
            self.annotations.append(annotation)
            self.ann_id += 1
        self.img_id += 1
    instance = dict()
    instance['info'] = 'instance segmentation'
    instance['license'] = ['license']
    instance['images'] = self.images
    instance['annotations'] = self.annotations
    instance['categories'] = self.categories
    return instance

def _init_categories(self, label):
    category = dict()
    if len(self.categories) == 0:
        category['id'] = self.category_id
        category['name'] = label
        self.categories.append(category)
        self.category_id += 1
    else:
        category_list = []
        for c in self.categories:
            category_list.append(c['name'])
        if label not in category_list:
            category['id'] = self.category_id
            category['name'] = label
            self.categories.append(category)
            self.category_id += 1

def _image(self, obj, path):
    image = dict()
    from labelme import utils
    img_x = utils.img_b64_to_arr(obj['imageData'])
    if len(img_x.shape[:]) == 3:
        h, w = img_x.shape[:-1]
    else:
        h, w = img_x.shape[:]
    image['height'] = h
    image['width'] = w
    image['id'] = self.img_id
    image['file_name'] = os.path.basename(path).replace(".json", self.img_format)
    return image

def _annotation(self, shape):
    label = shape['label']
    self._init_categories(label)
    points = shape['points']
    category = list(filter(lambda c: c['name'] == label, self.categories))[0]
    annotation = dict()
    annotation['id'] = self.ann_id
    annotation['image_id'] = self.img_id
    annotation['category_id'] = category['id']
    annotation['segmentation'] = [np.asarray(points).flatten().tolist()]
    annotation['bbox'] = self._get_box(points)
    annotation['iscrowd'] = 0
    annotation['area'] = 1.0
    return annotation

def read_jsonfile(self, path):
    self.ann_num += 1
    sys.stdout.write("\rload json file: {}, number: {}".format(path, self.ann_num))
    sys.stdout.flush()
    try:
        with open(path, "r", encoding='utf-8') as f:
            return json.load(f)
    except:
        with open(path, "r", encoding='gbk') as f:
            return json.load(f)

def _get_box(self, points):
    min_x = min_y = np.inf
    max_x = max_y = 0
    for x, y in points:
        min_x = min(min_x, x)
        min_y = min(min_y, y)
        max_x = max(max_x, x)
        max_y = max(max_y, y)
    return [min_x, min_y, max_x - min_x, max_y - min_y]

a. 數據集轉換
    labelme_json_path = "./labelme_json" # 輸入的labelme json文件夾(需要包含imageData字段的內容)
    save_coco_path = "./coco_dataset" # 輸出文件夾
    annotations_path = os.path.join(save_coco_path, 'annotations')
    image_path = os.path.join(save_coco_path, 'images')

    if not os.path.exists(annotations_path):
        os.makedirs(annotations_path)
    if not os.path.exists(image_path):
        os.makedirs(image_path)

    json_list_path = glob.glob(os.path.join(input_path, '*.json'))

    l2c_train = Lableme2CoCo(img_format='.png')
    train_instance = l2c_train.to_coco(json_list_path)
    l2c_train.save_coco_json(train_instance, os.path.join(annotations_path, 'trainval.json'))

    print("Start creating images..")
    for json_path in json_list_path:
        data_dict = json.load(open(json_path))
        imageData = data_dict['imageData']
        img = img_b64_to_arr(imageData)
        img_save_path = os.path.join(image_path, os.path.basepath(json_path).split('.')[0] + img_format)
        img.save(img_save_path)
	
    print('\nSave dataset to {}. end!'.format(save_coco_path))
b. 數據集測試
from pycocotools.coco import COCO

ann_file = "./coco_dataset/annotations/trainval.json" # 轉換后的COCO json文件
coco = COCO(annotation_file=ann_file)

print("coco\nimages.size [%05d]\t annotations.size [%05d]\t category.size [%05d]"
      % (len(coco.imgs), len(coco.anns), len(coco.cats)))
if len(coco.imgs) < 1:
    print('error!')
else:
    print('success!')

'''
    loading annotations into memory...
    Done (t=0.00s)
    creating index...
    index created!
    coco
    images.size [00002]	 annotations.size [00002]	 category.size [00001]
    success!
'''


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM