目標檢測數據增強方法

本文轉載自查看原文 2019-04-16 21:01 2535 檢測網絡

def letterbox_image(img, inp_dim):
    '''resize image with unchanged aspect ratio using padding
    
    Parameters
    ----------
    
    img : numpy.ndarray
        Image 
    
    inp_dim: tuple(int)
        shape of the reszied image
        
    Returns
    -------
    
    numpy.ndarray:
        Resized image
    
    '''

    inp_dim = (inp_dim, inp_dim)
    img_w, img_h = img.shape[1], img.shape[0]
    w, h = inp_dim
    new_w = int(img_w * min(w/img_w, h/img_h))
    new_h = int(img_h * min(w/img_w, h/img_h))
    resized_image = cv2.resize(img, (new_w,new_h)) # 按照target_szie/(長邊)為scale進行resize,然后填充空白區域
    
    canvas = np.full((inp_dim[1], inp_dim[0], 3), 0)

    canvas[(h-new_h)//2:(h-new_h)//2 + new_h,(w-new_w)//2:(w-new_w)//2 + new_w,  :] = resized_image
    
    return canvas

class Resize(object):
    """Resize the image in accordance to `image_letter_box` function in darknet 
    
    The aspect ratio is maintained. The longer side is resized to the input 
    size of the network, while the remaining space on the shorter side is filled 
    with black color. **This should be the last transform**
    
    
    Parameters
    ----------
    inp_dim : tuple(int)
        tuple containing the size to which the image will be resized.
        
    Returns
    -------
    
    numpy.ndaaray
        Sheared image in the numpy format of shape `HxWxC`
    
    numpy.ndarray
        Resized bounding box co-ordinates of the format `n x 4` where n is 
        number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
        
    """
    
    def __init__(self, inp_dim):
        self.inp_dim = inp_dim
        
    def __call__(self, img, bboxes):
        w,h = img.shape[1], img.shape[0]
        img = letterbox_image(img, self.inp_dim) # 按照target_szie/(長邊)為scale進行resize,然后填充空白區域
    
    
        scale = min(self.inp_dim/h, self.inp_dim/w)
        bboxes[:,:4] *= (scale)
    
        new_w = scale*w
        new_h = scale*h
        inp_dim = self.inp_dim   
    
        del_h = (inp_dim - new_h)/2
        del_w = (inp_dim - new_w)/2
    
        add_matrix = np.array([[del_w, del_h, del_w, del_h]]).astype(int)
    
        bboxes[:,:4] += add_matrix # 根據空白區域補充
    
        img = img.astype(np.uint8)
    
        return img, bboxes 

class RandomHorizontalFlip(object):

    """Randomly horizontally flips the Image with the probability *p*

    Parameters
    ----------
    p: float
        The probability with which the image is flipped


    Returns
    -------

    numpy.ndaaray
        Flipped image in the numpy format of shape `HxWxC`

    numpy.ndarray
        Tranformed bounding box co-ordinates of the format `n x 4` where n is
        number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box

    """

    def __init__(self, p=0.5):
        self.p = p

    def __call__(self, img, bboxes):
            img_center = np.array(img.shape[:2])[::-1]/2 # 得到圖像中心坐標(x,y)
            img_center = np.hstack((img_center, img_center))
            if random.random() < self.p:
                img = img[:, ::-1, :]  # 圖像水平翻轉
                bboxes[:, [0, 2]] += 2*(img_center[[0, 2]] - bboxes[:, [0, 2]]) # 將box(x1,y1,x2,y2)的x坐標翻轉,

                box_w = abs(bboxes[:, 0] - bboxes[:, 2])

                bboxes[:, 0] -= box_w  # 翻轉后的坐標,x1>x2;該操作交換坐標,使得x1<x2
                bboxes[:, 2] += box_w

            return img, bboxes

class RandomScale(object):
    """Randomly scales an image    
    
    
    Bounding boxes which have an area of less than 25% in the remaining in the 
    transformed image is dropped. The resolution is maintained, and the remaining
    area if any is filled by black color.
    
    Parameters
    ----------
    scale: float or tuple(float)
        if **float**, the image is scaled by a factor drawn 
        randomly from a range (1 - `scale` , 1 + `scale`). If **tuple**,
        the `scale` is drawn randomly from values specified by the 
        tuple
        
    Returns
    -------
    
    numpy.ndaaray
        Scaled image in the numpy format of shape `HxWxC`
    
    numpy.ndarray
        Tranformed bounding box co-ordinates of the format `n x 4` where n is 
        number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
        
    """

    def __init__(self, scale = 0.2, diff = False):
        self.scale = scale

        
        if type(self.scale) == tuple:
            assert len(self.scale) == 2, "Invalid range"
            assert self.scale[0] > -1, "Scale factor can't be less than -1"
            assert self.scale[1] > -1, "Scale factor can't be less than -1"
        else:
            assert self.scale > 0, "Please input a positive float"
            self.scale = (max(-1, -self.scale), self.scale)
        
        self.diff = diff

        

    def __call__(self, img, bboxes):
    
        
        #Chose a random digit to scale by 
        
        img_shape = img.shape
        
        if self.diff:
            scale_x = random.uniform(*self.scale)
            scale_y = random.uniform(*self.scale)
        else:
            scale_x = random.uniform(*self.scale)
            scale_y = scale_x
            
    
        
        resize_scale_x = 1 + scale_x
        resize_scale_y = 1 + scale_y

        # The logic of the Scale transformation is fairly simple.
        # We use the OpenCV function cv2.resize to scale our image, and scale our bounding boxes by the scale factor(s).
        img=  cv2.resize(img, None, fx = resize_scale_x, fy = resize_scale_y)
        
        bboxes[:,:4] *= [resize_scale_x, resize_scale_y, resize_scale_x, resize_scale_y]
        
        
        
        canvas = np.zeros(img_shape, dtype = np.uint8) # 原始圖像大小
        
        y_lim = int(min(resize_scale_y,1)*img_shape[0])
        x_lim = int(min(resize_scale_x,1)*img_shape[1])
        
        
        canvas[:y_lim,:x_lim,:] =  img[:y_lim,:x_lim,:] # 有可能變大或者變小,如果變大,取其中一部分,變小,黑色填充
        
        img = canvas
        bboxes = clip_box(bboxes, [0,0,1 + img_shape[1], img_shape[0]], 0.25) # 對變換后的box:處理超出邊界和面積小於閾值drop操作;
    
    
        return img, bboxes

class RandomTranslate(object): # 隨機平移
    """Randomly Translates the image    
    
    
    Bounding boxes which have an area of less than 25% in the remaining in the 
    transformed image is dropped. The resolution is maintained, and the remaining
    area if any is filled by black color.
    
    Parameters
    ----------
    translate: float or tuple(float)
        if **float**, the image is translated by a factor drawn 
        randomly from a range (1 - `translate` , 1 + `translate`). If **tuple**,
        `translate` is drawn randomly from values specified by the 
        tuple
        
    Returns
    -------
    
    numpy.ndaaray
        Translated image in the numpy format of shape `HxWxC`
    
    numpy.ndarray
        Tranformed bounding box co-ordinates of the format `n x 4` where n is 
        number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
        
    """

    def __init__(self, translate = 0.2, diff = False):
        self.translate = translate
        
        if type(self.translate) == tuple:
            assert len(self.translate) == 2, "Invalid range"  
            assert self.translate[0] > 0 & self.translate[0] < 1
            assert self.translate[1] > 0 & self.translate[1] < 1


        else:
            assert self.translate > 0 and self.translate < 1
            self.translate = (-self.translate, self.translate) # 必須在(0-1)之間
            
            
        self.diff = diff

    def __call__(self, img, bboxes):        
        #Chose a random digit to scale by 
        img_shape = img.shape
        
        #translate the image
        
        #percentage of the dimension of the image to translate
        translate_factor_x = random.uniform(*self.translate)
        translate_factor_y = random.uniform(*self.translate)
        
        if not self.diff:
            translate_factor_y = translate_factor_x
            
        canvas = np.zeros(img_shape).astype(np.uint8)
    
    
        corner_x = int(translate_factor_x*img.shape[1])
        corner_y = int(translate_factor_y*img.shape[0])

        #change the origin to the top-left corner of the translated box  # 相當於做一個平移操作,做超過邊界處理等
        orig_box_cords =  [max(0,corner_y), max(corner_x,0), min(img_shape[0], corner_y + img.shape[0]), min(img_shape[1],corner_x + img.shape[1])]

        mask = img[max(-corner_y, 0):min(img.shape[0], -corner_y + img_shape[0]), max(-corner_x, 0):min(img.shape[1], -corner_x + img_shape[1]),:]
        canvas[orig_box_cords[0]:orig_box_cords[2], orig_box_cords[1]:orig_box_cords[3],:] = mask
        img = canvas
        
        bboxes[:,:4] += [corner_x, corner_y, corner_x, corner_y] # box做一個平移操作
        
        
        bboxes = clip_box(bboxes, [0,0,img_shape[1], img_shape[0]], 0.25)
        
        return img, bboxes

class RandomRotate(object):
    """Randomly rotates an image    
    
    
    Bounding boxes which have an area of less than 25% in the remaining in the 
    transformed image is dropped. The resolution is maintained, and the remaining
    area if any is filled by black color.
    
    Parameters
    ----------
    angle: float or tuple(float)
        if **float**, the image is rotated by a factor drawn 
        randomly from a range (-`angle`, `angle`). If **tuple**,
        the `angle` is drawn randomly from values specified by the 
        tuple
        
    Returns
    -------
    
    numpy.ndaaray
        Rotated image in the numpy format of shape `HxWxC`
    
    numpy.ndarray
        Tranformed bounding box co-ordinates of the format `n x 4` where n is 
        number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
        
    """

    def __init__(self, angle = 10):
        self.angle = angle
        
        if type(self.angle) == tuple:
            assert len(self.angle) == 2, "Invalid range"  
            
        else:
            self.angle = (-self.angle, self.angle)
            
    def __call__(self, img, bboxes):
    
        angle = random.uniform(*self.angle)
    
        w,h = img.shape[1], img.shape[0]
        cx, cy = w//2, h//2
    
        img = rotate_im(img, angle) # 旋轉后,為了保證整圖信息,仿射后的圖像變大,先求仿射矩陣,然后變換整圖;
    
        corners = get_corners(bboxes) # 得到四個角點
    
        corners = np.hstack((corners, bboxes[:,4:]))
    
    
        corners[:,:8] = rotate_box(corners[:,:8], angle, cx, cy, h, w) # 根據仿射矩陣得到box旋轉后的坐標
    
        new_bbox = get_enclosing_box(corners) # we have to find the tightest rectangle parallel to the sides of the image containing the tilted rectangular box.
    
    
        scale_factor_x = img.shape[1] / w
    
        scale_factor_y = img.shape[0] / h
    
        img = cv2.resize(img, (w,h)) # 旋轉后變大的圖像恢復到原圖像大小;
    
        new_bbox[:,:4] /= [scale_factor_x, scale_factor_y, scale_factor_x, scale_factor_y] 
    
        bboxes  = new_bbox
    
        bboxes = clip_box(bboxes, [0,0,w, h], 0.25)
    
        return img, bboxes

class RandomShear(object): # 旋轉的特殊情況
    """Randomly shears an image in horizontal direction   
    
    
    Bounding boxes which have an area of less than 25% in the remaining in the 
    transformed image is dropped. The resolution is maintained, and the remaining
    area if any is filled by black color.
    
    Parameters
    ----------
    shear_factor: float or tuple(float)
        if **float**, the image is sheared horizontally by a factor drawn 
        randomly from a range (-`shear_factor`, `shear_factor`). If **tuple**,
        the `shear_factor` is drawn randomly from values specified by the 
        tuple
        
    Returns
    -------
    
    numpy.ndaaray
        Sheared image in the numpy format of shape `HxWxC`
    
    numpy.ndarray
        Tranformed bounding box co-ordinates of the format `n x 4` where n is 
        number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
        
    """

    def __init__(self, shear_factor = 0.2):
        self.shear_factor = shear_factor
        
        if type(self.shear_factor) == tuple:
            assert len(self.shear_factor) == 2, "Invalid range for scaling factor"   
        else:
            self.shear_factor = (-self.shear_factor, self.shear_factor)
        
        shear_factor = random.uniform(*self.shear_factor)
        
    def __call__(self, img, bboxes):
    
        shear_factor = random.uniform(*self.shear_factor)
    
        w,h = img.shape[1], img.shape[0]
    
        if shear_factor < 0:
            img, bboxes = HorizontalFlip()(img, bboxes) # 一種巧妙的方法,來避免...
    
        M = np.array([[1, abs(shear_factor), 0],[0,1,0]])
    
        nW =  img.shape[1] + abs(shear_factor*img.shape[0])
    
        bboxes[:,[0,2]] += ((bboxes[:,[1,3]]) * abs(shear_factor) ).astype(int) 
    
    
        img = cv2.warpAffine(img, M, (int(nW), img.shape[0])) # 只進行水平變換
    
        if shear_factor < 0:
            img, bboxes = HorizontalFlip()(img, bboxes)
    
        img = cv2.resize(img, (w,h))
    
        scale_factor_x = nW / w
    
        bboxes[:,:4] /= [scale_factor_x, 1, scale_factor_x, 1] 
    
    
        return img, bboxes

通過多線程進行加速:

def parse_data(data):
    img = np.array(cv2.imread(data))
    h, w, c = img.shape
    assert c == 3
    img = cv2.resize(img, (scale_size, scale_size))
    img = img.astype(np.float32)

    shift = (scale_size - crop_size) // 2
    img = img[shift: shift + crop_size, shift: shift + crop_size, :]
    # Flip image at random if flag is selected
    if np.random.random() < 0.5:  # self.horizontal_flip and
        img = cv2.flip(img, 1)
    img = (img - np.array(127.5)) / 127.5

    return img


def parse_data_without_augmentation(data):
    img = np.array(cv2.imread(data))
    h, w, c = img.shape
    assert c == 3
    img = cv2.resize(img, (crop_size, crop_size))
    img = img.astype(np.float32)
    img = (img - np.array(127.5)) / 127.5
return img

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2019/3/10 11:15
# @Author  : Whu_DSP
# @File    : dped_dataloader.py

import multiprocessing as mtp
import os
import cv2
import numpy as np
from scipy import misc


def parse_data(filename):
    I = np.asarray(misc.imread(filename))
    I = np.float16(I) / 255
    return I

class Dataloader:

    def __init__(self, dped_dir, type_phone, batch_size, is_training, im_shape):
        self.works = mtp.Pool(10)
        self.dped_dir = dped_dir
        self.phone_type = type_phone
        self.batch_size = batch_size
        self.is_training = is_training
        self.im_shape = im_shape
        self.image_list, self.dslr_list = self._get_data_file_list()

        self.num_images = len(self.image_list)
        self._cur = 0
        self._perm = None
        self._shuffle_index() # init order

    def _get_data_file_list(self):
        if self.is_training:
            directory_phone = os.path.join(self.dped_dir, str(self.phone_type), 'training_data', str(self.phone_type))
            directory_dslr = os.path.join(self.dped_dir, str(self.phone_type), 'training_data', 'canon')
        else:
            directory_phone = os.path.join(self.dped_dir, str(self.phone_type), 'test_data', 'patches',
                                           str(self.phone_type))
            directory_dslr = os.path.join(self.dped_dir, str(self.phone_type), 'test_data', 'patches', 'canon')

        # num_images = len([name for name in os.listdir(directory_phone) if os.path.isfile(os.path.join(directory_phone, name))])
        image_list = [os.path.join(directory_phone, name) for name in os.listdir(directory_phone)]
        dslr_list = [os.path.join(directory_dslr, name) for name in os.listdir(directory_dslr)]
        return image_list, dslr_list

    def _shuffle_index(self):
        '''randomly permute the train order'''
        self._perm = np.random.permutation(np.arange(self.num_images))
        self._cur = 0

    def _get_next_minbatch_index(self):
        """return the indices for the next minibatch"""
        if self._cur + self.batch_size > self.num_images:
            self._shuffle_index()
        next_index = self._perm[self._cur:self._cur + self.batch_size]
        self._cur += self.batch_size
        return next_index

    def get_minibatch(self, minibatch_db):
        """return minibatch datas for train/test"""
        if self.is_training:
            jobs = self.works.map(parse_data, minibatch_db)
        else:
            jobs = self.works.map(parse_data, minibatch_db)
        index = 0
        images_data = np.zeros([self.batch_size, self.im_shape[0], self.im_shape[1], 3])
        for index_job in range(len(jobs)):
            images_data[index, :, :, :] = jobs[index_job]
            index += 1
        return images_data

    def next_batch(self):
        """Get next batch images and labels"""
        db_index = self._get_next_minbatch_index()
        minibatch_db = []
        for i in range(len(db_index)):
            minibatch_db.append(self.image_list[db_index[i]])

        minibatch_db_t = []
        for i in range(len(db_index)):
            minibatch_db_t.append(self.dslr_list[db_index[i]])

        images_data = self.get_minibatch(minibatch_db)
        dslr_data = self.get_minibatch(minibatch_db_t)

        return images_data, dslr_data


if __name__ == "__main__":

    data_dir = "F:\\ranjiewen\\TF_EnhanceDPED\\data\\dped"
    train_loader = Dataloader(data_dir, "iphone", 32, True,[100,100])
    test_loader = Dataloader(data_dir, "iphone", 32, False, [100, 100])

    for i in range(10):
        image_batch,label_batch = train_loader.next_batch()
        print(image_batch.shape,label_batch.shape)

        print("-------------------------------------------")
        image_batch,label_batch = test_loader.next_batch()
print(image_batch.shape,label_batch.shape)

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 目標檢測數據增強，旋轉方法目標檢測中的數據增強方法（附詳細代碼講解）目標檢測:SSD的數據增強算法目標檢測中的數據增強技術小目標檢測的增強算法目標檢測方法——SSD 傳統的目標檢測方法目標檢測：介紹及傳統方法傳統目標檢測方法的比較目標檢測數據集