py-faster-rcnn代碼閱讀3-roidb.py


roidb是比較復雜的數據結構,存放了數據集的roi信息。原始的roidb來自數據集,在trian.py的get_training_roidb(imdb)函數進行了水平翻轉擴充數量,然后prepare_roidb(imdb)【定義在roidb.py】為roidb添加了一些說明性的屬性。

在這里暫時記錄下roidb的結構信息,后面繼續看的時候可能會有些修正:

roidb是由字典組成的list,roidb[img_index]包含了該圖片索引所包含到roi信息,下面以roidb[img_index]為例說明:

roidb[img_index]包含的key, value
boxes box位置信息,box_num*4的np array
gt_overlaps 所有box在不同類別的得分,box_num*class_num矩陣
gt_classes 所有box的真實類別,box_num長度的list
flipped 是否翻轉
 image 該圖片的路徑,字符串
width 圖片的寬
height  圖片的高
max_overlaps 每個box的在所有類別的得分最大值,box_num長度
max_classes 每個box的得分最高所對應的類,box_num長度
bbox_targets 每個box的類別,以及與最接近的gt-box的4個方位偏移

 參考iamzhangzhuping的博客,感謝!更多信息請移步iamzhangzhuping的博客

 

下面是代碼

roidb.py

import numpy as np
from fast_rcnn.config import cfg
from fast_rcnn.bbox_transform import bbox_transform
from utils.cython_bbox import bbox_overlaps
import PIL

def prepare_roidb(imdb):
    # 給原始roidata添加一些說明性的附加屬性
    """Enrich the imdb's roidb by adding some derived quantities that
    are useful for training. This function precomputes the maximum
    overlap, taken over ground-truth boxes, between each ROI and
    each ground-truth box. The class with maximum overlap is also
    recorded.
    """
    sizes = [PIL.Image.open(imdb.image_path_at(i)).size
             for i in xrange(imdb.num_images)]
    # 當在‘Stage 2 Fast R-CNN, init from stage 2 RPN R-CNN model’階段中,roidb由rpn_roidb()
    # 方法生成,其中的每一張圖像的box不僅僅只有gtbox,還包括rpn_file里面的box。
    roidb = imdb.roidb
    for i in xrange(len(imdb.image_index)):
        roidb[i]['image'] = imdb.image_path_at(i)
        roidb[i]['width'] = sizes[i][0]
        roidb[i]['height'] = sizes[i][1]
        # need gt_overlaps as a dense array for argmax  
        # gt_overlaps是一個box_num*classes_num的矩陣,應該是每個box在不同類別的得分
        gt_overlaps = roidb[i]['gt_overlaps'].toarray()
        # max overlap with gt over classes (columns)
        # 每個box的在所有類別的得分最大值,box_num長度
        max_overlaps = gt_overlaps.max(axis=1)
        # gt class that had the max overlap
        # 每個box的得分最高所對應的類,box_num長度
        max_classes = gt_overlaps.argmax(axis=1)
        roidb[i]['max_classes'] = max_classes
        roidb[i]['max_overlaps'] = max_overlaps
        # sanity checks
        # 做檢查,max_overlaps == 0意味着背景,否則非背景
        # max overlap of 0 => class should be zero (background)
        zero_inds = np.where(max_overlaps == 0)[0]
        assert all(max_classes[zero_inds] == 0)
        # max overlap > 0 => class should not be zero (must be a fg class)
        nonzero_inds = np.where(max_overlaps > 0)[0]
        assert all(max_classes[nonzero_inds] != 0)

def add_bbox_regression_targets(roidb):
    """Add information needed to train bounding-box regressors."""
    assert len(roidb) > 0
    assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?'

    num_images = len(roidb)
    # Infer number of classes from the number of columns in gt_overlaps
    # 類別數,roidb[0]對應第0號圖片上的roi,shape[1]多少列表示roi屬於不同類上的概率
    num_classes = roidb[0]['gt_overlaps'].shape[1]
    for im_i in xrange(num_images):
        rois = roidb[im_i]['boxes']
        max_overlaps = roidb[im_i]['max_overlaps']
        max_classes = roidb[im_i]['max_classes']
        # bbox_targets:每個box的類別,以及與最接近的gt-box的4個方位偏移
        roidb[im_i]['bbox_targets'] = \
                _compute_targets(rois, max_overlaps, max_classes)
    
    # 這里config是false
    if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
        # Use fixed / precomputed "means" and "stds" instead of empirical values
        # 使用固定的均值和方差代替經驗值
        means = np.tile(
                np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (num_classes, 1))
        stds = np.tile(
                np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (num_classes, 1))
    else:
        # Compute values needed for means and stds
        # 計算所需的均值和方差
        # var(x) = E(x^2) - E(x)^2
        # 計數各個類別出現box的數量
        class_counts = np.zeros((num_classes, 1)) + cfg.EPS  #加上cfg.EPS防止除0出錯
        # 21類*4個位置,如果出現box的類別與其中某一類相同,將該box的4個target加入4個列元素中
        sums = np.zeros((num_classes, 4)) 
        # 21類*4個位置,如果出現box的類別與其中某一類相同,將該box的4個target的平方加入4個列元素中
        squared_sums = np.zeros((num_classes, 4))
        for im_i in xrange(num_images):
            targets = roidb[im_i]['bbox_targets']
            for cls in xrange(1, num_classes):
                cls_inds = np.where(targets[:, 0] == cls)[0]
                # box的類別與該類匹配,計入
                if cls_inds.size > 0:
                    class_counts[cls] += cls_inds.size
                    sums[cls, :] += targets[cls_inds, 1:].sum(axis=0)
                    squared_sums[cls, :] += \
                            (targets[cls_inds, 1:] ** 2).sum(axis=0)

        means = sums / class_counts # 均值
        stds = np.sqrt(squared_sums / class_counts - means ** 2) #標准差

    print 'bbox target means:'
    print means
    print means[1:, :].mean(axis=0) # ignore bg class
    print 'bbox target stdevs:'
    print stds
    print stds[1:, :].mean(axis=0) # ignore bg class

    # Normalize targets
    # 對每一box歸一化target
    if cfg.TRAIN.BBOX_NORMALIZE_TARGETS:
        print "Normalizing targets"
        for im_i in xrange(num_images):
            targets = roidb[im_i]['bbox_targets']
            for cls in xrange(1, num_classes):
                cls_inds = np.where(targets[:, 0] == cls)[0]
                roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :]
                roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :]
    else:
        print "NOT normalizing targets"

    # 均值和方差也用於預測
    # These values will be needed for making predictions
    # (the predicts will need to be unnormalized and uncentered)
    return means.ravel(), stds.ravel()  # ravel()排序拉成一維

def _compute_targets(rois, overlaps, labels):  # 參數rois只含有當前圖片的box信息
    """Compute bounding-box regression targets for an image."""
    # Indices目錄 of ground-truth ROIs
    # ground-truth ROIs
    gt_inds = np.where(overlaps == 1)[0]
    if len(gt_inds) == 0:
        # Bail if the image has no ground-truth ROIs
        # 不存在gt ROI,返回空數組
        return np.zeros((rois.shape[0], 5), dtype=np.float32)
    # Indices of examples for which we try to make predictions
    # BBOX閾值,只有ROI與gt的重疊度大於閾值,這樣的ROI才能用作bb回歸的訓練樣本
    ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0]

    # Get IoU overlap between each ex ROI and gt ROI
    # 計算ex ROI and gt ROI的IoU
    ex_gt_overlaps = bbox_overlaps(
        # 變數據格式為float
        np.ascontiguousarray(rois[ex_inds, :], dtype=np.float),
        np.ascontiguousarray(rois[gt_inds, :], dtype=np.float))

    # Find which gt ROI each ex ROI has max overlap with:
    # this will be the ex ROI's gt target
    # 這里每一行代表一個ex_roi,列代表gt_roi,元素數值代表兩者的IoU
    gt_assignment = ex_gt_overlaps.argmax(axis=1) #按行求最大,返回索引.
    gt_rois = rois[gt_inds[gt_assignment], :]  #每個ex_roi對應的gt_rois,與下面ex_roi數量相同
    ex_rois = rois[ex_inds, :]

    targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
    targets[ex_inds, 0] = labels[ex_inds]  #第一個元素是label
    targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois)  #后4個元素是ex_box與gt_box的4個方位的偏移
    return targets

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM