Tensorflow版Faster RCNN源碼解析(TFFRCNN) (20) datasets/pascal_voc.py


本blog為github上CharlesShang/TFFRCNN版源碼解析系列代碼筆記

---------------個人學習筆記---------------

----------------本文作者疆--------------

------點擊此處鏈接至博客園原文------

 

定義了pascal_voc類,繼承自imdb類,類中定義了18個函數

1.__init__(self,image_set,year,devkit_path=None)構造函數,初始化部分變量

這里面部分變量未在本腳本中被更新如self._num_classes和self._roidb

# pascal_voc類繼承自imdb類
class pascal_voc(imdb):
    # image_set(如trainval等),如voc_2007_tainval
    def __init__(self, image_set, year, devkit_path=None):
        # 初始化self._name(如voc_2007_tainval)、self._num_classes(該數據集對應值應為21,但imdb類構造函數初始化為0???)、
        # self._classes(空列表--->本腳本中被更新為存儲類別名稱的元組)
        # self._image_index列表(空列表--->本腳本中被更新為不含后綴的數據集(如trainval數據集)圖像名稱組成的列表)
        # 和self._obj_proposer、self._roidb(為None,本腳本中未被更新???)、self._roidb_handler(self.default_roidb--->本腳本被更新為self.gt_roidb)、
        # self.config字典(空字典--->本腳本中更新為數據集相關設置信息構成的字典,字段包括‘cleanup’、'use_salt'、'use_diff'、'matlab_eval'、'rpn_file'、'min_size')
        imdb.__init__(self, 'voc_' + year + '_' + image_set)
        self._year = year
        self._image_set = image_set
        # 如E:\TFFRCNN\data\VOCdevkit2007
        self._devkit_path = self._get_default_path() if devkit_path is None \
                            else devkit_path
        # 如E:\TFFRCNN\data\VOCdevkit2007\VOC2007  VOC2007數據路徑
        self._data_path = os.path.join(self._devkit_path, 'VOC' + self._year)
        self._classes = ('__background__', # always index 0
                         'aeroplane', 'bicycle', 'bird', 'boat',
                         'bottle', 'bus', 'car', 'cat', 'chair',
                         'cow', 'diningtable', 'dog', 'horse',
                         'motorbike', 'person', 'pottedplant',
                         'sheep', 'sofa', 'train', 'tvmonitor')
        '''
        self._classes = ('__background__', # always index 0
                         'craft')   #2018.1.30
        '''
        # self._class_to_ind中存放的是{'__background__':0,'craft':1  ...}key-value 字典
        # zip函數:對應取出每一個數組中的元素再組合
        self._class_to_ind = dict(zip(self.classes, xrange(self.num_classes)))
        self._image_ext = '.jpg'
        # 不含后綴的數據集(如trainval數據集)圖像名稱組成的列表(從trainval.txt中獲取)
        self._image_index = self._load_image_set_index()
        # Default to roidb handler
        # self._roidb_handler = self.selective_search_roidb
        # 返回的是gt_roidb(各圖像gt roi字典組成的列表)的內存地址
        self._roidb_handler = self.gt_roidb
        # 生成一個隨機的uuid,即對於分布式數據,每個數據都有自己對應的唯一的標識符!
        # _get_comp_id(...)中使用
        self._salt = str(uuid.uuid4())
        self._comp_id = 'comp4'
        # PASCAL specific config options
        self.config = {'cleanup'     : True,
                       'use_salt'    : True,
                       'use_diff'    : False,
                       'matlab_eval' : False,
                       'rpn_file'    : None,
                       'min_size'    : 2}
        assert os.path.exists(self._devkit_path), \
                'VOCdevkit path does not exist: {}'.format(self._devkit_path)
        assert os.path.exists(self._data_path), \
                'Path does not exist: {}'.format(self._data_path)

2.image_path_at(self,i)獲取數據集第i張圖像的絕對路徑,未見調用

    # 獲取數據集第i張圖像的絕對路徑
    def image_path_at(self, i):  
        """
        Return the absolute path to image i in the image sequence.
        """
        # self._image_index為不含后綴的圖像名稱組成的列表
        return self.image_path_from_index(self._image_index[i])   

3.image_path_from_index(self,index)根據圖像不含后綴的名稱(如000001)獲取該圖像絕對路徑,被image_path_at(...)調用

    # 根據圖像不含后綴的名稱獲取圖像絕對路徑
    def image_path_from_index(self, index):

        """
        Construct an image path from the image's "index" identifier.
        """
        image_path = os.path.join(self._data_path, 'JPEGImages',
                                  index + self._image_ext)    #self._image_ext = '.jpg'
        assert os.path.exists(image_path), \
                'Path does not exist: {}'.format(image_path)
        return image_path

4._load_image_set_index(self)

獲得數據集圖像名稱構成的列表,如trainval數據集從trainval.txt中取出[000001, 000002, ...]圖像名稱列表,被__init__(...)調用

    # 獲得數據集圖像名稱構成的列表,如[000001, 000003, ...]
    def _load_image_set_index(self):   
        """
        Load the indexes listed in this dataset's image set file.
        """
        # Example path to image set file:
        # 如E:\TFFRCNN\data\VOCdevkit2007\VOC2007\ImageSets\Main\trainval.txt
        # 該路勁下存儲相關圖像名稱信息,如000001 000002...
        image_set_file = os.path.join(self._data_path, 'ImageSets', 'Main',
                                      self._image_set + '.txt')
        assert os.path.exists(image_set_file), \
                'Path does not exist: {}'.format(image_set_file)
        with open(image_set_file) as f:
            # readlines()函數一行一行讀取
            # x.strip(rm)函數 刪除x字符串中開頭結尾處rm刪除序列的字符
            image_index = [x.strip() for x in f.readlines()]                                                
        return image_index

5._get_default_path(self)

獲取數據集文件夾默認路徑,如E:\TFFRCNN\data\VOCdevkit2007,被__init__(...)調用

    # 獲取pascal voc數據集文件夾默認路徑
    def _get_default_path(self):    
        """
        Return the default path where PASCAL VOC is expected to be installed.
        """
        # 默認DATA_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'data'))
        # ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..'))
        return os.path.join(cfg.DATA_DIR, 'VOCdevkit' + self._year)

6.gt_roidb(self)

從/向cache文件夾中(如E:\TFFRCNN\data\cache\voc_2007_trainval_gt_roidb.pkl)cPickle序列化讀/寫gt roi相關信息。若路徑已存在表明曾經創建過,則讀,否則則寫供下次讀。被__init__(...)不加()調用表明加載gt roi相關信息,返回gt_roidb的地址(self._roidb_handler = self.gt_roidb,但該值未見調用該函數是通過調用_load_pascal_annotation(...)得到由各圖像gt roi信息字典組成的列表gt_roidb。實際上還被selective_search_roidb(...)、rpn_roidb(...)調用,但未使用SS產生roi。

 # 從/向cache文件夾中cPickle序列化讀/寫groundtruth roi相關信息
    # cache路徑存在則讀,否則則寫
    def gt_roidb(self):
        """               
        Return the database of ground-truth regions of interest.
        This function loads/saves from/to a cache file to speed up future calls.
        """
        # 如E:\TFFRCNN\data\cache\voc_2007_trainval_gt_roidb.pkl
        # cache_path為imdb類中的屬性方法
        cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl')
        # 序列化讀gt_roidb
        if os.path.exists(cache_file):
            with open(cache_file, 'rb') as fid:
                roidb = cPickle.load(fid)
            print '{} gt roidb loaded from {}'.format(self.name, cache_file)
            return roidb
        # 序列化寫gt_roidb(先有寫,后有讀)
        # 讀取每張圖片對應的xml文件信息構成的字典 存入gt_roidb列表!!!
        gt_roidb = [self._load_pascal_annotation(index)
                    for index in self.image_index]
        with open(cache_file, 'wb') as fid:
            cPickle.dump(gt_roidb, fid, cPickle.HIGHEST_PROTOCOL)
        print 'wrote gt roidb to {}'.format(cache_file)
        return gt_roidb

7.selective_search_roidb(self)

與SS算法相關,未使用,類似於gt_roidb(...),從/向cache文件夾中(如E:\TFFRCNN\data\cache\voc_2007_trainval__selective_search_roidb.pkl)cPickle序列化讀/寫gt roi相關信息,未見調用。

該函數表明(VOC2007數據集上,可見if判斷語句)self.roidb既包含了gt roi也包含了(由SS)產生的roi

 # 與SS算法有關,未使用
    def selective_search_roidb(self):
        """
        Return the database of selective search regions of interest.
        Ground-truth ROIs are also included !!!!!!
        This function loads/saves from/to a cache file to speed up future calls.
        """
        cache_file = os.path.join(self.cache_path,
                                  self.name + '_selective_search_roidb.pkl')
        if os.path.exists(cache_file):
            with open(cache_file, 'rb') as fid:
                roidb = cPickle.load(fid)
            print '{} ss roidb loaded from {}'.format(self.name, cache_file)
            return roidb
        if int(self._year) == 2007 or self._image_set != 'test':    
            gt_roidb = self.gt_roidb()
            ss_roidb = self._load_selective_search_roidb(gt_roidb)
            roidb = imdb.merge_roidbs(gt_roidb, ss_roidb)    # 合並gt_roidb和ss_roidb
        else:
            roidb = self._load_selective_search_roidb(None)
        with open(cache_file, 'wb') as fid:
            cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL)
        print 'wrote ss roidb to {}'.format(cache_file)
        return roidb

8.rpn_roidb(self)

返回rpn_roidb,從該函數同樣看出(int(self._year) == 2007 or self._image_set != 'test')時self.roidb來源包括gt_roidb和rpn_roidb,未見調用(應該在某個地方被調用了!)

    def rpn_roidb(self):
        if int(self._year) == 2007 or self._image_set != 'test':
            gt_roidb = self.gt_roidb()
            rpn_roidb = self._load_rpn_roidb(gt_roidb)
            # 合並gt_roidb和rpn_roidb
            roidb = imdb.merge_roidbs(gt_roidb, rpn_roidb)
        else:
            roidb = self._load_rpn_roidb(None)
        return roidb

9._load_rpn_roidb(self,gt_roidb)

調用(imdb.py中create_roidb_from_box_list(...)函數更新由RPN產生的rpn_roidb列表中(各圖像rpn_roi信息構成的)字典內容(傳入gt_roidb是為了得到‘gt_overlaps’,其他字段‘gt_classes’全0且不更新、‘flipped’為False、‘seg_areas’全0且不更新此處0表明該roi非gt roi而是由RPN產生的roi,可見create_roidb_from_box_list(...)函數),被rpn_roidb(...)調用,應注意這里self.config['rpn_file']表示rpn_roidb序列化內容的存儲路徑,在__init__()構造函數中初值為None,在調用該函數之前self.config應在某處更新

注意:rpn_roidb為各張圖像產生roi相關信息構成的字典組成的列表,字典內容見如下create_roidb_from_box_list(...)函數

    def _load_rpn_roidb(self, gt_roidb):
        # __init__()構造函數中該字段初值為None,在本句之前self.config應在某處更新!
        # 該字段為rpn_roidb序列化內容的存儲路徑
        filename = self.config['rpn_file']   
        print 'loading {}'.format(filename)
        assert os.path.exists(filename), \
               'rpn data not found at: {}'.format(filename)
        with open(filename, 'rb') as f:
            box_list = cPickle.load(f)
        return self.create_roidb_from_box_list(box_list, gt_roidb)

----------------注意以下函數中gt_classes為全0表明:對應的roi不是gt roi,這也解釋了test.py中的遺留的問題----------------------

對於各張圖像中由RPN產生的roi,與gt_roi計算IoU值,最大值對應的gt_roi作為gt,因此overlaps僅對應類別位置有>0的IoU值,其余位置全0,但是這里並沒有更新‘gt_classes’字段為gt_roi對應的類別,而是設置為全0,同時‘seg_areas’也被設置為全0

    def create_roidb_from_box_list(self, box_list, gt_roidb):
        # box_list(即rpn_roidb存儲路徑中序列化的內容)為數據集各張圖像產生的boxz組成的元組-組成的列表
        assert len(box_list) == self.num_images, \
                'Number of boxes must match number of ground-truth images'
        roidb = []       # rpn_roidb為列表
        for i in xrange(self.num_images):
            boxes = box_list[i]
            num_boxes = boxes.shape[0]
            overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32)

            if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0:
                # 對於各張圖像,為什么rpn_roi和gt_roi是對應起來的,需要看rpn_roidb存儲的序列化內容
                gt_boxes = gt_roidb[i]['boxes']
                gt_classes = gt_roidb[i]['gt_classes']
                gt_overlaps = bbox_overlaps(boxes.astype(np.float),
                                            gt_boxes.astype(np.float))
                argmaxes = gt_overlaps.argmax(axis=1)
                maxes = gt_overlaps.max(axis=1)
                I = np.where(maxes > 0)[0]
                # 對於各張圖像中由RPN產生的roi,與gt_roi計算IoU值,最大值對應的gt_roi作為gt,因此overlaps僅對應類別位置有>0的IoU值,其余位置全0
                # 但是這里並沒有更新‘gt_classes’字段為gt_roi對應的類別,而是設置為全0,同時‘seg_areas’也被設置為全0
                overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]
            overlaps = scipy.sparse.csr_matrix(overlaps)
            # 因此,rpn_roidb為各張圖像產生roi相關信息構成的字典組成的列表
            # 這里並沒有更新‘gt_classes’字段為gt_roi對應的類別,而是設置為全0,同時‘seg_areas’也被設置為全0
            # 此處‘gt_classes’表明對應的roi非gt roi
            roidb.append({
                'boxes' : boxes,
                'gt_classes' : np.zeros((num_boxes,), dtype=np.int32),
                'gt_overlaps' : overlaps,
                'flipped' : False,
                'seg_areas' : np.zeros((num_boxes,), dtype=np.float32),
            })
        return roidb

10._load_selective_search_roidb(self,gt_roidb)

類似於_load_rpn_roidb(self,gt_roidb),返回由SS算法得到的roidb數據,由於算法中未使用,不過多解釋,被selective_search_roidb(...)調用

    def _load_selective_search_roidb(self, gt_roidb):
        # 用SS算法預先得到的.mat文件
        filename = os.path.abspath(os.path.join(cfg.DATA_DIR,
                                                'selective_search_data',
                                                self.name + '.mat'))
        assert os.path.exists(filename), \
               'Selective search data not found at: {}'.format(filename)
        # sio即scipy.io讀寫mat文件
        # ravel()扁平化函數
        raw_data = sio.loadmat(filename)['boxes'].ravel()
        box_list = []
        for i in xrange(raw_data.shape[0]):
            boxes = raw_data[i][:, (1, 0, 3, 2)] - 1
            # 可見ds_utils.py文件
            keep = ds_utils.unique_boxes(boxes)
            boxes = boxes[keep, :]
            keep = ds_utils.filter_small_boxes(boxes, self.config['min_size'])
            boxes = boxes[keep, :]
            box_list.append(boxes)
        return self.create_roidb_from_box_list(box_list, gt_roidb)

11._load_pascal_annotation(self, index)

根據不含后綴的圖像名稱(如index為000001)讀取相應xml文件,獲得該圖像gt roi相關信息構成的字典,字典包含'boxes'(shape為(None,4),存儲該圖像所有gt roi坐標信息)、'gt_classes'(None,),存儲該圖像所有gt roi類別索引信息)、'gt_ishard'(shape為(None,),存儲該圖像所有gt roi是否為難例)、'gt_overlaps'(稀疏矩陣未壓縮前shape為(None,21),存儲該圖像所有gt roi IOU值,對應gt類別位置其值為1.0,其他全0)、'flipped'(為false)、'seg_areas'(shape為(None,),存儲該圖像所有gt roi面積)字段,數據集全部圖像的gt roi信息字典組成的列表為gt_roidb,該函數被gt_roidb(...)調用,可以看到gt_roidb與rpn_roidb在結構上是一致的,未查到相關資料 overlaps = scipy.sparse.csr_matrix(overlaps)

    # 根據不含后綴的圖像名稱加載圖片,讀取xml文件獲取groundtruth roi相關信息
    def _load_pascal_annotation(self, index):
        """
        Load image and bounding boxes info from XML file in the PASCAL VOC
        format.
        """
        filename = os.path.join(self._data_path, 'Annotations', index + '.xml')
        tree = ET.parse(filename)
        objs = tree.findall('object')
        # if not self.config['use_diff']:
        #     # Exclude the samples labeled as difficult
        #     non_diff_objs = [
        #         obj for obj in objs if int(obj.find('difficult').text) == 0]
        #     # if len(non_diff_objs) != len(objs):
        #     #     print 'Removed {} difficult objects'.format(
        #     #         len(objs) - len(non_diff_objs))
        #     objs = non_diff_objs
        num_objs = len(objs)
        # 初始化boxes,建立一個shape為(num_objs, 4)的全0數組,4列表示某個object gt bbox坐標
        boxes = np.zeros((num_objs, 4), dtype=np.uint16)
        # 初始化gt_classes,建立一個shape為(num_objs)的向量,pascal voc數據集對應值為1--21中的任一個
        gt_classes = np.zeros((num_objs), dtype=np.int32)
        # 初始化overlaps,建立一個shape為(num_objs, self.num_classes)的全0數組,gt roi對應類別所在列為1,其余全0
        overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)
        # "Seg" area for pascal is just the box area
        # 存儲gt roi面積
        seg_areas = np.zeros((num_objs), dtype=np.float32)
        # 存儲是否為難例(0或1,1表示hard )
        ishards = np.zeros((num_objs), dtype=np.int32)

        # Load object bounding boxes into a data frame.
        # 對該圖像所有的obj循環處理,存儲相應值
        for ix, obj in enumerate(objs):
            bbox = obj.find('bndbox')
            # Make pixel indexes 0-based
            # 記錄gt roi位置信息,這里為何要減1
            x1 = float(bbox.find('xmin').text) - 1
            y1 = float(bbox.find('ymin').text) - 1
            x2 = float(bbox.find('xmax').text) - 1
            y2 = float(bbox.find('ymax').text) - 1

            diffc = obj.find('difficult')
            difficult = 0 if diffc == None else int(diffc.text)
            ishards[ix] = difficult
            # self._class_to_ind中存放的是{'__background__':0,'craft':1  ...}key-value 字典
            # 取出類別名對應的index
            cls = self._class_to_ind[obj.find('name').text.lower().strip()]
            boxes[ix, :] = [x1, y1, x2, y2]
            gt_classes[ix] = cls
            # 生成類似於one-hot編碼[[0,0,0,0,1,0,0,0,...][0,0,0,0,1,0,0,0,...]]
            overlaps[ix, cls] = 1.0
            seg_areas[ix] = (x2 - x1 + 1) * (y2 - y1 + 1)
        # 對於那些零元素數目遠遠多於非零元素數目,並且非零元素的分布沒有規律的矩陣稱為稀疏矩陣、存儲和計算更為高效
        # 將overlaps稀疏矩陣壓縮!!!
        # 如(0,0) 1.0 (1,2) 1.0等???未查到相關內容
        overlaps = scipy.sparse.csr_matrix(overlaps)
        # 該圖像gt roi信息構成的字典
        return {'boxes' : boxes,               # (None,4)
                'gt_classes': gt_classes,      # (None,1)
                'gt_ishard': ishards,          # (None,1)
                'gt_overlaps' : overlaps,      # 壓縮前為(None,21)  壓縮后的,形式見上
                'flipped' : False,             # 1
                'seg_areas' : seg_areas}       # (None,1)

xml文件示例

<annotation>
    <folder>VOC2007</folder>
    <filename>000001.jpg</filename>
    <source>
        <database>My Database</database>
        <annotation>VOC2007</annotation>
        <image>flickr</image>
        <flickrid>NULL</flickrid>
    </source>
    <owner>
        <flickrid>NULL</flickrid>
        <name>sunyifeng</name>
    </owner>
    <size>
        <width>1920</width>
        <height>1080</height>
        <depth>3</depth>
    </size>
    <segmented>0</segmented>
    <object>
        <name>craft</name>
        <pose>Unspecified</pose>
        <truncated>0</truncated>
        <difficult>0</difficult>
        <bndbox>
            <xmin>963</xmin>
            <ymin>696</ymin>
            <xmax>1038</xmax>
            <ymax>739</ymax>
        </bndbox>
    </object>
</annotation>
# -*- coding:utf-8 -*-
# Author: WUJiang
# 測試功能,未查到相關內容

import scipy.sparse.csr
import numpy as np

# pascal voc數據集,該值none行21列
overlaps = np.array([
    [1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    [0, 0, 1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
])
"""
(0,0) 1.0
(1,2) 1.0

"""
print(scipy.sparse.csr_matrix(overlaps))
View Code

12._get_com_id(self)

返回cop_id隨機數,被_get_voc_results_file_template(...)和_do_matlab_eval(...)調用

    def _get_comp_id(self):   
        # 其中self._salt = str(uuid.uuid4())
        # self._comp_id = 'comp4'  use_salt=True
        comp_id = (self._comp_id + '_' + self._salt if self.config['use_salt']  
            else self._comp_id)
        return comp_id

13._get_voc_result_file_template(self)

返回PASCAL VOC各個類別self._image_set(如test 檢測)結果存儲路徑模板,如/TFFRCNN/data/VOCdevkit2007/results/VOC2007/Main/<comp_id>_det_test_bus.txt,被_write_voc_results_file(...)調用,針對各類保存的檢測結果,將在evaluate_detections(...)中被刪除

    # 返回PASCAL VOC各個類別self._image_set(如test 檢測)結果存儲路徑模板
    def _get_voc_results_file_template(self): 
        # .../results/VOC2007/Main/<comp_id>_det_test_xxxxxxxxx.txt
        filename = self._get_comp_id() + '_det_' + self._image_set + '_{:s}.txt'
        filedir = os.path.join(self._devkit_path, 'results', 'VOC' + self._year, 'Main')
        if not os.path.exists(filedir):
            os.makedirs(filedir)
        path = os.path.join(filedir, filename)
        return path

14._write_voc_results_file(self,all_boxes)

為各類檢測結果寫一個txt文件,如..VOCdevkit/results/VOC2007/Main/<comp_id>_det_test_aeroplane.txt,其中每行分別為不含后綴的圖像名、該圖像某box的置信得分、該box的4維坐標,被evaluate_detections(...)調用

    # 為每類檢測結果寫一個txt文件
    # 注意傳入參數all_boxes
    def _write_voc_results_file(self, all_boxes):
        for cls_ind, cls in enumerate(self.classes):
            if cls == '__background__':
                continue
            print 'Writing {} VOC results file'.format(cls)
            # 如VOCdevkit/results/VOC2007/Main/<comp_id>_det_test_aeroplane.txt
            filename = self._get_voc_results_file_template().format(cls)
            with open(filename, 'wt') as f:
                # self.image_index 不含后綴的圖像名組成的列表
                for im_ind, index in enumerate(self.image_index):  
                    # 遍歷每一張圖像,取出對應圖像某類的檢測結果
                    dets = all_boxes[cls_ind][im_ind]
                    if dets == []:
                        continue
                    # the VOCdevkit expects 1-based indices  索引
                    # 逐行寫入:不含后綴圖像名  該圖像某box置信得分  該box四維坐標+1
                    for k in xrange(dets.shape[0]):
                        f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
                                format(index, dets[k, -1],                    
                                       dets[k, 0] + 1, dets[k, 1] + 1,
                                       dets[k, 2] + 1, dets[k, 3] + 1))

15.362

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM