本blog為github上CharlesShang/TFFRCNN版源碼解析系列代碼筆記
---------------個人學習筆記---------------
----------------本文作者疆--------------
------點擊此處鏈接至博客園原文------
定義了pascal_voc類,繼承自imdb類,類中定義了18個函數
1.__init__(self,image_set,year,devkit_path=None)構造函數,初始化部分變量
這里面部分變量未在本腳本中被更新,如self._num_classes和self._roidb
# pascal_voc類繼承自imdb類 class pascal_voc(imdb): # image_set(如trainval等),如voc_2007_tainval def __init__(self, image_set, year, devkit_path=None): # 初始化self._name(如voc_2007_tainval)、self._num_classes(該數據集對應值應為21,但imdb類構造函數初始化為0???)、 # self._classes(空列表--->本腳本中被更新為存儲類別名稱的元組) # self._image_index列表(空列表--->本腳本中被更新為不含后綴的數據集(如trainval數據集)圖像名稱組成的列表) # 和self._obj_proposer、self._roidb(為None,本腳本中未被更新???)、self._roidb_handler(self.default_roidb--->本腳本被更新為self.gt_roidb)、 # self.config字典(空字典--->本腳本中更新為數據集相關設置信息構成的字典,字段包括‘cleanup’、'use_salt'、'use_diff'、'matlab_eval'、'rpn_file'、'min_size') imdb.__init__(self, 'voc_' + year + '_' + image_set) self._year = year self._image_set = image_set # 如E:\TFFRCNN\data\VOCdevkit2007 self._devkit_path = self._get_default_path() if devkit_path is None \ else devkit_path # 如E:\TFFRCNN\data\VOCdevkit2007\VOC2007 VOC2007數據路徑 self._data_path = os.path.join(self._devkit_path, 'VOC' + self._year) self._classes = ('__background__', # always index 0 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor') ''' self._classes = ('__background__', # always index 0 'craft') #2018.1.30 ''' # self._class_to_ind中存放的是{'__background__':0,'craft':1 ...}key-value 字典 # zip函數:對應取出每一個數組中的元素再組合 self._class_to_ind = dict(zip(self.classes, xrange(self.num_classes))) self._image_ext = '.jpg' # 不含后綴的數據集(如trainval數據集)圖像名稱組成的列表(從trainval.txt中獲取) self._image_index = self._load_image_set_index() # Default to roidb handler # self._roidb_handler = self.selective_search_roidb # 返回的是gt_roidb(各圖像gt roi字典組成的列表)的內存地址 self._roidb_handler = self.gt_roidb # 生成一個隨機的uuid,即對於分布式數據,每個數據都有自己對應的唯一的標識符! # _get_comp_id(...)中使用 self._salt = str(uuid.uuid4()) self._comp_id = 'comp4' # PASCAL specific config options self.config = {'cleanup' : True, 'use_salt' : True, 'use_diff' : False, 'matlab_eval' : False, 'rpn_file' : None, 'min_size' : 2} assert os.path.exists(self._devkit_path), \ 'VOCdevkit path does not exist: {}'.format(self._devkit_path) assert os.path.exists(self._data_path), \ 'Path does not exist: {}'.format(self._data_path)
2.image_path_at(self,i)獲取數據集第i張圖像的絕對路徑,未見調用
# 獲取數據集第i張圖像的絕對路徑 def image_path_at(self, i): """ Return the absolute path to image i in the image sequence. """ # self._image_index為不含后綴的圖像名稱組成的列表 return self.image_path_from_index(self._image_index[i])
3.image_path_from_index(self,index)根據圖像不含后綴的名稱(如000001)獲取該圖像絕對路徑,被image_path_at(...)調用
# 根據圖像不含后綴的名稱獲取圖像絕對路徑 def image_path_from_index(self, index): """ Construct an image path from the image's "index" identifier. """ image_path = os.path.join(self._data_path, 'JPEGImages', index + self._image_ext) #self._image_ext = '.jpg' assert os.path.exists(image_path), \ 'Path does not exist: {}'.format(image_path) return image_path
4._load_image_set_index(self)
獲得數據集圖像名稱構成的列表,如trainval數據集從trainval.txt中取出[000001, 000002, ...]圖像名稱列表,被__init__(...)調用
# 獲得數據集圖像名稱構成的列表,如[000001, 000003, ...] def _load_image_set_index(self): """ Load the indexes listed in this dataset's image set file. """ # Example path to image set file: # 如E:\TFFRCNN\data\VOCdevkit2007\VOC2007\ImageSets\Main\trainval.txt # 該路勁下存儲相關圖像名稱信息,如000001 000002... image_set_file = os.path.join(self._data_path, 'ImageSets', 'Main', self._image_set + '.txt') assert os.path.exists(image_set_file), \ 'Path does not exist: {}'.format(image_set_file) with open(image_set_file) as f: # readlines()函數一行一行讀取 # x.strip(rm)函數 刪除x字符串中開頭結尾處rm刪除序列的字符 image_index = [x.strip() for x in f.readlines()] return image_index
5._get_default_path(self)
獲取數據集文件夾默認路徑,如E:\TFFRCNN\data\VOCdevkit2007,被__init__(...)調用
# 獲取pascal voc數據集文件夾默認路徑 def _get_default_path(self): """ Return the default path where PASCAL VOC is expected to be installed. """ # 默認DATA_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'data')) # ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..')) return os.path.join(cfg.DATA_DIR, 'VOCdevkit' + self._year)
6.gt_roidb(self)
從/向cache文件夾中(如E:\TFFRCNN\data\cache\voc_2007_trainval_gt_roidb.pkl)cPickle序列化讀/寫gt roi相關信息。若路徑已存在表明曾經創建過,則讀,否則則寫供下次讀。被__init__(...)不加()調用,表明加載gt roi相關信息,返回gt_roidb的地址(self._roidb_handler = self.gt_roidb,但該值未見調用)該函數是通過調用_load_pascal_annotation(...)得到由各圖像gt roi信息字典組成的列表gt_roidb。實際上還被selective_search_roidb(...)、rpn_roidb(...)調用,但未使用SS產生roi。
# 從/向cache文件夾中cPickle序列化讀/寫groundtruth roi相關信息 # cache路徑存在則讀,否則則寫 def gt_roidb(self): """ Return the database of ground-truth regions of interest. This function loads/saves from/to a cache file to speed up future calls. """ # 如E:\TFFRCNN\data\cache\voc_2007_trainval_gt_roidb.pkl # cache_path為imdb類中的屬性方法 cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl') # 序列化讀gt_roidb if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: roidb = cPickle.load(fid) print '{} gt roidb loaded from {}'.format(self.name, cache_file) return roidb # 序列化寫gt_roidb(先有寫,后有讀) # 讀取每張圖片對應的xml文件信息構成的字典 存入gt_roidb列表!!! gt_roidb = [self._load_pascal_annotation(index) for index in self.image_index] with open(cache_file, 'wb') as fid: cPickle.dump(gt_roidb, fid, cPickle.HIGHEST_PROTOCOL) print 'wrote gt roidb to {}'.format(cache_file) return gt_roidb
7.selective_search_roidb(self)
與SS算法相關,未使用,類似於gt_roidb(...),從/向cache文件夾中(如E:\TFFRCNN\data\cache\voc_2007_trainval__selective_search_roidb.pkl)cPickle序列化讀/寫gt roi相關信息,未見調用。
該函數表明(VOC2007數據集上,可見if判斷語句)self.roidb中既包含了gt roi也包含了(由SS)產生的roi
# 與SS算法有關,未使用 def selective_search_roidb(self): """ Return the database of selective search regions of interest. Ground-truth ROIs are also included !!!!!! This function loads/saves from/to a cache file to speed up future calls. """ cache_file = os.path.join(self.cache_path, self.name + '_selective_search_roidb.pkl') if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: roidb = cPickle.load(fid) print '{} ss roidb loaded from {}'.format(self.name, cache_file) return roidb if int(self._year) == 2007 or self._image_set != 'test': gt_roidb = self.gt_roidb() ss_roidb = self._load_selective_search_roidb(gt_roidb) roidb = imdb.merge_roidbs(gt_roidb, ss_roidb) # 合並gt_roidb和ss_roidb else: roidb = self._load_selective_search_roidb(None) with open(cache_file, 'wb') as fid: cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL) print 'wrote ss roidb to {}'.format(cache_file) return roidb
8.rpn_roidb(self)
返回rpn_roidb,從該函數同樣看出(int(self._year) == 2007 or self._image_set != 'test')時self.roidb來源包括gt_roidb和rpn_roidb,未見調用(應該在某個地方被調用了!)
def rpn_roidb(self): if int(self._year) == 2007 or self._image_set != 'test': gt_roidb = self.gt_roidb() rpn_roidb = self._load_rpn_roidb(gt_roidb) # 合並gt_roidb和rpn_roidb roidb = imdb.merge_roidbs(gt_roidb, rpn_roidb) else: roidb = self._load_rpn_roidb(None) return roidb
9._load_rpn_roidb(self,gt_roidb)
調用(imdb.py中)create_roidb_from_box_list(...)函數更新由RPN產生的rpn_roidb列表中(各圖像rpn_roi信息構成的)字典內容(傳入gt_roidb是為了得到‘gt_overlaps’,其他字段‘gt_classes’全0且不更新、‘flipped’為False、‘seg_areas’全0且不更新,此處0表明該roi非gt roi而是由RPN產生的roi,可見create_roidb_from_box_list(...)函數),被rpn_roidb(...)調用,應注意這里self.config['rpn_file']表示rpn_roidb序列化內容的存儲路徑,在__init__()構造函數中初值為None,在調用該函數之前self.config應在某處更新!
注意:rpn_roidb為各張圖像產生roi相關信息構成的字典組成的列表,字典內容見如下create_roidb_from_box_list(...)函數
def _load_rpn_roidb(self, gt_roidb): # __init__()構造函數中該字段初值為None,在本句之前self.config應在某處更新! # 該字段為rpn_roidb序列化內容的存儲路徑 filename = self.config['rpn_file'] print 'loading {}'.format(filename) assert os.path.exists(filename), \ 'rpn data not found at: {}'.format(filename) with open(filename, 'rb') as f: box_list = cPickle.load(f) return self.create_roidb_from_box_list(box_list, gt_roidb)
----------------注意以下函數中gt_classes為全0表明:對應的roi不是gt roi,這也解釋了test.py中的遺留的問題----------------------
對於各張圖像中由RPN產生的roi,與gt_roi計算IoU值,最大值對應的gt_roi作為gt,因此overlaps僅對應類別位置有>0的IoU值,其余位置全0,但是這里並沒有更新‘gt_classes’字段為gt_roi對應的類別,而是設置為全0,同時‘seg_areas’也被設置為全0
def create_roidb_from_box_list(self, box_list, gt_roidb): # box_list(即rpn_roidb存儲路徑中序列化的內容)為數據集各張圖像產生的boxz組成的元組-組成的列表 assert len(box_list) == self.num_images, \ 'Number of boxes must match number of ground-truth images' roidb = [] # rpn_roidb為列表 for i in xrange(self.num_images): boxes = box_list[i] num_boxes = boxes.shape[0] overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32) if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0: # 對於各張圖像,為什么rpn_roi和gt_roi是對應起來的,需要看rpn_roidb存儲的序列化內容 gt_boxes = gt_roidb[i]['boxes'] gt_classes = gt_roidb[i]['gt_classes'] gt_overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) argmaxes = gt_overlaps.argmax(axis=1) maxes = gt_overlaps.max(axis=1) I = np.where(maxes > 0)[0] # 對於各張圖像中由RPN產生的roi,與gt_roi計算IoU值,最大值對應的gt_roi作為gt,因此overlaps僅對應類別位置有>0的IoU值,其余位置全0 # 但是這里並沒有更新‘gt_classes’字段為gt_roi對應的類別,而是設置為全0,同時‘seg_areas’也被設置為全0 overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] overlaps = scipy.sparse.csr_matrix(overlaps) # 因此,rpn_roidb為各張圖像產生roi相關信息構成的字典組成的列表 # 這里並沒有更新‘gt_classes’字段為gt_roi對應的類別,而是設置為全0,同時‘seg_areas’也被設置為全0 # 此處‘gt_classes’表明對應的roi非gt roi roidb.append({ 'boxes' : boxes, 'gt_classes' : np.zeros((num_boxes,), dtype=np.int32), 'gt_overlaps' : overlaps, 'flipped' : False, 'seg_areas' : np.zeros((num_boxes,), dtype=np.float32), }) return roidb
10._load_selective_search_roidb(self,gt_roidb)
類似於_load_rpn_roidb(self,gt_roidb),返回由SS算法得到的roidb數據,由於算法中未使用,不過多解釋,被selective_search_roidb(...)調用
def _load_selective_search_roidb(self, gt_roidb): # 用SS算法預先得到的.mat文件 filename = os.path.abspath(os.path.join(cfg.DATA_DIR, 'selective_search_data', self.name + '.mat')) assert os.path.exists(filename), \ 'Selective search data not found at: {}'.format(filename) # sio即scipy.io讀寫mat文件 # ravel()扁平化函數 raw_data = sio.loadmat(filename)['boxes'].ravel() box_list = [] for i in xrange(raw_data.shape[0]): boxes = raw_data[i][:, (1, 0, 3, 2)] - 1 # 可見ds_utils.py文件 keep = ds_utils.unique_boxes(boxes) boxes = boxes[keep, :] keep = ds_utils.filter_small_boxes(boxes, self.config['min_size']) boxes = boxes[keep, :] box_list.append(boxes) return self.create_roidb_from_box_list(box_list, gt_roidb)
11._load_pascal_annotation(self, index)
根據不含后綴的圖像名稱(如index為000001)讀取相應xml文件,獲得該圖像gt roi相關信息構成的字典,字典包含'boxes'(shape為(None,4),存儲該圖像所有gt roi坐標信息)、'gt_classes'(None,),存儲該圖像所有gt roi類別索引信息)、'gt_ishard'(shape為(None,),存儲該圖像所有gt roi是否為難例)、'gt_overlaps'(稀疏矩陣未壓縮前shape為(None,21),存儲該圖像所有gt roi IOU值,對應gt類別位置其值為1.0,其他全0)、'flipped'(為false)、'seg_areas'(shape為(None,),存儲該圖像所有gt roi面積)字段,數據集全部圖像的gt roi信息字典組成的列表為gt_roidb,該函數被gt_roidb(...)調用,可以看到gt_roidb與rpn_roidb在結構上是一致的,未查到相關資料 overlaps = scipy.sparse.csr_matrix(overlaps)
# 根據不含后綴的圖像名稱加載圖片,讀取xml文件獲取groundtruth roi相關信息 def _load_pascal_annotation(self, index): """ Load image and bounding boxes info from XML file in the PASCAL VOC format. """ filename = os.path.join(self._data_path, 'Annotations', index + '.xml') tree = ET.parse(filename) objs = tree.findall('object') # if not self.config['use_diff']: # # Exclude the samples labeled as difficult # non_diff_objs = [ # obj for obj in objs if int(obj.find('difficult').text) == 0] # # if len(non_diff_objs) != len(objs): # # print 'Removed {} difficult objects'.format( # # len(objs) - len(non_diff_objs)) # objs = non_diff_objs num_objs = len(objs) # 初始化boxes,建立一個shape為(num_objs, 4)的全0數組,4列表示某個object gt bbox坐標 boxes = np.zeros((num_objs, 4), dtype=np.uint16) # 初始化gt_classes,建立一個shape為(num_objs)的向量,pascal voc數據集對應值為1--21中的任一個 gt_classes = np.zeros((num_objs), dtype=np.int32) # 初始化overlaps,建立一個shape為(num_objs, self.num_classes)的全0數組,gt roi對應類別所在列為1,其余全0 overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) # "Seg" area for pascal is just the box area # 存儲gt roi面積 seg_areas = np.zeros((num_objs), dtype=np.float32) # 存儲是否為難例(0或1,1表示hard ) ishards = np.zeros((num_objs), dtype=np.int32) # Load object bounding boxes into a data frame. # 對該圖像所有的obj循環處理,存儲相應值 for ix, obj in enumerate(objs): bbox = obj.find('bndbox') # Make pixel indexes 0-based # 記錄gt roi位置信息,這里為何要減1 x1 = float(bbox.find('xmin').text) - 1 y1 = float(bbox.find('ymin').text) - 1 x2 = float(bbox.find('xmax').text) - 1 y2 = float(bbox.find('ymax').text) - 1 diffc = obj.find('difficult') difficult = 0 if diffc == None else int(diffc.text) ishards[ix] = difficult # self._class_to_ind中存放的是{'__background__':0,'craft':1 ...}key-value 字典 # 取出類別名對應的index cls = self._class_to_ind[obj.find('name').text.lower().strip()] boxes[ix, :] = [x1, y1, x2, y2] gt_classes[ix] = cls # 生成類似於one-hot編碼[[0,0,0,0,1,0,0,0,...][0,0,0,0,1,0,0,0,...]] overlaps[ix, cls] = 1.0 seg_areas[ix] = (x2 - x1 + 1) * (y2 - y1 + 1) # 對於那些零元素數目遠遠多於非零元素數目,並且非零元素的分布沒有規律的矩陣稱為稀疏矩陣、存儲和計算更為高效 # 將overlaps稀疏矩陣壓縮!!! # 如(0,0) 1.0 (1,2) 1.0等???未查到相關內容 overlaps = scipy.sparse.csr_matrix(overlaps) # 該圖像gt roi信息構成的字典 return {'boxes' : boxes, # (None,4) 'gt_classes': gt_classes, # (None,1) 'gt_ishard': ishards, # (None,1) 'gt_overlaps' : overlaps, # 壓縮前為(None,21) 壓縮后的,形式見上 'flipped' : False, # 1 'seg_areas' : seg_areas} # (None,1)
xml文件示例
<annotation> <folder>VOC2007</folder> <filename>000001.jpg</filename> <source> <database>My Database</database> <annotation>VOC2007</annotation> <image>flickr</image> <flickrid>NULL</flickrid> </source> <owner> <flickrid>NULL</flickrid> <name>sunyifeng</name> </owner> <size> <width>1920</width> <height>1080</height> <depth>3</depth> </size> <segmented>0</segmented> <object> <name>craft</name> <pose>Unspecified</pose> <truncated>0</truncated> <difficult>0</difficult> <bndbox> <xmin>963</xmin> <ymin>696</ymin> <xmax>1038</xmax> <ymax>739</ymax> </bndbox> </object> </annotation>

# -*- coding:utf-8 -*- # Author: WUJiang # 測試功能,未查到相關內容 import scipy.sparse.csr import numpy as np # pascal voc數據集,該值none行21列 overlaps = np.array([ [1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] ]) """ (0,0) 1.0 (1,2) 1.0 """ print(scipy.sparse.csr_matrix(overlaps))
12._get_com_id(self)
返回cop_id隨機數,被_get_voc_results_file_template(...)和_do_matlab_eval(...)調用
def _get_comp_id(self): # 其中self._salt = str(uuid.uuid4()) # self._comp_id = 'comp4' use_salt=True comp_id = (self._comp_id + '_' + self._salt if self.config['use_salt'] else self._comp_id) return comp_id
13._get_voc_result_file_template(self)
返回PASCAL VOC各個類別self._image_set(如test 檢測)結果存儲路徑模板,如/TFFRCNN/data/VOCdevkit2007/results/VOC2007/Main/<comp_id>_det_test_bus.txt,被_write_voc_results_file(...)調用,針對各類保存的檢測結果,將在evaluate_detections(...)中被刪除
# 返回PASCAL VOC各個類別self._image_set(如test 檢測)結果存儲路徑模板 def _get_voc_results_file_template(self): # .../results/VOC2007/Main/<comp_id>_det_test_xxxxxxxxx.txt filename = self._get_comp_id() + '_det_' + self._image_set + '_{:s}.txt' filedir = os.path.join(self._devkit_path, 'results', 'VOC' + self._year, 'Main') if not os.path.exists(filedir): os.makedirs(filedir) path = os.path.join(filedir, filename) return path
14._write_voc_results_file(self,all_boxes)
為各類檢測結果寫一個txt文件,如..VOCdevkit/results/VOC2007/Main/<comp_id>_det_test_aeroplane.txt,其中每行分別為不含后綴的圖像名、該圖像某box的置信得分、該box的4維坐標,被evaluate_detections(...)調用
# 為每類檢測結果寫一個txt文件 # 注意傳入參數all_boxes def _write_voc_results_file(self, all_boxes): for cls_ind, cls in enumerate(self.classes): if cls == '__background__': continue print 'Writing {} VOC results file'.format(cls) # 如VOCdevkit/results/VOC2007/Main/<comp_id>_det_test_aeroplane.txt filename = self._get_voc_results_file_template().format(cls) with open(filename, 'wt') as f: # self.image_index 不含后綴的圖像名組成的列表 for im_ind, index in enumerate(self.image_index): # 遍歷每一張圖像,取出對應圖像某類的檢測結果 dets = all_boxes[cls_ind][im_ind] if dets == []: continue # the VOCdevkit expects 1-based indices 索引 # 逐行寫入:不含后綴圖像名 該圖像某box置信得分 該box四維坐標+1 for k in xrange(dets.shape[0]): f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'. format(index, dets[k, -1], dets[k, 0] + 1, dets[k, 1] + 1, dets[k, 2] + 1, dets[k, 3] + 1))
15.362