計算機視覺-自定義對象檢測器


1、模板匹配

運行指令:python template_matching.py --source 3.jpg --template 2.jpg

import argparse
import cv2

ap = argparse.ArgumentParser()
ap.add_argument("-s", "--source", required=True, help="Path to the source image")
ap.add_argument("-t", "--template", required=True, help="Path to the template image")
args = vars(ap.parse_args())
  
source = cv2.imread(args["source"])
template = cv2.imread(args["template"])
(tempH, tempW) = template.shape[:2]
 
result = cv2.matchTemplate(source, template, cv2.TM_CCOEFF) #參數1:源圖像 參數2:模板圖像 參數3:模板匹配方法
(minVal, maxVal, minLoc, (x, y)) = cv2.minMaxLoc(result) #獲取最佳匹配的(x,y)坐標

cv2.rectangle(source, (x, y), (x + tempW, y + tempH), (0, 255, 0), 2) #在源圖像上繪制邊框

 

 2、訓練自己的物體探測器

作用:結合caltech101數據集,結合.mat文件,訓練對象檢測器,生成SVM線性支持向量機

train_detector.py

運行指令:python train_detector.py --class stop_sign_images --annotations stop_sign_annotations \

--output output/stop_sign_detector.svm
from __future__ import print_function
from imutils import paths
from scipy.io import loadmat
from skimage import io
import argparse
import dlib
 
ap = argparse.ArgumentParser()
ap.add_argument("-c", "--class", required=True,
    help="Path to the CALTECH-101 class images")#要訓練一個對象檢測器的具體CALTECH-101(數據集)類的路徑
ap.add_argument("-a", "--annotations", required=True,
    help="Path to the CALTECH-101 class annotations")#指定我們正在訓練的特定類的邊界框的路徑(caltech101數據集中對應的.mat文件夾)
ap.add_argument("-o", "--output", required=True,
    help="Path to the output detector")#輸出分類器的路徑
args = vars(ap.parse_args())

print("[INFO] gathering images and bounding boxes...")
options = dlib.simple_object_detector_training_options()
images = []
boxes = []
 
for imagePath in paths.list_images(args["class"]):#循環輸入需要被訓練的圖像
    imageID = imagePath[imagePath.rfind("/") + 1:].split("_")[1]
    imageID = imageID.replace(".jpg", "")
    p = "{}/annotation_{}.mat".format(args["annotations"], imageID)
    annotations = loadmat(p)["box_coord"]#從路徑中提取圖像ID,然后使用圖像ID ,從磁盤加載相應的 注釋(即邊界框)
 
    bb = [dlib.rectangle(left=long(x), top=long(y), right=long(w), bottom=long(h)) 
            for (y, h, x, w) in annotations]#構 矩形 對象來表示邊界框
    
    boxes.append(bb)
    images.append(io.imread(imagePath))#更新邊界當前圖像框和添加圖片到列表中,在DLIB庫將需要的兩個圖像和函數加載到訓練分類器中

 test_detector.py

運行指令:python test_detector.py --detector output/stop_sign_detector.svm --testing stop_sign_testing

作用:測試自定義對象檢測器效果

from imutils import paths
import argparse
import dlib
import cv2
 
ap = argparse.ArgumentParser()
ap.add_argument("-d", "--detector", required=True, help="Path to trained object detector")#訓練出的SVM線性檢測器
ap.add_argument("-t", "--testing", required=True, help="Path to directory of testing images")#包含停止標志圖像進行測試的目錄的路徑
args = vars(ap.parse_args())
 
detector = dlib.simple_object_detector(args["detector"])

for testingPath in paths.list_images(args["testing"]):#循環測試需要測試的圖像
    image = cv2.imread(testingPath)
    boxes = detector(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
 
    for b in boxes:
        (x, y, w, h) = (b.left(), b.top(), b.right(), b.bottom())
        cv2.rectangle(image, (x, y), (w, h), (0, 255, 0), 2)
 
    cv2.imshow("Image", image)
    cv2.waitKey(0)

 

3.1、圖像金字塔  

目錄:

作用:指圖像按一定比例縮放,並且返回。

知識點:關鍵字 yield 返回並不結束,理解為延遲返回結果

helper.py:

import imutils
#自定義金字塔函數
def pyramid(image, scale=1.5, minSize=(30, 30)): #參數1:源圖像 參數2:每次縮放比例 參數3:設置最小尺寸
    yield image  #定義為金字塔原圖像
 
    while True:
        w = int(image.shape[1] / scale)
        image = imutils.resize(image, width=w) #設置長寬按比例縮放

        if image.shape[0] < minSize[1] or image.shape[1] < minSize[0]:#判斷縮放的圖片是否滿足需求
            break
         yield image
#定義滑動穿口函數
def sliding_window(image, stepSize, windowSize):#參數1:要檢查的對象 參數2:每次跳過多少像素,參數3:每次窗口要檢查的大小
   for y in xrange(0, image.shape[0], stepSize):     for x in xrange(0, image.shape[1], stepSize):       yield (x, y, image[y:y + windowSize[1], x:x + windowSize[0]])

test_pyramid.py

示例:python test_pyramid.py --image florida_trip.png --scale 1.5

#對金字塔函數的使用
from
pyimagesearch.object_detection.helpers import pyramid import argparse import cv2 ap = argparse.ArgumentParser() ap.add_argument("-i", "--image", required=True, help="path to the input image") ap.add_argument("-s", "--scale", type=float, default=1.5, help="scale factor size") #每次圖像縮小比例 args = vars(ap.parse_args()) image = cv2.imread(args["image"]) for (i, layer) in enumerate(pyramid(image, scale=args["scale"])): cv2.imshow("Layer {}".format(i + 1), layer) cv2.waitKey(0)

 3.2、滑動窗戶

test_sliding_window.py

作用:金字塔與滑動窗口的聯合的運用

運行指令:python test_sliding_window.py --image florida_trip.png --width 64 --height 64

from pyimagesearch.object_detection.helpers import sliding_window
from pyimagesearch.object_detection.helpers import pyramid
import argparse
import time
import cv2
 
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True, help="path to the input image")#需要處理的圖像
ap.add_argument("-w", "--width", type=int, help="width of sliding window")#滑動窗口的寬度
ap.add_argument("-t", "--height", type=int, help="height of sliding window")#滑動窗口的高度
ap.add_argument("-s", "--scale", type=float, default=1.5, help="scale factor size")#圖像金字塔的調整大小因子
args = vars(ap.parse_args())
 
image = cv2.imread(args["image"])
(winW, winH) = (args["width"], args["height"])

for layer in pyramid(image, scale=args["scale"]):
    for (x, y, window) in sliding_window(layer, stepSize=32, windowSize=(winW, winH)):
        
        if window.shape[0] != winH or window.shape[1] != winW:
            continue

        clone = layer.copy()
        cv2.rectangle(clone, (x, y), (x + winW, y + winH), (0, 255, 0), 2)
        cv2.imshow("Window", clone)
 
        cv2.waitKey(1)
        time.sleep(0.025)

4.1構建自定義檢測框架的6個步驟

哈爾級聯的問題(Viola-Jones探測器(2)):OpenCV中檢測到面孔/人物/對象/任何東西,將花費大量時間調整cv2.detectMultiScale參數。

Viola-Jones探測器不是我們唯一的物體檢測選擇。我們可以使用關鍵點對象檢測,局部不變描述符和一系列的視覺詞模型。

六步框架:

步驟1:從想要檢測的對象的訓練數據中取樣p個正樣本,並從這些樣本中提取HOG描述符。將提取對象的邊界框(包括圖像的訓練數據),然后在該ROI上計算HOG特征,HOG功能將作為正面例子。

步驟2:負面訓練集不包含任何要檢測的對象,並從這些樣品中提取HOG描述為好。實踐中負面樣本遠遠大於正樣本

步驟3:在正負樣本上訓練線性支持向量機。

步驟4:應用硬陰極開采。對於負面訓練集中的每個圖像和每個圖像的每個可能的比例(即圖像金字塔),應用滑動窗口技術將窗口滑過圖像。減少我們最終檢測器中的假陽性數量。

步驟5:采取在硬陰極開采階段發現的假陽性樣本,以其置信度(即概率)進行排序,並使用這些陰性樣本重新訓練分類器

步驟6:分類器現在已經受過培訓,可以應用於測試數據集。再次,就像在步驟4中,對於測試集中的每個圖像,並且對於圖像的每個比例,應用滑動窗口技術。在每個窗口中,提取HOG描述符並應用分類器。如果分類器以很大的概率檢測到對象,記錄窗口的邊界框。完成掃描圖像后,應用非最大抑制來刪除冗余和重疊的邊界框。

擴展和其他方法:

在物體檢測中使用HOG+線性SVM方法簡單易懂。與使用的標准6步框架略有不同。

 

第一個變化是關於HOG滑動窗口和非最大抑制方法。代替從提取特征的二者的正和負數據集,所述方法DLIB優化HOG滑動窗口使得上的錯誤的數目  的每個訓練圖像。這意味着  整個  訓練圖像都用於(1)提取正例,和(2) 從圖像的所有其他區域提取  負樣本完全減輕 了負面培訓的需要和強烈的消極采礦的要求。這是Max-Margin Object 檢測方法如此之快的原因之一  

 

其次,在實際的訓練階段,dlib也考慮到非最大的壓制。我們通常只應用NMS來獲得最終的邊界框,但在這種情況下,我們實際上可以在訓練階段使用NMS。這有助於減少誤報  實質上並再次減輕了硬負開采的需要。

 

最后,dlib使用非常精確的算法來找到分離兩個圖像類的最優超平面。該方法比許多其他最先進的對象檢測器獲得更高的精度(具有較低的假陽性率)。

5、准備實驗和培訓數據

框架的完整目錄結構:(pyimagesearch同級目錄還有conf目錄存放json文件,datasets目錄,存放數據集)

 

實驗配置:運用JSON配置文件

json配置文件優勢:

1、不需要明確定義一個永無止盡的命令行參數列表,只需要提供的是我們配置文件的路徑。

2、配置文件允許將所有相關參數整合到一個 位置

3、確保我們不會忘記為每個Python腳本使用哪些命令行選項。所有選項將在我們的配置文件中定義。

4、允許我們為每個要創建的對象檢測器配置一個配置文件  。這是一個巨大的優勢,允許我們通過修改單個文件來定義對象檢測器  。

cars.json:

{
    #######
    # DATASET PATHS
    #######
    "image_dataset": "datasets/caltech101/101_ObjectCategories/car_side",#我們的“正例”圖像的路徑,需要訓練的基礎數據
    "image_annotations": "datasets/caltech101/Annotations/car_side",#包含與image_dataset中每個圖像相關聯的邊界框的目錄的路徑
    "image_distractions": "datasets/sceneclass13",#不包含我們想要檢測的對象的任何示例的“否定示例”  
}

explore_dims.py 

作用:在caltech101數據中提取.mat文件信息,遍歷所有圖片輪廓信息,同時獲取滑動活動窗口尺寸

涉及到知識點:1、處理caltech101數據集方法及提取.mat文件信息

2、用用golb.golb()函數遍歷文件夾中的文件方法

運行指令:python explore_dims.py --conf conf/cars.json

from __future__ import print_function
from pyimagesearch.utils import Conf
from scipy import io
import numpy as np
import argparse
import glob
 

ap = argparse.ArgumentParser()
ap.add_argument("-c", "--conf", required=True, help="path to the configuration file")
args = vars(ap.parse_args())

conf = Conf(args["conf"])#加載配置文件
widths = []#初始化檢測對象的寬度
heights = []#初始化檢測對象的高度
 
for p in glob.glob(conf["image_annotations"] + "/*.mat"):#循環檢測對象的注釋文件
    (y, h, x, w) = io.loadmat(p)["box_coord"][0]
    widths.append(w - x)
    heights.append(h - y)#加載每個檢測對象的注釋文件相關聯的邊界框,並更新相應的寬度和高度列表。
 
#計算平均寬度和高度
(avgWidth, avgHeight) = (np.mean(widths), np.mean(heights))
print("[INFO] avg. width: {:.2f}".format(avgWidth))
print("[INFO] avg. height: {:.2f}".format(avgHeight))
print("[INFO] aspect ratio: {:.2f}".format(avgWidth / avgHeight))

conf.py:解析命令行參數

作用:解析car.json文件的類
python內置函數__getitem__的作用:

在類中定義了__getitem__()方法,那么他的實例對象(假設為P)就可以這樣P[key]取值。當實例對象做P[key]運算時,就會調用類中的__getitem__()方法

import commentjson as json
 
class Conf:
    def __init__(self, confPath):
        conf = json.loads(open(confPath).read())
        self.__dict__.update(conf)
 
    def __getitem__(self, k):
        return self.__dict__.get(k, None)    

 6、構建HOG描述符

cars.json:

{
    #######
    # DATASET PATHS
    #######
    "image_dataset": "datasets/caltech101/101_ObjectCategories/car_side",
    "image_annotations": "datasets/caltech101/Annotations/car_side",
    "image_distractions": "datasets/sceneclass13",
 
    #######
    # FEATURE EXTRACTION
    #######
    "features_path": "output/cars/car_features.hdf5",
    "percent_gt_images": 0.5,
    "offset": 5,
    "use_flip": true,
    "num_distraction_images": 500,
    "num_distractions_per_image": 10,
 
    #######
    # HISTOGRAM OF ORIENTED GRADIENTS DESCRIPTOR 使用的方向梯度直方圖
    #######
    "orientations": 9,
    "pixels_per_cell": [4, 4],  #能被滑動窗口尺寸整除
    "cells_per_block": [2, 2],
    "normalize": true,
 
    #######
    # OBJECT DETECTOR 定義滑動窗口大小
    #######
    "window_step": 4,
    "overlap_thresh": 0.3,
    "pyramid_scale": 1.5,
    "window_dim": [96, 32],
    "min_probability": 0.7
}

 dataset.py

作用:定義h5py數據庫運用的方法

涉及到知識點:1、對h5py數據庫的運用

疑問:create_dataset()函數參數作用:

參數:數據庫的名字,參數2:數據庫維度,參數3:數據類型

擴展:h5py文件是存放兩類對象的容器,數據集(dataset)和組(group),dataset類似數組類的數據集合,和numpy的數組差不多。group是像文件夾一樣的容器,它好比python中的字典,有鍵(key)和值(value)。group中可以存放dataset或者其他的group。”鍵”就是組成員的名稱。

import numpy as np
import h5py
 
#從磁盤上的數據集加載特征向量和標簽
def dump_dataset(data,labels,path,datasetName,writeMethod="w"):#參數1:要寫入HDF5數據集的特征向量列表。參數2:標簽,與每個特征向量相關聯的標簽列表。參數3:HDF5數據集在磁盤上的存儲位置。參數5:HDF5文件中數據集的名稱。參數5:HDF5數據集的寫入方法
    db = h5py.File(path, writeMethod)
    dataset = db.create_dataset(datasetName, (len(data), len(data[0]) + 1), dtype="float")
    dataset[0:len(data)] = np.c_[labels, data]
    db.close()
 
def load_dataset(path, datasetName):#加載與datasetName相關聯的特征向量和標簽
    db = h5py.File(path, "r")
    (labels, data) = (db[datasetName][:, 0], db[datasetName][:, 1:])
    db.close()
    return (data, labels)

helpers.py:

 作用:返回每張圖片的ROI,(最小包圍矩陣)

import imutils
import cv2
 
def crop_ct101_bb(image, bb, padding=10, dstSize=(32, 32)):
    (y, h, x, w) = bb
    (x, y) = (max(x - padding, 0), max(y - padding, 0))
    roi = image[y:h + padding, x:w + padding]
 
    roi = cv2.resize(roi, dstSize, interpolation=cv2.INTER_AREA)
 
    return roi

extract_features.py:

作用:提取圖片的hog特征向量,為SVC數據分類提供數據

 涉及到知識點:1、運用imutils中的paths模塊遍歷文件

疑惑:1、 progressbar模塊的作用:

    創建一個進度條顯示對象

    widgets可選參數含義:

    'Progress: ' :設置進度條前顯示的文字

    Percentage() :顯示百分比

    Bar('#') : 設置進度條形狀

    ETA() : 顯示預計剩余時間

    Timer() :顯示已用時間 

2、HOG函數的詳解

https://blog.csdn.net/zhazhiqiang/article/details/20221143

https://baike.baidu.com/item/HOG/9738560?fr=aladdin

3、random.sample函數作用?

sample(seq, n) 從序列seq中選擇n個隨機且獨立的元素;

4、random.choice函數的作用?

choice(seq) 從序列seq中返回隨機的元素

random模塊拓展:

1 )、random() 返回0<=n<1之間的隨機實數n;

 2)、getrandbits(n) 以長整型形式返回n個隨機位;
3)、shuffle(seq[, random]) 原地指定seq序列;

5、sklearn.feature_extraction.image模塊中extract_patches_2d函數的作用?

6)提示信息:Default value of `block_norm`==`L1` is deprecated and will be changed to `L2-Hys` in v0.15  比Py中特征少?

運行指令:python extract_features.py --conf conf/cars.json

# import the necessary packages
from __future__ import print_function
from sklearn.feature_extraction.image import extract_patches_2d
from pyimagesearch.object_detection import helpers
from pyimagesearch.descriptors import HOG
from pyimagesearch.utils import dataset
from pyimagesearch.utils import Conf
from imutils import paths
from scipy import io
import numpy as np
import progressbar
import argparse
import random
import cv2

ap = argparse.ArgumentParser()
ap.add_argument("-c", "--conf", required=True, help="path to the configuration file")
args = vars(ap.parse_args())
 
conf = Conf(args["conf"])#加載配置文件

#調用函數初始化HOG描述符
hog = HOG(orientations=conf["orientations"], pixelsPerCell=tuple(conf["pixels_per_cell"]),
    cellsPerBlock=tuple(conf["cells_per_block"]), normalize=conf["normalize"])
data = []
labels = []


#隨機抽取車測試圖
trnPaths = list(paths.list_images(conf["image_dataset"]))
trnPaths = random.sample(trnPaths, int(len(trnPaths) * conf["percent_gt_images"]))
print("[INFO] describing training ROIs...")

widgets = ["Extracting: ", progressbar.Percentage(), " ", progressbar.Bar(), " ", progressbar.ETA()]
pbar = progressbar.ProgressBar(maxval=len(trnPaths), widgets=widgets).start()
#訓練每個圖像
for (i, trnPath) in enumerate(trnPaths):
    image = cv2.imread(trnPath)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    imageID = trnPath[trnPath.rfind("_") + 1:].replace(".jpg", "")#提取文件名
 
    p = "{}/annotation_{}.mat".format(conf["image_annotations"], imageID)
    bb = io.loadmat(p)["box_coord"][0]
    roi = helpers.crop_ct101_bb(image, bb, padding=conf["offset"], dstSize=tuple(conf["window_dim"]))
   #確定我們是否應該使用ROI的水平翻轉作為額外的訓練數據
    rois = (roi, cv2.flip(roi, 1)) if conf["use_flip"] else (roi,)
   #中提取HOG特征,並更新 數據   和 標簽   列表
    for roi in rois:
        features = hog.describe(roi)
        data.append(features)
        labels.append(1)
 
    pbar.update(i)

dstPaths = list(paths.list_images(conf["image_distractions"]))
pbar = progressbar.ProgressBar(maxval=conf["num_distraction_images"], widgets=widgets).start()
print("[INFO] describing distraction ROIs...")
 
#訓練負圖像樣本
for i in np.arange(0, conf["num_distraction_images"]):
    image = cv2.imread(random.choice(dstPaths))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    patches = extract_patches_2d(image, tuple(conf["window_dim"]),
        max_patches=conf["num_distractions_per_image"])
 
    for patch in patches:
        features = hog.describe(patch)
        data.append(features)
        labels.append(-1)
 
    pbar.update(i)

pbar.finish()
print("[INFO] dumping features and labels to file...")
dataset.dump_dataset(data, labels, conf["features_path"], "features")

 7、初始訓練階段

car.json:

{
    #######
    # DATASET PATHS
    #######
    "image_dataset": "datasets/caltech101/101_ObjectCategories/car_side",
    "image_annotations": "datasets/caltech101/Annotations/car_side",
    "image_distractions": "datasets/sceneclass13",
 
    #######
    # FEATURE EXTRACTION
    #######
    "features_path": "output/cars/car_features.hdf5",
    "percent_gt_images": 0.5,
    "offset": 5,
    "use_flip": true,
    "num_distraction_images": 500,
    "num_distractions_per_image": 10,
 
    #######
    # HISTOGRAM OF ORIENTED GRADIENTS DESCRIPTOR
    #######
    "orientations": 9,
    "pixels_per_cell": [4, 4],
    "cells_per_block": [2, 2],
    "normalize": true,
 
    #######
    # OBJECT DETECTOR
    #######
    "window_step": 4,
    "overlap_thresh": 0.3,
    "pyramid_scale": 1.5,
    "window_dim": [96, 32],
    "min_probability": 0.7,
 
    #######
    # LINEAR SVM
    #######
    "classifier_path": "output/cars/model.cpickle",#分類器被儲存的位置
    "C": 0.01,
}

train_model.py

作用:對獲取的圖像hog特征向量,運用SVC線性分類處理

疑問:1、sklearn函數模塊詳解?

2、args["hard_negatives"]參數作用?

3、numpy.stack()函數作業用:

改變列表數據維度

參數1:列表數據,參數2:設置列表維度

4、numpy.hstack()函數作用

水平(按列順序)把數組給堆疊起來,vstack()函數正好和它相反

參數tup可以是元組,列表,或者numpy數組,返回結果為numpy的數組。

 運行指令:python train_model.py --conf conf/cars.json

from __future__ import print_function
from pyimagesearch.utils import dataset
from pyimagesearch.utils import Conf
from sklearn.svm import SVC
import numpy as np
import argparse
import cPickle
 
ap = argparse.ArgumentParser()
ap.add_argument("-c", "--conf", required=True,
    help="path to the configuration file")
ap.add_argument("-n", "--hard-negatives", type=int, default=-1,
    help="flag indicating whether or not hard negatives should be used")
args = vars(ap.parse_args())

print("[INFO] loading dataset...")
conf = Conf(args["conf"])
(data, labels) = dataset.load_dataset(conf["features_path"], "features")#抓取提取的特征向量和標簽
 
if args["hard_negatives"] > 0:
    print("[INFO] loading hard negatives...")
    (hardData, hardLabels) = dataset.load_dataset(conf["features_path"], "hard_negatives")
    data = np.vstack([data, hardData])
    labels = np.hstack([labels, hardLabels])

print("[INFO] training classifier...")
model = SVC(kernel="linear", C=conf["C"], probability=True, random_state=42)
model.fit(data, labels)
 
print("[INFO] dumping classifier...")
f = open(conf["classifier_path"], "w")
f.write(cPickle.dumps(model))#將分類器轉儲成檔
f.close()

objectdetector.py:

作用:經過滑動窗口和金字塔處理后的圖像,提取符合概率的輪廓列表。
疑惑:改變概率參數,符合要求的輪廓數量沒有發生改變?
pyramid函數(3.1)、sliding_window函數(3.2)寫到helpers.py中
iimport helpers
 
class ObjectDetector:
    def __init__(self, model, desc):
        self.model = model
        self.desc = desc

    def detect(self, image, winDim, winStep=4, pyramidScale=1.5, minProb=0.7):#image:需要檢測的圖像,winDim:滑動窗口尺寸大小
        boxes = []
        probs = []
 
        for layer in helpers.pyramid(image, scale=pyramidScale, minSize=winDim):#循環金子塔中的圖像
            scale = image.shape[0] / float(layer.shape[0])
 
            for (x, y, window) in helpers.sliding_window(layer, winStep, winDim):
                (winH, winW) = window.shape[:2]
 
                if winH == winDim[1] and winW == winDim[0]:
                    features = self.desc.describe(window).reshape(1, -1)
                    prob = self.model.predict_proba(features)[0][1]
 
                    if prob > minProb:
                        (startX, startY) = (int(scale * x), int(scale * y))
                        endX = int(startX + (scale * winW))
                        endY = int(startY + (scale * winH))
 
                        boxes.append((startX, startY, endX, endY))
                        probs.append(prob)
 
        return (boxes, probs)

test_model_no_nms.py(與pyimagesearch同級目錄下)

作用:測試通過輪廓列表尋找到輪廓是否正確

疑問:

1、sklearn.svm模塊中SVC的詳解

參數解釋鏈接:https://blog.csdn.net/szlcw1/article/details/52336824

2、提示錯誤信息:Default value of `block_norm`==`L1` is deprecated and will be changed to `L2-Hys` in v0.15

運行指令:python test_model_no_nms.py --conf conf/cars.json--image datasets/caltech101/101_ObjectCategories/car_side/image_0004.jpg

from pyimagesearch.object_detection import ObjectDetector
from pyimagesearch.descriptors import HOG
from pyimagesearch.utils import Conf
import imutils
import argparse
import cPickle
import cv2
 
ap = argparse.ArgumentParser()
ap.add_argument("-c", "--conf", required=True, help="path to the configuration file")
ap.add_argument("-i", "--image", required=True, help="path to the image to be classified")
args = vars(ap.parse_args())
 
conf = Conf(args["conf"])

model = cPickle.loads(open(conf["classifier_path"]).read()) #SVC線性值
hog = HOG(orientations=conf["orientations"], pixelsPerCell=tuple(conf["pixels_per_cell"]),
    cellsPerBlock=tuple(conf["cells_per_block"]), normalize=conf["normalize"]) #hog特征向量值提取方法
od = ObjectDetector(model, hog)

image = cv2.imread(args["image"])
image = imutils.resize(image, width=min(260, image.shape[1]))
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 
(boxes, probs) = od.detect(gray, conf["window_dim"], winStep=conf["window_step"],
    pyramidScale=conf["pyramid_scale"], minProb=conf["min_probability"])
 

for (startX, startY, endX, endY) in boxes:
    cv2.rectangle(image, (startX, startY), (endX, endY), (0, 0, 255), 2)
 
cv2.imshow("Image", image)
cv2.waitKey(0)

問題1:修改感興趣概率,獲取的敏感區域一樣?

8、非最大抑制

作用:解決重疊邊界框,尋找到最佳匹配輪廓

nms.py(object_detection目錄下):

疑問:1、numpy模塊中argsort()函數的作用:

  獲取數組從小到的大索引值

2、numpy模塊中concatenate()函數作用:

  對數組進行拼接

3、while里面對idx的處理邏輯?語法規則?

講解示例:https://blog.csdn.net/scut_salmon/article/details/79318387

nms.py

import numpy as np
def non_max_suppression(boxes, probs, overlapThresh):#參數1:邊界框列表,參數2:每個框相關的概率,參數3:重疊的閥值
    if len(boxes) == 0:#判斷邊界框列表是否為空
        return []
 
    if boxes.dtype.kind == "i":
        boxes = boxes.astype("float")#將邊界框數據由整型轉換成浮點型
        #獲取邊界框每個角的坐標
    pick = []
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]
        
         #獲取邊界框的面積
    area = (x2 - x1 + 1) * (y2 - y1 + 1)
    idxs = np.argsort(probs)

        #獲取列表的長度,並將其保留在邊框列表中
    while len(idxs) > 0:
        last = len(idxs) - 1
        i = idxs[last]
        pick.append(i)
                
        #獲取邊最大坐標的界框和最小的坐標邊界寬
        xx1 = np.maximum(x1[i], x1[idxs[:last]])
        yy1 = np.maximum(y1[i], y1[idxs[:last]])
        xx2 = np.minimum(x2[i], x2[idxs[:last]])
        yy2 = np.minimum(y2[i], y2[idxs[:last]])
                
        w = np.maximum(0, xx2 - xx1 + 1)
        h = np.maximum(0, yy2 - yy1 + 1)
                
        # 計算重疊比例
        overlap = (w * h) / area[idxs[:last]]
 
        idxs = np.delete(idxs, np.concatenate(([last],
            np.where(overlap > overlapThresh)[0])))
 
    return boxes[pick].astype("int") 

test_model.py(與pyimagesearch目錄同級):

作用:測試解決重疊輪廓的邊界效果

運行指令:python test_model.py --conf conf/cars.json--image datasets/caltech101/101_ObjectCategories/car_side/image_0004.jpg

from pyimagesearch.object_detection import non_max_suppression
from pyimagesearch.object_detection import ObjectDetector
from pyimagesearch.descriptors import HOG
from pyimagesearch.utils import Conf
import numpy as np
import imutils
import argparse
import cPickle
import cv2
 
ap = argparse.ArgumentParser()
ap.add_argument("-c", "--conf", required=True, help="path to the configuration file")
ap.add_argument("-i", "--image", required=True, help="path to the image to be classified")
args = vars(ap.parse_args())
 
conf = Conf(args["conf"])
 
model = cPickle.loads(open(conf["classifier_path"]).read())
hog = HOG(orientations=conf["orientations"], pixelsPerCell=tuple(conf["pixels_per_cell"]),
    cellsPerBlock=tuple(conf["cells_per_block"]), normalize=conf["normalize"])
od = ObjectDetector(model, hog)

image = cv2.imread(args["image"])
image = imutils.resize(image, width=min(260, image.shape[1]))
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 
(boxes, probs) = od.detect(gray, conf["window_dim"], winStep=conf["window_step"],
    pyramidScale=conf["pyramid_scale"], minProb=conf["min_probability"])
pick = non_max_suppression(np.array(boxes), probs, conf["overlap_thresh"])
orig = image.copy()
 
for (startX, startY, endX, endY) in boxes:
    cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 0, 255), 2)
 
for (startX, startY, endX, endY) in pick:
    cv2.rectangle(image, (startX, startY), (endX, endY), (0, 255, 0), 2)
 
cv2.imshow("Original", orig)
cv2.imshow("Image", image)
cv2.waitKey(0)

 9、堅硬的負面特征采集

作用:訓練與需要提取的特征完全不相關的特征,一般是物體的背面場景,所以一般應用sceneclass13數據集,訓練負面特征變量,減少誤判情況。將負面數據也寫入h5py數據庫中。

運行指令:python hard_negative_mine.py --conf conf/cars.json

hard_negative_mine.py

from __future__ import print_function
from pyimagesearch.object_detection.objectdetector import ObjectDetector
from pyimagesearch.descriptors.hog import HOG
from pyimagesearch.utils import dataset
from pyimagesearch.utils.conf import Conf
from imutils import paths
import numpy as np
import progressbar
import argparse
import cPickle
import random
import cv2

ap = argparse.ArgumentParser()
ap.add_argument("-c", "--conf", required = True, help = "path to the configuration file")
args = vars(ap.parse_args())

conf =Conf(args["conf"])
data =[]

model =cPickle.loads(open(conf["classifier_path"]).read())
hog =HOG(orientations = conf["orientations"],  pixelsPerCell = tuple(conf["pixels_per_cell"]), 
    cellsPerBlock = tuple(conf["cells_per_block"]),  normalize = conf["normalize"])


od = ObjectDetector(model, hog)

dstPaths = list(paths.list_images(conf["image_distractions"]))
dstPaths =random.sample(dstPaths,  conf["hn_num_distraction_images"])

widgets = ["Mining:", progressbar.Percentage(), " ", progressbar.Bar(), "", progressbar.ETA()]
pbar = progressbar.ProgressBar(maxval = len(dstPaths), widgets = widgets).start()
myindex = 0
for (i, imagePath) in enumerate(dstPaths):
    image = cv2.imread(imagePath)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    (boxes, probs) = od.detect(gray, conf["window_dim"], winStep = conf["hn_window_step"], pyramidScale = conf["hn_pyramid_scale"], minProb = conf["hn_min_probability"])
    
    for (prob, (startX, startY, endX, endY)) in zip(probs, boxes):
        roi = cv2.resize(gray[startY:endY, startX:endX], tuple(conf["window_dim"]), interpolation = cv2.INTER_AREA)
        features = hog.describe(roi)
        data.append(np.hstack([[prob], features]))
    pbar.update(i)

pbar.finish()
print("[INFO] sorting by probability...")
data = np.array(data)
data = data[data[:, 0].argsort()[::-1]]

print("[INFO] dmping hard negatives to file...")
dataset.dump_dataset(data[:, 1:], [-1] * len(data), conf["features_path"], "hard_negatives", writeMethod = "a")

10、重新訓練對象檢測器

作用:將堅硬的負面特征加入到SVM中,減少虛假輪廓的出現

運行指令:python train_model.py --conf conf/cars.json --hard-negatives 1

train_model.py

from __future__ import print_function
from pyimagesearch.utils import dataset
from pyimagesearch.utils.conf import Conf
from sklearn.svm import SVC
import argparse
import pickle
import numpy as np

ap  = argparse.ArgumentParser()
ap.add_argument("-c", "--conf", required = True, help = "path to the configuration file")
ap.add_argument("-n", "--hard-negatives", type = int, default = -1, help="flag indicating whether or not hard negatives should be used")
args = vars(ap.parse_args())

print("[INFO] loading dataset...")
conf = Conf(args["conf"])
(data, labels) = dataset.load_dataset(conf["features_path"], "features")

if args["hard_negatives"] > 0:
    print("[INFO] loading hard negatives...")
    (hardData,hardLabels) = dataset.load_dataset(conf["features_path"], "hard_negatives")
    data = np.vstack([data, hardData])
    labels = np.hstack([labels, hardLabels])

print("[INFO] training classifier...")
model = SVC(kernel = "linear", C = conf["C"], probability = True, random_state = 42)
model.fit(data, labels)

print("[INFO] dumping classifier...")
f = open(conf["classifier_path"], "wb")
f.write(pickle.dumps(model))
f.close()

 

11、imglab的運用

前期准備工作:生成xml文件,運用imglab生成器,選取特征輪廓。

步驟1:imglab -c 文件夾路徑 生成xml文件路徑  步驟2:imglab xml文件            手動框選特征區域

作用:將提取圖片的邊界框,並且將其特征運用svm分類。

運行指令:python train_detector.py --xml face_detector/faces_annotations.xml --detector face_detector/detector.svm

from __future__ import print_function
import argparse
import dlib

ap = argparse.ArgumentParser()
ap.add_argument("-x", "--xml", required = True, help = "path to input XML file")
ap.add_argument("-d", "--detector", required = True, help = "path to output director")
args = vars(ap.parse_args())

print("[INFO] training detector....")
options = dlib.simple_object_detector_training_options()
options.C = 1.0
options.num_threads = 4
options.bei_verbose = True
dlib.train_simple_object_detector(args["xml"], args["detector"], options)

print("[INFO] training accuracy:{}".format(dlib.test_simple_object_detector(args["xml"], args["detector"])))

detector = dlib.simple_object_detector(args["detector"])
win = dlib.image_window()
win.set_image(detector)
dlib.hit_enter_to_continue()

 

test_detector.py

作用:測試訓練出來SVM線性向量

運行指令:python test_detector.py --detector face_detector/detector.svm--testing face_detector/testing

from imutils import paths
import argparse
import dlib
import cv2

ap = argparse.ArgumentParser()
ap.add_argument("-d", "--detector", required = True, help = "Path to train object detector")
ap.add_argument("-t", "--testing", required = True, help = "Path to directory of testing images")
args = vars(ap.parse_args())

detector = dlib.simple_object_detector(args["detector"])

for testingPath in paths.list_images(args["testing"]):
    image = cv2.imread(testingPath)
    boxes = detector(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

    for b in boxes:
        (x, y, w, h) = (b.left(), b.top(), b.right(), b.bottom())
        cv2.rectangle(image, (x, y), (w, h), (0, 255, 0), 2)

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM