py4CV例子2汽車檢測和svm算法


1、什么是汽車檢測數據集;
伊利諾伊大學汽車檢測圖像數據庫( UIUC Image Database for Car Detection)
包括1w+的有汽車/無汽車圖片,並且精確地標注了汽車位置;同時還包括1k+的測試數據集;
2、什么是svm算法:
SVM(Support Vector Machine)指的是支持向量機,是常見的一種判別方法。在機器學習領域,是一個有監督的學習模型,通常用來進行模式識別、分類以及回歸分析。
尋找到超平面
3、單個汽車檢測
import cv2
import numpy as np
from os.path import join

datapath = "E:/dl4cv/datesets/CarData/TrainImages/"
def path( cls, i):
return " %s / %s%d .pgm" % (datapath, cls,i+ 1)

pos, neg = "pos-", "neg-"
#創建sift特征提取
detect = cv2.xfeatures2d.SIFT_create()
extract = cv2.xfeatures2d.SIFT_create()

flann_params = dict( algorithm = 1, trees = 5)
matcher = cv2.FlannBasedMatcher(flann_params, {})

bow_kmeans_trainer = cv2.BOWKMeansTrainer( 40)
#創建詞袋模型
extract_bow = cv2.BOWImgDescriptorExtractor(extract, matcher)

def extract_sift( fn):
im = cv2.imread(fn, 0)
return extract.compute(im, detect.detect(im))[ 1]
for i in range( 8):
bow_kmeans_trainer.add(extract_sift(path(pos,i)))
bow_kmeans_trainer.add(extract_sift(path(neg,i)))
voc = bow_kmeans_trainer.cluster()
extract_bow.setVocabulary( voc )

def bow_features( fn):
im = cv2.imread(fn, 0)
return extract_bow.compute(im, detect.detect(im))

traindata, trainlabels = [],[]
for i in range( 20):
traindata.extend(bow_features(path(pos, i))); trainlabels.append( 1)
traindata.extend(bow_features(path(neg, i))); trainlabels.append(- 1)
#創建svm模型
svm = cv2.ml.SVM_create()
svm.train(np.array(traindata), cv2.ml.ROW_SAMPLE, np.array(trainlabels))

def predict( fn):
f = bow_features(fn);
p = svm.predict(f)
print(fn, " \t ", p[ 1][ 0][ 0])
return p

#測試兩圖
car, notcar = "E:/sandbox/car3.jpg", "E:/sandbox/car4.jpg"
car_img = cv2.imread(car)
notcar_img = cv2.imread(notcar)
car_predict = predict(car)
not_car_predict = predict(notcar)

font = cv2.FONT_HERSHEY_SIMPLEX

if (car_predict[ 1][ 0][ 0] == 1.0):
cv2.putText(car_img, 'Car Detected',( 10, 30), font, 1,( 0, 255, 0), 2,cv2.LINE_AA)

if (not_car_predict[ 1][ 0][ 0] == - 1.0):
cv2.putText(notcar_img, 'Car Not Detected',( 10, 30), font, 1,( 0, 0, 255), 2,cv2.LINE_AA)

cv2.imshow( 'BOW + SVM Success', car_img)
cv2.imshow( 'BOW + SVM Failure', notcar_img)
cv2.waitKey( 0)
cv2.destroyAllWindows()

其結果,僅僅是檢查出圖片中是否有汽車。比較關鍵的部分
flann_params = dict( algorithm = 1, trees = 5)
matcher = cv2.FlannBasedMatcher(flann_params, {})
創建基於flann的匹配器
bow_kmeans_trainer = cv2.BOWKMeansTrainer( 40)
創建bow訓練器
def extract_sift( fn):
im = cv2.imread(fn, 0)
return extract.compute(im, detect.detect(im))[ 1]
以灰度方式讀取圖像,提取sift,並返回結果
for i in range( 8):
bow_kmeans_trainer.add(extract_sift(path(pos,i)))
bow_kmeans_trainer.add(extract_sift(path(neg,i)))
為模型輸入正負樣本
voc = bow_kmeans_trainer.cluster()
extract_bow.setVocabulary( voc )
cluster函數,執行k-means分類,並且返回詞匯。進一步制定extract_bow提取描述符
def bow_features( fn):
im = cv2.imread(fn, 0)
return extract_bow.compute(im, detect.detect(im))
返回bow的描述符提取器計算得到的描述符
traindata, trainlabels = [],[]
for i in range( 20):
traindata.extend(bow_features(path(pos, i))); trainlabels.append( 1)
traindata.extend(bow_features(path(neg, i))); trainlabels.append(- 1)
創建2個數組,生成svm模型所需正負樣本標簽
svm = cv2.ml.SVM_create()
svm.train(np.array(traindata), cv2.ml.ROW_SAMPLE, np.array(trainlabels))
創建並訓練一個svm模型
def predict( fn):
f = bow_features(fn);
p = svm.predict(f)
print(fn, " \t ", p[ 1][ 0][ 0])
return p
返回預測的結果。在沒有經過精確測算的情況下sift+bow+svm的組合表現良好,我找到的幾幅圖像都能夠正確識別。這個時候,必須用更科學的數據集來進行測試。
3、通過交叉檢驗
import cv2
import numpy as np
from os.path import join
import numpy as np
import os
import math
#在carData建立svm模型並且k_fold測試,ratio=1表示全部數據用於測試
RATIO = 0.2
datapath = "E:/dl4cv/datesets/CarData/TrainImages/"
def path( cls, i):
return " %s / %s%d .pgm" % (datapath, cls,i+ 1)

#根據Ratio獲得訓練和測試數據集的圖片地址和標簽
def get_files( file_dir, ratio):
'''
Args:
file_dir: file directory
Returns:
list of images and labels
'''
pos = []
label_pos = []
neg = []
label_neg = []
for file in os.listdir(file_dir):
name = file.split( sep= '-')
if name[ 0]== 'pos':
pos.append(file_dir + file)
label_pos.append( 1)
else:
neg.append(file_dir + file)
label_neg.append(- 1)
print( '數據集中有 %d pos \n 以及 %d neg ' %( len(pos), len(neg)))
#圖片list和標簽list
#hstack 水平(按列順序)把數組給堆疊起來
image_list = np.hstack((pos, neg))
label_list = np.hstack((label_pos, label_neg))
temp = np.array([image_list, label_list])
temp = temp.transpose()
#亂序的目的是為了讓正樣本和負樣本混在一起,這樣直接取其中百分之多少就可以來用了
np.random.shuffle(temp)
all_image_list = temp[:, 0]
all_label_list = temp[:, 1]
n_sample = len(all_label_list)
#根據比率,確定訓練和測試數量
n_val = math.ceil(n_sample*ratio) # number of validation samples
n_train = n_sample - n_val # number of trainning samples
tra_images = []
val_images = []
#按照0-n_train為tra_images,后面位val_images的方式來排序
tra_images = all_image_list[:n_train]
tra_labels = all_label_list[:n_train]
tra_labels = [ int( float(i)) for i in tra_labels]

val_images = all_image_list[n_train:]
val_labels = all_label_list[n_train:]
val_labels = [ int( float(i)) for i in val_labels]
return tra_images,tra_labels,val_images,val_labels

pos, neg = "pos-", "neg-"
#創建sift特征提取
detect = cv2.xfeatures2d.SIFT_create()
extract = cv2.xfeatures2d.SIFT_create()
#創建基於flann的匹配器
flann_params = dict( algorithm = 1, trees = 5)
matcher = cv2.FlannBasedMatcher(flann_params, {})
#創建bow訓練器
bow_kmeans_trainer = cv2.BOWKMeansTrainer( 40)
extract_bow = cv2.BOWImgDescriptorExtractor(extract, matcher)
#以灰度方式讀取圖像,提取sift,並返回結果
def extract_sift( fn):
im = cv2.imread(fn, 0)
return extract.compute(im, detect.detect(im))[ 1]
#為模型輸入正負樣本
for i in range( 8):
bow_kmeans_trainer.add(extract_sift(path(pos,i)))
bow_kmeans_trainer.add(extract_sift(path(neg,i)))
#cluster函數,執行k-means分類,並且返回詞匯。進一步制定extract_bow提取描述符
voc = bow_kmeans_trainer.cluster()
extract_bow.setVocabulary( voc )
#返回bow的描述符提取器計算得到的描述符
def bow_features( fn):
im = cv2.imread(fn, 0)
return extract_bow.compute(im, detect.detect(im))

#獲得數據集
train_images, train_labels, val_images, val_labels = get_files(datapath, RATIO)
traindata, trainlabels = [],[]
#20這個參數並不是越大越好
#for i in range(400):
# traindata.extend(bow_features(path(pos, i))); trainlabels.append(1)
# traindata.extend(bow_features(path(neg, i))); trainlabels.append(-1)

#當給出較大訓練數據集的時候,預測是明顯錯誤的
for i in range( len(train_images)):
traindata.extend(bow_features(train_images[i]))
trainlabels.append(train_labels[i])

#創建並訓練一個svm模型
#初步認為,加大數據集並沒有提高svm的識別率
svm = cv2.ml.SVM_create()
svm.train(np.array(traindata), cv2.ml.ROW_SAMPLE, np.array(trainlabels))
#返回預測的結果
def predict( fn):
f = bow_features(fn);
p = svm.predict(f)
print(fn, " \t ", p[ 1][ 0][ 0])
return p
#在測試集上進行測試
result = []
for i in range( len(val_images)):
f = bow_features(val_images[i]);
p = svm.predict(f)
result.append(p[ 1][ 0][ 0])

np_val_labels = np.array(val_labels)[:,np.newaxis]
np_result = np.array(result)[:,np.newaxis]
matches = np_result == np_val_labels

correct = np.count_nonzero(matches)
accuracy = correct* 100.0/ len(result)
print(accuracy)

#測試兩圖
car, notcar = "E:/sandbox/car3.jpg", "E:/sandbox/car4.jpg"
car_img = cv2.imread(car)
notcar_img = cv2.imread(notcar)
car_predict = predict(car)
not_car_predict = predict(notcar)

font = cv2.FONT_HERSHEY_SIMPLEX

if (car_predict[ 1][ 0][ 0] == 1.0):
cv2.putText(car_img, 'Car Detected',( 10, 30), font, 1,( 0, 255, 0), 2,cv2.LINE_AA)

if (not_car_predict[ 1][ 0][ 0] == - 1.0):
cv2.putText(notcar_img, 'Car Not Detected',( 10, 30), font, 1,( 0, 0, 255), 2,cv2.LINE_AA)

cv2.imshow( 'BOW + SVM Success', car_img)
cv2.imshow( 'BOW + SVM Failure', notcar_img)
cv2.waitKey( 0)
cv2.destroyAllWindows()

對數據集進行交叉檢驗,在20/80的比率下,成功識別率為
數據集中有 549 pos
以及 499 neg
84.90566037735849
E:/sandbox/car3.jpg -1.0
E:/sandbox/car4.jpg -1.0

4、多個汽車和滑動窗口檢測
該應用的執行過程如下:
1、獲取數據集
2、創建BOW訓練器並且獲得視覺詞匯
3、采用詞匯訓練svm
4、嘗試對測試圖像的金字塔采用滑動窗口進行檢測
5、對重疊的矩形采用“非最大抑制”,進行過濾
6、得到結果。
各個函數如下
def resize( img, scaleFactor):
return cv2.resize(img, ( int(img.shape[ 1] * ( 1 / scaleFactor)), int(img.shape[ 0] * ( 1 / scaleFactor))), interpolation=cv2.INTER_AREA)

大小變化
def pyramid( image, scale= 1.5, minSize=( 200, 80)):
yield image

while True:
image = resize(image, scale)
if image.shape[ 0] < minSize[ 1] or image.shape[ 1] < minSize[ 0]:
break

yield image

采用yield方法,獲得金字塔層
def sliding_window( image, step, window_size):
for y in xrange( 0, image.shape[ 0], step):
for x in xrange( 0, image.shape[ 1], step):
yield (x, y, image[y:y + window_size[ 1], x:x + window_size[ 0]])

同樣采用yield的方法,獲得滑動窗口數據
# import the necessary packages
import numpy as np

# Malisiewicz et al.
# Python port by Adrian Rosebrock
def non_max_suppression_fast( boxes, overlapThresh):
# if there are no boxes, return an empty list
if len(boxes) == 0:
return []

# if the bounding boxes integers, convert them to floats --
# this is important since we'll be doing a bunch of divisions
if boxes.dtype.kind == "i":
boxes = boxes.astype( "float")

# initialize the list of picked indexes
pick = []

# grab the coordinates of the bounding boxes
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
scores = boxes[:, 4]
# compute the area of the bounding boxes and sort the bounding
# boxes by the score/probability of the bounding box
area = (x2 - x1 + 1) * (y2 - y1 + 1)
idxs = np.argsort(scores)[::- 1]

# keep looping while some indexes still remain in the indexes
# list
while len(idxs) > 0:
# grab the last index in the indexes list and add the
# index value to the list of picked indexes
last = len(idxs) - 1
i = idxs[last]
pick.append(i)

# find the largest (x, y) coordinates for the start of
# the bounding box and the smallest (x, y) coordinates
# for the end of the bounding box
xx1 = np.maximum(x1[i], x1[idxs[:last]])
yy1 = np.maximum(y1[i], y1[idxs[:last]])
xx2 = np.minimum(x2[i], x2[idxs[:last]])
yy2 = np.minimum(y2[i], y2[idxs[:last]])

# compute the width and height of the bounding box
w = np.maximum( 0, xx2 - xx1 + 1)
h = np.maximum( 0, yy2 - yy1 + 1)

# compute the ratio of overlap
overlap = (w * h) / area[idxs[:last]]

# delete all indexes from the index list that have
idxs = np.delete(idxs, np.concatenate(([last],
np.where(overlap > overlapThresh)[ 0])))

# return only the bounding boxes that were picked using the
# integer data type
return boxes[pick].astype( "int")

這個函數得到系列舉行並且對這些矩形按照評分進行排序。從評分醉倒的矩形開始消除所有重疊超過一定閾值的矩形。
datapath = "E:/py4cv/CarData/TrainImages"
SAMPLES = 400

def path( cls, i):
return " %s / %s%d .pgm" % (datapath, cls,i+ 1)
獲得系列路徑下地址
def get_flann_matcher():
flann_params = dict( algorithm = 1, trees = 5)
return cv2.FlannBasedMatcher(flann_params, {})
獲得flann的matcher
def get_bow_extractor( extract, match):
return cv2.BOWImgDescriptorExtractor(extract, match)
獲得bow的image描述
def get_extract_detect():
return cv2.xfeatures2d.SIFT_create(), cv2.xfeatures2d.SIFT_create()
獲得sift描述
def extract_sift( fn, extractor, detector):
im = cv2.imread(fn, 0)
return extractor.compute(im, detector.detect(im))[ 1]
及其計算值
def bow_features( img, extractor_bow, detector):
return extractor_bow.compute(img, detector.detect(img))
獲得bow features
import cv2
import numpy as np

datapath = "E:/py4cv/CarData/TrainImages"
SAMPLES = 400

def path( cls, i):
return " %s / %s%d .pgm" % (datapath, cls,i+ 1)

def get_flann_matcher():
flann_params = dict( algorithm = 1, trees = 5)
return cv2.FlannBasedMatcher(flann_params, {})

def get_bow_extractor( extract, match):
return cv2.BOWImgDescriptorExtractor(extract, match)

def get_extract_detect():
return cv2.xfeatures2d.SIFT_create(), cv2.xfeatures2d.SIFT_create()

def extract_sift( fn, extractor, detector):
im = cv2.imread(fn, 0)
return extractor.compute(im, detector.detect(im))[ 1]
def bow_features( img, extractor_bow, detector):
return extractor_bow.compute(img, detector.detect(img))

def car_detector():
pos, neg = "pos-", "neg-"
#獲得兩個sift的檢測器
detect, extract = get_extract_detect()
#獲得flann的描述符
matcher = get_flann_matcher()
#創建bow描述檢測器
print( "building BOWKMeansTrainer...")
bow_kmeans_trainer = cv2.BOWKMeansTrainer( 12)
extract_bow = cv2.BOWImgDescriptorExtractor(extract, matcher)
#首先構建bow詞典
print( "adding features to trainer...")
for i in range(SAMPLES):
print i
bow_kmeans_trainer.add(extract_sift(path(pos,i), extract, detect))
#注意,這里地方沒有添加負樣本
#bow_kmeans_trainer.add(extract_sift(path(neg,i), extract, detect))
#聚類並返回詞匯
vocabulary = bow_kmeans_trainer.cluster()
extract_bow.setVocabulary(vocabulary)
#基於詞匯進行svm訓練
traindata, trainlabels = [],[]
print "adding to train data"
for i in range(SAMPLES):
print i
#1為有數據,-1為沒有數據
traindata.extend(bow_features(cv2.imread(path(pos, i), 0), extract_bow, detect))
trainlabels.append( 1)
traindata.extend(bow_features(cv2.imread(path(neg, i), 0), extract_bow, detect))
trainlabels.append(- 1)

svm = cv2.ml.SVM_create()
svm.setType(cv2.ml.SVM_C_SVC)
svm.setGamma( 1)
svm.setC( 35) #越大,誤判的可能性越小;越低,則可能導致過擬合
svm.setKernel(cv2.ml.SVM_RBF) #svm_linear 超平面 兩類;svm_rbf高斯核,多類

svm.train(np.array(traindata), cv2.ml.ROW_SAMPLE, np.array(trainlabels))
return svm, extract_bow

車輛滑動窗口探測
import cv2
import numpy as np
from car_detector.detector import car_detector, bow_features
from car_detector.pyramid import pyramid
from car_detector.non_maximum import non_max_suppression_fast as nms
from car_detector.sliding_window import sliding_window
import urllib

def in_range( number, test, thresh= 0.2):
return abs(number - test) < thresh

img_path = "E:/sandbox/car1.png"

#獲得svm模型訓練結果。注意在訓練過程中沒有使用滑動窗口
svm, extractor = car_detector()
detect = cv2.xfeatures2d.SIFT_create()

w, h = 100, 40
img = cv2.imread(img_path)

rectangles = []
counter = 1
scaleFactor = 1.25
scale = 1
font = cv2.FONT_HERSHEY_PLAIN

for resized in pyramid(img, scaleFactor):
scale = float(img.shape[ 1]) / float(resized.shape[ 1])
for (x, y, roi) in sliding_window(resized, 20, ( 100, 40)): #窗口大小100*40
if roi.shape[ 1] != w or roi.shape[ 0] != h:
continue

try:
bf = bow_features(roi, extractor, detect)
_, result = svm.predict(bf)
a, res = svm.predict(bf, flags=cv2.ml.STAT_MODEL_RAW_OUTPUT | cv2.ml.STAT_MODEL_UPDATE_MODEL)
print (( "Class: %d , Score: %f , a: %s ") % (result[ 0][ 0], res[ 0][ 0], res))
score = res[ 0][ 0]
if result[ 0][ 0] == 1:
if score < - 1.0:
rx, ry, rx2, ry2 = int(x * scale), int(y * scale), int((x+w) * scale), int((y+h) * scale)
rectangles.append([rx, ry, rx2, ry2, abs(score)])
except:
pass

counter += 1

windows = np.array(rectangles)
#調用非最大值抑制
boxes = nms(windows, 0.25)


for (x, y, x2, y2, score) in boxes:
print (x, y, x2, y2, score)
cv2.rectangle(img, ( int(x), int(y)),( int(x2), int(y2)),( 0, 255, 0), 1)
cv2.putText(img, " %f " % score, ( int(x), int(y)), font, 1, ( 0, 255, 0))

cv2.imshow( "img", img)
cv2.waitKey( 0)

小結和啟示:
1、python作為這個時代的語言,的確是非常適合機器學習和圖像處理應用層算法的編寫的;
2、svm作為一種已經發展很長世間的算法,90左右應該已經是它的瓶頸,即使存在sift+bow的特征提取方式;
3、再想提高,盡快進入mlp和cnn乃至dl的境地;
4、挖掘發現機器學習的現實應用場景,是除了掌握機器學習方法以外,最難也是最有意義的一件事情。
5、今日是學習時間機器學習的最好時間,時不我們待。
感謝閱讀至此,希望有所幫助。





附件列表

     


    免責聲明!

    本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



     
    粵ICP備18138465號   © 2018-2025 CODEPRJ.COM