伊利諾伊大學汽車檢測圖像數據庫(
UIUC Image Database for Car Detection)


包括1w+的有汽車/無汽車圖片,並且精確地標注了汽車位置;同時還包括1k+的測試數據集;
2、什么是svm算法:
SVM(Support Vector Machine)指的是支持向量機,是常見的一種判別方法。在機器學習領域,是一個有監督的學習模型,通常用來進行模式識別、分類以及回歸分析。
尋找到超平面


3、單個汽車檢測
import cv2
import numpy
as np
from os.path
import join
datapath =
"E:/dl4cv/datesets/CarData/TrainImages/"
def
path(
cls,
i):
return
"
%s
/
%s%d
.pgm" % (datapath,
cls,i+
1)
pos, neg =
"pos-",
"neg-"
#創建sift特征提取
detect = cv2.xfeatures2d.SIFT_create()
extract = cv2.xfeatures2d.SIFT_create()
flann_params =
dict(
algorithm =
1,
trees =
5)
matcher = cv2.FlannBasedMatcher(flann_params, {})
bow_kmeans_trainer = cv2.BOWKMeansTrainer(
40)
#創建詞袋模型
extract_bow = cv2.BOWImgDescriptorExtractor(extract, matcher)
def
extract_sift(
fn):
im = cv2.imread(fn,
0)
return extract.compute(im, detect.detect(im))[
1]
for i
in
range(
8):
bow_kmeans_trainer.add(extract_sift(path(pos,i)))
bow_kmeans_trainer.add(extract_sift(path(neg,i)))
voc = bow_kmeans_trainer.cluster()
extract_bow.setVocabulary( voc )
def
bow_features(
fn):
im = cv2.imread(fn,
0)
return extract_bow.compute(im, detect.detect(im))
traindata, trainlabels = [],[]
for i
in
range(
20):
traindata.extend(bow_features(path(pos, i))); trainlabels.append(
1)
traindata.extend(bow_features(path(neg, i))); trainlabels.append(-
1)
#創建svm模型
svm = cv2.ml.SVM_create()
svm.train(np.array(traindata), cv2.ml.ROW_SAMPLE, np.array(trainlabels))
def
predict(
fn):
f = bow_features(fn);
p = svm.predict(f)
print(fn,
"
\t
", p[
1][
0][
0])
return p
#測試兩圖
car, notcar =
"E:/sandbox/car3.jpg",
"E:/sandbox/car4.jpg"
car_img = cv2.imread(car)
notcar_img = cv2.imread(notcar)
car_predict = predict(car)
not_car_predict = predict(notcar)
font = cv2.FONT_HERSHEY_SIMPLEX
if (car_predict[
1][
0][
0] ==
1.0):
cv2.putText(car_img,
'Car Detected',(
10,
30), font,
1,(
0,
255,
0),
2,cv2.LINE_AA)
if (not_car_predict[
1][
0][
0] == -
1.0):
cv2.putText(notcar_img,
'Car Not Detected',(
10,
30), font,
1,(
0,
0,
255),
2,cv2.LINE_AA)
cv2.imshow(
'BOW + SVM Success', car_img)
cv2.imshow(
'BOW + SVM Failure', notcar_img)
cv2.waitKey(
0)
cv2.destroyAllWindows()

其結果,僅僅是檢查出圖片中是否有汽車。比較關鍵的部分
flann_params =
dict(
algorithm =
1,
trees =
5)
matcher = cv2.FlannBasedMatcher(flann_params, {})
創建基於flann的匹配器
bow_kmeans_trainer = cv2.BOWKMeansTrainer(
40)
創建bow訓練器
def
extract_sift(
fn):
im = cv2.imread(fn,
0)
return extract.compute(im, detect.detect(im))[
1]
以灰度方式讀取圖像,提取sift,並返回結果
for i
in
range(
8):
bow_kmeans_trainer.add(extract_sift(path(pos,i)))
bow_kmeans_trainer.add(extract_sift(path(neg,i)))
為模型輸入正負樣本
voc = bow_kmeans_trainer.cluster()
extract_bow.setVocabulary( voc )
cluster函數,執行k-means分類,並且返回詞匯。進一步制定extract_bow提取描述符
def
bow_features(
fn):
im = cv2.imread(fn,
0)
return extract_bow.compute(im, detect.detect(im))
返回bow的描述符提取器計算得到的描述符
traindata, trainlabels = [],[]
for i
in
range(
20):
traindata.extend(bow_features(path(pos, i))); trainlabels.append(
1)
traindata.extend(bow_features(path(neg, i))); trainlabels.append(-
1)
創建2個數組,生成svm模型所需正負樣本標簽
svm = cv2.ml.SVM_create()
svm.train(np.array(traindata), cv2.ml.ROW_SAMPLE, np.array(trainlabels))
創建並訓練一個svm模型
def
predict(
fn):
f = bow_features(fn);
p = svm.predict(f)
print(fn,
"
\t
", p[
1][
0][
0])
return p
返回預測的結果。在沒有經過精確測算的情況下sift+bow+svm的組合表現良好,我找到的幾幅圖像都能夠正確識別。這個時候,必須用更科學的數據集來進行測試。
3、通過交叉檢驗
import cv2
import numpy
as np
from os.path
import join
import numpy
as np
import os
import math
#在carData建立svm模型並且k_fold測試,ratio=1表示全部數據用於測試
RATIO =
0.2
datapath =
"E:/dl4cv/datesets/CarData/TrainImages/"
def
path(
cls,
i):
return
"
%s
/
%s%d
.pgm" % (datapath,
cls,i+
1)
#根據Ratio獲得訓練和測試數據集的圖片地址和標簽
def
get_files(
file_dir,
ratio):
'''
Args:
file_dir: file directory
Returns:
list of images and labels
'''
pos = []
label_pos = []
neg = []
label_neg = []
for
file
in os.listdir(file_dir):
name =
file.split(
sep=
'-')
if name[
0]==
'pos':
pos.append(file_dir +
file)
label_pos.append(
1)
else:
neg.append(file_dir +
file)
label_neg.append(-
1)
print(
'數據集中有
%d
pos
\n
以及
%d
neg ' %(
len(pos),
len(neg)))
#圖片list和標簽list
#hstack 水平(按列順序)把數組給堆疊起來
image_list = np.hstack((pos, neg))
label_list = np.hstack((label_pos, label_neg))
temp = np.array([image_list, label_list])
temp = temp.transpose()
#亂序的目的是為了讓正樣本和負樣本混在一起,這樣直接取其中百分之多少就可以來用了
np.random.shuffle(temp)
all_image_list = temp[:,
0]
all_label_list = temp[:,
1]
n_sample =
len(all_label_list)
#根據比率,確定訓練和測試數量
n_val = math.ceil(n_sample*ratio)
# number of validation samples
n_train = n_sample - n_val
# number of trainning samples
tra_images = []
val_images = []
#按照0-n_train為tra_images,后面位val_images的方式來排序
tra_images = all_image_list[:n_train]
tra_labels = all_label_list[:n_train]
tra_labels = [
int(
float(i))
for i
in tra_labels]
val_images = all_image_list[n_train:]
val_labels = all_label_list[n_train:]
val_labels = [
int(
float(i))
for i
in val_labels]
return tra_images,tra_labels,val_images,val_labels
pos, neg =
"pos-",
"neg-"
#創建sift特征提取
detect = cv2.xfeatures2d.SIFT_create()
extract = cv2.xfeatures2d.SIFT_create()
#創建基於flann的匹配器
flann_params =
dict(
algorithm =
1,
trees =
5)
matcher = cv2.FlannBasedMatcher(flann_params, {})
#創建bow訓練器
bow_kmeans_trainer = cv2.BOWKMeansTrainer(
40)
extract_bow = cv2.BOWImgDescriptorExtractor(extract, matcher)
#以灰度方式讀取圖像,提取sift,並返回結果
def
extract_sift(
fn):
im = cv2.imread(fn,
0)
return extract.compute(im, detect.detect(im))[
1]
#為模型輸入正負樣本
for i
in
range(
8):
bow_kmeans_trainer.add(extract_sift(path(pos,i)))
bow_kmeans_trainer.add(extract_sift(path(neg,i)))
#cluster函數,執行k-means分類,並且返回詞匯。進一步制定extract_bow提取描述符
voc = bow_kmeans_trainer.cluster()
extract_bow.setVocabulary( voc )
#返回bow的描述符提取器計算得到的描述符
def
bow_features(
fn):
im = cv2.imread(fn,
0)
return extract_bow.compute(im, detect.detect(im))
#獲得數據集
train_images, train_labels, val_images, val_labels = get_files(datapath, RATIO)
traindata, trainlabels = [],[]
#20這個參數並不是越大越好
#for i in range(400):
# traindata.extend(bow_features(path(pos, i))); trainlabels.append(1)
# traindata.extend(bow_features(path(neg, i))); trainlabels.append(-1)
#當給出較大訓練數據集的時候,預測是明顯錯誤的
for i
in
range(
len(train_images)):
traindata.extend(bow_features(train_images[i]))
trainlabels.append(train_labels[i])
#創建並訓練一個svm模型
#初步認為,加大數據集並沒有提高svm的識別率
svm = cv2.ml.SVM_create()
svm.train(np.array(traindata), cv2.ml.ROW_SAMPLE, np.array(trainlabels))
#返回預測的結果
def
predict(
fn):
f = bow_features(fn);
p = svm.predict(f)
print(fn,
"
\t
", p[
1][
0][
0])
return p
#在測試集上進行測試
result = []
for i
in
range(
len(val_images)):
f = bow_features(val_images[i]);
p = svm.predict(f)
result.append(p[
1][
0][
0])
np_val_labels = np.array(val_labels)[:,np.newaxis]
np_result = np.array(result)[:,np.newaxis]
matches = np_result == np_val_labels
correct = np.count_nonzero(matches)
accuracy = correct*
100.0/
len(result)
print(accuracy)
#測試兩圖
car, notcar =
"E:/sandbox/car3.jpg",
"E:/sandbox/car4.jpg"
car_img = cv2.imread(car)
notcar_img = cv2.imread(notcar)
car_predict = predict(car)
not_car_predict = predict(notcar)
font = cv2.FONT_HERSHEY_SIMPLEX
if (car_predict[
1][
0][
0] ==
1.0):
cv2.putText(car_img,
'Car Detected',(
10,
30), font,
1,(
0,
255,
0),
2,cv2.LINE_AA)
if (not_car_predict[
1][
0][
0] == -
1.0):
cv2.putText(notcar_img,
'Car Not Detected',(
10,
30), font,
1,(
0,
0,
255),
2,cv2.LINE_AA)
cv2.imshow(
'BOW + SVM Success', car_img)
cv2.imshow(
'BOW + SVM Failure', notcar_img)
cv2.waitKey(
0)
cv2.destroyAllWindows()
對數據集進行交叉檢驗,在20/80的比率下,成功識別率為
4、多個汽車和滑動窗口檢測
該應用的執行過程如下:
1、獲取數據集
2、創建BOW訓練器並且獲得視覺詞匯
3、采用詞匯訓練svm
4、嘗試對測試圖像的金字塔采用滑動窗口進行檢測
5、對重疊的矩形采用“非最大抑制”,進行過濾
6、得到結果。
各個函數如下
def
resize(
img,
scaleFactor):
return cv2.resize(img, (
int(img.shape[
1] * (
1 / scaleFactor)),
int(img.shape[
0] * (
1 / scaleFactor))),
interpolation=cv2.INTER_AREA)
大小變化
def
pyramid(
image,
scale=
1.5,
minSize=(
200,
80)):
yield image
while
True:
image = resize(image, scale)
if image.shape[
0] < minSize[
1]
or image.shape[
1] < minSize[
0]:
break
yield image
采用yield方法,獲得金字塔層
def
sliding_window(
image,
step,
window_size):
for y
in
xrange(
0, image.shape[
0], step):
for x
in
xrange(
0, image.shape[
1], step):
yield (x, y, image[y:y + window_size[
1], x:x + window_size[
0]])
同樣采用yield的方法,獲得滑動窗口數據
# import the necessary packages
import numpy
as np
# Malisiewicz et al.
# Python port by Adrian Rosebrock
def
non_max_suppression_fast(
boxes,
overlapThresh):
# if there are no boxes, return an empty list
if
len(boxes) ==
0:
return []
# if the bounding boxes integers, convert them to floats --
# this is important since we'll be doing a bunch of divisions
if boxes.dtype.kind ==
"i":
boxes = boxes.astype(
"float")
# initialize the list of picked indexes
pick = []
# grab the coordinates of the bounding boxes
x1 = boxes[:,
0]
y1 = boxes[:,
1]
x2 = boxes[:,
2]
y2 = boxes[:,
3]
scores = boxes[:,
4]
# compute the area of the bounding boxes and sort the bounding
# boxes by the score/probability of the bounding box
area = (x2 - x1 +
1) * (y2 - y1 +
1)
idxs = np.argsort(scores)[::-
1]
# keep looping while some indexes still remain in the indexes
# list
while
len(idxs) >
0:
# grab the last index in the indexes list and add the
# index value to the list of picked indexes
last =
len(idxs) -
1
i = idxs[last]
pick.append(i)
# find the largest (x, y) coordinates for the start of
# the bounding box and the smallest (x, y) coordinates
# for the end of the bounding box
xx1 = np.maximum(x1[i], x1[idxs[:last]])
yy1 = np.maximum(y1[i], y1[idxs[:last]])
xx2 = np.minimum(x2[i], x2[idxs[:last]])
yy2 = np.minimum(y2[i], y2[idxs[:last]])
# compute the width and height of the bounding box
w = np.maximum(
0, xx2 - xx1 +
1)
h = np.maximum(
0, yy2 - yy1 +
1)
# compute the ratio of overlap
overlap = (w * h) / area[idxs[:last]]
# delete all indexes from the index list that have
idxs = np.delete(idxs, np.concatenate(([last],
np.where(overlap > overlapThresh)[
0])))
# return only the bounding boxes that were picked using the
# integer data type
return boxes[pick].astype(
"int")
這個函數得到系列舉行並且對這些矩形按照評分進行排序。從評分醉倒的矩形開始消除所有重疊超過一定閾值的矩形。
datapath =
"E:/py4cv/CarData/TrainImages"
SAMPLES =
400
def
path(
cls,
i):
return
"
%s
/
%s%d
.pgm" % (datapath,
cls,i+
1)
獲得系列路徑下地址
def
get_flann_matcher():
flann_params =
dict(
algorithm =
1,
trees =
5)
return cv2.FlannBasedMatcher(flann_params, {})
獲得flann的matcher
def
get_bow_extractor(
extract,
match):
return cv2.BOWImgDescriptorExtractor(extract, match)
獲得bow的image描述
def
get_extract_detect():
return cv2.xfeatures2d.SIFT_create(), cv2.xfeatures2d.SIFT_create()
獲得sift描述
def
extract_sift(
fn,
extractor,
detector):
im = cv2.imread(fn,
0)
return extractor.compute(im, detector.detect(im))[
1]
及其計算值
def
bow_features(
img,
extractor_bow,
detector):
return extractor_bow.compute(img, detector.detect(img))
獲得bow features
import cv2
import numpy
as np
datapath =
"E:/py4cv/CarData/TrainImages"
SAMPLES =
400
def
path(
cls,
i):
return
"
%s
/
%s%d
.pgm" % (datapath,
cls,i+
1)
def
get_flann_matcher():
flann_params =
dict(
algorithm =
1,
trees =
5)
return cv2.FlannBasedMatcher(flann_params, {})
def
get_bow_extractor(
extract,
match):
return cv2.BOWImgDescriptorExtractor(extract, match)
def
get_extract_detect():
return cv2.xfeatures2d.SIFT_create(), cv2.xfeatures2d.SIFT_create()
def
extract_sift(
fn,
extractor,
detector):
im = cv2.imread(fn,
0)
return extractor.compute(im, detector.detect(im))[
1]
def
bow_features(
img,
extractor_bow,
detector):
return extractor_bow.compute(img, detector.detect(img))
def
car_detector():
pos, neg =
"pos-",
"neg-"
#獲得兩個sift的檢測器
detect, extract = get_extract_detect()
#獲得flann的描述符
matcher = get_flann_matcher()
#創建bow描述檢測器
print(
"building BOWKMeansTrainer...")
bow_kmeans_trainer = cv2.BOWKMeansTrainer(
12)
extract_bow = cv2.BOWImgDescriptorExtractor(extract, matcher)
#首先構建bow詞典
print(
"adding features to trainer...")
for i
in
range(SAMPLES):
print i
bow_kmeans_trainer.add(extract_sift(path(pos,i), extract, detect))
#注意,這里地方沒有添加負樣本
#bow_kmeans_trainer.add(extract_sift(path(neg,i), extract, detect))
#聚類並返回詞匯
vocabulary = bow_kmeans_trainer.cluster()
extract_bow.setVocabulary(vocabulary)
#基於詞匯進行svm訓練
traindata, trainlabels = [],[]
print
"adding to train data"
for i
in
range(SAMPLES):
print i
#1為有數據,-1為沒有數據
traindata.extend(bow_features(cv2.imread(path(pos, i),
0), extract_bow, detect))
trainlabels.append(
1)
traindata.extend(bow_features(cv2.imread(path(neg, i),
0), extract_bow, detect))
trainlabels.append(-
1)
svm = cv2.ml.SVM_create()
svm.setType(cv2.ml.SVM_C_SVC)
svm.setGamma(
1)
svm.setC(
35)
#越大,誤判的可能性越小;越低,則可能導致過擬合
svm.setKernel(cv2.ml.SVM_RBF)
#svm_linear 超平面 兩類;svm_rbf高斯核,多類
svm.train(np.array(traindata), cv2.ml.ROW_SAMPLE, np.array(trainlabels))
return svm, extract_bow
車輛滑動窗口探測
import cv2
import numpy
as np
from car_detector.detector
import car_detector, bow_features
from car_detector.pyramid
import pyramid
from car_detector.non_maximum
import non_max_suppression_fast
as nms
from car_detector.sliding_window
import sliding_window
import urllib
def
in_range(
number,
test,
thresh=
0.2):
return
abs(number - test) < thresh
img_path =
"E:/sandbox/car1.png"
#獲得svm模型訓練結果。注意在訓練過程中沒有使用滑動窗口
svm, extractor = car_detector()
detect = cv2.xfeatures2d.SIFT_create()
w, h =
100,
40
img = cv2.imread(img_path)
rectangles = []
counter =
1
scaleFactor =
1.25
scale =
1
font = cv2.FONT_HERSHEY_PLAIN
for resized
in pyramid(img, scaleFactor):
scale =
float(img.shape[
1]) /
float(resized.shape[
1])
for (x, y, roi)
in sliding_window(resized,
20, (
100,
40)):
#窗口大小100*40
if roi.shape[
1] != w
or roi.shape[
0] != h:
continue
try:
bf = bow_features(roi, extractor, detect)
_, result = svm.predict(bf)
a, res = svm.predict(bf,
flags=cv2.ml.STAT_MODEL_RAW_OUTPUT | cv2.ml.STAT_MODEL_UPDATE_MODEL)
print ((
"Class:
%d
, Score:
%f
, a:
%s
") % (result[
0][
0], res[
0][
0], res))
score = res[
0][
0]
if result[
0][
0] ==
1:
if score < -
1.0:
rx, ry, rx2, ry2 =
int(x * scale),
int(y * scale),
int((x+w) * scale),
int((y+h) * scale)
rectangles.append([rx, ry, rx2, ry2,
abs(score)])
except:
pass
counter +=
1
windows = np.array(rectangles)
#調用非最大值抑制
boxes = nms(windows,
0.25)
for (x, y, x2, y2, score)
in boxes:
print (x, y, x2, y2, score)
cv2.rectangle(img, (
int(x),
int(y)),(
int(x2),
int(y2)),(
0,
255,
0),
1)
cv2.putText(img,
"
%f
" % score, (
int(x),
int(y)), font,
1, (
0,
255,
0))
cv2.imshow(
"img", img)
cv2.waitKey(
0)

小結和啟示:
1、python作為這個時代的語言,的確是非常適合機器學習和圖像處理應用層算法的編寫的;
2、svm作為一種已經發展很長世間的算法,90左右應該已經是它的瓶頸,即使存在sift+bow的特征提取方式;
3、再想提高,盡快進入mlp和cnn乃至dl的境地;
4、挖掘發現機器學習的現實應用場景,是除了掌握機器學習方法以外,最難也是最有意義的一件事情。
5、今日是學習時間機器學習的最好時間,時不我們待。
感謝閱讀至此,希望有所幫助。
附件列表