項目來源於 《opencv 3計算機視覺 python語言實現》
整個執行過程如下:
1)獲取一個訓練數據集。
2)創建BOW訓練器並獲得視覺詞匯。
3)采用詞匯訓練SVM。
4)嘗試對測試圖像的圖像金字塔采用滑動寬口進行檢測。
5)對重疊的矩形使用非極大抑制。
6)輸出結果。
該項目的結構如下:
|-----car_detector
| |--detector.py
| |--__init__.py
| |--non_maximum.py
| |--pyramid.py
| |--sliding_window.py
|-----car_sliding_windows.py
樣本與代碼見連接。
主程序是car_sliding_windows.py,所有的工具都包含在car_detector文件夾中。由於使用的是Python2.7,因此在文件夾中需要一個檢測模塊文件__init__.py。
car_detector模塊的四個文件如下:
- SVM訓練的模型
- 非極大抑制函數
- 圖像金字塔
- 滑動窗口函數
圖像金字塔 pyramid.py
#coding= utf-8 import cv2 """ 功能:縮放圖像 輸入:圖片、尺度 輸出:縮放后圖像 """ def resize(img, scaleFactor): return cv2.resize(img, (int(img.shape[1] * (1 / scaleFactor)), int(img.shape[0] * (1 / scaleFactor))), interpolation=cv2.INTER_AREA) """ 功能:建立圖像金字塔 輸入:圖片、尺度、最小尺寸 輸出:圖像金字塔 """ def pyramid(image, scale=1.5, minSize=(200, 80)): yield image """ yield 的作用就是把一個函數變成一個 generator,帶有 yield 的函數不再是一個普通函數,Python 解釋器會將其視為一個 generator, 調用 pyramid() 不會執行 pyramid() 函數,而是返回一個 iterable 對象!在循環執行時,每次循環都會執行 pyramid 函數內部的代碼, 執行到 yield 時,pyramid() 函數就返回一個迭代值,下次迭代時,代碼從 yield 的下一條語句繼續執行, 而函數的本地變量看起來和上次中斷執行前是完全一樣的,於是函數繼續執行,直到再次遇到 yield。 """ while True: image = resize(image, scale) if image.shape[0] < minSize[1] or image.shape[1] < minSize[0]: break yield image
滑動窗口函數 sliding_window.py
#coding= utf-8 """ 功能:滑動窗口 輸入:圖像、步長、滑動窗大小 輸出:圖像窗口 """ def sliding_window(image, step, window_size): for y in xrange(0, image.shape[0], step): for x in xrange(0, image.shape[1], step): yield (x, y, image[y:y + window_size[1], x:x + window_size[0]])
非極大抑制 non_maximum.py
這個函數對於一系列的目標候選框矩陣,按照分類器得分排序。每次將評分最高的矩形框保存,消除掉剩下的矩形框中重疊超過閾值的矩形。
#coding= utf-8 # import the necessary packages import numpy as np # Malisiewicz et al. # Python port by Adrian Rosebrock """ 功能:非極大抑制 輸入:目標框、重合率 輸出:最后目標框 """ def non_max_suppression_fast(boxes, overlapThresh): # 如果目標框列表為空,返回空 if len(boxes) == 0: return [] # 如果目標框參數是整型,轉換成浮點型 # 這很重要,因為后面有一系列除法 if boxes.dtype.kind == "i": boxes = boxes.astype("float") # 初始化篩選列表 pick = [] # 獲得目標框坐標 x1 = boxes[:,0] y1 = boxes[:,1] x2 = boxes[:,2] y2 = boxes[:,3] scores = boxes[:,4] # 計算所有目標框面積 # 並將所有目標框按照score重新排列 area = (x2 - x1 + 1) * (y2 - y1 + 1) idxs = np.argsort(scores)[::-1] # keep looping while some indexes still remain in the indexes # list while len(idxs) > 0: # 獲得最大得分目標框索引,並放入篩選結果中 last = len(idxs) - 1 i = idxs[last] pick.append(i) # 獲得得分最高目標框與其他目標框最大起始坐標和最小終止坐標 xx1 = np.maximum(x1[i], x1[idxs[:last]]) yy1 = np.maximum(y1[i], y1[idxs[:last]]) xx2 = np.minimum(x2[i], x2[idxs[:last]]) yy2 = np.minimum(y2[i], y2[idxs[:last]]) # 計算最小目標框長、寬 w = np.maximum(0, xx2 - xx1 + 1) h = np.maximum(0, yy2 - yy1 + 1) # 計算除得分最高外的所有目標框與最小目標框的重合度 overlap = (w * h) / area[idxs[:last]] # 刪除得分最高(已保存在篩選結果列表)、重合度大於閾值的目標框的索引 idxs = np.delete(idxs, np.concatenate(([last], np.where(overlap > overlapThresh)[0]))) # return only the bounding boxes that were picked using the # integer data type return boxes[pick].astype("int")
檢測函數 detector.py
這個代碼包含 SIFT特征提取、BOW圖像詞匯描述符獲得、SVM分類器訓練
#coding= utf-8 import cv2 import numpy as np datapath = "./CarData/TrainImages/" SAMPLES = 400 def path(cls,i): return "%s/%s%d.pgm" % (datapath,cls,i+1) # 定義 FLANN 匹配器函數 def get_flann_matcher(): flann_params = dict(algorithm = 1, trees = 5) return cv2.FlannBasedMatcher(flann_params, {}) def get_bow_extractor(extract, match): return cv2.BOWImgDescriptorExtractor(extract, match) # 創建 SIFT 特征檢測器 def get_extract_detect(): return cv2.xfeatures2d.SIFT_create(), cv2.xfeatures2d.SIFT_create() def extract_sift(fn, extractor, detector): im = cv2.imread(fn,0) return extractor.compute(im, detector.detect(im))[1] # 創建 BOW 訓練器 def bow_features(img, extractor_bow, detector): return extractor_bow.compute(img, detector.detect(img)) def car_detector(): pos, neg = "pos-", "neg-" detect, extract = get_extract_detect() matcher = get_flann_matcher() print "building BOWKMeansTrainer..." bow_kmeans_trainer = cv2.BOWKMeansTrainer(12) extract_bow = cv2.BOWImgDescriptorExtractor(extract, matcher) print "adding features to trainer" for i in range(8): print i bow_kmeans_trainer.add(extract_sift(path(pos,i), extract, detect)) bow_kmeans_trainer.add(extract_sift(path(neg,i), extract, detect)) vocabulary = bow_kmeans_trainer.cluster() extract_bow.setVocabulary(vocabulary) traindata, trainlabels = [],[] print "adding to train data" for i in range(SAMPLES): print i traindata.extend(bow_features(cv2.imread(path(pos, i), 0), extract_bow, detect)) trainlabels.append(1) traindata.extend(bow_features(cv2.imread(path(neg, i), 0), extract_bow, detect)) trainlabels.append(-1) # 創建 SVM 分類器 svm = cv2.ml.SVM_create() svm.setType(cv2.ml.SVM_C_SVC) svm.setGamma(1) svm.setC(35) # 此參數決定分類器的訓練誤差和預測誤差 svm.setKernel(cv2.ml.SVM_RBF) # 核函數 # 訓練 svm.train(np.array(traindata), cv2.ml.ROW_SAMPLE, np.array(trainlabels)) return svm, extract_bow
主函數 car_sliding_windows.py
#coding= utf-8 import cv2 import numpy as np from car_detector.detector import car_detector, bow_features from car_detector.pyramid import pyramid from car_detector.non_maximum import non_max_suppression_fast as nms from car_detector.sliding_window import sliding_window import urllib def in_range(number, test, thresh=0.2): return abs(number - test) < thresh test_image = "../images/cars.jpg" # 測試樣本路徑 img_path = "../images/test.jpg" urllib.urlretrieve(test_image, img_path) # 檢測文件是否存在 svm, extractor = car_detector() # 提取特征訓練分類器 detect = cv2.xfeatures2d.SIFT_create() # w, h = 100, 40 img = cv2.imread(img_path) #img = cv2.imread(test_image) rectangles = [] counter = 1 scaleFactor = 1.25 scale = 1 font = cv2.FONT_HERSHEY_PLAIN for resized in pyramid(img, scaleFactor): scale = float(img.shape[1]) / float(resized.shape[1]) for (x, y, roi) in sliding_window(resized, 20, (100, 40)): if roi.shape[1] != w or roi.shape[0] != h: continue try: bf = bow_features(roi, extractor, detect) _, result = svm.predict(bf) a, res = svm.predict(bf, flags=cv2.ml.STAT_MODEL_RAW_OUTPUT | cv2.ml.STAT_MODEL_UPDATE_MODEL) print "Class: %d, Score: %f, a: %s" % (result[0][0], res[0][0], res) score = res[0][0] if result[0][0] == 1: if score < -1.0: rx, ry, rx2, ry2 = int(x * scale), int(y * scale), int((x+w) * scale), int((y+h) * scale) rectangles.append([rx, ry, rx2, ry2, abs(score)]) except: pass counter += 1 windows = np.array(rectangles) boxes = nms(windows, 0.25) for (x, y, x2, y2, score) in boxes: print x, y, x2, y2, score cv2.rectangle(img, (int(x),int(y)),(int(x2), int(y2)),(0, 255, 0), 1) cv2.putText(img, "%f" % score, (int(x),int(y)), font, 1, (0, 255, 0)) cv2.imshow("img", img) cv2.waitKey(0)