caffe_實戰之兩個簡單的例子(物體分類和人臉檢測)


一、物體分類:

這里使用的是caffe官網中自帶的例子,我這里主要是對代碼的解釋~

首先導入一些必要的庫:

import caffe
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline 
plt.rcParams['figure.figsize'] = (10 , 10) #顯示圖像的最大范圍,使用plt.rcParams['savefig.dpi']得到缺省的dpi值為100,則最大的圖片范圍為1000*1000
plt.rcParams['image.interpolation'] = 'nearest' #最近鄰差值方式
plt.rcParams['image.cmap'] = 'gray' #灰度空間,表明顯示圖像時是灰度圖而不是彩色圖 import sys caffe_root = 'E:\\caffe\\caffe-master\\' sys.path.insert(0 , caffe_root + 'python')
import os
caffe.set_mode_cpu() #CPU模式
model_def = caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt' #加載配置文件
model_weights = caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel' #加載模型文件
net = caffe.Net(model_def , model_weights , caffe.TEST) #用caffe的測試模式,即只是提取特征,不訓練
#預處理函數
#caffe中用的圖像是BGR空間,但是matplotlib用的是RGB空間;再比如caffe的數值空間是[0,255],但是matplotlib的空間是[0,1],這些都需要轉換
#載入imagenet的均值,實際圖像要減去這個均值,從而減少噪聲影響(同時還有特征縮放的作用?)
mu = np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy')
mu = mu.mean(1).mean(1)  #計算像素的平均值(mean(1)按每行計算均值)
print 'mean-subtracted values:' , zip('BGR' , mu) #打印B、G、R的平均像素值
transformer = caffe.io.Transformer({'data' : net.blobs['data'].data.shape}) #用轉換函數Transformer函數使transformer得到data層的數據格式
transformer.set_transpose('data' , (2 , 0 , 1))  #由於python中讀取的圖片格式為H*W*K,所以需要轉換為caffe的格式即K*H*W
transformer.set_mean('data' , mu) #每個通道都減去平均像素值
transformer.set_raw_scale('data' , 255) #python 中將圖片存儲為[0,1],而caffe中將圖片存儲為[0,255],而這里是Python空間,所以將[0,1]轉換為[0,255]
transformer.set_channel_swap('data' , (2 , 1 , 0)) #交換RGB空間到BGR空間
net.blobs['data'].reshape(50 , 3 , 227 , 227) #batchsize=50,3通道,圖像大小為227*227
image = caffe.io.load_image(caffe_root + 'examples/images/cat.jpg')
transformed_image = transformer.preprocess('data' , image) #執行上面的圖像預處理操作,並將image載入到blob中,和下面語句一起的
plt.imshow(image) #顯示圖片
 
        
net.blobs['data'].data[...] = transformed_image
output = net.forward() #進行一次前向傳播
output_prob = output['prob'][0] #output_prob存儲屬於每類的概率,['prob'][0],它是一個一維數組
'''
layer {
  name: "prob"
  type: "Softmax"
  bottom: "fc8"
  top: "prob"
}
'''
print 'predicted class is:' , output_prob.argmax() #最大概率所在的類別
out:predicted class is: 281
labels_file = caffe_root + 'data/ilsvrc12/synset_words.txt'
labels = np.loadtxt(labels_file , str , delimiter = '\t') #一行一行讀取到labels中,定界符delimiter是\t
print 'output label:' , labels[output_prob.argmax()]  

out:

output label: n02123045 tabby, tabby cat
#輸出概率較大的前5個物體
top_inds = output_prob.argsort()[: : -1][: 5] #得到數組值從小到大的索引值后再從右向左進行提取,並取前5個即概率最大的5個物體
print 'probabilities and labels:'
zip(output_prob[top_inds] , labels[top_inds])

out:

probabilities and labels:
Out[17]:
[(0.3124361, 'n02123045 tabby, tabby cat'),
 (0.23797169, 'n02123159 tiger cat'),
 (0.12387215, 'n02124075 Egyptian cat'),
 (0.10075664, 'n02119022 red fox, Vulpes vulpes'),
 (0.070956953, 'n02127052 lynx, catamount')]
 
 
二、人臉檢測
代碼來自一個教程中給的視頻,貼出代碼和附注~
import caffe
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cbook as cbook
#import Image
import sys
import os
from  math import pow
from PIL import Image, ImageDraw, ImageFont
import cv2
import math
import random
caffe_root = 'E:\\caffe\\caffe-master\\'

sys.path.insert(0, caffe_root + 'python')
os.environ['GLOG_minloglevel'] = '2'

caffe.set_mode_cpu()
#非極大值抑制算法NMS
class Point(object):
    def __init__(self, x, y):
        self.x = x
        self.y = y
def calculateDistance(x1,y1,x2,y2):  #計算人臉框的對角線距離
    dist = math.sqrt((x2 - x1)**2 + (y2 - y1)**2)
    return dist

def range_overlap(a_min, a_max, b_min, b_max):

    return (a_min <= b_max) and (b_min <= a_max)

def rect_overlaps(r1,r2):
    return range_overlap(r1.left, r1.right, r2.left, r2.right) and range_overlap(r1.bottom, r1.top, r2.bottom, r2.top)

def rect_merge(r1,r2, mergeThresh):

    if rect_overlaps(r1,r2):
        # dist = calculateDistance((r1.left + r1.right)/2, (r1.top + r1.bottom)/2, (r2.left + r2.right)/2, (r2.top + r2.bottom)/2)
        SI= abs(min(r1.right, r2.right) - max(r1.left, r2.left)) * abs(max(r1.bottom, r2.bottom) - min(r1.top, r2.top))
        SA = abs(r1.right - r1.left)*abs(r1.bottom - r1.top)
        SB = abs(r2.right - r2.left)*abs(r2.bottom - r2.top)
        S=SA+SB-SI
        ratio = float(SI) / float(S)
        if ratio > mergeThresh :
            return 1
    return 0
class Rect(object):  
    def __init__(self, p1, p2): #p1和p2為對角線上的兩個點
        '''Store the top, bottom, left and right values for points
               p1 and p2 are the (corners) in either order
        '''
        self.left   = min(p1.x, p2.x) #?????
        self.right  = max(p1.x, p2.x)
        self.bottom = min(p1.y, p2.y)
        self.top    = max(p1.y, p2.y)

    def __str__(self):
        return "Rect[%d, %d, %d, %d]" % ( self.left, self.top, self.right, self.bottom )
def nms_average(boxes, groupThresh=2, overlapThresh=0.2):  
    rects = []
    temp_boxes = []
    weightslist = []
    new_rects = []
    for i in range(len(boxes)):
        if boxes[i][4] > 0.2:
            rects.append([boxes[i,0], boxes[i,1], boxes[i,2]-boxes[i,0], boxes[i,3]-boxes[i,1]])


    rects, weights = cv2.groupRectangles(rects, groupThresh, overlapThresh) #函數解釋http://blog.csdn.net/nongfu_spring/article/details/38977833

    rectangles = []
    for i in range(len(rects)):

        testRect = Rect( Point(rects[i,0], rects[i,1]), Point(rects[i,0]+rects[i,2], rects[i,1]+rects[i,3]))
        rectangles.append(testRect)
    clusters = []
    for rect in rectangles:
        matched = 0
        for cluster in clusters:
            if (rect_merge( rect, cluster , 0.2) ):
                matched=1
                cluster.left   =  (cluster.left + rect.left   )/2
                cluster.right  = ( cluster.right+  rect.right  )/2
                cluster.top    = ( cluster.top+    rect.top    )/2
                cluster.bottom = ( cluster.bottom+ rect.bottom )/2

        if ( not matched ):
            clusters.append( rect )
    result_boxes = []
    for i in range(len(clusters)):

        result_boxes.append([clusters[i].left, clusters[i].bottom, clusters[i].right, clusters[i].top, 1])

    return result_boxes
def generateBoundingBox(featureMap, scale): #由於做了scale變換,所以在這里還要將坐標反變換回去
    boundingBox = [] #存儲候選框,以及屬於人臉的概率
    stride = 32 #感受野的大小,filter大小,這個需要自己不斷地去調整;
    cellSize = 227 #人臉框的大小,它這里是認為特征圖上的一塊區域的prob大於95%,就以那個點在原始圖像中相應的位置作為人臉框的左上角點,然后框出候選框,但這么做感覺會使候選框變多
    #遍歷最終的特征圖,尋找屬於人臉的概率大於95%的那些區域,加上Box
    for (x,y), prob in np.ndenumerate(featureMap):
        if(prob >= 0.95):
            boundingBox.append([float(stride * y)/ scale, 
                                float(x * stride)/scale, 
                                float(stride * y + cellSize - 1)/scale, 
                                float(stride * x + cellSize - 1)/scale, prob])

    return boundingBox
def face_detection(imgFile):
    net_full_conv = caffe.Net(os.path.join(caffe_root, 'faceDetect', 'deploy_full_conv.prototxt'),
                              os.path.join(caffe_root, 'faceDetect', 'alexnet_iter_50000_full_conv.caffemodel'),
                              caffe.TEST)#全卷積網絡(導入訓練好的模型和deploy配置文件)
    randNum = random.randint(1,10000) #設置一個在1到10000之間的隨機數
    
    
    
    scales = []  #設置幾個scale,組成圖像金字塔
    factor = 0.793700526  #圖像放大或者縮小的一個因子(經驗值)

    
    img = cv2.imread(imgFile) #讀入測試圖像

    
    largest = min(2, 4000/max(img.shape[0:2])) #設定做scale變幻時最大的scale
    scale = largest
    minD = largest*min(img.shape[0:2]) #設定最小的scale
    while minD >= 227: #只要最小的邊做完最大的scale變換后大於227,之前得到的largest就可以作為最大的scale來用,並依此乘上factor,加入到scale列表中
        scales.append(scale)
        scale *= factor
        minD *= factor

    total_boxes = []  #存儲所有的候選框
    #進行多尺度的人臉檢測
    for scale in scales:

        scale_img = cv2.resize(img,((int(img.shape[0] * scale), int(img.shape[1] * scale))))  #調整圖像的長和高
        cv2.imwrite('E:\\caffe\\caffe-master\\faceDetect\\scale\\scale_img.jpg',scale_img) #保存圖像
        #圖像預處理
        im = caffe.io.load_image('E:\\caffe\\caffe-master\\faceDetect\\scale\\scale_img.jpg') #得到的特征值是0到1之間的小數
        net_full_conv.blobs['data'].reshape(1,3,scale_img.shape[1],scale_img.shape[0]) #blobs['data']指data層,字典用法;同時由於圖像大小發生了變化,data層的輸入接口也要發生相應的變化
        transformer = caffe.io.Transformer({'data': net_full_conv.blobs['data'].data.shape}) #設定圖像的shape格式
        transformer.set_mean('data', np.load(caffe_root +
                                             'python\\caffe\\imagenet\\ilsvrc_2012_mean.npy').mean(1).mean(1)) #減去均值操作
        transformer.set_transpose('data', (2,0,1))  #move image channels to outermost dimension
        transformer.set_channel_swap('data', (2,1,0)) #swap channels from RGB to BGR
        transformer.set_raw_scale('data', 255.0) #rescale from [0,1] to [0,255]
        
        out = net_full_conv.forward_all(data=np.asarray([transformer.preprocess('data', im)])) #進行一次前向傳播,out包括所有經過每一層后的特征圖,其中元素為[(x,y),prob](特征圖中的每一個小區域都代表一個概率)
        
        boxes = generateBoundingBox(out['prob'][0,1], scale)  #輸出兩類的可能性,並經過篩選獲得候選框
        if(boxes):
            total_boxes.extend(boxes)  #將每次scale變換后的圖片得到的候選框存進total_boxes中

    boxes_nms = np.array(total_boxes)
    true_boxes = nms_average(boxes_nms, 1, 0.2)  #利用非極大值算法過濾出人臉概率最大的框
    if not true_boxes == []:
        (x1, y1, x2, y2) = true_boxes[0][:-1]
        print (x1, y1, x2, y2)
        cv2.rectangle(img, (int(x1),int(y1)), (int(x2),int(y2)), (0,0,255),thickness = 5)
        cv2.imwrite('E:\\caffe\\caffe-master\\faceDetect\\scale\\result.jpg',img)
imgFile = 'E:\\caffe\\caffe-master\\data\\imageset_2\\tangyudi\\tmp9055.jpg'
#image_file = cbook.get_sample_data(imgFile)
img = plt.imread(imgFile)
plt.imshow(img)
plt.show()

#face_detection(imgFile)
 
           

 

face_detection(imgFile)

out:

(581, 147, 818, 384)
imgFile = 'E:\\caffe\\caffe-master\\faceDetect\\scale\\result.jpg'
img = plt.imread(imgFile)
plt.imshow(img)
plt.show()
 
           

  

 
 

 

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM