Dlib+OpenCV深度學習人臉識別
人臉識別在LWF(Labeled Faces in the Wild)數據集上人臉識別率現在已經99.7%以上,這個識別率確實非常高了,但是真實的環境中的准確率有多少呢?我沒有這方面的數據,但是可以確信的是真實環境中的識別率並沒有那么樂觀。現在雖然有一些商業應用如員工人臉識別管理系統、海關身份驗證系統、甚至是銀行人臉識別功能,但是我們可以仔細想想員工人臉識別管理,海關身份證系統的應用場景對身份的驗證功能其實並沒有商家吹噓的那么重要,打個比方說員工上班的時候刷臉如果失敗了會怎樣,是不是重新識別一下,如果還是誤識別,或是識別不出,是不是就干脆刷卡或是其他方式登記上班,然后罵一句他娘的,本人那么帥居然沒識別出來!那銀行櫃員機上人臉識別系統呢,你看它敢不敢讓你連密碼也不輸直接刷臉轉賬,是不是關掉了人臉識別、指紋識別機器還可以正常運作。所以說真實環境中在各種光照因素、年齡因素、網紅因素(化妝)、甚至是作弊因素等各種因素條件下的識別率有多少只有產品廠家自己知道,我相信每個廠家針對這些情況都有做優化,比如外圍硬件的輔助,針對特定場景的各種約束等等,通過各個廠家自己在各個方面對系統的優化,是可以提升自身產品的綜合體驗的。
前面扯遠了,本文的目的是實現一個人臉識別的最簡單實際應用,即用攝像頭捕捉動態人臉,然后和已經存儲在數據庫中的128D人臉特征進行比較識別出相應的人臉信息(名字等)。工程是基於VS2015+簡單的MFC對話框實現的,代碼存放在:http://git.oschina.net/wjiang/face_recognition
在這個系統中我預先存儲了下面幾位明星的正面頭像的128D人臉特征,當然你可以存儲和導入更多的人臉。
然后經過人臉檢測、人臉圖像處理,和人臉識別等步驟識別出相應的人臉信息,識別效果如下(怕大家被丑到所以用了明星的圖片,沒有用真實的人臉 – 沒有做活體檢測):
當然這只是一個簡單的應用,真正用到生產的系統,還需運用活體檢測等技術,防止運用照片或是手機視頻等方式欺騙過人臉識別系統,安全級別要求更高的應用領域例如支付、轉賬等系統活體檢測可能仍不夠安全,這時還可以通過人臉識別+驗證密碼等方式加強安全性能。
人臉數據導入,也就是說我在系統啟動之初,需要導入我的人臉數據庫,也就是前面的那些明星的正面照。裝載的開始階段,因為要檢測靜態人臉圖片的人臉部位,首先需要用dlib的人臉檢測器,用get_frontal_face_detector()獲得。然后需要將68點人臉標記模型導入shape_predictor sp,目的就是要對其人臉到一個標准的姿勢,接着就是裝載DNN模型。然后取每張人臉照片的特征,並將特征和姓名等相關的信息放入FACE_DESC結構中,最后將每張人臉信息結構放入face_desc_vec容器中,這里我只裝載了9個明星的人臉信息。
- int FACE_RECOGNITION::load_db_faces(void)
- {
- intrc = -1;
- longhFile = 0;
- struct_finddata_tfileinfo;
- frontal_face_detectordetector =get_frontal_face_detector();
- // We will also use a face landmarking model to align faces to a standard pose: (see face_landmark_detection_ex.cpp for an introduction)
- deserialize("shape_predictor_68_face_landmarks.dat") >>sp;
- // And finally we load the DNN responsible for face recognition.
- deserialize("dlib_face_recognition_resnet_model_v1.dat") >>net;
- if ((hFile =_findfirst(".\\faces\\*.jpg", &fileinfo)) != -1)
- {
- do
- {
- if ((fileinfo.attrib &_A_ARCH))
- {
- if (strcmp(fileinfo.name,".") != 0 && strcmp(fileinfo.name,"..") != 0)
- {
- if (!strcmp(strstr(fileinfo.name,".") + 1 , "jpg"))
- {
- cout <<"This file is an image file!" <<fileinfo.name <<endl;
- matrix<rgb_pixel>img;
- charpath[260];
- sprintf_s(path,".\\faces\\%s",fileinfo.name);
- load_image(img,path);
- image_windowwin(img);
- for (autoface :detector(img))
- {
- autoshape =sp(img,face);
- matrix<rgb_pixel>face_chip;
- extract_image_chip(img,get_face_chip_details(shape, 150, 0.25),face_chip);
- //Record the all this face's information
- FACE_DESCsigle_face;
- sigle_face.face_chip =face_chip;
- sigle_face.name =fileinfo.name;
- std::vector<matrix<rgb_pixel>>face_chip_vec;
- std::vector<matrix<float, 0, 1>>face_all;
- face_chip_vec.push_back(move(face_chip));
- //Asks the DNN to convert each face image in faces into a 128D vector
- face_all =net(face_chip_vec);
- //Get the feature of this person
- std::vector<matrix<float, 0, 1>>::iteratoriter_begin = face_all.begin(),
- iter_end =face_all.end();
- if (face_all.size() > 1)break;
- sigle_face.face_feature = *iter_begin;
- //all the person description into vector
- face_desc_vec.push_back(sigle_face);
- win.add_overlay(face);
- }
- }
- else
- {
- cout <<"This file is not image file!" <<fileinfo.name <<endl;
- }
- }
- }
- else
- {
- //files.push_back(p.assign(path).append("\\").append(fileinfo.name));
- }
- } while (_findnext(hFile, &fileinfo) == 0);
- _findclose(hFile);
- }
- returnrc;
- }
int FACE_RECOGNITION::load_db_faces(void) { intrc = -1; longhFile = 0; struct_finddata_tfileinfo; frontal_face_detectordetector =get_frontal_face_detector(); // We will also use a face landmarking model to align faces to a standard pose: (see face_landmark_detection_ex.cpp for an introduction) deserialize("shape_predictor_68_face_landmarks.dat") >>sp; // And finally we load the DNN responsible for face recognition. deserialize("dlib_face_recognition_resnet_model_v1.dat") >>net; if ((hFile =_findfirst(".\\faces\\*.jpg", &fileinfo)) != -1) { do { if ((fileinfo.attrib &_A_ARCH)) { if (strcmp(fileinfo.name,".") != 0 && strcmp(fileinfo.name,"..") != 0) { if (!strcmp(strstr(fileinfo.name,".") + 1 , "jpg")) { cout <<"This file is an image file!" <<fileinfo.name <<endl; matrix<rgb_pixel>img; charpath[260]; sprintf_s(path,".\\faces\\%s",fileinfo.name); load_image(img,path); image_windowwin(img); for (autoface :detector(img)) { autoshape =sp(img,face); matrix<rgb_pixel>face_chip; extract_image_chip(img,get_face_chip_details(shape, 150, 0.25),face_chip); //Record the all this face's information FACE_DESCsigle_face; sigle_face.face_chip =face_chip; sigle_face.name =fileinfo.name; std::vector<matrix<rgb_pixel>>face_chip_vec; std::vector<matrix<float, 0, 1>>face_all; face_chip_vec.push_back(move(face_chip)); //Asks the DNN to convert each face image in faces into a 128D vector face_all =net(face_chip_vec); //Get the feature of this person std::vector<matrix<float, 0, 1>>::iteratoriter_begin = face_all.begin(), iter_end =face_all.end(); if (face_all.size() > 1)break; sigle_face.face_feature = *iter_begin; //all the person description into vector face_desc_vec.push_back(sigle_face); win.add_overlay(face); } } else { cout <<"This file is not image file!" <<fileinfo.name <<endl; } } } else { //files.push_back(p.assign(path).append("\\").append(fileinfo.name)); } } while (_findnext(hFile, &fileinfo) == 0); _findclose(hFile); } returnrc; }
人臉檢測在人臉識別的應用系統中我認為是至關重要的一環,因為人臉檢測的好壞直接影響最終的識別率,如果在人臉檢測階段能做到盡量好的話,系統的識別率會有一個比較大的提升。下面的是人臉檢測的具體代碼實現(很簡陋莫怪),嘗試了用Dlib人臉檢測,OpenCV人臉檢測,還有於仕琪的libfacedetection,比較發現於仕琪的libfacedetection是做人臉檢測最好的一個,速度快,並且檢測圖像效果也很好。
- intcapture_face(Matframe,Mat&out)
- {
- Matgray;
- Matface;
- intrc = -1;
- if (frame.empty() || !frame.data)return -1;
- cvtColor(frame,gray,CV_BGR2GRAY);
- int *pResults =NULL;
- unsignedchar *pBuffer = (unsignedchar *)malloc(DETECT_BUFFER_SIZE);
- if (!pBuffer)
- {
- fprintf(stderr,"Can not alloc buffer.\n");
- return -1;
- }
- //pResults = facedetect_frontal_tmp((unsigned char*)(gray.ptr(0)), gray.cols, gray.rows, gray.step,
- // 1.2f, 5, 24);
- pResults =facedetect_multiview_reinforce(pBuffer, (unsignedchar*)(gray.ptr(0)),gray.cols,gray.rows, (int)gray.step,
- 1.2f, 2, 48, 0, 1);
- //printf("%d faces detected.\n", (pResults ? *pResults : 0));//重復運行
- //print the detection results
- if (pResults !=NULL)
- {
- for (inti = 0;i < (pResults ? *pResults : 0);i++)
- {
- short *p = ((short*)(pResults + 1)) + 6 *i;
- intx =p[0];
- inty =p[1];
- intw =p[2];
- inth =p[3];
- intneighbors =p[4];
- Rect_<float>face_rect =Rect_<float>(x,y,w, h);
- face =frame(face_rect);
- printf("face_rect=[%d, %d, %d, %d], neighbors=%d\n",x,y, w,h,neighbors);
- Pointleft(x,y);
- Pointright(x +w,y + h);
- cv::rectangle(frame,left,right, Scalar(230, 255, 0), 4);
- }
- //imshow("frame", frame);
- if (face.empty() || !face.data)
- {
- face_detect_count = 0;
- return -1;
- }
- if (face_detect_count++ > 30)
- {
- imshow("face",face);
- out =face.clone();
- return 0;
- }
- }
- else
- {
- //face is moving, and reset the detect count
- face_detect_count = 0;
- }
- returnrc;
- }
intcapture_face(Matframe,Mat&out) { Matgray; Matface; intrc = -1; if (frame.empty() || !frame.data)return -1; cvtColor(frame,gray,CV_BGR2GRAY); int *pResults =NULL; unsignedchar *pBuffer = (unsignedchar *)malloc(DETECT_BUFFER_SIZE); if (!pBuffer) { fprintf(stderr,"Can not alloc buffer.\n"); return -1; } //pResults = facedetect_frontal_tmp((unsigned char*)(gray.ptr(0)), gray.cols, gray.rows, gray.step, // 1.2f, 5, 24); pResults =facedetect_multiview_reinforce(pBuffer, (unsignedchar*)(gray.ptr(0)),gray.cols,gray.rows, (int)gray.step, 1.2f, 2, 48, 0, 1); //printf("%d faces detected.\n", (pResults ? *pResults : 0));//重復運行 //print the detection results if (pResults !=NULL) { for (inti = 0;i < (pResults ? *pResults : 0);i++) { short *p = ((short*)(pResults + 1)) + 6 *i; intx =p[0]; inty =p[1]; intw =p[2]; inth =p[3]; intneighbors =p[4]; Rect_<float>face_rect =Rect_<float>(x,y,w, h); face =frame(face_rect); printf("face_rect=[%d, %d, %d, %d], neighbors=%d\n",x,y, w,h,neighbors); Pointleft(x,y); Pointright(x +w,y + h); cv::rectangle(frame,left,right, Scalar(230, 255, 0), 4); } //imshow("frame", frame); if (face.empty() || !face.data) { face_detect_count = 0; return -1; } if (face_detect_count++ > 30) { imshow("face",face); out =face.clone(); return 0; } } else { //face is moving, and reset the detect count face_detect_count = 0; } returnrc; }
通過人臉檢測函數capture_face()經過處理之后臨時保存在工程目錄下的cap.jpg,用get_face_chip_details()函數將檢測到的目標圖片標准化為150*150像素大小,並對人臉進行旋轉居中,用extract_image_chip()取得圖像的一個拷貝,然后將其存儲到自己的圖片face_chip中,把的到face_chip放入vect_faces容器中,傳送給深度神經網絡net,得到捕捉到人臉圖片的128D向量特征。最后在事先導入的人臉數據庫中遍歷與此特征最相近的人臉即可識別出相應的人臉信息。
這種模式的應用,也就是我們所說的1:N應用,1對N是比較考驗系統運算能力的,舉個例子,現在支付寶賬戶應該已經是上億級別的用戶,如果你在就餐的時候選擇使用支付寶人臉支付,也許在半個小時內服務器也沒有找你的臉,這下就悲催,當然在真實應用場景可能是還需要你輸入你的名字,這下可能就快多了,畢竟全國可能和你重名的也就了不的幾千上萬個吧,一搜索,人臉識別再一驗證即可。
前面的這些還沒有考慮安全的因素,比如說雙胞胎啊,化妝啊(網紅的年代啊),還有年齡的因素,環境的因素還包括光照、角度等導致的誤識別或是識別不出,識別不出的情況還好,如果是誤識別對於支付等對於安全性要求極其嚴苛的應用來說簡直就是災難。所以人臉識別還有很大的局限性 – 額,好像扯遠了。
- matrix<rgb_pixel> face_cap;
- //save the capture in the project directory
- load_image(face_cap, ".\\cap.jpg");
- //Display the raw image on the screen
- image_window win1(face_cap);
- frontal_face_detector detector = get_frontal_face_detector();
- std::vector<matrix<rgb_pixel>> vect_faces;
- for (auto face : detector(face_cap))
- {
- auto shape = face_recognize.sp(face_cap, face);
- matrix<rgb_pixel> face_chip;
- extract_image_chip(face_cap, get_face_chip_details(shape, 150, 0.25), face_chip);
- vect_faces.push_back(move(face_chip));
- win1.add_overlay(face);
- }
- if (vect_faces.size() != 1)
- {
- cout <<"Capture face error! face number "<< vect_faces.size() << endl;
- cap.release();
- goto CAPTURE;
- }
- //Use DNN and get the capture face's feature with 128D vector
- std::vector<matrix<float, 0, 1>> face_cap_desc = face_recognize.net(vect_faces);
- //Browse the face feature from the database, and find the match one
- std::pair<double,std::string> candidate_face;
- std::vector<double> len_vec;
- std::vector<std::pair<double, std::string>> candi_face_vec;
- candi_face_vec.reserve(256);
- for (size_t i = 0; i < face_recognize.face_desc_vec.size(); ++i)
- {
- auto len = length(face_cap_desc[0] - face_recognize.face_desc_vec[i].face_feature);
- if (len < 0.45)
- {
- len_vec.push_back(len);
- candidate_face.first = len;
- candidate_face.second = face_recognize.face_desc_vec[i].name.c_str();
- candi_face_vec.push_back(candidate_face);
- ifdef _FACE_RECOGNIZE_DEBUG
- char buffer[256] = {0};
- sprintf_s(buffer, "Candidate face %s Euclid length %f",
- face_recognize.face_desc_vec[i].name.c_str(),
- len);
- MessageBox(CString(buffer), NULL, MB_YESNO);
- endif
- }
- else
- {
- cout << "This face from database is not match the capture face, continue!" << endl;
- }
- }
- //Find the most similar face
- if (len_vec.size() != 0)
- {
- shellSort(len_vec);
- int i(0);
- for (i = 0; i != len_vec.size(); i++)
- {
- if (len_vec[0] == candi_face_vec[i].first)
- break;
- }
- char buffer[256] = { 0 };
- sprintf_s(buffer, "The face is %s -- Euclid length %f",
- candi_face_vec[i].second.c_str(), candi_face_vec[i].first);
- if (MessageBox(CString(buffer), NULL, MB_YESNO) == IDNO)
- {
- face_record();
- }
- }
- else
- {
- if (MessageBox(CString("Not the similar face been found"), NULL, MB_YESNO) == IDYES)
- {
- face_record();
- }
- }
- face_detect_count = 0;
- frame.release();
- face.release();
matrix<rgb_pixel> face_cap; //save the capture in the project directory load_image(face_cap, ".\\cap.jpg"); //Display the raw image on the screen image_window win1(face_cap); frontal_face_detector detector = get_frontal_face_detector(); std::vector<matrix<rgb_pixel>> vect_faces; for (auto face : detector(face_cap)) { auto shape = face_recognize.sp(face_cap, face); matrix<rgb_pixel> face_chip; extract_image_chip(face_cap, get_face_chip_details(shape, 150, 0.25), face_chip); vect_faces.push_back(move(face_chip)); win1.add_overlay(face); } if (vect_faces.size() != 1) { cout <<"Capture face error! face number "<< vect_faces.size() << endl; cap.release(); goto CAPTURE; } //Use DNN and get the capture face's feature with 128D vector std::vector<matrix<float, 0, 1>> face_cap_desc = face_recognize.net(vect_faces); //Browse the face feature from the database, and find the match one std::pair<double,std::string> candidate_face; std::vector<double> len_vec; std::vector<std::pair<double, std::string>> candi_face_vec; candi_face_vec.reserve(256); for (size_t i = 0; i < face_recognize.face_desc_vec.size(); ++i) { auto len = length(face_cap_desc[0] - face_recognize.face_desc_vec[i].face_feature); if (len < 0.45) { len_vec.push_back(len); candidate_face.first = len; candidate_face.second = face_recognize.face_desc_vec[i].name.c_str(); candi_face_vec.push_back(candidate_face); #ifdef _FACE_RECOGNIZE_DEBUG char buffer[256] = {0}; sprintf_s(buffer, "Candidate face %s Euclid length %f", face_recognize.face_desc_vec[i].name.c_str(), len); MessageBox(CString(buffer), NULL, MB_YESNO); #endif } else { cout << "This face from database is not match the capture face, continue!" << endl; } } //Find the most similar face if (len_vec.size() != 0) { shellSort(len_vec); int i(0); for (i = 0; i != len_vec.size(); i++) { if (len_vec[0] == candi_face_vec[i].first) break; } char buffer[256] = { 0 }; sprintf_s(buffer, "The face is %s -- Euclid length %f", candi_face_vec[i].second.c_str(), candi_face_vec[i].first); if (MessageBox(CString(buffer), NULL, MB_YESNO) == IDNO) { face_record(); } } else { if (MessageBox(CString("Not the similar face been found"), NULL, MB_YESNO) == IDYES) { face_record(); } } face_detect_count = 0; frame.release(); face.release();
當人臉或是物體快速的在攝像頭前活動時,會導致系統異常拋出,異常提示如下:
對於這個問題,我們可以先用C++捕獲異常的工具,try和catch工具來捕獲異常:
- Mat frame;
- Mat face;
- VideoCapture cap(0);
- if (!cap.isOpened()) {
- AfxMessageBox(_T("Please check your USB camera's interface num."));
- }
- try
- {
- while (1)
- {
- check_close(cap);
- cap >> frame;
- if (!frame.empty())
- {
- if (capture_face(frame, face) == 0)
- {
- //convert to IplImage format and then save with .jpg format
- IplImage face_Img;
- face_Img = IplImage(face);
- //save the capture face to the project directory
- cvSaveImage("./cap.jpg", &face_Img);
- break;
- }
- imshow("view", frame);
- }
- int c = waitKey(10);
- if ((char)c == 'c') { break; }
- }
- }
- catch (exception& e)
- {
- cout << "\nexception thrown!" << endl;
- cout << e.what() << endl;
- ifdef _CAPTURE_DEBUG
- MessageBox(CString(e.what()), NULL, MB_YESNO);
- endif
- goto CAPTURE;
- }
Mat frame; Mat face; VideoCapture cap(0); if (!cap.isOpened()) { AfxMessageBox(_T("Please check your USB camera's interface num.")); } try { while (1) { check_close(cap); cap >> frame; if (!frame.empty()) { if (capture_face(frame, face) == 0) { //convert to IplImage format and then save with .jpg format IplImage face_Img; face_Img = IplImage(face); //save the capture face to the project directory cvSaveImage("./cap.jpg", &face_Img); break; } imshow("view", frame); } int c = waitKey(10); if ((char)c == 'c') { break; } } } catch (exception& e) { cout << "\nexception thrown!" << endl; cout << e.what() << endl; #ifdef _CAPTURE_DEBUG MessageBox(CString(e.what()), NULL, MB_YESNO); #endif goto CAPTURE; }
在catch中將捕獲到的異常信息打印出來:
可以看到,可能是由於攝像頭捕獲響應速率跟不上的原因,在cap >>frame;的時候得到的frame出現了格式錯誤,如上圖的對話框所示error(-215) 0 < roi.x,也就是說opencv感興趣區域的x坐標出現了一個負數,而這顯然必須是要非負數的地方出現了一個負數的輸入,導致OpenCV異常拋出。
沒關系我們我們不理會這個異常的frame輸入就可以,在異常拋出的catch屏蔽掉對話框的顯示,我們即可流暢的采集圖像。不理會這個錯誤的幀輸入也就是說直接丟棄這一幀。