機器學習-svd實現人臉識別


加載sklearn中的人臉數據集

from sklearn.datasets import fetch_lfw_people
faces = fetch_lfw_people()

執行上面的第二行程序,python會從網上下載labeled_face_wild people數據集,這個數據集大概200M,因為牆的原因下載很慢失敗。

使用百度雲下載該數據集,是個.tgz的壓縮包

鏈接:https://pan.baidu.com/s/1eySjV_1K2XYD5YYKCxiVEw
提取碼:3wut

把下載好的壓縮包放入C:\Users\Tim\scikit_learn_data\lfw_home,其中yyy是我的用戶名,再次運行faces = fetch_lfw_people(),成功,jupyter notebook中的輸出如下:

from sklearn.datasets import fetch_lfw_people
faces = fetch_lfw_people(min_faces_per_person=60)
print(faces.target_names)
print(faces.images.shape)
['Donald Rumsfeld' 'George W Bush' 'Gerhard Schroeder' 'Junichiro Koizumi'
 'Tony Blair']
(964, 62, 47)
# 進行完上一步還可以看一下圖片長什么樣子
import
matplotlib.pyplot as plt %matplotlib inline fig, ax = plt.subplots(3, 5) for i, axi in enumerate(ax.flat): axi.imshow(faces.images[i], cmap='bone') axi.set(xticks=[], yticks=[], xlabel=faces.target_names[faces.target[i]])

解決人臉識別(jupyter)

人臉識別是一個分類問題,因為機器學習中svd屬於王霸地位(深度學習不算),所以使用svd對圖像進行訓練。

# svc 支持向量解決分類問題
from sklearn.svm import SVC
# 圖片的維度太高,降維
from sklearn.decomposition import PCA
# 管道
from sklearn.pipeline import make_pipeline

pca = PCA(n_components=150, whiten=True, random_state=42)
svc = SVC(kernel='rbf', class_weight='balanced')
model = make_pipeline(pca, svc)

和上一步一樣看看我們數據的同時加載數據

from sklearn.datasets import fetch_lfw_people
faces = fetch_lfw_people(min_faces_per_person=60)
print(faces.target_names)
print(faces.images.shape)
import matplotlib.pyplot as plt
%matplotlib inline
fig, ax = plt.subplots(3, 5)
for i, axi in enumerate(ax.flat):
    axi.imshow(faces.images[i], cmap='bone')
    axi.set(xticks=[], yticks=[],
            xlabel=faces.target_names[faces.target[i]])

切分訓練集和測試集

from sklearn.model_selection import train_test_split
Xtrain, Xtest, ytrain, ytest = train_test_split(faces.data, faces.target,
                                                random_state=40)
# 進行訓練
from
sklearn.model_selection import GridSearchCV param_grid = {'svc__C': [1, 5, 10], 'svc__gamma': [0.0001, 0.0005, 0.001]} grid = GridSearchCV(model, param_grid) %time grid.fit(Xtrain, ytrain)
GridSearchCV(cv='warn', error_score='raise-deprecating',
             estimator=Pipeline(memory=None,
                                steps=[('pca',
                                        PCA(copy=True, iterated_power='auto',
                                            n_components=150, random_state=42,
                                            svd_solver='auto', tol=0.0,
                                            whiten=True)),
                                       ('svc',
                                        SVC(C=1.0, cache_size=200,
                                            class_weight='balanced', coef0=0.0,
                                            decision_function_shape='ovr',
                                            degree=3, gamma='auto_deprecated',
                                            kernel='rbf', max_iter=-1,
                                            probability=False,
                                            random_state=None, shrinking=True,
                                            tol=0.001, verbose=False))],
                                verbose=False),
             iid='warn', n_jobs=None,
             param_grid={'svc__C': [1, 5, 10],
                         'svc__gamma': [0.0001, 0.0005, 0.001]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

print(grid.best_params_)

{'svc__C': 10, 'svc__gamma': 0.0001}

查看測試集的測試結果

model = grid.best_estimator_
yfit = model.predict(Xtest)
yfit.shape
import matplotlib as mpl
# 防止中文報錯
mpl.rcParams["font.sans-serif"] = ["SimHei"]
mpl.rcParams["axes.unicode_minus"] = False
# 畫圖
fig, ax = plt.subplots(4, 6)
for i, axi in enumerate(ax.flat):
    # 調整像素為[62,47]
    axi.imshow(Xtest[i].reshape(62, 47), cmap='bone')
    axi.set(xticks=[], yticks=[])
    # 截取目標名字的最后一組字
    axi.set_ylabel(faces.target_names[yfit[i]].split()[-1],
                   color='black' if yfit[i] == ytest[i] else 'red')
fig.suptitle('預測錯誤的名字被紅色標注', size=14);

 

可以看到預測錯誤了四個,准確率欠佳,下面打印分類報告

from sklearn.metrics import classification_report
print(classification_report(ytest, yfit,
                            target_names=faces.target_names))
                   precision    recall  f1-score   support

  Donald Rumsfeld       0.75      0.87      0.81        31
    George W Bush       0.97      0.92      0.94       124
Gerhard Schroeder       0.80      0.83      0.81        29
Junichiro Koizumi       1.00      1.00      1.00        16
       Tony Blair       0.85      0.85      0.85        41

         accuracy                           0.90       241
        macro avg       0.87      0.89      0.88       241
     weighted avg       0.90      0.90      0.90       241

最后使用seaborn的heatmap打印混淆矩陣

import seaborn as sns
from sklearn.metrics import confusion_matrix
# 混淆矩陣
mat = confusion_matrix(ytest, yfit)
# 注意這里的混淆矩陣的畫圖
sns.heatmap(mat.T, square=True, annot=True, fmt='d', cbar=False,
            xticklabels=faces.target_names,
            yticklabels=faces.target_names)
plt.xlabel('true label')
plt.ylabel('predicted label');

 

 對於svd不懂得可以轉頭看一下,svm原理


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM