加載sklearn中的人臉數據集

from sklearn.datasets import fetch_lfw_people
faces = fetch_lfw_people()

執行上面的第二行程序，python會從網上下載labeled_face_wild people數據集，這個數據集大概200M，因為牆的原因下載很慢失敗。

使用百度雲下載該數據集，是個.tgz的壓縮包

鏈接：https://pan.baidu.com/s/1eySjV_1K2XYD5YYKCxiVEw
提取碼：3wut

把下載好的壓縮包放入C:\Users\Tim\scikit_learn_data\lfw_home，其中yyy是我的用戶名，再次運行faces = fetch_lfw_people()，成功，jupyter notebook中的輸出如下：

from sklearn.datasets import fetch_lfw_people
faces = fetch_lfw_people(min_faces_per_person=60)
print(faces.target_names)
print(faces.images.shape)

['Donald Rumsfeld' 'George W Bush' 'Gerhard Schroeder' 'Junichiro Koizumi'
 'Tony Blair']
(964, 62, 47)

# 進行完上一步還可以看一下圖片長什么樣子
import matplotlib.pyplot as plt
%matplotlib inline
fig, ax = plt.subplots(3, 5)
for i, axi in enumerate(ax.flat):
    axi.imshow(faces.images[i], cmap='bone')
    axi.set(xticks=[], yticks=[],
            xlabel=faces.target_names[faces.target[i]])

解決人臉識別（jupyter）

人臉識別是一個分類問題，因為機器學習中svd屬於王霸地位（深度學習不算），所以使用svd對圖像進行訓練。

# svc 支持向量解決分類問題
from sklearn.svm import SVC
# 圖片的維度太高，降維
from sklearn.decomposition import PCA
# 管道
from sklearn.pipeline import make_pipeline

pca = PCA(n_components=150, whiten=True, random_state=42)
svc = SVC(kernel='rbf', class_weight='balanced')
model = make_pipeline(pca, svc)

和上一步一樣看看我們數據的同時加載數據

from sklearn.datasets import fetch_lfw_people
faces = fetch_lfw_people(min_faces_per_person=60)
print(faces.target_names)
print(faces.images.shape)
import matplotlib.pyplot as plt
%matplotlib inline
fig, ax = plt.subplots(3, 5)
for i, axi in enumerate(ax.flat):
    axi.imshow(faces.images[i], cmap='bone')
    axi.set(xticks=[], yticks=[],
            xlabel=faces.target_names[faces.target[i]])

切分訓練集和測試集

from sklearn.model_selection import train_test_split
Xtrain, Xtest, ytrain, ytest = train_test_split(faces.data, faces.target,
                                                random_state=40)

# 進行訓練
from sklearn.model_selection import GridSearchCV
param_grid = {'svc__C': [1, 5, 10],
              'svc__gamma': [0.0001, 0.0005, 0.001]}
grid = GridSearchCV(model, param_grid)

%time grid.fit(Xtrain, ytrain)

GridSearchCV(cv='warn', error_score='raise-deprecating',
             estimator=Pipeline(memory=None,
                                steps=[('pca',
                                        PCA(copy=True, iterated_power='auto',
                                            n_components=150, random_state=42,
                                            svd_solver='auto', tol=0.0,
                                            whiten=True)),
                                       ('svc',
                                        SVC(C=1.0, cache_size=200,
                                            class_weight='balanced', coef0=0.0,
                                            decision_function_shape='ovr',
                                            degree=3, gamma='auto_deprecated',
                                            kernel='rbf', max_iter=-1,
                                            probability=False,
                                            random_state=None, shrinking=True,
                                            tol=0.001, verbose=False))],
                                verbose=False),
             iid='warn', n_jobs=None,
             param_grid={'svc__C': [1, 5, 10],
                         'svc__gamma': [0.0001, 0.0005, 0.001]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

print(grid.best_params_)

{'svc__C': 10, 'svc__gamma': 0.0001}

查看測試集的測試結果

model = grid.best_estimator_
yfit = model.predict(Xtest)
yfit.shape
import matplotlib as mpl
# 防止中文報錯
mpl.rcParams["font.sans-serif"] = ["SimHei"]
mpl.rcParams["axes.unicode_minus"] = False
# 畫圖
fig, ax = plt.subplots(4, 6)
for i, axi in enumerate(ax.flat):
    # 調整像素為[62,47]
    axi.imshow(Xtest[i].reshape(62, 47), cmap='bone')
    axi.set(xticks=[], yticks=[])
    # 截取目標名字的最后一組字
    axi.set_ylabel(faces.target_names[yfit[i]].split()[-1],
                   color='black' if yfit[i] == ytest[i] else 'red')
fig.suptitle('預測錯誤的名字被紅色標注', size=14);

可以看到預測錯誤了四個，准確率欠佳，下面打印分類報告

from sklearn.metrics import classification_report
print(classification_report(ytest, yfit,
                            target_names=faces.target_names))

                   precision    recall  f1-score   support

  Donald Rumsfeld       0.75      0.87      0.81        31
    George W Bush       0.97      0.92      0.94       124
Gerhard Schroeder       0.80      0.83      0.81        29
Junichiro Koizumi       1.00      1.00      1.00        16
       Tony Blair       0.85      0.85      0.85        41

         accuracy                           0.90       241
        macro avg       0.87      0.89      0.88       241
     weighted avg       0.90      0.90      0.90       241

最后使用seaborn的heatmap打印混淆矩陣

import seaborn as sns
from sklearn.metrics import confusion_matrix
# 混淆矩陣
mat = confusion_matrix(ytest, yfit)
# 注意這里的混淆矩陣的畫圖
sns.heatmap(mat.T, square=True, annot=True, fmt='d', cbar=False,
            xticklabels=faces.target_names,
            yticklabels=faces.target_names)
plt.xlabel('true label')
plt.ylabel('predicted label');

對於svd不懂得可以轉頭看一下，svm原理

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 [機器學習][face recognition] 一個視頻人臉識別實現機器學習Python實現 SVD 分解初識機器學習-人臉識別 [python] 機器學習卷積神經網絡用遷移學習實現人臉識別 Python實現人臉識別功能，face_recognition的使用 | 機器學習基於機器學習人臉識別face recognition具體的算法和原理機器學習練習之人臉識別(SVM) 矩陣SVD在機器學習中的應用機器學習相關——SVD分解 AI識別照片是誰，人臉識別face_recognition開源項目安裝使用 | 機器學習