代碼:
# -- coding: gbk -- from sklearn.datasets import load_breast_cancer from pylab import * from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.svm import LinearSVC from sklearn.datasets import make_blobs import mglearn def LogisticRegression二分類線性模型(): cancer = load_breast_cancer() X_train, X_test, y_train, y_test = train_test_split( cancer.data, cancer.target, stratify=cancer.target, random_state=42) '''構建模型''' logreg = LogisticRegression().fit(X_train, y_train) #print(logreg.predict()) '''評測''' print("Training set score: {:.3f}".format(logreg.score(X_train, y_train))) print("Test set score: {:.3f}".format(logreg.score(X_test, y_test))) '''增加C擬合靈活度——————更高訓練集精度''' logreg100 = LogisticRegression(C=100).fit(X_train, y_train) print("Training set score: {:.3f}".format(logreg100.score(X_train, y_train))) print("Test set score: {:.3f}".format(logreg100.score(X_test, y_test))) def LinearSVC一對其余分類器(): X, y = make_blobs(random_state=42) linear_svm = LinearSVC().fit(X, y) ''' coef_的形狀是(3, 2),說明coef_每行包含三個類別之一的系數向量, 每列包含某個特征(這個數據集有2個特征)對應的系數值。 現在intercept_是一維數組,保存每個類別的截距。 ''' print(linear_svm.coef_) # 特征 print(linear_svm.intercept_) # 截距 mglearn.discrete_scatter(X[:, 0], X[:, 1], y) line = np.linspace(-15, 15) print(line) for coef, intercept, color in zip(linear_svm.coef_, linear_svm.intercept_, ['b', 'r', 'g']): plt.plot(line, -(line * coef[0] + intercept) / coef[1], c=color) plt.ylim(-10, 15) plt.xlim(-10, 8) plt.xlabel("Feature 0") plt.ylabel("Feature 1") plt.legend(['Class 0', 'Class 1', 'Class 2', 'Line class 0', 'Line class 1', 'Line class 2'], loc=(1.01, 0.3)) plt.show() if __name__ =='__main__': cancer = load_breast_cancer() X_train, X_test, y_train, y_test = train_test_split( cancer.data, cancer.target, stratify=cancer.target, random_state=42) logreg = LogisticRegression().fit(X_train, y_train) y_pred=logreg.predict(X_test) print(np.mean(y_pred==y_test))