---恢復內容開始---
1.邏輯回歸
# 數據讀取 import pandas as pd default = pd.read_csv('Default.csv') print(default) ## 數據預處理 ## 數值編碼 for item in ['student', 'default']: default[item] = default[item].replace({'No':0, 'Yes':1}) print(default) ## Min-Max標准化 default[['balance', 'income']] = default[['balance', 'income']].apply(lambda x:(x-x.min())/(x.max()-x.min())) print(default) ## 訓練集測試集分割 X = default.drop('default', axis=1) y = default['default'] from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=10) # 建立模型 from sklearn.linear_model import * model_LR = LogisticRegression(class_weight='balanced', random_state=10) # 訓練模型 model_LR.fit(X_train, y_train) # 模型評價 model_eval = model_LR.score(X_test, y_test) model_eval
# 導入需要的包 import pandas as pd from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.metrics import classification_report import seaborn as sns # 數據處理 data = pd.read_csv('caesarian.csv') y = data['Caesarian'] X = data.drop('Caesarian',axis=1) # 划分訓練集與測試集 x_train,x_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state = 10) # 建立邏輯回歸模型 clf = LogisticRegression(class_weight = 'balanced',random_state = 10,solver = 'sag') clf.fit(x_train,y_train) y_pred=clf.predict(x_test) # 分類正確率 score = clf.score(x_test,y_test) print(score) # 分類報告 report = classification_report(y_test, y_pred) print(report)
2.朴素貝葉斯
####################################### # 導入需要的包 import pandas as pd from sklearn.model_selection import train_test_split from sklearn.naive_bayes import MultinomialNB # 數據處理 data=pd.read_csv('caesarian.csv') y = data['Caesarian'] X = data.drop('Caesarian',axis = 1) # 划分訓練集與測試集 x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=10) # 建立模型 MNB = MultinomialNB(alpha = 0.01) clf = MNB.fit(x_train, y_train) # 輸出准確率 score = clf.score(x_test,y_test) print(score) ####################################### # 導入需要的包 import pandas as pd import numpy as np # 數據處理 data = pd.read_csv('caesarian.csv') y = data['Caesarian'] X = data[['Age','Delivery Number','Delivery time','Blood of Pressure','Heart Problem']] # 划分訓練集與測試集 from sklearn.model_selection import train_test_split x_train,x_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state = 10) # 按照線性核、多項式核、Sigmoid核、高斯核的順序,分別選用不同的核函數 from sklearn.svm import SVC kernels=['linear','poly','sigmoid','rbf'] kernel_scores = [] for kernel in kernels: kernel_score = SVC(kernel = kernel,random_state=10).fit(x_train,y_train).score(x_test,y_test) kernel_scores.append(kernel_score) print(kernel_scores)
3.支持向量機
# 導入需要的包 import pandas as pd import numpy as np # 數據處理 data=pd.read_csv('caesarian.csv') caesarian=data['Caesarian'] feature=['Age','Delivery Number','Delivery time','Blood of Pressure','Heart Problem'] traindata=data[feature] print(traindata.head(4)) # 划分訓練集與測試集 from sklearn.model_selection import train_test_split x_train,x_test,y_train,y_test=train_test_split(traindata,caesarian,train_size=0.8,random_state=10) # 載入模型 from sklearn.svm import LinearSVC # 建立模型 lsvm=LinearSVC(C=0.68,random_state=10) # 訓練模型 lsvm.fit(x_train,y_train) # 模型評價 score=lsvm.score(x_test,y_test) print(score)
# 導入需要的包 import pandas as pd from sklearn.model_selection import train_test_split from sklearn.svm import SVC # 數據處理 data = pd.read_csv('caesarian.csv') y = data['Caesarian'] X = data.drop('Caesarian',axis = 1) # 划分訓練集與測試集 x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state = 10) # 調整支持向量機模型的參數 clf = SVC(kernel='linear',verbose = True) clf.fit(x_train,y_train) # 分類正確率 score = clf.score(x_test,y_test) print(score) # 支持向量 SV = clf.support_vectors_ print('\n\n',SV) # 正類和負類支持向量的索引 S = clf.support_ print('\n\n',S) # 每個類支持向量的個數 NS = clf.n_support_ print('\n\n',NS) # 超平面系數 C = clf.coef_ print('\n\n',C)
4.K近鄰
# 導入需要的包 import pandas as pd import numpy as np # 數據處理 data=pd.read_csv('caesarian.csv') caesarian=data['Caesarian'] feature=['Age','Delivery Number','Delivery time','Blood of Pressure','Heart Problem'] traindata=data[feature] # 划分訓練集與測試集 from sklearn.model_selection import train_test_split x_train,x_test,y_train,y_test=train_test_split(traindata,caesarian,train_size=0.8,random_state=10) # 導入模型 from sklearn.neighbors import * # 輸出分類正確率 K_scores = [KNeighborsClassifier(n_neighbors=k).fit(x_train,y_train).score(x_test,y_test) for k in range(2,10,2)] print(K_scores)
5.決策樹
# 數據處理 data=pd.read_csv('caesarian.csv') caesarian=data['Caesarian'] feature=['Age','Delivery Number','Delivery time','Blood of Pressure','Heart Problem'] traindata=data[feature] # 划分訓練集與測試集 from sklearn.model_selection import * x_train,x_test,y_train,y_test=train_test_split(traindata, caesarian, test_size=0.2, random_state=10) # 訓練模型 from sklearn.tree import * DF_model = DecisionTreeClassifier(random_state=10) DF_model.fit(x_train, y_train) # 模型預測 result = DF_model.predict(x_test) result = round(DF_model.score(x_test, y_test),4) print(result) # 查看各特征的重要性並繪圖 subplot=pd.Series(DF_model.feature_importances_, index=x_train.columns).sort_values().plot(kind='barh', title='特征重要性')
---恢復內容結束---