#-*- coding: utf-8 -*- #邏輯回歸 自動建模 import numpy as np import pandas as pd from sklearn.linear_model import LogisticRegression as LR from sklearn.linear_model import RandomizedLogisticRegression as RLR #參數初始化 filename = '../data/bankloan.xls' data = pd.read_excel(filename) x = data.iloc[:,:8].as_matrix()#使用pandas讀取文件 就可以不用管label column標簽 y = data.iloc[:,8].as_matrix() rlr = RLR() #建立隨機邏輯回歸模型,進行特征選擇和變量篩選 rlr.fit(x, y) #訓練模型 egeList=rlr.get_support() #獲取篩選后的特征 egeList=np.append(egeList,False)#往numpy數組中 添加一個False元素 使用np.append(array,ele)方法 print("rlr.get_support():") print(egeList) print(u'隨機邏輯回歸模型特征選擇結束!!!') print(u'有效特征為:%s' % ','.join(data.columns[egeList])) x = data[data.columns[egeList]].as_matrix() #篩選好特征值 lr = LR() #建立邏輯回歸模型 lr.fit(x, y) #用篩選后的特征進行訓練 print(u'邏輯回歸訓練模型結束!!!') print(u'模型的平均正確率:%s' % lr.score(x, y)) #給出模型的平均正確率,本例為81.4%
D:\Download\python3\python3.exe "D:\Program Files\JetBrains\PyCharm 2017.3.3\helpers\pydev\pydev_run_in_console.py" 56033 56034 "E:/A正在學習/python data dig/chapter5/demo/code/5-1_logistic_regression.py" Running E:/A正在學習/python data dig/chapter5/demo/code/5-1_logistic_regression.py import sys; print('Python %s on %s' % (sys.version, sys.platform)) sys.path.extend(['E:\\A正在學習\\python data dig', 'E:/A正在學習/python data dig/chapter5/demo/code']) C:\Users\Snow\AppData\Roaming\Python\Python35\site-packages\sklearn\utils\deprecation.py:58: DeprecationWarning: Class RandomizedLogisticRegression is deprecated; The class RandomizedLogisticRegression is deprecated in 0.19 and will be removed in 0.21. warnings.warn(msg, category=DeprecationWarning) rlr.get_support(): [False False True True False True True False False] 隨機邏輯回歸模型特征選擇結束!!! 有效特征為:工齡,地址,負債率,信用卡負債 邏輯回歸訓練模型結束!!! 模型的平均正確率:0.8142857142857143 PyDev console: starting. Python 3.5.4 (v3.5.4:3f56838, Aug 8 2017, 02:17:05) [MSC v.1900 64 bit (AMD64)] on win32