随机森林特征选择


from sklearn.ensemble import RandomForestClassifier import matplotlib.pyplot as plt selected_feat_names=set() for i in range(10): #这里我们进行十次循环取交集 tmp = set() rfc = RandomForestClassifier(n_jobs=-1) rfc.fit(X, y) #print("training finished") importances = rfc.feature_importances_ indices = np.argsort(importances)[::-1] # 降序排列 S={} for f in range(X.shape[1]): if importances[indices[f]] >=0.0001: tmp.add(X.columns[indices[f]]) S[X.columns[indices[f]]]=importances[indices[f]] #print("%2d) %-*s %f" % (f + 1, 30, X.columns[indices[f]], importances[indices[f]])) selected_feat_names |= tmp imp_fea=pd.Series(S) print(len(selected_feat_names), "features are selected")


免责声明!

本站转载的文章为个人学习借鉴使用,本站对版权不负任何法律责任。如果侵犯了您的隐私权益,请联系本站邮箱yoyou2525@163.com删除。



 
粤ICP备18138465号  © 2018-2025 CODEPRJ.COM