make_classification創建用於分類的數據集,官方文檔
例子:
### 創建模型 def create_model(): # 生成數據 from sklearn.datasets import make_classification X, y = make_classification(n_samples=10000, # 樣本個數 n_features=25, # 特征個數 n_informative=3, # 有效特征個數 n_redundant=2, # 冗余特征個數(有效特征的隨機組合) n_repeated=0, # 重復特征個數(有效特征和冗余特征的隨機組合) n_classes=3, # 樣本類別 n_clusters_per_class=1, # 簇的個數 random_state=0) print("原始特征維度",X.shape) # 讀取數據 print("讀取數據") #import pandas as pd #data = pd.read_csv(datapath) # 數據划分 print("數據划分") from sklearn.model_selection import train_test_split global x_train,x_valid,x_test,y_train,y_valid,y_test x_train,x_test,y_train,y_test = train_test_split(X,y,random_state = 33,test_size = 0.25) x_train,x_valid,y_train,y_valid = train_test_split(x_train,y_train,random_state = 33,test_size = 0.25) # 創建模型 print("創建模型") from sklearn.linear_model import LogisticRegression global model model = LogisticRegression(penalty = 'l2').fit(x_train,y_train) ### 保存模型 def save_model(): print("保存模型") from sklearn.externals import joblib joblib.dump(model,'model.pkl') ### 模型驗證 def validate_model(): print("模型驗證") print(model.score(x_valid,y_valid)) ### 模型預測 def predict_model(): print("模型預測") global pred pred = model.predict_proba(x_test) print(pred) if __name__ == "__main__": create_model() save_model() validate_model() predict_model()