make_classification創建用於分類的數據集,官方文檔
例子:
### 創建模型
def create_model():
# 生成數據
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=10000, # 樣本個數
n_features=25, # 特征個數
n_informative=3, # 有效特征個數
n_redundant=2, # 冗余特征個數(有效特征的隨機組合)
n_repeated=0, # 重復特征個數(有效特征和冗余特征的隨機組合)
n_classes=3, # 樣本類別
n_clusters_per_class=1, # 簇的個數
random_state=0)
print("原始特征維度",X.shape)
# 讀取數據
print("讀取數據")
#import pandas as pd
#data = pd.read_csv(datapath)
# 數據划分
print("數據划分")
from sklearn.model_selection import train_test_split
global x_train,x_valid,x_test,y_train,y_valid,y_test
x_train,x_test,y_train,y_test = train_test_split(X,y,random_state = 33,test_size = 0.25)
x_train,x_valid,y_train,y_valid = train_test_split(x_train,y_train,random_state = 33,test_size = 0.25)
# 創建模型
print("創建模型")
from sklearn.linear_model import LogisticRegression
global model
model = LogisticRegression(penalty = 'l2').fit(x_train,y_train)
### 保存模型
def save_model():
print("保存模型")
from sklearn.externals import joblib
joblib.dump(model,'model.pkl')
### 模型驗證
def validate_model():
print("模型驗證")
print(model.score(x_valid,y_valid))
### 模型預測
def predict_model():
print("模型預測")
global pred
pred = model.predict_proba(x_test)
print(pred)
if __name__ == "__main__":
create_model()
save_model()
validate_model()
predict_model()
