from sklearn.datasets import make_classification創建分類數據集


        make_classification創建用於分類的數據集,官方文檔

例子:

### 創建模型
def create_model():
    
    # 生成數據
    from sklearn.datasets import make_classification
    X, y = make_classification(n_samples=10000,        # 樣本個數
                               n_features=25,          # 特征個數
                               n_informative=3,        # 有效特征個數
                               n_redundant=2,          # 冗余特征個數(有效特征的隨機組合)
                               n_repeated=0,           # 重復特征個數(有效特征和冗余特征的隨機組合)
                               n_classes=3,            # 樣本類別
                               n_clusters_per_class=1, # 簇的個數
                               random_state=0)
    
    print("原始特征維度",X.shape)
    
    # 讀取數據
    print("讀取數據")
    #import pandas as pd
    #data = pd.read_csv(datapath)
    
    # 數據划分
    print("數據划分")
    from sklearn.model_selection import train_test_split
    global x_train,x_valid,x_test,y_train,y_valid,y_test
    x_train,x_test,y_train,y_test = train_test_split(X,y,random_state = 33,test_size = 0.25)
    x_train,x_valid,y_train,y_valid = train_test_split(x_train,y_train,random_state = 33,test_size = 0.25)

    # 創建模型
    print("創建模型")
    from sklearn.linear_model import LogisticRegression
    global model 
    model = LogisticRegression(penalty = 'l2').fit(x_train,y_train)

### 保存模型    
def save_model():
    print("保存模型")
    from sklearn.externals import joblib
    joblib.dump(model,'model.pkl')

### 模型驗證   
def validate_model():
    print("模型驗證")
    print(model.score(x_valid,y_valid))  
    
### 模型預測
def predict_model():
    print("模型預測")
    global pred
    pred = model.predict_proba(x_test)
    print(pred)
    
if __name__ == "__main__":
    create_model()
    save_model()
    validate_model()
    predict_model()
    

  


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM