Keras人工神經網絡多分類(SGD)


import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.preprocessing import LabelEncoder
from keras.optimizers import SGD
from keras.layers import LSTM

# load dataset
dataframe = pd.read_csv("./data/iris1.csv", header=None)
dataset = dataframe.values
X = dataset[:, 0:19].astype(float)
dummy_y1 = dataset[:, 19]
m,n=1682,6
dum_imax=np.zeros((m,n))
# print(type(dum_imax))
for i in range(m):
    # print(i)
    # exit()
    if dummy_y1[i]!=0:
        dum_imax[i][dummy_y1[i]-1]=1
    else:
        dum_imax[i][5]=1
# print(dum_imax)
dummy_y =dum_imax
print(dummy_y)
print(type(dummy_y[0][0]))

def baseline_model():
    model = Sequential()
    model.add(Dense(output_dim=50, input_dim=19, activation='relu'))
    # model.add(LSTM(128))
    model.add(Dropout(0.4))
    model.add(Dense(output_dim=6, input_dim=50, activation='softmax'))
    # Compile model
    sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    # model.compile(loss='categorical_crossentropy', optimizer=sgd)
    #編譯模型。由於我們做的是二元分類,所以我們指定損失函數為binary_crossentropy,以及模式為binary
    #另外常見的損失函數還有mean_squared_error、categorical_crossentropy等,請閱讀幫助文件。
    #求解方法我們指定用adam,還有sgd、rmsprop等可選
    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
    return model
estimator = KerasClassifier(build_fn=baseline_model, nb_epoch=40, batch_size=256)
print(estimator)

# splitting data into training set and test set. If random_state is set to an integer, the split datasets are fixed.
X_train, X_test, Y_train, Y_test = train_test_split(X, dummy_y, test_size=0.2, random_state=0)#train_test_split是交叉驗證中常用的函數,功能是從樣本中隨機的按比例選取train data和testdata,
print(len(X_train[0]))
print(len(Y_train[0]))
estimator.fit(X_train, Y_train,nb_epoch = 100)#訓練模型,學習一百次

# make predictions
print(X_test)
pred = estimator.predict(X_test)
print(pred)
# init_lables = encoder.inverse_transform(pred)
# print(init_lables)

# inverse numeric variables to initial categorical labels
# init_lables = encoder.inverse_transform(pred)
# print(init_lables)

# k-fold cross-validate
# seed = 42
# np.random.seed(seed)
'''
n_splits : 默認3,最小為2;K折驗證的K值
shuffle : 默認False;shuffle會對數據產生隨機攪動(洗牌)
random_state :默認None,隨機種子
'''
kfold = KFold(n_splits=5, shuffle=True)#定義5折,在對數據進行划分之前,對數據進行隨機混洗

results = cross_val_score(estimator, X, dummy_y, cv=kfold)#在數據集上,使用k fold交叉驗證,對估計器estimator進行評估。
print("baseline:%.2f%%(%.2f%%)"%(results.mean()*100,results.std()*100))#返回的結果,是10次數據集划分后,每次的評估結果。評估結果包括平均准確率和標准差

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM