鳶尾花數據集的導入及查看:
①鳶尾花數據集的導入:
from sklearn.datasets import load_iris
②查看鳶尾花數據集:
iris=load_iris()
print("鳶尾花數據集:\n",iris)
print("查看數據集描述:\n", iris.DESCR)
print("查看特征值的名字:\n",iris.feature_names)
print("查看特征數據:\n",iris.data,iris.data.shape)
print("查看目標值名字:\n",iris.target_names)
print("查看目標數據:\n",iris.target)
划分數據集:
①導入train_test_split包:
from sklearn.model_selection import train_test_split
②划分數據集:數據集划分為訓練集和測試集
x_train,x_test,y_train,y_test=train_test_split(iris.data,iris.target,test_size=0.2)
注:iris.data為數據集的特征值,iris.target為數據集的目標值,test_size為測試值的划分比例(可省,默認為0.25),
x_train:訓練集的特征值
x_test:測試集的特征值
y_train:訓練集的目標值
y_test:測試集的特征值
完整代碼:
from sklearn.datasets import load_iris #導入數據集 from sklearn.model_selection import train_test_split def datatest(): # 獲取數據集 iris=load_iris() print("鳶尾花數據集:\n",iris) print("查看數據集描述:\n", iris.DESCR) print("查看特征值的名字:\n",iris.feature_names) print("查看特征數據:\n",iris.data,iris.data.shape) print("查看目標值名字:\n",iris.target_names) print("查看目標數據:\n",iris.target) # 划分數據集 x_train,x_test,y_train,y_test=train_test_split(iris.data,iris.target,test_size=0.2) print(x_train,x_train.shape) print(x_test,x_test.shape) print(y_train,y_train.shape) print(y_test,y_test.shape) if __name__ == '__main__': datatest()