數據集官網下載;
jupyter notebook 實現;
import numpy as np import pandas as pd import matplotlib.pyplot as plt fname = 'E:\\pythonwork\\project\\Deeplearning\\Task\\data\\iris.data' with open(fname, 'r+', encoding='utf-8') as f: s = [i[:-1].split(',') for i in f.readlines()] # 讀取TXT,逗號為分隔符 # pandas讀取數據 樣本數為各50個 names=['slength','swidth','plength','pwidth','name'] iris = pd.DataFrame(data=s, columns=names) # 刪除一個莫名其妙的空行: iris.dropna(axis=0, how='any', inplace=True) # 有三種類別: seto = iris.iloc[0:50,:] vers = iris.iloc[50:100,:] virg = iris.iloc[100:150,:] seto.shape vers.shape # 統計每個品種有多少個樣本 iris['name'].value_counts() # 字符串類型的數據變成float(否則不能畫圖) iris.iloc[:,:4]=iris.iloc[:,:4].astype('float') # 畫出slength和swidth的關系圖 plt.scatter(x=iris['slength'],y=iris['swidth']) plt.show()
#------------------- # 按顏色不同分類 畫圖 plt.scatter(x=seto['slength'],y=seto['swidth'],color='red') plt.scatter(x=vers['slength'],y=seto['swidth'],color='blue',marker="+") plt.scatter(x=virg['slength'],y=seto['swidth'],color='green',marker='*') plt.xlabel('s length') plt.ylabel('s width') plt.show()