1:統計直方圖,輸入分箱數據 然后輸入數據即可
import matplotlib.pyplot as plt import pandas as pd import seaborn as sns # 制作頻數分布表 plt.rcParams["font.sans-serif"] = ["SimHei"] # rawdata = pd.read_excel("./data/0-10test_result.xlsx") # print(rawdata.describe()) # rawdata = rawdata['test'] rawdata = pd.read_csv("./data/Ar_tpm.tsv", sep='\t') print(rawdata.describe()) rawdata = rawdata['Ar_tpm'] # 分割區間 bin_list = bin_list = [0, 0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0, 2.2, 2.4, 2.6, 2.8, 3.0, 3.2, 3.4, 3.6,3.8,4.0,4.5,5.0,5.5,6.0,6.5,7.0,7.5,8.0,10.0,15.0,20.0,30.0,50.0,100.0,500.0,1000.0,5000.0,50000.0] split_str = [] for i in range(len(bin_list)): if (i < len(bin_list) - 1): split_str.append(str(bin_list[i]) + '~' + str(bin_list[i + 1])) else: break data_split = pd.cut(rawdata, bin_list, labels=split_str) # 計算每個區間的頻數並按上面的labels調整順序 freq_chart = data_split.value_counts() freq_chart = freq_chart.sort_index() # 保存為一個頻率分布字典 freq_dict = {'section': freq_chart.index, 'frequency': freq_chart.values} # 將對應的頻率分布字典轉化為pd文件 freq_data = pd.DataFrame(freq_dict) ax = plt.figure(figsize=(300, 100),dpi=150).add_subplot(111) sns.barplot(x="section", y="frequency", data=freq_data, palette="Set3") # palette設置顏色 # 設置y軸高度 y軸高度最好為最大都軸超20% max_y=max(freq_data["frequency"]) ax.set_ylim(0, int(max_y+0.2*max_y)) ax.set_title('總體正確頻數分布', size=20) # 設置字體大小 # plt.axhline(y=2000, ls=":", c="red", lw=4, label='2000分界線') # 添加水平直線 lw粗細 # plt.legend() plt.xticks(fontsize=4) plt.yticks(fontsize=10) # 設置數據標簽 for x, y in zip(range(len(bin_list)), freq_data.frequency): ax.text(x, y, '%d' % y, ha='center', va='bottom', fontsize=8, color='black') plt.show()
2:統計散點圖:輸入Excel 格式為test和result兩列,即可作為散點圖
import matplotlib.pyplot as plt import pandas as pd import numpy as np rawdata = pd.read_excel("./data/test_result.xlsx") plt.rcParams["font.sans-serif"] = ["SimHei"] print(rawdata) groundtruth=rawdata['test'] predict=rawdata['result'] y=rawdata['index'] #y軸高度 plt.ylim((0, 0.75)) #y軸精度 plt.yticks(np.arange(0,0.75,0.1)) #Z軸空間凸顯 plt.scatter(y, groundtruth, marker='.', zorder=3) plt.scatter(y, predict, marker='.') plt.legend(['groundtruth','predict']) plt.title("總體測試集預測結果分布圖") plt.show()