1:统计直方图,输入分箱数据 然后输入数据即可
import matplotlib.pyplot as plt import pandas as pd import seaborn as sns # 制作频数分布表 plt.rcParams["font.sans-serif"] = ["SimHei"] # rawdata = pd.read_excel("./data/0-10test_result.xlsx") # print(rawdata.describe()) # rawdata = rawdata['test'] rawdata = pd.read_csv("./data/Ar_tpm.tsv", sep='\t') print(rawdata.describe()) rawdata = rawdata['Ar_tpm'] # 分割区间 bin_list = bin_list = [0, 0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0, 2.2, 2.4, 2.6, 2.8, 3.0, 3.2, 3.4, 3.6,3.8,4.0,4.5,5.0,5.5,6.0,6.5,7.0,7.5,8.0,10.0,15.0,20.0,30.0,50.0,100.0,500.0,1000.0,5000.0,50000.0] split_str = [] for i in range(len(bin_list)): if (i < len(bin_list) - 1): split_str.append(str(bin_list[i]) + '~' + str(bin_list[i + 1])) else: break data_split = pd.cut(rawdata, bin_list, labels=split_str) # 计算每个区间的频数并按上面的labels调整顺序 freq_chart = data_split.value_counts() freq_chart = freq_chart.sort_index() # 保存为一个频率分布字典 freq_dict = {'section': freq_chart.index, 'frequency': freq_chart.values} # 将对应的频率分布字典转化为pd文件 freq_data = pd.DataFrame(freq_dict) ax = plt.figure(figsize=(300, 100),dpi=150).add_subplot(111) sns.barplot(x="section", y="frequency", data=freq_data, palette="Set3") # palette设置颜色 # 设置y轴高度 y轴高度最好为最大都轴超20% max_y=max(freq_data["frequency"]) ax.set_ylim(0, int(max_y+0.2*max_y)) ax.set_title('总体正确频数分布', size=20) # 设置字体大小 # plt.axhline(y=2000, ls=":", c="red", lw=4, label='2000分界线') # 添加水平直线 lw粗细 # plt.legend() plt.xticks(fontsize=4) plt.yticks(fontsize=10) # 设置数据标签 for x, y in zip(range(len(bin_list)), freq_data.frequency): ax.text(x, y, '%d' % y, ha='center', va='bottom', fontsize=8, color='black') plt.show()
2:统计散点图:输入Excel 格式为test和result两列,即可作为散点图
import matplotlib.pyplot as plt import pandas as pd import numpy as np rawdata = pd.read_excel("./data/test_result.xlsx") plt.rcParams["font.sans-serif"] = ["SimHei"] print(rawdata) groundtruth=rawdata['test'] predict=rawdata['result'] y=rawdata['index'] #y轴高度 plt.ylim((0, 0.75)) #y轴精度 plt.yticks(np.arange(0,0.75,0.1)) #Z轴空间凸显 plt.scatter(y, groundtruth, marker='.', zorder=3) plt.scatter(y, predict, marker='.') plt.legend(['groundtruth','predict']) plt.title("总体测试集预测结果分布图") plt.show()
