PYthon回归的统计方法(散点图和柱状图)


1:统计直方图,输入分箱数据 然后输入数据即可

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

# 制作频数分布表
plt.rcParams["font.sans-serif"] = ["SimHei"]
# rawdata = pd.read_excel("./data/0-10test_result.xlsx")
# print(rawdata.describe())
# rawdata = rawdata['test']
rawdata = pd.read_csv("./data/Ar_tpm.tsv", sep='\t')
print(rawdata.describe())
rawdata = rawdata['Ar_tpm']
# 分割区间
bin_list = bin_list = [0, 0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0, 2.2, 2.4, 2.6, 2.8, 3.0, 3.2, 3.4,
                       3.6,3.8,4.0,4.5,5.0,5.5,6.0,6.5,7.0,7.5,8.0,10.0,15.0,20.0,30.0,50.0,100.0,500.0,1000.0,5000.0,50000.0]
split_str = []
for i in range(len(bin_list)):
    if (i < len(bin_list) - 1):
        split_str.append(str(bin_list[i]) + '~' + str(bin_list[i + 1]))
    else:
        break
data_split = pd.cut(rawdata, bin_list,
                    labels=split_str)
# 计算每个区间的频数并按上面的labels调整顺序
freq_chart = data_split.value_counts()
freq_chart = freq_chart.sort_index()
# 保存为一个频率分布字典
freq_dict = {'section': freq_chart.index, 'frequency': freq_chart.values}
# 将对应的频率分布字典转化为pd文件
freq_data = pd.DataFrame(freq_dict)

ax = plt.figure(figsize=(300, 100),dpi=150).add_subplot(111)
sns.barplot(x="section", y="frequency", data=freq_data, palette="Set3")  # palette设置颜色
# 设置y轴高度 y轴高度最好为最大都轴超20%
max_y=max(freq_data["frequency"])
ax.set_ylim(0, int(max_y+0.2*max_y))
ax.set_title('总体正确频数分布', size=20)
# 设置字体大小
# plt.axhline(y=2000, ls=":", c="red", lw=4, label='2000分界线')  # 添加水平直线 lw粗细
# plt.legend()
plt.xticks(fontsize=4)
plt.yticks(fontsize=10)
# 设置数据标签
for x, y in zip(range(len(bin_list)), freq_data.frequency):
    ax.text(x, y, '%d' % y, ha='center', va='bottom', fontsize=8, color='black')
plt.show()

2:统计散点图:输入Excel 格式为test和result两列,即可作为散点图

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
rawdata = pd.read_excel("./data/test_result.xlsx")
plt.rcParams["font.sans-serif"] = ["SimHei"]
print(rawdata)
groundtruth=rawdata['test']
predict=rawdata['result']
y=rawdata['index']
#y轴高度
plt.ylim((0, 0.75))
#y轴精度
plt.yticks(np.arange(0,0.75,0.1))
#Z轴空间凸显
plt.scatter(y, groundtruth, marker='.', zorder=3)
plt.scatter(y, predict, marker='.')
plt.legend(['groundtruth','predict'])
plt.title("总体测试集预测结果分布图")
plt.show()

 


免责声明!

本站转载的文章为个人学习借鉴使用,本站对版权不负任何法律责任。如果侵犯了您的隐私权益,请联系本站邮箱yoyou2525@163.com删除。



 
粤ICP备18138465号  © 2018-2025 CODEPRJ.COM