直方圖可以大致查看數據分布是否為正態。通常還需要將正態分布的曲線疊加在上面。
導入需要的包
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
定義生成均值為mu,方差為sigma的正態分布對應y值的函數
def norm(x, mu, sigma):
return ((1/(np.sqrt(2*np.pi)*sigma))*np.exp(-0.5*(1/sigma*(x-mu))**2))
生成測試數據
data = np.random.normal(size=100000)
mu = np.mean(data)
sigma = np.std(data)
num_bins = 100
繪制直方圖
fig, ax = plt.subplots()
# histogram
n, bins, patches = ax.hist(
data,
bins=num_bins,
density=True,
color='#2792C3')
添加正態分布的曲線
y = norm(bins, mu, sigma)
ax.plot(
bins,
y,
color='#EE827C',
ls='--'
)
添加95%分位數線
tmp_thr = stats.norm.ppf(0.95)
thr = tmp_thr*sigma+mu
y_thr = norm(thr, mu, sigma)
ax.plot(
[thr, thr],
[0, y_thr],
color='#EE827C',
ls='--'
)
用顏色填充對應的區域。其中,zorder用來指定相應組塊的層數,即調整與其他組塊的重疊關系。
tmp_x = np.linspace(thr, np.max(data), num=100)
tmp_y = norm(tmp_x, mu, sigma)
ax.fill_between(tmp_x, 0, tmp_y, facecolor='#EE827C', zorder=2, alpha=0.5)
設置坐標軸標簽並保存圖片
ax.set_xlabel('Data')
ax.set_ylabel('Density')
plt.savefig('demo.pdf', bbox_inches='tight')
示例圖
完整代碼
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
def norm(x, mu, sigma):
return ((1/(np.sqrt(2*np.pi)*sigma))*np.exp(-0.5*(1/sigma*(x-mu))**2))
# generate the test data
data = np.random.normal(size=100000)
mu = np.mean(data)
sigma = np.std(data)
num_bins = 100
fig, ax = plt.subplots()
# histogram
n, bins, patches = ax.hist(
data,
bins=num_bins,
density=True,
color='#2792C3')
# normal distribution line
y = norm(bins, mu, sigma)
ax.plot(
bins,
y,
color='#EE827C',
ls='--'
)
# add line of 95% quantile
tmp_thr = stats.norm.ppf(0.95)
thr = tmp_thr*sigma+mu
y_thr = norm(thr, mu, sigma)
ax.plot(
[thr, thr],
[0, y_thr],
color='#EE827C',
ls='--'
)
# fill the area greater than 95% quantile with color
tmp_x = np.linspace(thr, np.max(data), num=100)
tmp_y = norm(tmp_x, mu, sigma)
ax.fill_between(tmp_x, 0, tmp_y, facecolor='#EE827C', zorder=2, alpha=0.5)
ax.set_xlabel('Data')
ax.set_ylabel('Density')
# save fig
plt.savefig('demo.pdf', bbox_inches='tight')