一、關於體溫、性別、心率的臨床數據
對男性體溫抽樣計算下95%置信區間總體均值范圍。轉自:https://www.jianshu.com/p/a3efca8371eb
import pandas as pd import numpy as np import seaborn as sns import matplotlib.pyplot as plt #讀取數據 df = pd.read_csv('http://jse.amstat.org/datasets/normtemp.dat.txt', header = None,sep = '\s+' ,names=['體溫','性別','心率']) #選取樣本大小,查看數據 np.random.seed(42) #df.describe() #樣本量為90,查看樣本數據 df_sam = df.sample(90) df_sam.head() #計算抽取樣本中男士體溫的均值 df3 = df_sam.loc[df_sam['性別']==1] df3['體溫'].mean() #重復抽取樣本,計算其他樣本中男士體溫的均值,得到抽樣分布 boot_means = [] for _ in range(10000): bootsample = df.sample(90, replace=True) mean = bootsample[bootsample['性別'] == 1]['體溫'].mean() boot_means.append(mean) #繪制男士體溫抽樣分布均值 #計算抽樣分布的置信區間以估計總體均值, 置信度95% np.percentile(boot_means, 2.5), np.percentile(boot_means, 97.5)
二、python實現一個總體均值的置信區間
轉自:https://blog.csdn.net/qq_39284106/article/details/103707239
def mean_interval(mean=None, std=None, sig=None, n=None, confidence=0.95): """ mean:樣本均值 std:樣本標准差 sig: 總體方差 n: 樣本量 confidence:置信水平 功能:構建總體均值的置信區間 """ alpha = 1 - confidence z_score = scipy.stats.norm.isf(alpha / 2) # z分布臨界值 t_score = scipy.stats.t.isf(alpha / 2, df = (n-1) ) # t分布臨界值 if n >= 30 and sig != None: me = z_score*sig / np.sqrt(n) # 誤差 lower_limit = mean - me upper_limit = mean + me if n >= 30 and sig == None: me = z_score*std / np.sqrt(n) lower_limit = mean - me upper_limit = mean + me if n < 30 and sig == None: me = t_score*std / np.sqrt(n) lower_limit = mean - me upper_limit = mean + me return (round(lower_limit, 3), round(upper_limit, 3)) mean_interval(mean=8900, std=None, sig=500, n=35, confidence=0.95) mean_interval(mean=8900, std=500, sig=None, n=35, confidence=0.90) mean_interval(mean=8900, std=500, sig=None, n=35, confidence=0.99)
三、實現一個總體方差的置信區間
(1) 樣本均值為21, 樣本標准差為2, 樣本量為50; (2) 樣本均值為1.3, 樣本標准差為0.02, 樣本量為15; (3) 樣本均值為167, 樣本標准差為31, 樣本量為22; Question1: 根據以上樣本結果,計算總體方差的90%的置信區間? Question2: 根據以上樣本結果,計算總體標准差的90%的置信區間? def std_interval(mean=None, std=None, n=None, confidence=0.95, para="總體標准差"): """ mean:樣本均值 std:樣本標准差 n: 樣本量 confidence:置信水平 para:總體估計參數 功能:構建總體方差&總體標准差的置信區間 """ variance = np.power(std,2) alpha = 1 - confidence chi_score0 = scipy.stats.chi2.isf(alpha / 2, df = (n-1)) chi_score1 = scipy.stats.chi2.isf(1 - alpha / 2, df = (n-1)) if para == "總體標准差": lower_limit = np.sqrt((n-1)*variance / chi_score0) upper_limit = np.sqrt((n-1)*variance / chi_score1) if para == "總體方差": lower_limit = (n-1)*variance / chi_score0 upper_limit = (n-1)*variance / chi_score1 return (round(lower_limit, 2), round(upper_limit, 2)) std_interval(mean=21, std=2, n=50, confidence=0.90) std_interval(mean=1.3, std=0.02, n=15, confidence=0.90) std_interval(mean=167, std=31, n=22, confidence=0.90)
四、實現兩個總體方差比的置信區間
data1 = [3.45, 3.22, 3.90, 3.20, 2.98, 3.70, 3.22, 3.75, 3.28, 3.50, 3.38, 3.35, 2.95, 3.45, 3.20, 3.16, 3.48, 3.12, 3.20, 3.18, 3.25] data2 = [3.22, 3.28, 3.35, 3.38, 3.19, 3.30, 3.30, 3.20, 3.05, 3.30, 3.29, 3.33, 3.34, 3.35, 3.27, 3.28, 3.16, 3.28, 3.30, 3.34, 3.25] def two_std_interval(d1, d2, confidence=0.95, para="兩個總體方差比"): """ d1: 數據1 d2: 數據2 confidence:置信水平 para:總體估計參數 功能:構建兩個總體方差比&總體標准差比的置信區間 """ n1 = len(d1) n2 = len(d2) var1 = np.var(d1, ddof=1) # ddof=1 樣本方差 var2 = np.var(d2, ddof=1) # ddof=1 樣本方差 alpha = 1 - confidence f_score0 = scipy.stats.f.isf(alpha / 2, dfn=n1-1, dfd=n2-1) # F分布臨界值 f_score1 = scipy.stats.f.isf(1-alpha / 2, dfn=n1-1, dfd=n2-1) # F分布臨界值 if para == "兩個總體標准差比": lower_limit = np.sqrt((var1 / var2) / f_score0) upper_limit = np.sqrt((var1 / var2) / f_score01) if para == "兩個總體方差比": lower_limit = (var1 / var2) / f_score0 upper_limit = (var1 / var2) / f_score1 return (round(lower_limit, 2), round(upper_limit, 2)) two_std_interval(data1, data2, confidence=0.95, para="兩個總體方差比")