Turkey HSD檢驗法/W法

本文轉載自查看原文 2017-05-27 10:06 6725 Turkey HSD/ 統計statistics/ 統計

python金融風控評分卡模型和數據分析微專業課（博主親自錄制視頻）：http://dwz.date/b9vv

項目聯系QQ：231469242

python 2.7

# -*- coding: utf-8 -*-
from statsmodels.stats.multicomp import (pairwise_tukeyhsd,
                                         MultiComparison)
                                         
# Import standard packages
import numpy as np
from scipy import stats
import pandas as pd      
import variance_check

                                                                    
#數據excel名               
excel="sample.xlsx"
#讀取數據
df=pd.read_excel(excel)
#獲取第一組數據，結構為列表
group_mentaln=list(df.StressReduction[(df.Treatment=="mental")])
group_physical=list(df.StressReduction[(df.Treatment=="physical")])
group_medical=list(df.StressReduction[(df.Treatment=="medical")])
list_groups=[group_mentaln,group_physical,group_medical]
list_total=group_mentaln+group_physical+group_medical

print"equal test-----------------------------------------------------"
# #比較組內的樣本是否相等，如果不相等，不適合於tukey等方法                     
equal_lenth=variance_check.Equal_lenth(list_groups)  
if equal_lenth==False:
    print("the length of groups are not equal")                               
                                                          
multiComp = MultiComparison(df['StressReduction'], df['Treatment']) 
tukey=multiComp.tukeyhsd()
summary=multiComp.tukeyhsd().summary()
print(summary) 

q=tukey.q_crit
print("q values:",q)
'''
q值
Out[41]: 3.5057698487864877
'''

'''
Multiple Comparison of Means - Tukey HSD,FWER=0.05
===============================================
 group1  group2  meandiff  lower  upper  reject
-----------------------------------------------
medical  mental    1.5     0.3217 2.6783  True 
medical physical   1.0    -0.1783 2.1783 False 
 mental physical   -0.5   -1.6783 0.6783 False 
-----------------------------------------------
'''
                               
print("data details:",summary.data) 
'''
[['group1', 'group2', 'meandiff', 'lower', 'upper', 'reject'], 
[u'medical', u'mental', 1.5, 0.32169999999999999, 2.6783000000000001, True], 
[u'medical', u'physical', 1.0, -0.17829999999999999, 2.1783000000000001, False],
[u'mental', u'physical', -0.5, -1.6782999999999999, 0.67830000000000001, False]]
'''

variance_check.py

# -*- coding: utf-8 -*-
'''
用於方差齊性檢驗
正太性檢驗
配對相等檢驗
'''
import scipy,math
from scipy.stats import f
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
# additional packages
from statsmodels.stats.diagnostic import lillifors
#多重比較
from statsmodels.sandbox.stats.multicomp import multipletests
#用於排列組合
import itertools
'''
#測試數據
group1=[2,3,7,2,6]
group2=[10,8,7,5,10]
group3=[10,13,14,13,15]
list_groups=[group1,group2,group3]
list_total=group1+group2+group3
'''
a=0.05

#正態分布測試
def check_normality(testData):
     
    #20<樣本數<50用normal test算法檢驗正態分布性
    if 20<len(testData) <50:
       p_value= stats.normaltest(testData)[1]
       if p_value<0.05:
           print"use normaltest"
           print "data are not normal distributed"
           return  False
       else:
           print"use normaltest"
           print "data are normal distributed"
           return True
     
    #樣本數小於50用Shapiro-Wilk算法檢驗正態分布性
    if len(testData) <50:
       p_value= stats.shapiro(testData)[1]
       if p_value<0.05:
           print "use shapiro:"
           print "data are not normal distributed"
           return  False
       else:
           print "use shapiro:"
           print "data are normal distributed"
           return True
       
    if 300>=len(testData) >=50:
       p_value= lillifors(testData)[1]
       if p_value<0.05:
           print "use lillifors:"
           print "data are not normal distributed"
           return  False
       else:
           print "use lillifors:"
           print "data are normal distributed"
           return True
     
    if len(testData) >300: 
       p_value= stats.kstest(testData,'norm')[1]
       if p_value<0.05:
           print "use kstest:"
           print "data are not normal distributed"
           return  False
       else:
           print "use kstest:"
           print "data are normal distributed"
           return True
 
 
#對所有樣本組進行正態性檢驗
def NormalTest(list_groups):
    for group in list_groups:
        #正態性檢驗
        status=check_normality(group)
        if status==False :
            return False
    return True
             
#排列組合函數
def Combination(list_groups):
    combination= []
    for i in range(1,len(list_groups)+1):
        iter = itertools.combinations(list_groups,i)
        combination.append(list(iter))
    #需要排除第一個和最后一個
    return combination[1:-1][0]
'''
Out[57]:
[[([2, 3, 7, 2, 6], [10, 8, 7, 5, 10]),
  ([2, 3, 7, 2, 6], [10, 13, 14, 13, 15]),
  ([10, 8, 7, 5, 10], [10, 13, 14, 13, 15])]]
'''       


#方差齊性檢測
def Levene_test(group1,group2,group3):
    leveneResult=scipy.stats.levene(group1,group2,group3)
    p=leveneResult[1]
    print"levene test:"
    if p<0.05:
        print"variances of groups are not equal"
        return False
    else:
        print"variances of groups are equal"
        return True
          
'''
H0成立，三組數據方差無顯著差異
Out[9]: LeveneResult(statistic=0.24561403508771934, pvalue=0.7860617221429711)
'''

#比較組內的樣本是否相等，如果不相等，不適合於tukey等方法
#此函數有問題，無法解決nan排除
def Equal_lenth(list_groups):
    list1=list_groups[0]
    list2=list_groups[1]
    list3=list_groups[2]
    
    list1_removeNan=[x for x in list1 if str(x) != 'nan' and str(x)!= '-inf']
    list2_removeNan=[x for x in list2 if str(x) != 'nan' and str(x)!= '-inf']
    list3_removeNan=[x for x in list3 if str(x) != 'nan' and str(x)!= '-inf']
    
    len1=len(list1_removeNan)
    len2=len(list2_removeNan)
    len3=len(list3_removeNan)
    if len1==len2==len3:
        return True
    else:
        return False


'''
#返回True or false 
normality=NormalTest(list_groups)   
leveneResult=Levene_test(list_groups[0],list_groups[1],list_groups[2])  
'''

數據sample.xlsx

https://en.wikipedia.org/wiki/Tukey's_range_test

Tukey's range test, also called Tukey method, Tukey's honest significance test, Tukey's HSD (Honestly Significant Difference) test

老鼠試驗數據

公式

D_turkey表示平均數差值的關鍵值，任何大於 D_turkey值的平均數差值都是顯著的

第四組和第五組平均數差值是不顯著的，其它組的差值是顯著的

Studentized Range q Table

a=0.05

http://www.real-statistics.com/statistics-tables/studentized-range-q-table/

q值

q值是一個殘差化范圍統計數據表格值；由平均數的數量和組內自由度數量交互決定

a表示分類組數，df表示所有數量自由度，a_fw表示0.05犯錯概率

MS_S/A 表示 within group的方差

s_m值

s_m是一個標准誤

n表示組數

結果

Tukey's range test, also known as the Tukey's test, Tukey method, Tukey's honest significance test, Tukey's HSD (honest significant difference) test,^[1] or the Tukey–Kramer method, is a single-step multiple comparison procedure and statistical test. It can be used on raw data or in conjunction with an ANOVA (post-hoc analysis) to find means that are significantly different from each other. Named after John Tukey,^[2] it compares all possible pairs of means, and is based on a studentized range distribution (q) (this distribution is similar to the distribution of t from the t-test. See below).^[3] The Tukey HSD tests should not be confused with the Tukey Mean Difference tests (also known as the Bland–Altman diagram).

Tukey's test compares the means of every treatment to the means of every other treatment; that is, it applies simultaneously to the set of all pairwise comparisons

\mu_i-\mu_j \,

and id　　entifies any difference between two means that is greater than the expected standard error. The confidence coefficient for the set, when all sample sizes are equal, is exactly 1 − α. For unequal sample sizes, the confidence coefficient is greater than 1 − α. In other words, the Tukey method is conservative when there are unequal sample sizes.

Assumptions of Tukey's test

前提條件：

樣本獨立性+樣本正態分布+所有組方差齊性

The observations being tested are independent within and among the groups.
The groups associated with each mean in the test are normally distributed.
There is equal within-group variance across the groups associated with each mean in the test (homogeneity of variance).

The test statistic

Tukey's test is based on a formula very similar to that of the t-test. In fact, Tukey's test is essentially a t-test, except that it corrects for family-wise error rate (when there are multiple comparisons being made, the probability of making a Type I error within at least one of the comparisons, increases — Tukey's test corrects for that, and is thus more suitable for multiple comparisons than a number of t-tests would be).^[3]

The formula for Tukey's test is:

q_s = \frac{Y_A - Y_B}{SE},

where Y_A is the larger of the two means being compared, Y_B is the smaller of the two means being compared, and SE is the standard error of the data in question.

This q_s value can then be compared to a q value from the studentized range distribution. If the q_s value is larger than the q_critical value obtained from the distribution, the two means are said to be significantly different.^[3]

Since the null hypothesis for Tukey's test states that all means being compared are from the same population (i.e. μ₁ = μ₂ = μ₃ = ... = μ_k), the means should be normally distributed (according to the central limit theorem). This gives rise to the normality assumption of Tukey's test.

The studentized range (q) distribution

The Tukey method uses the studentized range distribution. Suppose that we take a sample of size n from each of k populations with the same normal distribution N(μ, σ) and suppose that ${\bar {y}}$ _min is the smallest of these sample means and ${\bar {y}}$ _max is the largest of these sample means, and suppose S² is the pooled sample variance from these samples. Then the following random variable has a Studentized range distribution.

q={\frac {({\overline {y}}_{\max }-{\overline {y}}_{\min })}{S{\sqrt {2/n}}}}

This value of q is the basis of the critical value of q, based on three factors:

α (the Type I error rate, or the probability of rejecting a true null hypothesis)
k (the number of populations)
df (the number of degrees of freedom (N-k) where N is the total number of observations)

The distribution of q has been tabulated and appears in many textbooks on statistics. In some tables the distribution of q has been tabulated without the ${\sqrt {2}}$

Confidence limits

The Tukey confidence limits for all pairwise comparisons with confidence coefficient of at least 1 − α are

\bar{y}_{i\bullet}-\bar{y}_{j\bullet} \pm \frac{q_{\alpha;k;N-k}}{\sqrt{2}}\widehat{\sigma}_\varepsilon \sqrt{\frac{2}{n}} \qquad i,j=1,\ldots,k\quad i\neq j.

Notice that the point estimator and the estimated variance are the same as those for a single pairwise comparison. The only difference between the confidence limits for simultaneous comparisons and those for a single comparison is the multiple of the estimated standard deviation.

Also note that the sample sizes must be equal when using the studentized range approach. $\widehat{\sigma}_\varepsilon$

\bar{y}_{i\bullet}-\bar{y}_{j\bullet} \pm \frac{q_{\alpha;k;N-k}}{\sqrt{2}}\widehat{\sigma}_\varepsilon \sqrt{\frac{1}{n}_{i} + \frac{1}{n}_{j}} \qquad

where n_i and n_j are the sizes of groups i and j respectively. The degrees of freedom for the whole design is also applied.

Advantages and disadvantages

When doing all pairwise comparisons, this method is considered the best available when confidence intervals are needed or sample sizes are not equal. When samples sizes are equal and confidence intervals are not needed Tukey’s test is slightly less powerful than the stepdown procedures, but if they are not available Tukey’s is the next-best choice, and unless the number of groups is large, the loss in power will be slight. In the general case when many or all contrasts might be of interest, Scheffé's method tends to give narrower confidence limits and is therefore the preferred method.

https://github.com/thomas-haslwanter/statsintro_python/tree/master/ISP/Code_Quantlets/08_TestsMeanValues/multipleTesting