#-*- coding: utf-8 -*- #确定最佳p、d、q值 import pandas as pd #参数初始化 discfile = '../data/discdata_processed.xls' data = pd.read_excel(discfile, index_col = 'COLLECTTIME') data = data.iloc[: len(data)-5] #不使用最后5个数据 xdata = data['CWXT_DB:184:D:\\'] from statsmodels.tsa.arima_model import ARIMA #定阶 pmax = int(len(xdata)/10) #一般阶数不超过length/10 qmax = int(len(xdata)/10) #一般阶数不超过length/10 bic_matrix = [] #bic矩阵 for p in range(pmax+1): tmp = [] for q in range(qmax+1): try: #存在部分报错,所以用try来跳过报错。 tmp.append(ARIMA(xdata, (p,1,q)).fit().bic) except: tmp.append(None) bic_matrix.append(tmp) print(bic_matrix) #[[1275.6868239439104, 1273.190434524266, 1273.5749982328914, 1274.4669152438114, None], #[1276.7491283595593, 1271.8999324285992, None, None, None], #[1279.6942963992901, 1277.5553412371614, None, 1280.0924824267408, None], # [1278.0659994468958, 1278.9885944429066, 1282.782534558853, 1285.943493708969, None], # [1281.220790614283, 1282.6999920212124, 1286.2975191780365, 1290.1950373803218, None]] bic_matrix = pd.DataFrame(bic_matrix) #从中可以找出最小值 print(bic_matrix) # 0 1 2 3 4 # 0 1275.686824 1273.190435 1273.574998 1274.466915 None # 1 1276.749128 1271.899932 NaN NaN None # 2 1279.694296 1277.555341 NaN 1280.092482 None # 3 1278.065999 1278.988594 1282.782535 1285.943494 None # 4 1281.220791 1282.699992 1286.297519 1290.195037 None print(bic_matrix.stack()) # 0 0 1275.69 # 1 1273.19 # 2 1273.57 # 3 1274.47 # 1 0 1276.75 # 1 1271.9 # 2 0 1279.69 # 1 1277.56 # 3 1280.09 # 3 0 1278.07 # 1 1278.99 # 2 1282.78 # 3 1285.94 # 4 0 1281.22 # 1 1282.7 # 2 1286.3 # 3 1290.2 p,q = bic_matrix.stack().astype('float64').idxmin() #先用stack展平,然后用idxmin找出最小值位置。 print(u'BIC最小的p值和q值为:%s、%s' %(p,q))