import os import pandas as pd from sklearn import linear_model path = r'D:\新數據\每日收益率' filenames = os.listdir(path) for filename in filenames: print(filename) for i in filenames: excel_path = 'D:\新數據\每日收益率\\' + i f = open(excel_path,'rb') data = pd.read_excel(f) #到此處已是循環讀取某文件夾下所有excel文件,下面是在循環中對讀進來的文件進行統一的重復的一致的處理 data['time'] = data.index data = data.reset_index(drop = True) data1 = data.iloc[0:110,]#估計窗口的真實收益率 data2 = data.iloc[110:,]#事件窗口的真實收益率 feature = data.columns.tolist() feature.remove('time') feature.remove('000300')#滬深300指數 dfR = pd.DataFrame(data2['time']) dfAR = pd.DataFrame(data2['time']) for m in feature: regr=linear_model.LinearRegression() regr.fit(data1['000300'].values.reshape(-1, 1),data1[m].values.reshape(-1, 1)) y_pred1 = regr.predict(data2['000300'].values.reshape(-1, 1))#事件窗口的預期收益率 AR = data2[m].values.reshape(-1, 1)-y_pred1#真實收益率-預期收益率=超額收益率(事件窗口) dfR[m] = y_pred1#預期收益率 dfAR[m] = AR #超額收益率 save_path1 = 'D:\新數據\日預期收益率\\' + i save_path2 = 'D:\新數據\日超額收益率\\' + i dfR.to_excel(save_path1,index=False) dfAR.to_excel(save_path2,index=False)