import os
import pandas as pd
from sklearn import linear_model
path = r'D:\新數據\每日收益率'
filenames = os.listdir(path)
for filename in filenames:
print(filename)
for i in filenames:
excel_path = 'D:\新數據\每日收益率\\' + i
f = open(excel_path,'rb')
data = pd.read_excel(f) #到此處已是循環讀取某文件夾下所有excel文件,下面是在循環中對讀進來的文件進行統一的重復的一致的處理
data['time'] = data.index
data = data.reset_index(drop = True)
data1 = data.iloc[0:110,]#估計窗口的真實收益率
data2 = data.iloc[110:,]#事件窗口的真實收益率
feature = data.columns.tolist()
feature.remove('time')
feature.remove('000300')#滬深300指數
dfR = pd.DataFrame(data2['time'])
dfAR = pd.DataFrame(data2['time'])
for m in feature:
regr=linear_model.LinearRegression()
regr.fit(data1['000300'].values.reshape(-1, 1),data1[m].values.reshape(-1, 1))
y_pred1 = regr.predict(data2['000300'].values.reshape(-1, 1))#事件窗口的預期收益率
AR = data2[m].values.reshape(-1, 1)-y_pred1#真實收益率-預期收益率=超額收益率(事件窗口)
dfR[m] = y_pred1#預期收益率
dfAR[m] = AR #超額收益率
save_path1 = 'D:\新數據\日預期收益率\\' + i
save_path2 = 'D:\新數據\日超額收益率\\' + i
dfR.to_excel(save_path1,index=False)
dfAR.to_excel(save_path2,index=False)