數據平滑
數據的平滑處理通常包含有降噪、擬合等操作。降噪的功能意在去除額外的影響因素,擬合的目的意在數學模型化,可以通過更多的數學方法識別曲線特征。
案例:繪制兩只股票收益率曲線。收益率 =(后一天收盤價-前一天收盤價) / 前一天收盤價
使用卷積完成數據降噪。
# 數據平滑 import numpy as np import matplotlib.pyplot as mp import datetime as dt import matplotlib.dates as md def dmy2ymd(dmy): """ 把日月年轉年月日 :param day: :return: """ dmy = str(dmy, encoding='utf-8') t = dt.datetime.strptime(dmy, '%d-%m-%Y') s = t.date().strftime('%Y-%m-%d') return s dates, bhp_closing_prices = \ np.loadtxt('bhp.csv', delimiter=',', usecols=(1, 6), unpack=True, dtype='M8[D],f8', converters={1: dmy2ymd}) # 日月年轉年月日 vale_closing_prices = \ np.loadtxt('vale.csv', delimiter=',', usecols=(6,), unpack=True) # 因為日期一樣,所以此處不讀日期 # print(dates) # 繪制收盤價的折現圖 mp.figure('APPL', facecolor='lightgray') mp.title('APPL', fontsize=18) mp.xlabel('Date', fontsize=14) mp.ylabel('Price', fontsize=14) mp.grid(linestyle=":") # 設置刻度定位器 # 每周一一個主刻度,一天一個次刻度 ax = mp.gca() ma_loc = md.WeekdayLocator(byweekday=md.MO) ax.xaxis.set_major_locator(ma_loc) ax.xaxis.set_major_formatter(md.DateFormatter('%Y-%m-%d')) ax.xaxis.set_minor_locator(md.DayLocator()) # 修改dates的dtype為md.datetime.datetiem dates = dates.astype(md.datetime.datetime) # 計算兩只股票的收益率,並繪制曲線 bhp_returns = np.diff(bhp_closing_prices) / bhp_closing_prices[:-1] vale_returns = np.diff(vale_closing_prices) / vale_closing_prices[:-1] mp.plot(dates[1:], bhp_returns, color='red', alpha=0.1,label='bhp returns') mp.plot(dates[1:], vale_returns, color='blue',alpha=0.1, label='vale returns') #卷積降噪 kernel = np.hanning(8) kernel/=kernel.sum() bhp_convalved = np.convolve(bhp_returns,kernel,'valid') vale_convalved = np.convolve(vale_returns,kernel,'valid') mp.plot(dates[8:],bhp_convalved,color='dodgerblue',alpha=0.8,label='bhp convalved') mp.plot(dates[8:],vale_convalved,color='orangered',alpha=0.8,label='vale convalved') mp.legend() mp.gcf().autofmt_xdate() mp.show()
對處理過的股票收益率做多項式擬合。
# 數據平滑 import numpy as np import matplotlib.pyplot as mp import datetime as dt import matplotlib.dates as md def dmy2ymd(dmy): """ 把日月年轉年月日 :param day: :return: """ dmy = str(dmy, encoding='utf-8') t = dt.datetime.strptime(dmy, '%d-%m-%Y') s = t.date().strftime('%Y-%m-%d') return s dates, bhp_closing_prices = \ np.loadtxt('bhp.csv', delimiter=',', usecols=(1, 6), unpack=True, dtype='M8[D],f8', converters={1: dmy2ymd}) # 日月年轉年月日 vale_closing_prices = \ np.loadtxt('vale.csv', delimiter=',', usecols=(6,), unpack=True) # 因為日期一樣,所以此處不讀日期 # print(dates) # 繪制收盤價的折現圖 mp.figure('APPL', facecolor='lightgray') mp.title('APPL', fontsize=18) mp.xlabel('Date', fontsize=14) mp.ylabel('Price', fontsize=14) mp.grid(linestyle=":") # 設置刻度定位器 # 每周一一個主刻度,一天一個次刻度 ax = mp.gca() ma_loc = md.WeekdayLocator(byweekday=md.MO) ax.xaxis.set_major_locator(ma_loc) ax.xaxis.set_major_formatter(md.DateFormatter('%Y-%m-%d')) ax.xaxis.set_minor_locator(md.DayLocator()) # 修改dates的dtype為md.datetime.datetiem dates = dates.astype(md.datetime.datetime) # 計算兩只股票的收益率,並繪制曲線 bhp_returns = np.diff(bhp_closing_prices) / bhp_closing_prices[:-1] vale_returns = np.diff(vale_closing_prices) / vale_closing_prices[:-1] mp.plot(dates[1:], bhp_returns, color='red', alpha=0.1,label='bhp returns') mp.plot(dates[1:], vale_returns, color='blue',alpha=0.1, label='vale returns') #卷積降噪 kernel = np.hanning(8) kernel/=kernel.sum() bhp_convalved = np.convolve(bhp_returns,kernel,'valid') vale_convalved = np.convolve(vale_returns,kernel,'valid') mp.plot(dates[8:],bhp_convalved,color='dodgerblue',alpha=0.1,label='bhp convalved') mp.plot(dates[8:],vale_convalved,color='orangered',alpha=0.1,label='vale convalved') #多項式擬合 days = dates[8:].astype('M8[D]').astype('i4') bhp_p = np.polyfit(days,bhp_convalved,3) bhp_val = np.polyval(bhp_p,days) vale_p = np.polyfit(days,vale_convalved,3) vale_val = np.polyval(vale_p,days) mp.plot(dates[8:],bhp_val,color='orangered',label='bhp polyval') mp.plot(dates[8:],vale_val,color='blue',label='vale polyval') mp.legend() mp.gcf().autofmt_xdate() mp.show()
通過獲取兩個函數的焦點可以分析兩只股票的投資收益比。
# 數據平滑 import numpy as np import matplotlib.pyplot as mp import datetime as dt import matplotlib.dates as md def dmy2ymd(dmy): """ 把日月年轉年月日 :param day: :return: """ dmy = str(dmy, encoding='utf-8') t = dt.datetime.strptime(dmy, '%d-%m-%Y') s = t.date().strftime('%Y-%m-%d') return s dates, bhp_closing_prices = \ np.loadtxt('bhp.csv', delimiter=',', usecols=(1, 6), unpack=True, dtype='M8[D],f8', converters={1: dmy2ymd}) # 日月年轉年月日 vale_closing_prices = \ np.loadtxt('vale.csv', delimiter=',', usecols=(6,), unpack=True) # 因為日期一樣,所以此處不讀日期 # print(dates) # 繪制收盤價的折現圖 mp.figure('APPL', facecolor='lightgray') mp.title('APPL', fontsize=18) mp.xlabel('Date', fontsize=14) mp.ylabel('Price', fontsize=14) mp.grid(linestyle=":") # 設置刻度定位器 # 每周一一個主刻度,一天一個次刻度 ax = mp.gca() ma_loc = md.WeekdayLocator(byweekday=md.MO) ax.xaxis.set_major_locator(ma_loc) ax.xaxis.set_major_formatter(md.DateFormatter('%Y-%m-%d')) ax.xaxis.set_minor_locator(md.DayLocator()) # 修改dates的dtype為md.datetime.datetiem dates = dates.astype(md.datetime.datetime) # 計算兩只股票的收益率,並繪制曲線 bhp_returns = np.diff(bhp_closing_prices) / bhp_closing_prices[:-1] vale_returns = np.diff(vale_closing_prices) / vale_closing_prices[:-1] mp.plot(dates[1:], bhp_returns, color='red', alpha=0.1,label='bhp returns') mp.plot(dates[1:], vale_returns, color='blue',alpha=0.1, label='vale returns') #卷積降噪 kernel = np.hanning(8) kernel/=kernel.sum() bhp_convalved = np.convolve(bhp_returns,kernel,'valid') vale_convalved = np.convolve(vale_returns,kernel,'valid') mp.plot(dates[8:],bhp_convalved,color='dodgerblue',alpha=0.1,label='bhp convalved') mp.plot(dates[8:],vale_convalved,color='orangered',alpha=0.1,label='vale convalved') #多項式擬合 days = dates[8:].astype('M8[D]').astype('i4') bhp_p = np.polyfit(days,bhp_convalved,3) bhp_val = np.polyval(bhp_p,days) vale_p = np.polyfit(days,vale_convalved,3) vale_val = np.polyval(vale_p,days) mp.plot(dates[8:],bhp_val,color='orangered',label='bhp polyval') mp.plot(dates[8:],vale_val,color='blue',label='vale polyval') #求兩個多項式函數的焦點 diff_p = np.polysub(bhp_p,vale_p) xs = np.roots(diff_p) print(xs.astype('M8[D]')) #['2011-03-23' '2011-03-11' '2011-02-21'] mp.legend() mp.gcf().autofmt_xdate() mp.show()