使用TuShare下載歷史逐筆成交數據並生成1分鍾線


使用如下代碼從TuShare下載滬深300每只股票的歷史成交記錄並按股票、日期保存到本地。主要是為了以后查詢方便快速。

#-*- coding: utf-8 -*-
import numpy as np
import pandas as pd
import tushare as ts
import datetime
import time
import tushare as ts
import os
 
data_dir = '/home/vnpy/share/'  #下載數據的存放路徑
 
#ts.get_sz50s() #獲取上證50成份股  返回值為DataFrame:code股票代碼 name股票名稱
 
#cal_dates = ts.trade_cal() #返回交易所日歷,類型為DataFrame, calendarDate  isOpen
cal_dates = pd.read_csv(data_dir+'trade_cal.csv') 

#本地實現判斷市場開市函數 
#@date: str類型日期 eg.'2017-11-23'
def is_open_day(date):
    if date in cal_dates['calendarDate'].values:
        return cal_dates[cal_dates['calendarDate']==date].iat[0,2]==1
    return False
 
 
#從TuShare獲取tick data數據並保存到本地
#@symbol: str類型股票代碼 eg.600030
#@date: date類型日期
def get_save_tick_data(symbol, date):
    global sleep_time,data_dir
    res=True
    sleep_time=2
    str_date=str(date)
    dir=data_dir+symbol+'/'+str(date.year)+'/'+str(date.month)
    file=dir+'/'+symbol+'_'+str_date+'.csv'
    if is_open_day(str_date):
        if not os.path.exists(dir):
            os.makedirs(dir)
        if not os.path.exists(file):
            try:
                d=ts.get_tick_data(symbol,str_date,pause=0.1)
            except IOError, msg:
                print str(msg).decode('UTF-8')
                sleep_time=min(sleep_time*2, 128)#每次下載失敗后sleep_time翻倍,但是最大128s
                print 'Get tick data error: symbol: '+ symbol + ', date: '+str_date+', sleep time is: '+str(sleep_time)
                return res
            else:
                d.to_csv(file) 
                #hdf5_file=pd.HDFStore(file, 'w',complevel=4, complib='blosc')
                #hdf5_file['data']=d
                #hdf5_file.close()
                sleep_time=max(sleep_time/2, 2) #每次成功下載后sleep_time變為一半,但是至少2s
                print "Successfully download and save file: "+file+', sleep time is: '+str(sleep_time)
                return res
        else:
            print "Data already downloaded before, skip " + file
            res=False
            return res
 
#獲取從起始日期到截止日期中間的的所有日期,前后都是封閉區間
def get_date_list(begin_date, end_date):
    date_list = []
    while begin_date <= end_date:
        #date_str = str(begin_date)
        date_list.append(begin_date)
        begin_date += datetime.timedelta(days=1)
    return date_list
 
#獲取感興趣的所有股票信息,這里獲取滬深全部股票
def get_all_stock_id():
    #stock_info=ts.get_hs300s()
    stock_info = pd.read_csv(data_dir+'stock_basics.csv')
    return stock_info['code'].values

# 補全股票代碼(6位股票代碼)
# input: int or string
# output: string
def getSixDigitalStockCode(code):
    strZero = ''
    for i in range(len(str(code)), 6):
        strZero += '0'
    return strZero + str(code)

#從TuShare下載感興趣的所有股票的歷史成交數據,並保存到本地HDF5壓縮文件
#dates=get_date_list(datetime.date(2017,11,6), datetime.date(2017,11,12))
dates=get_date_list(datetime.date(2018,1,1), datetime.date(2018,7,9))
stocks=get_all_stock_id()
for stock in stocks:
    for date in dates:
       if get_save_tick_data(getSixDigitalStockCode(stock), date):
           time.sleep(sleep_time)

  

因為TuShare並沒有提供1分鍾線的信息,所以需要根據下載到的每日成交信息生成1分鍾線信息。

代碼如下: 其實就是不用for和列,直接 newdf = df.resample ... 保存列頭一致就好了

#-*- coding: utf-8 -*-
import pandas as pd
import datetime
import os

#根據分筆成交數據生成1分鍾線
def gen_min_line(symbol, date):
global data_dir data_dir = '/home/vnpy/share/' str_date=str(date) dir=data_dir+symbol+'/'+str(date.year)+'/'+str(date.month) tickfile=dir+'/'+symbol+'_'+str_date+'.csv' minfile=dir+'/'+symbol+'_'+str_date+'_1m.csv' print tickfile,minfile if (os.path.exists(tickfile)) and (not os.path.exists(minfile)): df=pd.read_csv(tickfile) print "Successfully read tick file: "+tickfile if df.shape[0]<10: #TuShare即便在停牌期間也會返回tick data,並且只有三行錯誤的數據,這里利用行數小於10把那些unexpected tickdata數據排除掉 print "No tick data read from tick file, skip generating 1min line" return 0 df['time']=str_date+' '+df['time'] df['time']=pd.to_datetime(df['time']) df=df.set_index('time') price_df=df['price'].resample('1min').ohlc() price_df=price_df.dropna() vols=df['volume'].resample('1min').sum() vols=vols.dropna() vol_df=pd.DataFrame(vols,columns=['volume']) amounts=df['amount'].resample('1min').sum() amounts=amounts.dropna() amount_df=pd.DataFrame(amounts,columns=['amount']) newdf=price_df.merge(vol_df, left_index=True, right_index=True).merge(amount_df, left_index=True, right_index=True) newdf.to_csv(minfile) print "Successfully write to minute file: "+minfile dates=get_date_list(datetime.date(2018,1,1), datetime.date(2018,7,9)) stocks=get_all_stock_id() for stock in stocks: for date in dates: gen_min_line(stock, date)

  

  refer to:https://blog.csdn.net/wqfhenanxc/article/details/78525730


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM