時隔多年,開盤啦APP龍虎榜席位標簽爬蟲,再上路,代碼如下,非專業開發,很業余,數據解析存儲中間還有很多不到位的地方,歡迎留言交流:
# -*- coding:utf-8 -*- import pymysql import datetime import pandas as pd import akshare as ak import requests import json import pymysql from sqlalchemy import create_engine def spider_lhb_sales_department(date, code): try: url = 'https://lhb.kaipanla.com/w1/api/index.php?apiv=w28&PhoneOSNew=1&VerSion=5.2.0.1 HTTP/1.1' data = { 'c': 'Stock', 'a': 'GetNewOneStockInfo', 'Type': 0, 'Time': date, 'StockID': code, 'DeviceID': 'ffffffff-f916-2186-0000-00000cdf9093' } headers = { 'User-Agent': 'Mozilla/5.0 (Linux; Android 7.1.2; VOG-AL00 Build/N2G48H; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/68.0.3440.70 Mobile Safari/537.36;kaipanla 5.2.0.1', } response = eval(requests.post(url=url, data=data, headers=headers).text).get('List')[0] # 開始解析 SellList = response.get('SellList') BuyList = response.get('BuyList') df = pd.DataFrame(columns=['營業部ID', '營業部名稱', '營業部標簽', '資金席位ID', '資金席位名稱']) for i in SellList: ID = i.get('ID') Name = i.get('Name') YouZiIcon = i.get('YouZiIcon') GroupID = i.get('GroupID') GroupIcon = i.get('GroupIcon') dict1 = { '營業部ID': ID, '營業部名稱': Name, '營業部標簽': YouZiIcon, '資金席位ID': GroupID, '資金席位名稱': GroupIcon } df1 = pd.DataFrame(dict1) df = df.append(df1, ignore_index=False) for j in BuyList: ID = j.get('ID') Name = j.get('Name') YouZiIcon = j.get('YouZiIcon') GroupID = j.get('GroupID') GroupIcon = j.get('GroupIcon') dict2 = { '營業部ID': ID, '營業部名稱': Name, '營業部標簽': YouZiIcon, '資金席位ID': GroupID, '資金席位名稱': GroupIcon } df2 = pd.DataFrame(dict2) df = df.append(df2, ignore_index=False) print(df) df.to_sql('ods_basic_department_info', con=engine1, if_exists='append', index=False) except Exception as error: pass if __name__ == '__main__': # 定義日期 today = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d') # engine engine1 = create_engine('mysql+pymysql://root:123456@localhost/stock_ods_db?charset=utf8') engine2 = create_engine('mysql+pymysql://root:123456@localhost/stock_dwd_db?charset=utf8') # 獲取龍虎榜名單 lhb_df = pd.read_sql('select distinct t_date,v_code from dwd_stock_special_lhb', con=engine2) for date in lhb_df['t_date'].values: for code in lhb_df['v_code'].values: print('開始') spider_lhb_sales_department(date, code) print('結束')