python之爬取東方財富網站中不同板塊的股票信息(僅供學習)


代碼:

主要是爬取行情中心的不同板塊的股票數據:

import requests import json import re import pandas as pd # 存儲相關信息
def getMessage(getCount): if getCount == 1: stockPlateDict = { '滬深京A股': 'hs_a_board', '上證A股': 'sh_a_board', '深證A股': 'sz_a_board', '北證A股': 'bj_a_board', '新股': 'newshares', '創業板': 'gem_board', '科創板': 'kcb_board', '滬股通': 'sh_hk_board', '深股通': 'sz_hk_board', 'B股': 'b_board', '上證AB股比價': 'ab_comparison_sh', '深證AB股比價': 'ab_comparison_sz', '風險警示板': 'st_board', '兩網及退市': 'staq_net_board' } return stockPlateDict elif getCount == 2: webUrl = "https://23.push2.eastmoney.com/api/qt/clist/get?"
        return webUrl elif getCount == 3: urlSuf = [  # 每個板塊的網址后綴
            "cb=jQuery1124011309990273440462_1650110291355&pn=1&pz=20&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&fid=f3&fs=m:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048&fields=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22,f11,f62,f128,f136,f115,f152&_=1650110291377", "cb=jQuery1124011309990273440462_1650110291355&pn=1&pz=20&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&fid=f3&fs=m:1+t:2,m:1+t:23&fields=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22,f11,f62,f128,f136,f115,f152&_=1650110291388", "cb=jQuery1124011309990273440462_1650110291355&pn=1&pz=20&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&fid=f3&fs=m:0+t:6,m:0+t:80&fields=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22,f11,f62,f128,f136,f115,f152&_=1650110291392", "cb=jQuery1124011309990273440462_1650110291355&pn=1&pz=20&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&fid=f3&fs=m:0+t:81+s:2048&fields=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22,f11,f62,f128,f136,f115,f152&_=1650110291396", "cb=jQuery1124011309990273440462_1650110291355&pn=1&pz=20&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&fid=f26&fs=m:0+f:8,m:1+f:8&fields=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f26,f22,f11,f62,f128,f136,f115,f152&_=1650110291400", "cb=jQuery1124011309990273440462_1650110291355&pn=1&pz=20&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&fid=f3&fs=m:0+t:80&fields=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22,f11,f62,f128,f136,f115,f152&_=1650110291405", "cb=jQuery1124011309990273440462_1650110291355&pn=1&pz=20&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&fid=f3&fs=m:1+t:23&fields=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22,f11,f62,f128,f136,f115,f152&_=1650110291408", "cb=jQuery1124011309990273440462_1650110291351&pn=1&pz=20&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&fid=f26&fs=b:BK0707&fields=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f26,f22,f11,f62,f128,f136,f115,f152&_=1650110291412", "cb=jQuery1124011309990273440462_1650110291351&pn=1&pz=20&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&fid=f26&fs=b:BK0804&fields=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f26,f22,f11,f62,f128,f136,f115,f152&_=1650110291416", "cb=jQuery1124011309990273440462_1650110291351&pn=1&pz=20&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&fid=f3&fs=m:0+t:7,m:1+t:3&fields=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22,f11,f62,f128,f136,f115,f152&_=1650110291420", "cb=jQuery1124011309990273440462_1650110291351&pn=1&pz=20&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&fid=f199&fs=m:1+b:BK0498&fields=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22,f11,f62,f128,f136,f115,f152,f201,f202,f203,f196,f197,f199,f195,f200&_=1650110291424", "cb=jQuery1124011309990273440462_1650110291351&pn=1&pz=20&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&fid=f199&fs=m:0+b:BK0498&fields=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22,f11,f62,f128,f136,f115,f152,f201,f202,f203,f196,f197,f199,f195,f200&_=1650110291427", "cb=jQuery1124031292793882810255_1650114519858&pn=1&pz=20&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&fid=f3&fs=m:0+f:4,m:1+f:4&fields=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22,f11,f62,f128,f136,f115,f152&_=1650114519867", "cb=jQuery1124031292793882810255_1650114519858&pn=1&pz=20&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&fid=f3&fs=m:0+s:3&fields=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f26,f22,f33,f11,f62,f128,f136,f115,f152&_=1650114519872" ] return urlSuf elif getCount == 4: stockMessage = { 'f12': '股票代碼', 'f14': '名稱', 'f2': '最高', 'f3': '漲跌幅%', 'f4': '漲跌額', 'f5': '成交量(手)', 'f6': '成交額', 'f7': '振幅%', 'f8': '換手率%', 'f9': '市盈率(動態)', 'f10': '量比', 'f15': '最新價', 'f16': '最低價', 'f17': '今開', 'f18': '昨收', 'f23': '市凈率', } return stockMessage # 得到板塊名稱
def getPlateName(): stockPlateDict = getMessage(1) plateName = []  # 板塊名稱
    for i in stockPlateDict: plateName.append(i) return plateName # 得到板塊uel列表
def getPlateUrlList(inputPlateName, pageCount): webUrl = getMessage(2)  # 網站首地址
    op = getSplitURL()  # 分割后的網址
    plateName = getPlateName()  # 板塊名稱
    pagesStart = []  # 存儲分割后頁碼之前的后綴
    pagesEnd = []  # 存儲分割后頁碼之后的后綴
    plateURL = []  # 板塊URL
    for i in range(0, len(op)):  # 組裝每一個板塊
        pagesStart.append(webUrl + op[i][0][0])  # 分割后頁碼之前的后綴
        pagesEnd.append(op[i][0][1])  # 分割后頁碼之后的后綴
    for i in range(0, len(plateName)): if plateName[i] == inputPlateName: for j in range(1, pageCount + 1):  # 添加第i個板塊的第j頁
                plateURL.append(pagesStart[i] + str(j) + pagesEnd[i]) return plateURL  # 返回多頁數的URL列表


# 處理url(不加頁碼的)
def getSplitURL(): urlSuf = getMessage(3)  # 得到網址后綴
    suf = [i.split("fields")[0] for i in urlSuf]  # 分割掉無用的后綴
    pattern = re.compile("(.*pn=)1(&.*)")  # 分割頁碼
    op = [re.findall(pattern, i) for i in suf]  # 正則匹配分割頁碼
    return op # 請求網頁得到json數據
def requestURL(plateName, requestCount):  # 請求已經處理好的url
    urls = getPlateUrlList(plateName, requestCount)  # 得到指定板塊指定頁數的url列表
    resTxT = []  # 存儲請求到的每一頁數據
    for i in range(0, requestCount): # 請求第i頁url
        res = requests.get(urls[i]) # text文本
        doc = res.text # 正則表達式
        pat = re.compile("({\"f1\".*?})") # 正則匹配
 resTxT.append(re.findall(pat, doc)) return resTxT # 程序開始的選擇項
def getChoose(choose): plateNmae = getPlateName()  # 板塊名稱列表
    for i in range(0, len(plateNmae)): if choose == i + 1:  # 對應的選擇返回對應的板塊名稱
            return plateNmae[i] # 處理f信息
def getStockMsg(chPlateNmae, pageCount): text = requestURL(chPlateNmae, pageCount)  # 請求到的f信息
    dicMessage = []  # 二維列表,第一層的個數表示頁碼數,第二層是存儲的對應頁碼的數據
    for i in range(0, len(text)):  # 遍歷第i頁
 dicMessage.append([]) for j in range(0, len(text[i])):  # 第i頁的第j個元素
            dicMessage[i].append(json.loads(text[i][j]))  # 轉換為字典
    return dicMessage def getStockFF(): stockMessage = getMessage(4)  # 拿到f信息字典
    stockFF = [i for i in stockMessage]  # f信息
    return stockFF # 獲取股票的信息
def getStockValue(dicMsg): # dicMsg # 二維列表,存儲股票信息
    stockFF = getStockFF()  # 獲取對應的f信息
    stockVal = []  # 存儲f信息對應的值
    for i in range(0, len(dicMsg)):  # 訪問第i頁
 stockVal.append([]) for j in range(0, len(dicMsg[i])):  # 訪問第i頁的第j個元素
 stockVal[i].append([]) for k in stockFF:  # 保存對應的f信息的值
 stockVal[i][j].append(dicMsg[i][j].get(k)) return stockVal # 創建表格
def makeDataDrame(stockValueList): stockFF = getStockFF()  # 得到f信息
    stockMsg = getMessage(4)  # 拿到股票title
    columns = []  # 存儲f信息對應的值
    df = pd.DataFrame() for i in stockFF: columns.append(stockMsg.get(i)) # 添加值
    stockList = [] for i in range(0, len(stockValueList)):  # 遍歷每一頁
        for j in range(0, len(stockValueList[i])): stockList.append(stockValueList[i][j]) # 使用一個新列表將三維轉為二維
    index = [i for i in range(1, len(stockList) + 1)]  # 行數
    df = pd.DataFrame(stockList, columns=columns, index=index)  # 創建表格
    return df # 保存
def toSave(df): df.to_excel('D:\df.xlsx') return True if __name__ == '__main__': plateNmae = getPlateName() print("----------------- 請選擇板塊 ---------------------") for i in range(0, len(plateNmae)): print("" + str(i + 1) + "" + plateNmae[i]) print("----------------- 請選擇以上板塊-------------------------------") chPlateNmae = getChoose(int(input("請輸入您的選擇:"))) pageCount = int(input("請輸入您要爬取的頁數:")) dicMsg = getStockMsg(chPlateNmae, pageCount) stockValueList = getStockValue(dicMsg) df = makeDataDrame(stockValueList) isOk = toSave(df) print(isOk)

保存結果:

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM