python實例:從excel讀取股票代碼,爬取股票信息寫到代碼后面的單元格中


關鍵詞:爬蟲、python、request、接口、excel處理、正則

思路:

1、首先准備好excel文檔,把股票代碼事先編輯進去。

2、腳本讀取文檔,依次讀出股票代碼到指定站點發起請求獲取股票信息

3、將獲取的股票信息簡單處理,依次寫入到指定的文檔單元格中,完成整個實例過程

用到的python庫:xlrd(讀取excel)、requests(獲取網頁數據)、lxml(處理網頁數據)、openpyyxl(對excel進行寫入編輯)、re(正則)

具體步驟:

一,導入相關庫

import xlrd  #引入讀取excel庫
import requests   #倒入requests庫
from lxml import etree  #倒入lxml 庫(沒有這個庫,pip install lxml安裝)
import os
import sys
import openpyxl
import re

二,讀取excel內的股票代碼,寫入數組(共后面的函數調用)

#讀取excel文檔內的股票代碼
def code():
    wb = xlrd.open_workbook(path+'\\stock.xlsx')# 打開Excel文件
    data = wb.sheet_by_name('Sheet1')#通過excel表格名稱(rank)獲取工作表
    b=data.col_values(0)#獲取第一列數據(數組)
    list=[]
    for c in b[1:]:#for循環,排除第一行數據
        d=int(c)
        s="%06d" % d#股票代碼一共有6位,常規打印無法打印出首位帶0的代碼的0部分,補齊缺失的0
        #print(s)
        list.append(s)
    return(list)
code=code()

三、循環讀取股票代碼查詢股票信息,寫入同一類數據的數組內(共后面寫入excel)

#code函數獲取的代碼,循環爬取代碼對應的股票數據,將股票數據寫入對應的數組(同一類)中
def get(code):
    list_name=[]#股票名稱
    list_score=[]#綜合評分
    list_Short=[]#短期趨勢   
    list_Metaphase=[]#中期趨勢
    list_Long=[]#長期趨勢
    list_comprehensive=[]#綜合評判
    list_day=[]#5日漲幅
    list_mouth=[]#3個月漲幅
    list_year=[]#1年漲幅
    for num in code:
        url='http://stockpage.10jqka.com.cn/'+num+'/'
        headers = {
            'Accept-Encoding': 'gzip, deflate',
            'Accept-Language': 'zh-CN,zh;q=0.9',
            'Upgrade-Insecure-Requests': '1',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
            'Referer': 'http://doctor.10jqka.com.cn/603160/',
            'Connection': 'keep-alive',
            'Cache-Control': 'max-age=0',
            }

        response = requests.get(url, headers=headers).text
        html = etree.HTML(response)
        b = html.xpath('//h1[@class = "m_logo fl"]/a/strong/text()')
        #print(b[0])#股票名稱
        c = html.xpath('//span[@class = "analyze-tips mt7"]/text()')
        #print(c[0])#綜合評分
        d = html.xpath('//div[@class = "analyze-txt fr"]/div/div[2]/text()')
        #print("短期趨勢:",d[0])#短期趨勢
        e = html.xpath('//div[@class = "analyze-txt fr"]/div[2]/div[2]/text()')
        #print("中期趨勢:",e[0])#中期趨勢
        f = html.xpath('//div[@class = "analyze-txt fr"]/div[3]/div[2]/text()')
        #print("遠期趨勢:",f[0])#遠期趨勢
        g = html.xpath('//div[@class = "txt-phra"]/text()')
        h = html.xpath('//div[@class = "txt-phra"]/strong/text()')
        i = html.xpath('//div[@class = "txt-phra"]/text()[2]')
        #print(g[0],h[0],i[0])#綜合評判
        m=g[0]+h[0]+i[0]
        #j = html.xpath('//tr[@class = "even hot_cont"]/td[2]/text()')
        #k = html.xpath('//tr[@class = "even hot_cont"]/td[3]/text()')
        #l = html.xpath('//tr[@class = "even hot_cont"]/td[4]/text()')
        tr_content = re.findall('<tr class="even hot_cont">(.*?)</tr', response, re.S)[0]
        td_content = re.findall('<td.*?>(.*?)</td', tr_content, re.S)#正則獲取周期漲幅
        #print("5日漲幅:",j[0])#5日漲幅
        #print("3個月漲幅:",k[0])#3個月漲幅
        #print("1年漲幅:",l[0])#1年漲幅
        list_name.append(b[0])#股票名稱數組
        list_score.append(c[0])#綜合評分
        list_Short.append(d[0])#短期趨勢   
        list_Metaphase.append(e[0])#中期趨勢
        list_Long.append(f[0])#長期趨勢
        list_comprehensive.append(m)#綜合評判
        list_day.append(td_content[1])#5日漲幅
        list_mouth.append(td_content[2])#3個月漲幅
        list_year.append(td_content[3])#1年漲幅

    return(list_name,list_score,list_Short,list_Metaphase,
    list_Long,list_comprehensive,list_day,list_mouth,list_year)
get=get(code)

四、將寫入數組的股票數據,依次寫入到對應股票代碼后的單元格中

#讀取get函數生成的股票數據,依次寫入到excel文檔中
xfile = openpyxl.load_workbook(path+'\\stock.xlsx')#加載文件
sheet1 = xfile.worksheets[0] 
#excel中單元格為B2開始,即第2列,第2行
for i in range(len(get[0])):#股票名稱
    sheet1.cell(i+2, 2).value=get[0][i]

for i in range(len(baidu[0])):#當前價格
    sheet1.cell(i+2, 3).value=baidu[0][i]

for i in range(len(baidu[1])):#當前市值
    sheet1.cell(i+2, 4).value=baidu[1][i]

for i in range(len(get[1])):#綜合評分
    sheet1.cell(i+2, 5).value=get[1][i]

for i in range(len(get[2])):#短期趨勢  
    sheet1.cell(i+2, 6).value=get[2][i]

for i in range(len(get[3])):#中期趨勢
    sheet1.cell(i+2, 7).value=get[3][i]

for i in range(len(get[4])):#長期趨勢
    sheet1.cell(i+2, 8).value=get[4][i]

for i in range(len(get[5])):#綜合評判
    sheet1.cell(i+2, 9).value=get[5][i]

for i in range(len(get[6])):#5日漲幅
    sheet1.cell(i+2, 10).value=get[6][i]

for i in range(len(get[7])):#3個月漲幅
    sheet1.cell(i+2, 11).value=get[7][i]

for i in range(len(get[8])):#1年漲幅
    sheet1.cell(i+2, 12).value=get[8][i]
xfile.save(path+'\\stock.xlsx')

直接后的文檔內容

 

全部代碼

#本腳本主要實現循環爬取數據后:
# 1、同一類數據統一寫入到同一個數組中,
# 2、讀取數組數據寫入指定的excel列中,實現最終數據爬取
import xlrd  #引入讀取excel庫
import requests   #倒入requests庫
from lxml import etree  #倒入lxml 庫(沒有這個庫,pip install lxml安裝)
import os
import sys
import openpyxl
import re

path = os.path.abspath(os.path.dirname(sys.argv[0]))

#讀取excel文檔內的股票代碼
def code():
    wb = xlrd.open_workbook(path+'\\stock.xlsx')# 打開Excel文件
    data = wb.sheet_by_name('Sheet1')#通過excel表格名稱(rank)獲取工作表
    b=data.col_values(0)#獲取第一列數據(數組)
    list=[]
    for c in b[1:]:#for循環,排除第一行數據
        d=int(c)
        s="%06d" % d#股票代碼一共有6位,常規打印無法打印出首位帶0的代碼的0部分,補齊缺失的0
        #print(s)
        list.append(s)
    return(list)
code=code()

#code函數獲取的代碼,循環爬取代碼對應的股票數據,將股票數據寫入對應的數組(同一類)中
def get(code):
    list_name=[]#股票名稱
    list_score=[]#綜合評分
    list_Short=[]#短期趨勢   
    list_Metaphase=[]#中期趨勢
    list_Long=[]#長期趨勢
    list_comprehensive=[]#綜合評判
    list_day=[]#5日漲幅
    list_mouth=[]#3個月漲幅
    list_year=[]#1年漲幅
    for num in code:
        url='http://stockpage.10jqka.com.cn/'+num+'/'
        headers = {
            'Accept-Encoding': 'gzip, deflate',
            'Accept-Language': 'zh-CN,zh;q=0.9',
            'Upgrade-Insecure-Requests': '1',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
            'Referer': 'http://doctor.10jqka.com.cn/603160/',
            'Connection': 'keep-alive',
            'Cache-Control': 'max-age=0',
            }

        response = requests.get(url, headers=headers).text
        html = etree.HTML(response)
        b = html.xpath('//h1[@class = "m_logo fl"]/a/strong/text()')
        #print(b[0])#股票名稱
        c = html.xpath('//span[@class = "analyze-tips mt7"]/text()')
        #print(c[0])#綜合評分
        d = html.xpath('//div[@class = "analyze-txt fr"]/div/div[2]/text()')
        #print("短期趨勢:",d[0])#短期趨勢
        e = html.xpath('//div[@class = "analyze-txt fr"]/div[2]/div[2]/text()')
        #print("中期趨勢:",e[0])#中期趨勢
        f = html.xpath('//div[@class = "analyze-txt fr"]/div[3]/div[2]/text()')
        #print("遠期趨勢:",f[0])#遠期趨勢
        g = html.xpath('//div[@class = "txt-phra"]/text()')
        h = html.xpath('//div[@class = "txt-phra"]/strong/text()')
        i = html.xpath('//div[@class = "txt-phra"]/text()[2]')
        #print(g[0],h[0],i[0])#綜合評判
        m=g[0]+h[0]+i[0]
        #j = html.xpath('//tr[@class = "even hot_cont"]/td[2]/text()')
        #k = html.xpath('//tr[@class = "even hot_cont"]/td[3]/text()')
        #l = html.xpath('//tr[@class = "even hot_cont"]/td[4]/text()')
        tr_content = re.findall('<tr class="even hot_cont">(.*?)</tr', response, re.S)[0]
        td_content = re.findall('<td.*?>(.*?)</td', tr_content, re.S)#正則獲取周期漲幅
        #print("5日漲幅:",j[0])#5日漲幅
        #print("3個月漲幅:",k[0])#3個月漲幅
        #print("1年漲幅:",l[0])#1年漲幅
        list_name.append(b[0])#股票名稱數組
        list_score.append(c[0])#綜合評分
        list_Short.append(d[0])#短期趨勢   
        list_Metaphase.append(e[0])#中期趨勢
        list_Long.append(f[0])#長期趨勢
        list_comprehensive.append(m)#綜合評判
        list_day.append(td_content[1])#5日漲幅
        list_mouth.append(td_content[2])#3個月漲幅
        list_year.append(td_content[3])#1年漲幅

    return(list_name,list_score,list_Short,list_Metaphase,
    list_Long,list_comprehensive,list_day,list_mouth,list_year)
get=get(code)

def baidu(code):
    list_Price=[]
    list_market=[]
    for num in code:
        cookies = {
            'BIDUPSID': '90EF3BD78F53BC8C96DF84CD3854CA2D',
            'PSTM': '1578233930',
            'BD_UPN': '12314753',
            'BAIDUID': '885754C8E6BD7B1A771802631815CC6D:FG=1',
            'BDORZ': 'B490B5EBF6F3CD402E515D22BCDA1598',
            'BDUSS': 'mxYdVpwOEx0eGJsT3VUYTJXbkZJYWhKSGpQWnlqaVBwMlExTWNNRkR4cWtabHRlSVFBQUFBJCQAAAAAAAAAAAEAAACRJsY-cGlwacnxu7AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKTZM16k2TNeV',
            'COOKIE_SESSION': '7_0_5_3_11_3_0_0_4_2_1_0_73199_0_169_0_1580456363_0_1580456194%7C9%23622712_32_1580376248%7C6',
            'cflag': '13%3A3',
            'BD_HOME': '1',
            'BDRCVFR[feWj1Vr5u3D]': 'I67x6TjHwwYf0',
            'delPer': '0',
            'BD_CK_SAM': '1',
            'PSINO': '3',
            'H_PS_PSSID': '1438_21104_26350',
            'H_PS_645EC': '29b8ZVy4WP7OUTz6%2FjeON9IexqLhOnMXkLTzhD5NfPu4fH%2FPZmThFknleY0LwzNQZ8j8',
            'BDSVRTM': '121',
            'WWW_ST': '1580466352318',
            }

        headers = {
            'is_xhr': '1',
            'Accept-Encoding': 'gzip, deflate, br',
            'Accept-Language': 'zh-CN,zh;q=0.9',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
            'is_pbs': num,
            'Accept': '*/*',
            'Referer': 'https://www.baidu.com/s?wd='+num+'&rsv_spt=1&rsv_iqid=0xa5a17c8700013159&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&rqlang=cn&tn=baiduhome_pg&rsv_enter=0&rsv_dl=tb&oq='+num+'&rsv_t=29b8ZVy4WP7OUTz6%2FjeON9IexqLhOnMXkLTzhD5NfPu4fH%2FPZmThFknleY0LwzNQZ8j8&rsv_pq=b379448d00013935',
            'X-Requested-With': 'XMLHttpRequest',
            'Connection': 'keep-alive',
            'is_referer': 'https://www.baidu.com/s?wd='+num+'&rsv_spt=1&rsv_iqid=0xa5a17c8700013159&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&tn=baiduhome_pg&rsv_enter=1&rsv_dl=tb&rsv_n=2&rsv_sug3=1&rsv_sug1=1&rsv_sug7=100&rsv_sug2=0&inputT=359&rsv_sug4=359',
            }

        params = (
            ('ie', ['utf-8', 'utf-8']),
            ('newi', '1'),
            ('mod', '1'),
            ('isbd', '1'),
            ('isid', 'b379448d00013935'),
            ('wd', num),
            ('rsv_spt', '1'),
            ('rsv_iqid', '0xa5a17c8700013159'),
            ('issp', '1'),
            ('f', '8'),
            ('rsv_bp', '1'),
            ('rsv_idx', '2'),
            ('rqlang', 'cn'),
            ('tn', 'baiduhome_pg'),
            ('rsv_enter', '0'),
            ('rsv_dl', 'tb'),
            ('oq', num),
            ('rsv_t', '29b8ZVy4WP7OUTz6/jeON9IexqLhOnMXkLTzhD5NfPu4fH/PZmThFknleY0LwzNQZ8j8'),
            ('rsv_pq', 'b379448d00013935'),
            ('bs', num),
            ('rsv_sid', '1438_21104_26350'),
            ('_ss', '1'),
            ('clist', ''),
            ('hsug', ''),
            ('f4s', '1'),
            ('csor', '6'),
            ('_cr1', '29647'),
            )

        response = requests.get('https://www.baidu.com/s', headers=headers, params=params, cookies=cookies).text
        html = etree.HTML(response)
        a = html.xpath('//span[@class = "op-stockdynamic-moretab-cur-num c-gap-right-small"]/text()')
        #print('當前價格:',a[0])#當前價格
        b = html.xpath('//ul[@class = "op-stockdynamic-moretab-info"]/li[8]/span[2]/text()')
        #print('當前市值:',b[0])#當前市值

        list_Price.append(a[0])#當前價格
        list_market.append(b[0])#當前市值

    return(list_Price,list_market)

baidu=baidu(code)

#讀取get函數生成的股票數據,依次寫入到excel文檔中
xfile = openpyxl.load_workbook(path+'\\stock.xlsx')#加載文件
sheet1 = xfile.worksheets[0] 
#excel中單元格為B2開始,即第2列,第2行
for i in range(len(get[0])):#股票名稱
    sheet1.cell(i+2, 2).value=get[0][i]

for i in range(len(baidu[0])):#當前價格
    sheet1.cell(i+2, 3).value=baidu[0][i]

for i in range(len(baidu[1])):#當前市值
    sheet1.cell(i+2, 4).value=baidu[1][i]

for i in range(len(get[1])):#綜合評分
    sheet1.cell(i+2, 5).value=get[1][i]

for i in range(len(get[2])):#短期趨勢  
    sheet1.cell(i+2, 6).value=get[2][i]

for i in range(len(get[3])):#中期趨勢
    sheet1.cell(i+2, 7).value=get[3][i]

for i in range(len(get[4])):#長期趨勢
    sheet1.cell(i+2, 8).value=get[4][i]

for i in range(len(get[5])):#綜合評判
    sheet1.cell(i+2, 9).value=get[5][i]

for i in range(len(get[6])):#5日漲幅
    sheet1.cell(i+2, 10).value=get[6][i]

for i in range(len(get[7])):#3個月漲幅
    sheet1.cell(i+2, 11).value=get[7][i]

for i in range(len(get[8])):#1年漲幅
    sheet1.cell(i+2, 12).value=get[8][i]
xfile.save(path+'\\stock.xlsx')

print("爬取完成")

 

 

 
 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM