python 簡單抓取網頁並寫入excel實例


# -*- coding: UTF-8 -*-
import requests
from bs4 import BeautifulSoup  
import xlwt  
import time 
 
 
#獲取第一頁的內容  
def get_one_page(url):  
    headers = {  
        'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36'  
    }  
    response = requests.get(url,headers=headers)  
    if response.status_code == 200:  
        return response.text  
    return None  
 
#解析第一頁內容,數據結構化  
def parse_one_page(html):  
 
    soup = BeautifulSoup(html,'lxml')  
    i = 0  
    for item in soup.select('tr')[2:-1]:  
 
        yield{  
            'time':item.select('td')[i].text,  
            'issue':item.select('td')[i+1].text,  
            'digits':item.select('td em')[0].text,  
            'ten_digits':item.select('td em')[1].text,  
            'hundred_digits':item.select('td em')[2].text,  
            'single_selection':item.select('td')[i+3].text,  
            'group_selection_3':item.select('td')[i+4].text,  
            'group_selection_6':item.select('td')[i+5].text,  
            'sales':item.select('td')[i+6].text,  
            'return_rates':item.select('td')[i+7].text  
        }  
 
 
#將數據寫入Excel表格中  
def write_to_excel():  
    f = xlwt.Workbook()      
    sheet1 = f.add_sheet('3D',cell_overwrite_ok=True)
    row0 = ["開獎日期","期號","個位數","十位數","百位數","單數","組選3","組選6","銷售額","返獎比例"] 
    #寫入第一行 
    for j in range(0,len(row0)): 
        sheet1.write(0,j,row0[j]) 
 
    #依次爬取每一頁內容的每一期信息,並將其依次寫入Excel 
    i=0  
    for k in range(1,247):  
        url = 'http://kaijiang.zhcw.com/zhcw/html/3d/list_%s.html' %(str(k))  
        html = get_one_page(url)  
        print('正在保存第%d頁。'%k) 
        #寫入每一期的信息  
        for item in parse_one_page(html):  
            sheet1.write(i+1,0,item['time'])  
            sheet1.write(i+1,1,item['issue'])  
            sheet1.write(i+1,2,item['digits'])  
            sheet1.write(i+1,3,item['ten_digits'])  
            sheet1.write(i+1,4,item['hundred_digits'])  
            sheet1.write(i+1,5,item['single_selection'])  
            sheet1.write(i+1,6,item['group_selection_3'])  
            sheet1.write(i+1,7,item['group_selection_6'])  
            sheet1.write(i+1,8,item['sales'])  
            sheet1.write(i+1,9,item['return_rates'])  
            i+=1  
 
 
    f.save('3D.xls')  
 
def main():
    write_to_excel()
 
if __name__ == '__main__':
    main()

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM