將爬蟲數據導出CSV格式


流程:尋找需要爬的網頁(小編的爬取網頁:http://www.gaosan.com/gaokao/239012.html)→打開Spyder→輸入爬蟲代碼→查看爬取結果→寫入到CSV文件中

輸出CSV格式核心代碼:

def writercsv(save_road,num,title):
    if os.path.isfile(save_road):
        with open(save_road,'a',newline='')as f:
            csv_write=csv.writer(f,dialect='excel')
            for i in range(num):
                u=allUniv[i]
                csv_write.writerow(u)
    else:
         with open(save_road,'w',newline='')as f:
            csv_write=csv.writer(f,dialect='excel')
            csv_write.writerow(title)
            for i in range(num):
                u=allUniv[i]
                csv_write.writerow(u)
                
title=["排名","學校名稱","綜合得分","省份"]
def main():
    url = 'http://www.gaosan.com/gaokao/239012.html'
    html = getHTMLText(url)
    soup = BeautifulSoup(html, "html.parser")
    fillUnivList(soup)
    printUnivList(250)
    #定義輸出路徑和行數,以及標題
    writercsv('E:\\python\爬蟲數據.csv',250,title)

  完整爬蟲代碼:

import requests
import codecs
import csv
from bs4 import BeautifulSoup
allUniv=[]
def getHtmlText(url):
    try:
        r=requests.get(url,timeout=30)
        r.raise_for_status()
        r.encoding=r.apparent_encoding
        return r.text
    except:
        return ""
def fillUnivList(soup):
    data=soup.find_all('tr')
    for tr in data:
        ltd=tr.find_all('td')
        if len(ltd)==0:
            continue
        singleUniv=[]
        for td in ltd:
            singleUniv.append(td.string)
        allUniv.append(singleUniv)
def printUnivList(num):
    print("{:^10}{:^10}{:^5}{:^8}".format("排名","學校名稱","綜合得分","省份"))
    for i in range(num):
        u=allUniv[i]
        print("{:^10}{:^10}{:^5}{:^8}".format(u[0],u[1],u[2],u[3]))
def writercsv(save_road,num,title):
    if os.path.isfile(save_road):
        with open(save_road,'a',newline='')as f:
            csv_write=csv.writer(f,dialect='excel')
            for i in range(num):
                u=allUniv[i]
                csv_write.writerow(u)
    else:
         with open(save_road,'w',newline='')as f:
            csv_write=csv.writer(f,dialect='excel')
            csv_write.writerow(title)
            for i in range(num):
                u=allUniv[i]
                csv_write.writerow(u)
                
title=["排名","學校名稱","綜合得分","省份"]
def main():
    url = 'http://www.gaosan.com/gaokao/239012.html'
    html = getHTMLText(url)
    soup = BeautifulSoup(html, "html.parser")
    fillUnivList(soup)
    printUnivList(250)
    #定義輸出路徑和行數,以及標題
    writercsv('E:\\python\爬蟲數據.csv',250,title)
main()

  爬取結果,如下圖:

在相對應的路徑下生產csv文件

文件打開效果圖:

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM