将爬虫数据导出CSV格式


流程:寻找需要爬的网页(小编的爬取网页:http://www.gaosan.com/gaokao/239012.html)→打开Spyder→输入爬虫代码→查看爬取结果→写入到CSV文件中

输出CSV格式核心代码:

def writercsv(save_road,num,title):
    if os.path.isfile(save_road):
        with open(save_road,'a',newline='')as f:
            csv_write=csv.writer(f,dialect='excel')
            for i in range(num):
                u=allUniv[i]
                csv_write.writerow(u)
    else:
         with open(save_road,'w',newline='')as f:
            csv_write=csv.writer(f,dialect='excel')
            csv_write.writerow(title)
            for i in range(num):
                u=allUniv[i]
                csv_write.writerow(u)
                
title=["排名","学校名称","综合得分","省份"]
def main():
    url = 'http://www.gaosan.com/gaokao/239012.html'
    html = getHTMLText(url)
    soup = BeautifulSoup(html, "html.parser")
    fillUnivList(soup)
    printUnivList(250)
    #定义输出路径和行数,以及标题
    writercsv('E:\\python\爬虫数据.csv',250,title)

  完整爬虫代码:

import requests
import codecs
import csv
from bs4 import BeautifulSoup
allUniv=[]
def getHtmlText(url):
    try:
        r=requests.get(url,timeout=30)
        r.raise_for_status()
        r.encoding=r.apparent_encoding
        return r.text
    except:
        return ""
def fillUnivList(soup):
    data=soup.find_all('tr')
    for tr in data:
        ltd=tr.find_all('td')
        if len(ltd)==0:
            continue
        singleUniv=[]
        for td in ltd:
            singleUniv.append(td.string)
        allUniv.append(singleUniv)
def printUnivList(num):
    print("{:^10}{:^10}{:^5}{:^8}".format("排名","学校名称","综合得分","省份"))
    for i in range(num):
        u=allUniv[i]
        print("{:^10}{:^10}{:^5}{:^8}".format(u[0],u[1],u[2],u[3]))
def writercsv(save_road,num,title):
    if os.path.isfile(save_road):
        with open(save_road,'a',newline='')as f:
            csv_write=csv.writer(f,dialect='excel')
            for i in range(num):
                u=allUniv[i]
                csv_write.writerow(u)
    else:
         with open(save_road,'w',newline='')as f:
            csv_write=csv.writer(f,dialect='excel')
            csv_write.writerow(title)
            for i in range(num):
                u=allUniv[i]
                csv_write.writerow(u)
                
title=["排名","学校名称","综合得分","省份"]
def main():
    url = 'http://www.gaosan.com/gaokao/239012.html'
    html = getHTMLText(url)
    soup = BeautifulSoup(html, "html.parser")
    fillUnivList(soup)
    printUnivList(250)
    #定义输出路径和行数,以及标题
    writercsv('E:\\python\爬虫数据.csv',250,title)
main()

  爬取结果,如下图:

在相对应的路径下生产csv文件

文件打开效果图:

 


免责声明!

本站转载的文章为个人学习借鉴使用,本站对版权不负任何法律责任。如果侵犯了您的隐私权益,请联系本站邮箱yoyou2525@163.com删除。



 
粤ICP备18138465号  © 2018-2025 CODEPRJ.COM