爬蟲初學——爬取中國大學排名並存為csv文件


鏈接:軟科中國最好大學排名2016

代碼:

 1 # -*- coding: utf-8 -*-
 2 """
 3 Created on Mon May 27 21:10:59 2019
 4 
 5 @author: Benny
 6 """
 7 
 8 import csv
 9 import os
10 import requests
11 import pandas
12 from bs4 import BeautifulSoup
13 allUniv = []
14 def getHTMLText(url):
15     try:
16         r = requests.get(url, timeout=30)
17         r.raise_for_status()
18         r.encoding = 'utf-8'
19         return r.text
20     except:
21         return ""
22 def fillUnivList(soup):
23     data = soup.find_all('tr')
24     for tr in data:
25         ltd = tr.find_all('td')
26         if len(ltd)==0:
27             continue
28         singleUniv = []
29         for td in ltd:
30             singleUniv.append(td.string)
31         allUniv.append(singleUniv)
32 def writercsv(save_road,num,title):
33     if os.path.isfile(save_road):
34         with open(save_road,'a',newline='')as f:
35             csv_write=csv.writer(f,dialect='excel')
36             for i in range(num):
37                 u=allUniv[i]
38                 csv_write.writerow(u)
39     else:
40          with open(save_road,'w',newline='')as f:
41             csv_write=csv.writer(f,dialect='excel')
42             csv_write.writerow(title)
43             for i in range(num):
44                 u=allUniv[i]
45                 csv_write.writerow(u)
46  
47 title=["排名","學校名稱","省市","總分","生源質量","培養結果","科研規模","科研質量","頂尖成果","頂尖人才","科技服務","產學研究合作","成果轉化"]
48 save_road="C:\\Users\\Benny\\Desktop\\Python\\Python練習\sqlit_test02.csv"
49 def main():
50     url = 'http://www.zuihaodaxue.cn/zuihaodaxuepaiming2016.html'
51     html = getHTMLText(url)
52     soup = BeautifulSoup(html, "html.parser")
53     fillUnivList(soup)
54     writercsv(save_road,10,title)
55 main()

文件截屏如下:(這里只是保存了前十名的數據,可以通過更改num來保存更多)

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM