爬取最新疫情數據


題目:

編程爬取每日最新的疫情統計數據。

並將爬取結果導入到數據庫中。

將可視化結果與統計數據結合,實時顯示當前最新數據。

這次的作業與上次周的可視化可以整合成一個完整的代碼,只需要在這次加上python爬取數據即可

本次爬取的是丁香醫生網站的數據,網址為:https://ncov.dxy.cn/ncovh5/view/pneumonia

爬取的代碼如下

 1 from os import path
 2 import requests
 3 from bs4 import BeautifulSoup
 4 import json
 5 import pymysql
 6 #import numpy as np
 7 import time
 8 from _ast import Try
 9 
10 url = 'https://ncov.dxy.cn/ncovh5/view/pneumonia?from=timeline&isappinstalled=0'  #請求地址
11 headers = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36'}#創建頭部信息
12 response =  requests.get(url,headers = headers)  #發送網絡請求
13 #print(response.content.decode('utf-8'))#以字節流形式打印網頁源碼
14 content = response.content.decode('utf-8')
15 #print(content)
16 soup = BeautifulSoup(content, 'html.parser')
17 listA = soup.find_all(name='script',attrs={"id":"getAreaStat"})
18 #世界確診
19 listB = soup.find_all(name='script',attrs={"id":"getListByCountryTypeService2"})
20 #listA = soup.find_all(name='div',attrs={"class":"c-touchable-feedback c-touchable-feedback-no-default"})
21 account = str(listA)
22 world_messages = str(listB)[87:-21]
23 messages = account[52:-21]
24 messages_json = json.loads(messages)
25 world_messages_json = json.loads(world_messages)
26 valuesList = []
27 cityList = []
28 worldList = []
29 localtime = time.localtime(time.time())
30 L=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
31 print(L)
32 for i in range(len(messages_json)):
33     #value = messages_json[i]
34     #value = (messages_json[i].get('provinceName'),messages_json[i].get('provinceShortName'),messages_json[i].get('currentConfirmedCount'),messages_json[i].get('confirmedCount'),messages_json[i].get('suspectedCount'),messages_json[i].get('curedCount'),messages_json[i].get('deadCount'),messages_json[i].get('comment'),messages_json[i].get('locationId'))
35     value = (messages_json[i].get('provinceName'),messages_json[i].get('confirmedCount'),messages_json[i].get('curedCount'),messages_json[i].get('deadCount'),messages_json[i].get('locationId'))
36     valuesList.append(value)
37     cityValue = messages_json[i].get('cities')
38     #print(cityValue) 一個省內沒有划分開的值
39     for j in range(len(cityValue)):
40         #cityValueList = (cityValue[j].get('cityName'),cityValue[j].get('currentConfirmedCount'),cityValue[j].get('confirmedCount'),cityValue[j].get('suspectedCount'),cityValue[j].get('curedCount'),cityValue[j].get('deadCount'),cityValue[j].get('locationId'),messages_json[i].get('provinceShortName'))
41         cityValueList = (messages_json[i].get('provinceName'),cityValue[j].get('cityName'),cityValue[j].get('confirmedCount'),cityValue[j].get('curedCount'),cityValue[j].get('deadCount'),cityValue[j].get('locationId'))
42         #print(cityValueList)  省份內各個城市的值
43         cityList.append(cityValueList)
44     
45 #print(cityList)  #城市
46 #print(valuesList)  #省份
47 db=pymysql.connect("localhost","root","123456","payiqing", charset='utf8')
48 cursor = db.cursor()
49      
50 sql_city="insert into info_copy (Province,City,Confirmed_num,Cured_num,Dead_num,Code,Date) values (%s,%s,%s,%s,%s,%s,'"+L+"')"
51 sql_province="insert into info_copy (Province,Confirmed_num,Cured_num,Dead_num,Code,Date) values (%s,%s,%s,%s,%s,'"+L+"')"
52 #print(sql)
53  
54 value_tuple= tuple(valuesList)
55 city_tuple=tuple(cityList)
56  
57 try:
58     cursor.executemany(sql_province,valuesList)
59     cursor.executemany(sql_city,city_tuple)
60     db.commit()
61 except:
62     print('執行失敗,進入回調4')
63     db.rollback()   
64      
65      
66      
67 db.close()

加上上次的代碼,效果如下圖所示:

 

 

 

 psp表格

 

缺陷記錄日志

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM