題目:
編程爬取每日最新的疫情統計數據。
並將爬取結果導入到數據庫中。
將可視化結果與統計數據結合,實時顯示當前最新數據。
這次的作業與上次周的可視化可以整合成一個完整的代碼,只需要在這次加上python爬取數據即可
本次爬取的是丁香醫生網站的數據,網址為:https://ncov.dxy.cn/ncovh5/view/pneumonia
爬取的代碼如下
1 from os import path 2 import requests 3 from bs4 import BeautifulSoup 4 import json 5 import pymysql 6 #import numpy as np 7 import time 8 from _ast import Try 9 10 url = 'https://ncov.dxy.cn/ncovh5/view/pneumonia?from=timeline&isappinstalled=0' #請求地址 11 headers = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36'}#創建頭部信息 12 response = requests.get(url,headers = headers) #發送網絡請求 13 #print(response.content.decode('utf-8'))#以字節流形式打印網頁源碼 14 content = response.content.decode('utf-8') 15 #print(content) 16 soup = BeautifulSoup(content, 'html.parser') 17 listA = soup.find_all(name='script',attrs={"id":"getAreaStat"}) 18 #世界確診 19 listB = soup.find_all(name='script',attrs={"id":"getListByCountryTypeService2"}) 20 #listA = soup.find_all(name='div',attrs={"class":"c-touchable-feedback c-touchable-feedback-no-default"}) 21 account = str(listA) 22 world_messages = str(listB)[87:-21] 23 messages = account[52:-21] 24 messages_json = json.loads(messages) 25 world_messages_json = json.loads(world_messages) 26 valuesList = [] 27 cityList = [] 28 worldList = [] 29 localtime = time.localtime(time.time()) 30 L=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) 31 print(L) 32 for i in range(len(messages_json)): 33 #value = messages_json[i] 34 #value = (messages_json[i].get('provinceName'),messages_json[i].get('provinceShortName'),messages_json[i].get('currentConfirmedCount'),messages_json[i].get('confirmedCount'),messages_json[i].get('suspectedCount'),messages_json[i].get('curedCount'),messages_json[i].get('deadCount'),messages_json[i].get('comment'),messages_json[i].get('locationId')) 35 value = (messages_json[i].get('provinceName'),messages_json[i].get('confirmedCount'),messages_json[i].get('curedCount'),messages_json[i].get('deadCount'),messages_json[i].get('locationId')) 36 valuesList.append(value) 37 cityValue = messages_json[i].get('cities') 38 #print(cityValue) 一個省內沒有划分開的值 39 for j in range(len(cityValue)): 40 #cityValueList = (cityValue[j].get('cityName'),cityValue[j].get('currentConfirmedCount'),cityValue[j].get('confirmedCount'),cityValue[j].get('suspectedCount'),cityValue[j].get('curedCount'),cityValue[j].get('deadCount'),cityValue[j].get('locationId'),messages_json[i].get('provinceShortName')) 41 cityValueList = (messages_json[i].get('provinceName'),cityValue[j].get('cityName'),cityValue[j].get('confirmedCount'),cityValue[j].get('curedCount'),cityValue[j].get('deadCount'),cityValue[j].get('locationId')) 42 #print(cityValueList) 省份內各個城市的值 43 cityList.append(cityValueList) 44 45 #print(cityList) #城市 46 #print(valuesList) #省份 47 db=pymysql.connect("localhost","root","123456","payiqing", charset='utf8') 48 cursor = db.cursor() 49 50 sql_city="insert into info_copy (Province,City,Confirmed_num,Cured_num,Dead_num,Code,Date) values (%s,%s,%s,%s,%s,%s,'"+L+"')" 51 sql_province="insert into info_copy (Province,Confirmed_num,Cured_num,Dead_num,Code,Date) values (%s,%s,%s,%s,%s,'"+L+"')" 52 #print(sql) 53 54 value_tuple= tuple(valuesList) 55 city_tuple=tuple(cityList) 56 57 try: 58 cursor.executemany(sql_province,valuesList) 59 cursor.executemany(sql_city,city_tuple) 60 db.commit() 61 except: 62 print('執行失敗,進入回調4') 63 db.rollback() 64 65 66 67 db.close()
加上上次的代碼,效果如下圖所示:


psp表格

缺陷記錄日志

