Python實現爬取全國疫情數據


國難當頭,我們雖然被困在了家里,也要好好學習不是嗎,乘此機學習以下利用Python如何實現爬取某網站的一些數據

一切從零開始,Python前期的環境安裝和IDE安裝,以及一系列pip install XXX略過......

1.獲取URL

https://ncov.dxy.cn/ncovh5/view/pneumonia?from=timeline&isappinstalled=0

 

 

2.為了避免反爬,偽裝成瀏覽器:

headers = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36'}#

3.最關鍵的一步,分析url,找到數據存放的規律

 

 4.完成代碼,並將數據存入數據庫

from os import path
import requests
from bs4 import BeautifulSoup
import json
import pymysql
import numpy as np

url = 'https://ncov.dxy.cn/ncovh5/view/pneumonia?from=timeline&isappinstalled=0'  #請求地址
headers = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36'}#創建頭部信息
response =  requests.get(url,headers = headers)  #發送網絡請求
content = response.content.decode('utf-8')

soup = BeautifulSoup(content, 'html.parser')
listA = soup.find_all(name='script',attrs={"id":"getAreaStat"})
listB = soup.find_all(name='script',attrs={"id":"getListByCountryTypeService2"})
#listA = soup.find_all(name='div',attrs={"class":"c-touchable-feedback c-touchable-feedback-no-default"})
account = str(listA)

messages = account[52:-21]
messages_json = json.loads(messages)

valuesList = []
cityList = []


for i in range(len(messages_json)):
    #value = messages_json[i]
    ##全國各省
    value = (messages_json[i].get('provinceName'),messages_json[i].get('provinceShortName'),messages_json[i].get('currentConfirmedCount'),messages_json[i].get('confirmedCount'),messages_json[i].get('suspectedCount'),messages_json[i].get('curedCount'),messages_json[i].get('deadCount'),messages_json[i].get('comment'),messages_json[i].get('locationId'),messages_json[i].get('statisticsData'),messages_json[i].get('CreateTime'))
    valuesList.append(value)
    cityValue = messages_json[i].get('cities')
    #print(cityValue)
    
    for j in range(len(cityValue)):
        cityValueList = (cityValue[j].get('cityName'),cityValue[j].get('currentConfirmedCount'),cityValue[j].get('confirmedCount'),cityValue[j].get('suspectedCount'),cityValue[j].get('curedCount'),cityValue[j].get('deadCount'),cityValue[j].get('locationId'),messages_json[i].get('provinceShortName'))
        #print(cityValueList)
        cityList.append(cityValueList)
    #cityList.append(cityValue)
db = pymysql.connect("localhost", "root", "root", "payiqing", charset='utf8')
cursor = db.cursor()
array = np.asarray(valuesList[0])
sql_clean_city = "TRUNCATE TABLE city_map"
sql_clean_json = "TRUNCATE TABLE province_data_from_json"
sql_clean_province = "TRUNCATE TABLE province_map"
sql1 = "INSERT INTO city_map values (%s,%s,%s,%s,%s,%s,%s,%s)"
##全國各省
sql = "INSERT INTO province_map values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) "

value_tuple = tuple(valuesList)
cityTuple = tuple(cityList)
worldTuple = tuple(worldList)

try:
    cursor.execute(sql_clean_province)
   
    db.commit()
except:
    print('執行失敗,進入回調1')
    db.rollback()

try:
    cursor.execute(sql_clean_city)
    
except:
    print('執行失敗,進入回調2')
    db.rollback()
try:
    
    cursor.executemany(sql, value_tuple)
    
    db.commit()
except:
    print('執行失敗,進入回調3')
    db.rollback()

try:
  
    cursor.executemany(sql1,cityTuple)
    db.commit()
except:
    print('執行失敗,進入回調4')
    db.rollback()


db.close()

 

 

 

 5.結果

 

 

 

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM