Python爬取中國疫情的實時數據


一、中國疫情數據的實時爬取

1、表結構(MySQL)

 

 

2、代碼部分(數據:丁香醫生)

 

import requests
from bs4 import BeautifulSoup
import json
import time
from pymysql import *

def mes():
    url = 'https://ncov.dxy.cn/ncovh5/view/pneumonia?from=timeline&isappinstalled=0'  #請求地址
    headers = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36 SLBrowser/6.0.1.6181'}#創建頭部信息
    resp =  requests.get(url,headers = headers)  #發送網絡請求
    content=resp.content.decode('utf-8')
    soup = BeautifulSoup(content, 'html.parser')
    listA = soup.find_all(name='script',attrs={"id":"getAreaStat"})
    account =str(listA)
    mes = account.replace('[<script id="getAreaStat">try { window.getAreaStat = ', '')
    mes=mes.replace('}catch(e){}</script>]','')
    #mes=account[52:-21]
    messages_json = json.loads(mes)
    print(messages_json)
    times=time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    print(times)
    provinceList=[]
    cityList=[]
    lenth=total()
    con=len(messages_json)+lenth#算出數據庫已有的條數+今天省份的條數,才是城市的開始id
    for item in messages_json:
        lenth+=1
        provinceName=item['provinceName']
        confirmedCount=item['confirmedCount']
        suspectedCount=item['suspectedCount']
        curedCount=item['curedCount']
        deadCount=item['deadCount']
        cities=item['cities']
        provinceList.append((lenth,times,provinceName,None,confirmedCount,suspectedCount,curedCount,deadCount))
        for i in cities:
            con+=1
            provinceName = item['provinceName']
            cityName=i['cityName']
            confirmedCount = i['confirmedCount']
            suspectedCount = item['suspectedCount']
            curedCount = i['curedCount']
            deadCount = i['deadCount']
            cityList.append((con,times,provinceName,cityName,confirmedCount,suspectedCount,curedCount,deadCount))
    insert(provinceList,cityList)


def insert(provinceList, cityList):
    provinceTuple=tuple(provinceList)
    cityTuple=tuple(cityList)
    cursor = db.cursor()
    sql = "insert into China values (%s,%s,%s,%s,%s,%s,%s,%s) "
    try:
        cursor.executemany(sql,provinceTuple)
        print("插入成功")
        db.commit()
    except Exception as e:
        print(e)
        db.rollback()
    try:
        cursor.executemany(sql,cityTuple)
        print("插入成功")
        db.commit()
    except Exception as e:
        print(e)
        db.rollback()
    cursor.close()
def total():
    sql= "select * from China"
    cursor = db.cursor()
    try:
        cursor.execute(sql)
        results = cursor.fetchall()
        lenth = len(results)
        db.commit()
        return lenth
    except:
        print('執行失敗,進入回調1')
        db.rollback()

# 連接數據庫的方法
def connectDB():
    try:
        db = connect(host='localhost', port=3306, user='root', password='123456', db='yiqing',charset='utf8')
        print("數據庫連接成功")
        return db
    except Exception as e:
        print(e)
    return NULL
if __name__ == '__main__':
    db=connectDB()
    mes()
China.py

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM