python爬取丁香园疫情数据


丁香园中数据是由js加载出来的,因此需要用正则去截取数据,最后转成json或其他数据形式并处理

爬取代码:

import pymysql
import requests
import pprint
import re
import json


def write_to_file(item):
    with open('yiqing.json','w',encoding='utf-8') as f:
        f.write(json.dumps(item,indent=4,ensure_ascii=False))
        f.close()

def mysql_():
    conn = pymysql.connect(host='127.0.0.1', user='root', passwd='yuanpeng0', db='test',
                           port=3306, charset='utf8',
                           cursorclass=pymysql.cursors.DictCursor)
    cur = conn.cursor()
    return conn,cur

reault=requests.get('https://ncov.dxy.cn/ncovh5/view/pneumonia?scene=2&clicktime=1579583352&enterid=1579583352&from=timeline&isappinstalled=0')
url_text=reault.content.decode()

url_result=re.search(r'window.getAreaStat = (.*?)}]}catch',url_text,re.S)
texts=url_result.group()

texts=texts.replace('window.getAreaStat = ','')
texts=texts.replace('}catch','')
c=json.loads(texts)
# pprint.pprint(c)
write_to_file(c)

result=re.search(r' window.getStatisticsService(.*?)该字段已替换为说明1',url_text,re.S)
result2=result.group()
result3=result2.replace(' window.getStatisticsService = ','')+'"}'
texts4=json.loads(result3)
currentConfirmedCount=(texts4['currentConfirmedCount'])
suspectedCount=(texts4['suspectedCount'])
seriousCount=(texts4['seriousCount'])
confirmedCount=(texts4['confirmedCount'])
deadCount=(texts4['deadCount'])
curedCount=(texts4['curedCount'])
list_result=[]
list_result.append((currentConfirmedCount,suspectedCount,seriousCount,confirmedCount,deadCount,curedCount))
print(list_result)
conn, cur = mysql_()
select_sql = "update yiqingcount set currentConfirmedCount=%s,suspectedCount=%s,seriousCount=%s,confirmedCount=%s,deadCount=%s,curedCount=%s where id = 1"
cur.executemany(select_sql,list_result)
conn.commit()

爬取数据形式如下:

 


免责声明!

本站转载的文章为个人学习借鉴使用,本站对版权不负任何法律责任。如果侵犯了您的隐私权益,请联系本站邮箱yoyou2525@163.com删除。



 
粤ICP备18138465号  © 2018-2025 CODEPRJ.COM