需要添加和調用的庫
具體代碼
1 import requests 2 from lxml import etree 3 import json 4 import openpyxl 5 6 7 #通用爬蟲 8 url = 'https://voice.baidu.com/act/newpneumonia/newpneumonia' 9 headers = { 10 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36" 11 } 12 #發出請求並獲取相應的網頁數據 13 response = requests.get(url=url,headers=headers).text 14 #在使用xpath的時候要用樹形態 15 html = etree.HTML(response) 16 #用xpath來獲取我們之前找到的頁面json數據 並打印看看 17 json_text = html.xpath('//script[@type="application/json"]/text()') 18 json_text = json_text[0] 19 print(json_text)
#用python本地自帶的庫轉換一下json數據 result = json.loads(json_text) # print(result) #通過打印出轉換的對象我們可以看到我們要的數據都要key為component對應的值之下 所以現在我們將值拿出來 result = result["component"] #再次打印看看結果 # print(result) # 獲取國內當前數據 result = result[0]['caseList'] print(result)
# 創建工作簿 wb = openpyxl.Workbook() # 創建工作表 ws = wb.active # 設置表的標題 ws.title = "國內疫情" # 寫入表頭 ws.append(["省份","累計確診","死亡","治愈"]) #獲取各省份的數據並寫入 for line in result: line_name = [line["area"],line["confirmed"],line["died"],line["crued"]] for ele in line_name: if ele == '': ele = 0 ws.append(line_name) #保存到excel中 wb.save('./國內疫情數據.xlsx')
爬取的數據