本案例主要是為了獲取某城市的歷史天氣數據,字段包含日期,最低氣溫,最高氣溫,風向,風力,天氣狀況,空氣質量指標值,空氣質量等級和空氣質量說明.
#導入第三方包
import requests
import time
import random
import pandas as pd
import re
#構造請求頭
headers = {
'Accept':'*/*',
'Accept -Enconding':'gzip,deflate',
'Accept-Language':'zh-CN,zh;q=0.9',
'conection':'keep-alive',
'User-Agent':'Mozilla/5.0 (windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/63.0.3236.0 Safari/537.36'
}
#生成所有需要抓取的鏈接
urls = []
for year in range(2012,2019):
for month in range(1,13):
if year <= 2016:
urls.append('http://tianqi.2345.com/t/wea_history/js/58362_%s%s.js'
%(year,month))
else:
if month<10:
urls.append('http://tianqi.2345.com/t/wea_history/js/58362_%s%s.js'
%(year,month,year,month))
#循環並通過正則匹配獲取相關數據
info = []
for url in urls:
seconds random.randint(3,6)
response = requests.get(usl,headers = headers).text #發送url鏈接的請求,並返回響應數據
ymd =re.findall("ymd:'(.*?)',",response) #正則表達式獲取日期數據
high = re.findall("bWendu:'(.*?)',",response) #正則表達式獲取最高氣溫數據
low = re.findall("yWendu:'(.*?)',",response) #正則表達式獲取最低氣溫數據
tianqi = re.findall("tianqi:'(.*?)',",response) #正則表達式獲取天氣狀況數據
fengxiang = re.findall("fengxiang:'(.*?)',",response) # 正則表達式獲取風向數據
fengli = re.findall("fengli:'(.*?)',",response) #正則表達式獲取風力數據
aqi = re.findall("aqi:'(.*?)',",response) #正則表達式獲取空氣質量指標數據
aqiInfo = re.findall("aqiInfo:'(.*?)',",response) #正則表達式獲取空氣質量說明數據
aqiLevel = re.findall("aqiLevel:'(.*?)',",response) #正則表達式獲取空氣質量水平數據
#猶豫 2012-2015沒有空氣質量相關的數據,故需要分開處理
if len(aqi) == 0:
aqi = None
aqiInfo = None
aqiLevel = None
info.append(pd.DataFrame({'ymd':ymd,'high':high,'low':low,'tianqi':tianqi,
'fengxiang':fengxiang,'fengli':fengli,'aqi':aqi,
'aqiInfo':aqiInfo,'aqiLevel':aqiLevel}))
else:
info.append(pd.DataFrame({'ymd':ymd,'high':high,'low':low,'tianqi':tianqi,
'fengxiang':fengxiang,'fengli':fengli,'aqi':aqi,
'aqiInfo':aqiInfo,'aqiLevel':aqiLevel}))
time.sleep(seconds) #每循環一次,都隨機停頓幾秒
#將存儲的所有天氣數據進行合並,生成數據表格
weather = pd.concat(info)
#數據導出
weather.to_csv('weather.csv',index = False)
#本例題中編譯后說aqiLevel 說是數據個數不相同,導致DataFrame報錯,本人在用的時候先屏蔽了這項參數。先學習大概用法,后面再糾正,或者有調試好的朋友可以指導一下。