1 # 天氣網余姚地區爬蟲案例 2 import requests 3 from lxml import etree 4 5 6 class WeatherSpider: 7 8 def __init__(self): 9 self.url = "http://www.weather.com.cn/weather/101210404.shtml" 10 self.headers = { 11 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36"} 12 13 def get_url_content(self): 14 return requests.get(self.url, headers=self.headers).content.decode() 15 16 def get_weather_data(self, html): 17 tmp_html = etree.HTML(html) 18 tomorrow_doc = tmp_html.xpath("//div[contains(@class,'con') and contains(@class,'today')]//div[@class='c7d']/ul/li[2]")[0] 19 weather_data = {} 20 weather_data["date"] = tomorrow_doc.xpath("./h1/text()")[0] 21 weather_data["weather"] = tomorrow_doc.xpath("./p[@class='wea']/@title")[0] 22 weather_data["temperature_max"] = tomorrow_doc.xpath("./p[@class='tem']/span/text()")[0] 23 weather_data["temperature_min"] = tomorrow_doc.xpath("./p[@class='tem']/i/text()")[0] 24 weather_data["air_speed"] = tomorrow_doc.xpath("./p[@class='win']/i/text()")[0] 25 return weather_data 26 27 def run(self): 28 # 獲取url請求內容 29 content_html = self.get_url_content() 30 # 根據url內容獲取天氣數據 31 data = self.get_weather_data(content_html) 32 # 打印爬取的天氣數據 33 print(data) 34 35 36 if __name__ == '__main__': 37 spider = WeatherSpider() 38 spider.run()
爬取結果
自己剛學爬蟲不久,利用爬蟲爬取天氣網,每次可以抓取第二天的天氣狀況,一個小demo