python 爬取煎蛋ooxx妹子圖


煎蛋網妹子圖首頁(http://jandan.net/ooxx),這個鏈接看起來怎么那么邪惡呢?經分析網站隱藏了圖片地址。心一橫,采取曲線路線,成功爬取大量妹子圖~

源碼如下:

 1 import requests
 2 import re
 3 import os
 4 import base64
 5 from urllib.request import urlretrieve
 6 
 7 
 8 class JianDan:
 9     def __init__(self):
10         self.url_temp = "http://jandan.net/ooxx/page-{}#comments"
11         self.header = {
12             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36'}
13 
14     def get_url_list(self):
15         url_list = [self.url_temp.format(page) for page in range(1, 51)]
16         return url_list
17 
18     def parse_url(self, url):
19         try:
20             response = requests.get(url, headers=self.header)
21             html_str = response.content.decode()
22             img_base_urls = re.findall(
23                 r'<span class="img-hash">(.*?)</span>', html_str)
24             img_urls = list(
25                 map(lambda base_url: "http:" + base64.b64decode(base_url).decode('utf-8'), img_base_urls))
26             return img_urls
27         except Exception as e:
28             print(f"請求目標網站異常:{e}")
29 
30     def make_file(self):
31         dir_name = '煎蛋IMG'
32         get_path = os.getcwd()
33         path_dir = get_path + "/" + dir_name
34         if not os.path.isdir(path_dir):
35             print(f"創建煎{dir_name}文件夾成功")
36             os.mkdir(path_dir)
37         else:
38             print(f"{dir_name}G文件夾已存在創建失敗")
39         return path_dir
40 
41     def download(self, img_url, file_path):
42         file_name = "/" + img_url.split('/')[-1]
43         print(f"###### 正在保存 -> {file_name} ")
44         try:
45             urlretrieve(img_url, file_path + file_name)
46             print(f"###### 保存成功 -> {file_name} ")
47         except Exception as e:
48             print(f'下載圖片失敗:{file_name}')
49 
50     def run(self):
51         url_list = self.get_url_list()
52         file_path = self.make_file()
53         for url in url_list:
54             print("#### 獲取第{}頁圖片 ####".format(url_list.index(url) + 1))
55             img_urls = self.parse_url(url)
56             for img_url in img_urls:
57                 self.download(img_url, file_path)
58 
59         print("end...")
60 
61 
62 if __name__ == '__main__':
63     jiandan = JianDan()
64     jiandan.run()

執行結果:

初學python與爬蟲,要學習的還很多。煎蛋網以后還會嘗試用更高效的方式來爬取測試的~


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM