python3爬取全站美眉圖片


爬取網站:https://www.169tp.com/xingganmeinv

該網站美眉圖片有數百頁,每頁24張,共上萬張圖片,全部爬取下來

 1 import urllib.request
 2 import re
 3 import os
 4 from bs4 import BeautifulSoup
 5 
 6 page_flag = 0
 7 base_url = "https://www.169tp.com/xingganmeinv/"
 8 first_url ="https://www.169tp.com/xingganmeinv/list_1_1.html"
 9 Imgnums = 0
10 
11 def get_html(url):
12     response = urllib.request.urlopen(url)
13     html = response.read().decode('gb18030')
14     return html
15 
16 def get_Imgurl_list(html):
17     img_urllist = re.findall('src=["\']{1}(.+?\.jpg)["\']{1}', html)
18     return img_urllist
19 
20 
21 def Download(img_urllist,page_flag,final_path,Imgnums):
22     num = 1
23     for imgurl in img_urllist:
24         imgname = "{}{}{}{}.jpg".format(final_path,page_flag,'_',num)
25         urllib.request.urlretrieve(imgurl,imgname)
26         print("已經爬取圖片名:",imgname)
27         Imgnums += 1
28         num += 1
29 
30 def makedir(path):
31     path = path.strip()
32     isExists = os.path.exists(path)
33     if not isExists:
34         print("創建了路徑為 ",path," 的文件夾")
35         os.makedirs(path)
36         return True
37     else:
38         print("路徑為 ",path," 的文件夾已經存在")
39         return False
40 
41 
42 filepath = input("請輸入保持圖片的文件夾路徑:")
43 print(filepath)
44 name = input("請輸入保存圖片的文件夾名:")
45 print(name)
46 finalpath = filepath + name
47 makedir(finalpath)
48 finalpath += '\\'
49 print(f"圖片保存路徑: {finalpath}")
50 
51 Download(get_Imgurl_list(first_url),page_flag,finalpath,Imgnums)
52 mysoup = BeautifulSoup(get_html(first_url),'html.parser')
53 next_page = mysoup.find('div',attrs = {'class':'page'}).find('li',text = '下一頁').find('a')
54 while next_page:
55     new_url = base_url + next_page['href']
56     page_flag += 1
57     Download(get_Imgurl_list(get_html(new_url)),page_flag,finalpath,Imgnums)
58     mysoup = BeautifulSoup(get_html(new_url),'html.parser')
59     next_page = mysoup.find('div',attrs = {'class':'page'}).find('li',text = '下一頁').find('a')
60 print(f"下載完成,共下載了 {Imgnums} 張圖片!")

運行截圖:

圖片名命名規則:存儲路徑+頁碼+下划線+圖片號+.jpg

圖片文件夾截圖:

 

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM