python3爬取全站美眉图片


爬取网站:https://www.169tp.com/xingganmeinv

该网站美眉图片有数百页,每页24张,共上万张图片,全部爬取下来

 1 import urllib.request
 2 import re
 3 import os
 4 from bs4 import BeautifulSoup
 5 
 6 page_flag = 0
 7 base_url = "https://www.169tp.com/xingganmeinv/"
 8 first_url ="https://www.169tp.com/xingganmeinv/list_1_1.html"
 9 Imgnums = 0
10 
11 def get_html(url):
12     response = urllib.request.urlopen(url)
13     html = response.read().decode('gb18030')
14     return html
15 
16 def get_Imgurl_list(html):
17     img_urllist = re.findall('src=["\']{1}(.+?\.jpg)["\']{1}', html)
18     return img_urllist
19 
20 
21 def Download(img_urllist,page_flag,final_path,Imgnums):
22     num = 1
23     for imgurl in img_urllist:
24         imgname = "{}{}{}{}.jpg".format(final_path,page_flag,'_',num)
25         urllib.request.urlretrieve(imgurl,imgname)
26         print("已经爬取图片名:",imgname)
27         Imgnums += 1
28         num += 1
29 
30 def makedir(path):
31     path = path.strip()
32     isExists = os.path.exists(path)
33     if not isExists:
34         print("创建了路径为 ",path," 的文件夹")
35         os.makedirs(path)
36         return True
37     else:
38         print("路径为 ",path," 的文件夹已经存在")
39         return False
40 
41 
42 filepath = input("请输入保持图片的文件夹路径:")
43 print(filepath)
44 name = input("请输入保存图片的文件夹名:")
45 print(name)
46 finalpath = filepath + name
47 makedir(finalpath)
48 finalpath += '\\'
49 print(f"图片保存路径: {finalpath}")
50 
51 Download(get_Imgurl_list(first_url),page_flag,finalpath,Imgnums)
52 mysoup = BeautifulSoup(get_html(first_url),'html.parser')
53 next_page = mysoup.find('div',attrs = {'class':'page'}).find('li',text = '下一页').find('a')
54 while next_page:
55     new_url = base_url + next_page['href']
56     page_flag += 1
57     Download(get_Imgurl_list(get_html(new_url)),page_flag,finalpath,Imgnums)
58     mysoup = BeautifulSoup(get_html(new_url),'html.parser')
59     next_page = mysoup.find('div',attrs = {'class':'page'}).find('li',text = '下一页').find('a')
60 print(f"下载完成,共下载了 {Imgnums} 张图片!")

运行截图:

图片名命名规则:存储路径+页码+下划线+图片号+.jpg

图片文件夹截图:

 

 


免责声明!

本站转载的文章为个人学习借鉴使用,本站对版权不负任何法律责任。如果侵犯了您的隐私权益,请联系本站邮箱yoyou2525@163.com删除。



 
粤ICP备18138465号  © 2018-2025 CODEPRJ.COM