python爬虫-妹子图

本文转载自查看原文 2020-08-12 14:01 879 python

python爬虫之妹子图

懂的人都懂！

import urllib.request
import os
import re
import time

#关于re模块使用的连接https://www.cnblogs.com/shenjianping/p/11647473.html

def url_open(url):
    #header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.18362"}
    header={"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360SE"}
    req = urllib.request.Request(url,headers=header)
    response = urllib.request.urlopen(req)
    html = response.read()
    return html

def find_resource(url):
    html = url_open(url).decode("utf-8") #获取网页
    #print(html) ##查看网页
    img_addrs = re.findall('''<img alt=".*?" src="(.*?)" />''',html) ##改动处
    #print(img_addrs) ##查看地址
    return img_addrs

def save_imgs(floder,img_addrs):
    for each in img_addrs:
        time.sleep(0.3)
        filename = each.split('/')[-1]
        with open(filename,'ab') as f:
            if re.findall("^\\bh",each) == []:
                each = "https://www.xiuaa.com" + each #当图片地址不完整时使用
            img = url_open(each) #打开图片地址
            f.write(img) #下载图片

def main(floder='download'): #主程序，传入文件夹名称参数
    os.chdir(floder)
    
    for i in range(0,10): #翻页
        num = i
        url = "https://www.xiuaa.com/xgmn/4492_" +str(num) + ".html" ##改动处
        img_addrs = find_resource(url)
        save_imgs(floder,img_addrs)
        print("第",i+1,"张..")
    
    print("爬取完毕！")

if __name__ == '__main__':
    main()

2020.8.12尝试了可以运行

免责声明！

本站转载的文章为个人学习借鉴使用，本站对版权不负任何法律责任。如果侵犯了您的隐私权益，请联系本站邮箱yoyou2525@163.com删除。

猜您在找 Python爬虫之——爬取妹子图片 Python 爬虫：煎蛋网妹子图 Python 爬虫入门(二)——爬取妹子图 java爬虫-妹子图 python、scrapy下编写妹子图爬虫程序 python爬虫—— 抓取今日头条的街拍的妹子图 python爬虫–爬取煎蛋网妹子图片 python 爬虫爬取煎蛋网妹子图一个爬虫的练习（妹子图）项目: python爬虫福利煎蛋网妹子图