福利爬mzitu

本文轉載自查看原文 2019-11-27 18:16 403 python

導入庫

import os
import requests
from bs4 import BeautifulSoup
import time

生成請求headers

def res_headers():
    headers = {
        'User-Agent': 'Mozilla/5.0 ',
        'Referer':'https://i5.meizitu.net/pfiles/style.css?091102',
    }
    return headers

網站請求

def get_page(url):
    headers=res_headers()
    # 創建session
    s = requests.session()
    s.keep_alive = False
    # 獲取頁面
    res = s.get(url,headers=headers)
    html = res.text
    return html

　　獲取頁面all girls的詳情頁url

def get_all_girls(url):
    html = get_page(url)
    # 構建soup頁面
    soup = BeautifulSoup(html, 'lxml')
    # 獲取 class_='archives' 下的所有 'a'標簽
    total_info = soup.find(class_='archives').find_all('a')
    # 遍歷 'a' 標簽，讀取'href'值
    all_list=[]
    for girls_info in total_info:
        link_url = girls_info['href']
        all_list.append(link_url)
    return all_list

獲取girl的所有圖片url

def get_girl_all_page(url):
    html=get_page(url)
    soup = BeautifulSoup(html,'lxml')
    # 在 class_='pagenavi' 中的倒數第3個標簽，讀取 'span' 的值（圖片數量）
    max_page = soup.find(class_='pagenavi',).find_all('a')[-2].find('span').string
    title = soup.find(class_='main-title').string
    # 循環讀取詳情頁面中的'img'標簽中的'src'值
    pic_url_list = []
    for i in range(int(max_page)):
        html = get_page(url + "/%s"  %(i+1))
        # print(html)
        soup = BeautifulSoup(html,'lxml')
        # print(soup.text)
        # pic_url = soup.find('img').get('src')
        pic_url = soup.find('img').get('src')
        # print(pic_url)
        pic_url_list.append(pic_url)
        time.sleep(0.1)
    # print(pic_url_list)
    download_Pic(title,pic_url_list)

　下載圖片，以標題為文件夾名

def download_Pic(title, pic_url_list):
    # 新建文件夾，路徑
    os.mkdir(title)
    headers = res_headers()
    # 自定義序列號
    j = 1
    # 下載圖片
    for item in pic_url_list:
        # 定義文件路徑及名稱
        filename = '%s/%s.jpg' % (title, str(j))
        print('downloading....%s : NO.%s' % (title, str(j)))
        with open(filename, 'wb') as f:
            img = requests.get(item, headers=headers).content
            f.write(img)
            f.close()
        j += 1
    time.sleep(100)

　　主程序

if __name__ == '__main__':
    url = "https://www.mzitu.com/all"
    pic_list = get_all_girls(url)
    for i in pic_list:
        get_girl_all_page(i)

　　*本文根據崔老師視頻及自己實際測試得出，仍存在請求的問題，有待后續改進

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 [Python]爬取mzitu網站 20200311_最新爬取mzitu 利用python3 爬蟲定制版妹子圖mzitu爬取爬取妹子網，重點是加入開頭的'Referer':'http://www.mzitu.com/' 3、爬取干貨集中營的福利圖片爬蟲實戰--基於requests和beautifulsoup的妹子網圖片爬取（福利哦！） Python爬取視頻(其實是一篇福利) Python協程爬取妹子圖(內有福利，你懂得~) （死宅福利）python爬蟲腳本爬取兔玩君分享計划千套寫真 python全棧爬取妹子圖網，l老司機福利