爬蟲 爬取妹子圖


功能寫的很差,簡單練手

#!/usr/bin/env python
# -*- coding:utf-8 -*-


import hashlib
import re
import time

import requests  # pip3 install requests

movie_path = r'D:\爬蟲學習\爬蟲\妹子圖'


def get_index_page(url):
    try:
        # 模擬發送get請求
        response = requests.get(url)
        if response.status_code == 200:
            return response.text
    except Exception:
        pass


def parse_index(index_page):
    detail_urls = re.findall('li>.*?<a href="(.*?)"', index_page, re.S)
    for detail_url in detail_urls:
        ret = detail_url.rsplit('/', maxsplit=1)[1]
        if ret:
            yield detail_url


def get_parge_url(detail_url):
    try:
        # 模擬發送get請求
        response = requests.get(detail_url,
                                headers={
                                    "Referer": "www.mzitu.com",
                                    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
                                    # 'Upgrade-Insecure-Requests': 1,
                                    # 'Cookie':'Hm_lvt_dbc355aef238b6c32b43eacbbf161c3c=1516079374; Hm_lpvt_dbc355aef238b6c32b43eacbbf161c3c=1516079794'
                                }, )

        if response.status_code == 200:
            return response.text
    except Exception:
        pass


def parse_detail(detail):
    try:
        details = re.findall('<img src="(.*?)" ', detail, re.S)
        return details[0]
    except Exception as e:
        pass


def get_movie(url,page_url):
    try:
        response = requests.get(url,
                                headers={
                                    "Referer": page_url,   # 這里解決防盜鏈問題
                                    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
                                },
                                )
        if response.status_code == 200:
            m = hashlib.md5()
            m.update(str(time.time()).encode('utf-8'))
            m.update(url.encode('utf-8'))
            filepath = '%s\%s.jpg' % (movie_path, m.hexdigest())
            with open(filepath, 'wb') as f:
                f.write(response.content)
                print('%s 下載成功' % url)
    except Exception:
        pass


def main():
    base_url = 'http://www.mzitu.com/xinggan/page/{0}/'
    for i in range(5):
        url = base_url.format(i)
        text = get_index_page(url)
        detail_urls = parse_index(text)
        for detail_url in detail_urls:
            detail_text = get_parge_url(detail_url)
            detail=parse_detail(detail_text)
            get_movie(detail,detail_url)
   


if __name__ == '__main__':
    main()

結果:

結果

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM