python爬蟲之爬取漫畫(一)


爬取“快看漫畫”《百怪夜譚》

import requests
from bs4 import BeautifulSoup
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 \
    (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36'
}


# 請求網站
def open_url(url):
    response = requests.get(url, headers=headers)
    response.encoding = 'utf-8'
    html = response.text
    return html


# 提取標題
def get_title(html):
    soup = BeautifulSoup(html, 'lxml')
    title_tag = soup.find('i', class_='ico')
    title_ = title_tag.text
    title = title_.split('')[2]
    return title


# 提取圖片鏈接
def get_img_url(html):
    soup2 = BeautifulSoup(html, 'lxml')
    img_urls = soup2.find_all('img', class_="kklazy ")
    return img_urls


# 下載圖片
def save_imgs(filename, img_urls):
    num = 1
    for img_url in img_urls:
        urls = img_url.get('data-kksrc')
        res = requests.get(urls).content
        with open(filename + ' 第{}頁'.format(str(num)) + '.jpg', 'wb') as file:
            file.write(res)
        print(filename + '第{}頁'.format(str(num)) + '下載完成!')
        num += 1


# 主程序framework
def main():
    url = 'https://www.kuaikanmanhua.com/web/comic/12759/'
    r = open_url(url)
    filename = get_title(r)
    img_urls = get_img_url(r)
    save_imgs(filename, img_urls)


if __name__ == '__main__':
    main()


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM