爬取“快看漫畫”《百怪夜譚》
import requests from bs4 import BeautifulSoup headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 \ (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36' } # 請求網站 def open_url(url): response = requests.get(url, headers=headers) response.encoding = 'utf-8' html = response.text return html # 提取標題 def get_title(html): soup = BeautifulSoup(html, 'lxml') title_tag = soup.find('i', class_='ico') title_ = title_tag.text title = title_.split('-')[2] return title # 提取圖片鏈接 def get_img_url(html): soup2 = BeautifulSoup(html, 'lxml') img_urls = soup2.find_all('img', class_="kklazy ") return img_urls # 下載圖片 def save_imgs(filename, img_urls): num = 1 for img_url in img_urls: urls = img_url.get('data-kksrc') res = requests.get(urls).content with open(filename + ' 第{}頁'.format(str(num)) + '.jpg', 'wb') as file: file.write(res) print(filename + '第{}頁'.format(str(num)) + '下載完成!') num += 1 # 主程序framework def main(): url = 'https://www.kuaikanmanhua.com/web/comic/12759/' r = open_url(url) filename = get_title(r) img_urls = get_img_url(r) save_imgs(filename, img_urls) if __name__ == '__main__': main()