爬取“快看漫画”《百怪夜谭》
import requests from bs4 import BeautifulSoup headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 \ (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36' } # 请求网站 def open_url(url): response = requests.get(url, headers=headers) response.encoding = 'utf-8' html = response.text return html # 提取标题 def get_title(html): soup = BeautifulSoup(html, 'lxml') title_tag = soup.find('i', class_='ico') title_ = title_tag.text title = title_.split('-')[2] return title # 提取图片链接 def get_img_url(html): soup2 = BeautifulSoup(html, 'lxml') img_urls = soup2.find_all('img', class_="kklazy ") return img_urls # 下载图片 def save_imgs(filename, img_urls): num = 1 for img_url in img_urls: urls = img_url.get('data-kksrc') res = requests.get(urls).content with open(filename + ' 第{}页'.format(str(num)) + '.jpg', 'wb') as file: file.write(res) print(filename + '第{}页'.format(str(num)) + '下载完成!') num += 1 # 主程序framework def main(): url = 'https://www.kuaikanmanhua.com/web/comic/12759/' r = open_url(url) filename = get_title(r) img_urls = get_img_url(r) save_imgs(filename, img_urls) if __name__ == '__main__': main()