爬取“盗墓笔记”小说
import requests from bs4 import BeautifulSoup headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36' } def open_url(url): r = requests.get(url, headers=headers) r.encoding = 'gbk' html = r.text return html def get_title(html): soup = BeautifulSoup(html, 'lxml') title_tag = soup.find('div', class_='h1title') return title_tag.text + '\n' def get_text(html): soup2 = BeautifulSoup(html, 'lxml') text_tag = soup2.find('div', id='htmlContent') return text_tag.text def save(title, text): with open('盗墓笔记.txt', 'a+', encoding='utf-8') as file: file.write(title) file.write(text) print('下载完成!') def main(): while True: num = int(input('请输入你想要下载第几章:')) + 78209 url = 'http://www.taiuu.com/0/67/' + str(num) + '.html' html = open_url(url) title = get_title(html) text = get_text(html) save(title, text) repeat = input('请问还要继续下载吗?(y/n)') if repeat == 'y': continue else: break print('已退出!') if __name__ == '__main__': main()