python爬蟲-搜索小說並下載

本文轉載自查看原文 2019-10-12 13:40 371

  1 #coding:utf-8
  2 import requests,os,re
  3 from bs4 import BeautifulSoup
  4 from selenium import webdriver
  5 from selenium.webdriver.chrome.options import Options
  6 from selenium.webdriver.common.keys import Keys
  7 
  8 class downloader():
  9 
 10     def __init__(self):
 11         self.urls = []  # 保存章節鏈接
 12         self.name = []  # 保存章節名
 13         self.url = 'https://so.biqusoso.com/s.php?ie=utf-8&siteid=biqugex.com&q='
 14 
 15     """輸入小說名，搜索"""
 16     def Get_url(self):
 17         #創建chrome參數對象，設置chrome瀏覽器無界面模式
 18         chrome_options = Options()
 19         chrome_options.add_argument('--headless')
 20         # 創建chrome無界面對象
 21         browser = webdriver.Chrome(options=chrome_options)
 22         browser.get(self.url)
 23         c = input('請輸入小說全名：')
 24         browser.find_element_by_xpath('//*[@id="wrapper"]/div[1]/div[2]/form/input[3]').send_keys(c)
 25         browser.find_element_by_xpath('//*[@id="wrapper"]/div[1]/div[2]/form/input[4]').click()
 26         new_url = browser.current_url
 27         # 關閉瀏覽器
 28         browser.close()
 29         # 關閉chromedriver進程
 30         browser.quit()
 31         print("已關閉瀏覽器")
 32         # print(new_url)
 33         response = requests.get(new_url)
 34         response.encoding = 'utf-8'
 35         soup = BeautifulSoup(response.text, 'lxml')
 36         # print(soup)
 37         name1 = soup.find_all('span', class_='s2')
 38         soup = BeautifulSoup(str(name1), 'lxml')
 39         new_name = soup.find('a')
 40         new_name1 = new_name.string
 41         # print(new_name1)
 42         self.href = new_name.attrs['href']
 43         print(self.href)
 44         return self.href
 45     def Response(self):
 46         response = requests.get(self.href)
 47         response.encoding = 'gbk'  # 解決亂碼
 48         self.soup = BeautifulSoup(response.text, 'lxml')  # 解析網頁
 49         div = self.soup.find_all('div', class_='listmain')  # 在解析結果中查找class_='listmain'
 50         soup1 = BeautifulSoup(str(div), 'lxml')  # 刪除字符串頭和尾的空格
 51         h = soup1.find_all('a')  # 在class_='listmain下面找到a標簽
 52         for i in h:
 53             self.name.append(i.string)  # 將a標簽中的非屬性字符，即章節名添加到name
 54             self.urls.append('https://www.biqugex.com%s' % i.get('href'))  # 將a標簽中的鏈接，添加到urls
 55 
 56     def file(self):
 57         """查找小說名字，並創建同名文件夾"""
 58         div1 = self.soup.select('body > div.book > div.info > h2')
 59         a = BeautifulSoup(str(div1), 'lxml')
 60         b = a.find('h2')
 61         b = b.string
 62         c = 'C:\\Users\\Administrator\\Desktop\\%s' % b
 63         if not os.path.exists(c):
 64             os.mkdir(c)
 65 
 66         # 循環解析urls，得到小說正文
 67         i = 0
 68         while i < len(self.urls):
 69             response1 = requests.get(url=self.urls[i])
 70             response1.encoding = 'gbk'
 71             soup2 = BeautifulSoup(response1.text, 'lxml')
 72             d = soup2.find_all('div', id='content')
 73             id1 = BeautifulSoup(str(d), 'lxml')
 74             # 創建文件名
 75             src = self.name[i] + '.txt'
 76             filename = c + '/' + src
 77             print(filename)
 78 
 79             # 將解析到的小說正文寫到文件中
 80             for result in id1:
 81                 res = result.text
 82                 id2 = soup2.select('#content')
 83                 with open(filename, 'w+', encoding='utf-8') as f:
 84                     f.write(res)
 85                 i += 1
 86 #如果輸入的網址不是正確的網址，則提示請輸入正確的筆趣閣網址
 87     def Main(self):
 88         try:
 89             d = downloader()
 90             d.Get_url()
 91         except:
 92             print('沒有找到')
 93         else:
 94             d.Response()
 95             d.file()
 96 
 97 
 98 
 99 if __name__ == '__main__':
100     # url=input('請輸入網址：')
101     # url='https://www.biqugex.com/book_104027/'
102     a = downloader()
103     a.Main()

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 python爬蟲抓取小說--練習爬蟲批量下載全站小說並自動保存抓取分析網頁批量下載評書(1)之搜索有聲小說【爬蟲】對新筆趣閣小說進行爬取，保存和下載 Python 爬蟲遇到形如小說的編碼如何轉換為中文？ python3爬蟲-使用requests爬取起點小說 Python寫的一個GUI界面的小說爬蟲軟件分享一個多方式精確爬取下載某小說網站上萬本小說的自寫爬蟲腳本 python3 爬蟲學習（一）使用爬蟲在網上拉取小說 C#最基本的小說爬蟲