python爬蟲分章節保存小說


#coding:utf-8
import requests,os
from bs4 import BeautifulSoup

class downloader():

    def __init__(self):
        self.urls = []  # 保存章節鏈接
        self.name = []  # 保存章節名


    def Response(self):
        response = requests.get(url)
        response.encoding = 'gbk'  # 解決亂碼
        self.soup = BeautifulSoup(response.text, 'lxml')  # 解析網頁
        div = self.soup.find_all('div', class_='listmain')  # 在解析結果中查找class_='listmain'
        soup1 = BeautifulSoup(str(div), 'lxml')  # 刪除字符串頭和尾的空格
        h = soup1.find_all('a')  # 在class_='listmain下面找到a標簽
        for i in h:
            self.name.append(i.string)  # 將a標簽中的非屬性字符,即章節名添加到name
            self.urls.append('https://www.biqugex.com%s' % i.get('href'))  # 將a標簽中的鏈接,添加到urls
        return url

    def file(self):
        """查找小說名字,並創建同名文件夾"""
        div1 = self.soup.select('body > div.book > div.info > h2')
        a = BeautifulSoup(str(div1), 'lxml')
        b = a.find('h2')
        b = b.string
        c = 'C:\\Users\\Administrator\\Desktop\\%s' % b
        if not os.path.exists(c):
            os.mkdir(c)

        # 循環解析urls,得到小說正文
        i = 0
        while i < len(self.urls):
            response1 = requests.get(url=self.urls[i])
            response1.encoding = 'gbk'
            soup2 = BeautifulSoup(response1.text, 'lxml')
            d = soup2.find_all('div', id='content')
            id1 = BeautifulSoup(str(d), 'lxml')
            # 創建文件名
            src = self.name[i] + '.txt'
            filename = c + '/' + src
            print(filename)

            # 將解析到的小說正文寫到文件中
            for result in id1:
                res = result.text
                id2 = soup2.select('#content')
                with open(filename, 'w+', encoding='utf-8') as f:
                    f.write(res)
                i += 1
#如果輸入的網址不是正確的網址,則提示請輸入正確的筆趣閣網址
    def Try(self):
        try:
            url ='https://www.biqugex.com/book_104027/'
            b=downloader()
            b.Response()
            b.file()
        except:
            print('請輸入正確的筆趣閣網址')


if __name__ == '__main__':
    url=input('請輸入網址:')
    # url='https://www.biqugexcom/book_104027/'
    a = downloader()
    a.Try()

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM