c#爬取笔趣阁小说（附源码）

本文转载自查看原文 2021-12-07 10:29 2605

我的代码小白复制也能实现效果

目标网站：https://www.biqugeu.net/

进入网站后我们搜索小说名称

打开f12可以看到第一个调用的接口很明显是我们刚刚搜索的接口，然后我们打开当前页面的源代码

可以看出源代码的这个地方对应的是页面查询到的第一个，我们就爬查询到的第一个，点击href链接进入下一个页面

然后我们就进入到了上面图片的页面，很明显这些就是我们要爬取的小说章节，我们先打开页面源代码，如下

到这，我们就应该找小说的第一章节开始爬取，第一个dt里面很明显对应着页面的最新章节，我们得从第二个dt开始第一个dd开始爬取，我们先点击链接进入

这个页面应该就是我们真正要爬取的小说内容了，我们打开页面源代码

到这我们可以看到我们需要爬取的东西都在这，小说题目，章节名称，章节内容以及下一章节的链接，然后我们开始写代码进行爬取。
以下是效果图以及手机看的效果

以下为完整代码

using System;
using System.IO;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;

namespace ConsoleApp3
{
    class Program
    {
        static void Main(string[] args)
        {
            string searchbook = "https://www.biqugeu.net/searchbook.php?keyword=<<bookname>>";
            string searchurl = null;
            string searchcontent = null;
            string baseurl = "https://www.biqugeu.net/";
            string nextChapter = null;
            string html = null;
            string bookname = null;
            string bookTitle = null;
            string ChapterContent;
            string regex1 = "<h1>(?<bookname>.*?)</h1>";
            string regex2 = "<a href=\"/.*?\" target=\"_top\" class=\"pre\">上一章</a> &larr; <a href=\"/.*?/\" target=\"_top\" title=\"\" class=\"back\">章节列表</a> &rarr; <a href=\"(?<nextChapter>.*?)\" target=\"_top\" class=\"next\"";
            string regex3 = "booktitle = \"(?<booktitle>.*?)\";";
            string regex4 = "(?<data>.*?)<br/><br/>";
            string regex5 = "<div class=\"image\">\\s*<a href=\"/(?<bookurl>.*?)\"";
            string regex6 = "<dt>.*?</dt><dd><ahref=\"/(?<bookfirst>.*?)\">.*?</a></dd>";

            Console.WriteLine("请输入需要爬取的小说！");

            string novelName = Console.ReadLine();
            try
            {
                searchurl = searchbook.Replace("<<bookname>>", novelName);
                HttpWebRequest req1 = (HttpWebRequest)WebRequest.Create(searchurl);
                req1.Method = "GET";
                req1.Accept = "text/html";
                req1.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36";
                HttpWebResponse res1 = (HttpWebResponse)req1.GetResponse();
                using (StreamReader reader = new StreamReader(res1.GetResponseStream()))
                {
                    html = reader.ReadToEnd();
                    if (!string.IsNullOrEmpty(html))
                    {
                        //Console.WriteLine(html);
                        html = html.Replace("\n", "").Replace("\t", "").Replace("\r", "");
                        searchcontent = Regex.Match(html, regex5).Groups["bookurl"].ToString();
                        if (searchcontent == "")
                        {
                            Console.WriteLine("没有找到该小说！");
                        }
                        searchurl = baseurl + searchcontent;
                    }
                }
            }
            catch (WebException we)
            {
                Console.WriteLine(we.Message);
            }
            try
            {
                HttpWebRequest req1 = (HttpWebRequest)WebRequest.Create(searchurl);
                req1.Method = "GET";
                req1.Accept = "text/html";
                req1.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36";
                HttpWebResponse res1 = (HttpWebResponse)req1.GetResponse();
                using (StreamReader reader = new StreamReader(res1.GetResponseStream()))
                {
                    html = reader.ReadToEnd();
                    if (!string.IsNullOrEmpty(html))
                    {
                        //Console.WriteLine(html);
                        html = html.Replace("\n", "").Replace("\t", "").Replace("\r", "").Replace(" ","");
                        searchcontent = Regex.Matches(html, regex6)[1].Groups["bookfirst"].ToString();
                        searchurl = baseurl + searchcontent;
                    }
                }
            }
            catch (Exception)
            {

                throw;
            }

            do
            {
            restart: try
                {
                    HttpWebRequest req = (HttpWebRequest)WebRequest.Create(searchurl);
                    req.Method = "GET";
                    req.Accept = "text/html";
                    req.AllowAutoRedirect = true;
                    req.Headers.Add("Encoding", Encoding.UTF8.ToString());
                    req.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)";
                    HttpWebResponse res = (HttpWebResponse)req.GetResponse();
                    using (StreamReader reader = new StreamReader(res.GetResponseStream()))
                    {
                        html = reader.ReadToEnd();
                        if (!string.IsNullOrEmpty(html))
                        {
                            ChapterContent = "";
                            //获取下一章
                            nextChapter = Regex.Match(html, regex2).Groups["nextChapter"].ToString();
                            searchurl = baseurl + nextChapter;

                            //获取章节名
                            bookname = Regex.Match(html, regex1).Groups["bookname"].ToString();
                            ChapterContent += "\r\n";
                            ChapterContent += bookname;
                            ChapterContent += "\r\n";
                            //获取书名
                            bookTitle = Regex.Match(html, regex3).Groups["booktitle"].ToString();
                            //获取内容
                            MatchCollection match = Regex.Matches(html, regex4);
                            foreach (Match item in match)
                            {
                                string book = Regex.Match(item.Value, regex4).Groups["data"].ToString().Trim();
                                ChapterContent += book;
                            }
                            Console.WriteLine(bookname + "-------下载完毕！");
                            AddBookToTXT(ChapterContent, bookTitle);
                        }

                    }
                }
                catch (WebException we)
                {
                    //Console.WriteLine(we.Message);
                    Console.WriteLine("远程主机强迫关闭了一个现有的连接,重新爬取当前章节。。。");
                    goto restart;
                }
            } while (nextChapter.Contains("html"));//当下一章链接没有跳转时结束
        }

        /// <summary>
        /// 将内容保存到txt文件
        /// </summary>
        /// <param name="logstring">内容</param>
        /// <param name="pathName">书名</param>
        public static void AddBookToTXT(string logstring, string pathName)
        {
            string path = AppDomain.CurrentDomain.BaseDirectory + pathName + ".txt";
            if (!System.IO.File.Exists(path))
            {
                FileStream stream = System.IO.File.Create(path);
                stream.Close();
                stream.Dispose();
            }
            using (StreamWriter writer = new StreamWriter(path, true))
            {
                writer.WriteLine(logstring);
            }
        }
    }
}

免责声明！

本站转载的文章为个人学习借鉴使用，本站对版权不负任何法律责任。如果侵犯了您的隐私权益，请联系本站邮箱yoyou2525@163.com删除。

猜您在找 爬虫大作业之爬取笔趣阁小说爬虫学习：request+xpath爬取笔趣阁小说 Python爬虫入门教程02：笔趣阁小说爬取 java多线程爬取笔趣阁所有小说（请准备够大的硬盘） python爬虫学习---记录爬取笔趣阁的经历（python3.6） python3 爬虫继续爬笔趣阁 ,,,,,,, Python3网络爬虫--爬取有声小说（附源码） python爬虫之小说爬取怎么用Python爬取抖音小视频? 资深程序员都这样爬取的(附源码) 初次尝试python爬虫，爬取小说网站的小说。